Opened 2 years ago

Closed 9 months ago

#12497 closed Bugs (fixed)

Errors with QI and Karma rules managing 64 bit integers

Reported by: Gilles Brunet <gillesb68@…> Owned by: Joel de Guzman
Milestone: To Be Determined Component: spirit
Version: Boost 1.61.0 Severity: Showstopper
Keywords: Cc:

Description

MSVC++ 14 used to build the following source code enclosing a trio of spirit components (Lex, QI and Karma) performing three steps:

  1. Uses “Spirit Lex” to make tokens from input text;
  2. Uses “Spirit QI” for parsing tokens to generate an AST; and
  3. Uses “Spirit Karma” for generating back the original text.

The “QI” and “Karma” grammars are compiling and running fine with tokens enclosing 32 bit integers. However, the compiler generates thousands of errors with tokens enclosing 64 bit integers.

For reproducing the problem, we move comments in front of the alias type named “int_lit_type” for defining it as a 64-bit integer. The code compiles and runs fine with 32 bit integers, and do not compile with 64 bit ones. The same result carried out whenever the compiler is configured for targeting “x86” or “x64”.

Here is the source code:

#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/karma.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/variant.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/phoenix.hpp>
#include <boost/phoenix/object/construct.hpp>


#include <string>
#include <vector>

namespace spirit = boost::spirit;
namespace phoenix = boost::phoenix;
namespace lex = spirit::lex;
namespace qi = spirit::qi;
namespace karma = spirit::karma;

///////////////////////////////////
// Lexer

using real_lit_type = long double;
//using int_lit_type = long;
using int_lit_type = std::int32_t;
//using int_lit_type = long long;
//using int_lit_type = std::int64_t;

template <typename Lexer>
struct lexer_grammar : lex::lexer<Lexer>
{
	lexer_grammar()
		: int_lit{"[0-9]+"}
		  , real_lit{"[0-9]+\\.([0-9]+([Ee][\\-\\+]?[0-9]*)?)?"}
		  , identifier{"[_a-zA-Z][_a-zA-Z0-9]*"}
	{
		this->self
			= lex::token_def<>("[ \\t\\n\\r]+")[lex::_pass = lex::pass_flags::pass_ignore]
			| real_lit
			| int_lit
			| identifier
			| lex::token_def<>('(') | ')' | '=' | ';';
	}

	lex::token_def<real_lit_type> real_lit;
	lex::token_def<int_lit_type> int_lit;
	lex::token_def<std::string> identifier;
};

template <typename Iterator>
struct tokens_factory
{
	using iterator_type = Iterator;
	using value_type = boost::mpl::vector<int_lit_type, real_lit_type, std::string>;
	using token_type = lex::lexertl::token<iterator_type, value_type, boost::mpl::true_>;
	using lexer_type = lexer_grammar<lex::lexertl::actor_lexer<token_type>>;
};

///////////////////////////////////
// AST

namespace ast {

	using literal_type = boost::variant<int_lit_type, real_lit_type>;
	using identifier_type = std::string;

	using expr_type = boost::variant<literal_type, identifier_type>;

	struct assign_stmt_type
	{
		assign_stmt_type() {}

		template <typename Stmt>
		explicit assign_stmt_type(const Stmt& stmt);

		identifier_type var_ref;
		expr_type expr;
	};

	using stmt_type = boost::variant<assign_stmt_type>;
	using stmt_list_type = std::vector<stmt_type>;

	template <>
	inline assign_stmt_type::assign_stmt_type(const stmt_type& stmt)
	{
		*this = boost::get<assign_stmt_type>(stmt);
	}

}

BOOST_FUSION_ADAPT_STRUCT(
	ast::assign_stmt_type,
	var_ref,
	expr
)

template <typename Iterator, typename Lexer>
struct parser_grammar : qi::grammar<Iterator, ast::stmt_list_type()>
{
	explicit parser_grammar(const Lexer& tokens)
		: parser_grammar::base_type{stmt_list}
	{
		stmt_list = *(stmt >> ';');

		stmt = assign_stmt;

		assign_stmt = var_ref_expr >> '=' >> expr;

		expr
			= ('(' >> expr >> ')')
			| var_ref_expr
			| literal_expr;

		var_ref_expr
			= tokens.identifier;

		literal_expr
			= tokens.int_lit
			| tokens.real_lit;
	}

	qi::rule<Iterator, ast::stmt_list_type()> stmt_list;
	qi::rule<Iterator, ast::stmt_type()> stmt;
	qi::rule<Iterator, ast::assign_stmt_type()> assign_stmt;
	qi::rule<Iterator, ast::expr_type()> expr;
	qi::rule<Iterator, ast::identifier_type()> var_ref_expr;
	qi::rule<Iterator, ast::literal_type()> literal_expr;
};

///////////////////////////////////
// Generator

template <typename Iterator>
struct generator_grammar
	: karma::grammar<Iterator, ast::stmt_list_type()>
{
	generator_grammar() : generator_grammar::base_type(stmt_list)
	{
		stmt_list = *(stmt << karma::lit(';') << karma::eol);

		stmt = assign_stmt;

		assign_stmt = var_ref_expr << karma::lit(" = ") << expr;

		expr
			= var_ref_expr
			| literal_expr;

		var_ref_expr = karma::string;

		literal_expr
			= int_literal_expr
			| real_literal_expr;

		int_literal_expr
			= karma::long_long
			| karma::long_;

		real_literal_expr
			= karma::long_double;
	}

	karma::rule<Iterator, ast::stmt_list_type()> stmt_list;
	karma::rule<Iterator, ast::stmt_type()> stmt;
	karma::rule<Iterator, ast::assign_stmt_type()> assign_stmt;
	karma::rule<Iterator, ast::expr_type()> expr;
	karma::rule<Iterator, ast::identifier_type()> var_ref_expr;
	karma::rule<Iterator, ast::literal_type()> literal_expr;
	karma::rule<Iterator, int_lit_type()> int_literal_expr;
	karma::rule<Iterator, real_lit_type()> real_literal_expr;
};

///////////////////////////////////
// main

int main()
{
	static const auto text = std::string{
		"var1 = 25.3;\n"
		"var2 = 455;\n"
		"var3 = var2;"
	};

	using lexer_type = tokens_factory<std::string::const_iterator>::lexer_type;
	lexer_type lexer{};
	auto tokens = std::vector<lexer_type::token_type>{};
	parser_grammar<lexer_type::iterator_type, lexer_type> parser{lexer};

	auto it = text.begin();
	auto lex_it = lexer.begin(it, text.end());
	auto lex_end = lexer.end();

	auto stmt_list = ast::stmt_list_type{};
	auto parser_success = qi::parse(lex_it, lex_end, parser, stmt_list);

	if (parser_success) {
		std::cout << "parser success. Then, generates back the source." << std::endl << std::endl;

		using output_iterator_type = std::back_insert_iterator<std::string>;
		using generator_type = generator_grammar<output_iterator_type>;

		std::string generated;
		output_iterator_type outit(generated);
		generator_type generator{};
		auto generator_success = karma::generate(outit, generator, stmt_list);

		if (generator_success) {
			std::cout
				<< "generator success:" << std::endl
				<< generated << std::endl;
		} else
			std::cout << "generator failure" << std::endl;
	} else
		std::cout << "parser failure" << std::endl;

	return 0;
}

Change History (4)

comment:1 Changed 2 years ago by Agustín K-ballo Bergé <kaballo86@…>

Reduced test case, fails with all of gcc, clang, msvc:

#include <boost/spirit/include/qi.hpp>

int main()
{
    char *first = nullptr, *last = nullptr;
    boost::long_long_type attr;
    boost::spirit::traits::assign_to(first, last, attr);
}

comment:2 Changed 22 months ago by bugs@…

I've created a [pull request](https://github.com/boostorg/spirit/pull/228) with the fix for Spirit V2.

I don't think X3 is impacted (as http://boost.2283326.n4.nabble.com/Design-structure-X3-parser-more-like-Qi-parser-tp4690205p4690404.html claims)

Thanks @K-ballo for the leg-work there, that made it much easier to spot.

comment:3 Changed 9 months ago by Nikita Kniazev <nok.raven@…>

comment:4 Changed 9 months ago by Joel de Guzman

Resolution: fixed
Status: newclosed
Note: See TracTickets for help on using tickets.