I have the following three rules:
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
Obviously this is recursive, so I use boost::recursive_wrapper
and created the following AST:
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node =
boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
This compiles (see full example below), but when the code attempts to construct an expression
object the constructor gets into an infinite loop of calling these three constructors:
#11 0x0000000000466066 in ast::expression::expression ...
#12 0x00000000004682e0 in boost::recursive_wrapper<ast::expression>::recursive_wrapper ...
#13 0x000000000046718d in boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>::variant
...
Thus, Creating an expression creates a boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>
(aka, an expression_node
) which creates a boost::recursive_wrapper<ast::expression>
which creates an expression which creates... and so on.
How can I solve this?
Here is a full example that compiles, but segfaults when the stack runs full:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;
struct unary_expression
{
unary_expression_node m_unary_expression;
};
enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
using expression_node = boost::variant<boost::recursive_wrapper<expression>, operator_expression>;
struct expression
{
expression_node m_expression;
};
std::ostream& operator<<(std::ostream& os, expression const& expression)
{
return os << expression.m_expression;
}
std::ostream& operator<<(std::ostream& os, unary_expression const& unary_expression)
{
return os << unary_expression.m_unary_expression;
}
std::ostream& operator<<(std::ostream& os, operator_expression const& operator_expression)
{
os << operator_expression.first;
for (auto& l : operator_expression.second)
{
os << ' ' << l.first << ' ' << l.second;
}
return os;
}
} // namespace ast
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::expression_node, m_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::unary_expression,
(ast::unary_expression_node, m_unary_expression)
)
BOOST_FUSION_ADAPT_STRUCT(
ast::operator_expression,
(ast::unary_expression, first),
(ast::operator_expression::second_type, second)
)
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression(), qi::space_type>
{
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::operator_expression(), qi::space_type> operator_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(expression, "expression_grammar")
{
using qi::double_;
using qi::char_;
using qi::int_;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
unary_expression =
( '(' > expression > ')' )
| int_;
operator_expression =
unary_expression >> *(operators > expression);
expression =
( '(' > expression > ')' )
| operator_expression;
}
};
} // namespace client
int main()
{
std::string const input{"1 == 1 != 0"};
using iterator_type = std::string::const_iterator;
using expression_grammar = client::expression_grammar<iterator_type>;
namespace qi = boost::spirit::qi;
expression_grammar program;
iterator_type iter{input.begin()};
iterator_type const end{input.end()};
ast::expression out;
bool r = qi::phrase_parse(iter, end, program, qi::space, out);
if (!r || iter != end)
{
std::cerr << "Parsing failed." << std::endl;
return 1;
}
std::cout << "Parsed: " << out << std::endl;
}
EDIT:
I tried simplifying things to just two rules (and two 'ast's):
struct expression;
using unary_expression = boost::variant<boost::recursive_wrapper<expression>, int>;
enum operators { op_eq, op_ne };
struct expression
{
unary_expression first;
using second_type = std::vector<std::pair<operators, expression>>;
second_type second;
};
BOOST_FUSION_ADAPT_STRUCT(
ast::expression,
(ast::unary_expression, first),
(ast::expression::second_type, second)
)
[...]
unary_expression =
( '(' > expression > ')' )
| int_;
expression =
unary_expression >> *(operators > expression);
but also this result in an infinite loop.
#18 0x00000000004646f2 in ast::expression::expression
#19 0x00000000004669ac in boost::recursive_wrapper<ast::expression>::recursive_wrapper
#20 0x0000000000465821 in boost::variant<boost::recursive_wrapper<ast::expression>, int>::variant
...
Variants default-construct to their first element type.
This indeed directly leads to an infinite loop. (Demo)
The way to solve it is to make the default variant element not re-entrant or to make it lazily constructed. In this case, you can simply re-arrange to make int
the first element.
Better yet, there doesn't seem to be a need to reflect the operator precedence hieararchy (as it is expressed in the rules) in the resultant tree, so why not simplify to:
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
This no longer crashes and seems a bit simpler in adaptation and usage.
This full demo uses that AST, but adds a true unary expression. A few style things have been fixed:
>> qi::eoi
)Note: I might have changed the precedence rules (specifically, associativity of binary operators). I'm not sure which version you require.
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>
namespace ast {
struct unary_expression;
struct binary_expression;
enum operators { op_eq, op_ne };
using expression = boost::variant<
int,
boost::recursive_wrapper<unary_expression>,
boost::recursive_wrapper<binary_expression>
>;
struct unary_expression {
bool negated = false;
expression expr;
};
struct binary_expression {
expression first;
std::vector<std::pair<operators, expression>> other;
};
}
BOOST_FUSION_ADAPT_STRUCT(ast::unary_expression, negated, expr)
BOOST_FUSION_ADAPT_STRUCT(ast::binary_expression, first, other)
namespace ast {
static inline std::ostream& operator<<(std::ostream& os, operators op) { return os << (op==op_eq?"==":"!="); }
static inline std::ostream& operator<<(std::ostream& os, binary_expression const& e) {
os << e.first;
for (auto& oe : e.other)
os << " " << oe.first << " " << oe.second;
return os;
}
static inline std::ostream& operator<<(std::ostream& os, unary_expression const& e) {
return os << (e.negated?"!":"") << "(" << e.expr << ")";
}
}
namespace client
{
namespace qi = boost::spirit::qi;
template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression()> {
private:
qi::symbols<char, ast::operators> operators;
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::space_type> simple_expression;
qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
qi::rule<Iterator, ast::binary_expression(), qi::space_type> binary_expression;
qi::rule<Iterator, ast::expression(), qi::space_type> expression;
public:
expression_grammar() : expression_grammar::base_type(start, "expression") {
using namespace qi;
operators.add
("==", ast::op_eq)
("!=", ast::op_ne)
;
simple_expression =
( '(' > expression > ')' )
| int_;
unary_expression =
matches['!'] >> simple_expression;
binary_expression =
unary_expression >> *(operators > expression);
expression = binary_expression;
start = skip(space) [ expression ];
BOOST_SPIRIT_DEBUG_NODES((expression)(binary_expression)(unary_expression)(simple_expression))
}
};
} // namespace client
int main() {
using It = std::string::const_iterator;
client::expression_grammar<It> const program;
std::string const input{"1 == !(1 != 0)"};
It iter = input.begin(), end = input.end();
ast::expression out;
if (parse(iter, end, program, out)) {
std::cout << "Parsed: " << out << std::endl;
} else {
std::cerr << "Parsing failed." << std::endl;
return 1;
}
if (iter != end) {
std::cout << "Remaining unparsed input: '" << std::string(iter, end) << "'\n";
}
}
Prints
Parsed: (1) == !((1) != (0))