Search code examples
c++boost-spiritboost-spirit-qiboost-variantrecursive-datastructures

Why is boost::recursive_wrapper not working in this case


I have the following three rules:

unary_expression =
    ( '(' > expression > ')' )
    | int_;

operator_expression =
    unary_expression >> *(operators > expression);

expression =
    ( '(' > expression > ')' )
    | operator_expression;

Obviously this is recursive, so I use boost::recursive_wrapper and created the following AST:

struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;

struct unary_expression
{
  unary_expression_node m_unary_expression;
};

enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

using expression_node = 
boost::variant<boost::recursive_wrapper<expression>, operator_expression>;

struct expression
{
  expression_node m_expression;
};

This compiles (see full example below), but when the code attempts to construct an expression object the constructor gets into an infinite loop of calling these three constructors:

#11 0x0000000000466066 in ast::expression::expression ...
#12 0x00000000004682e0 in boost::recursive_wrapper<ast::expression>::recursive_wrapper ...
#13 0x000000000046718d in boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression>::variant
...

Thus, Creating an expression creates a boost::variant<boost::recursive_wrapper<ast::expression>, ast::operator_expression> (aka, an expression_node) which creates a boost::recursive_wrapper<ast::expression> which creates an expression which creates... and so on.

How can I solve this?

Here is a full example that compiles, but segfaults when the stack runs full:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <iostream>
#include <string>
#include <vector>

namespace ast {

struct expression;
using unary_expression_node = boost::variant<boost::recursive_wrapper<expression>, int>;

struct unary_expression
{
  unary_expression_node m_unary_expression;
};

enum operators { op_eq, op_ne };
struct expression;
struct operator_expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

using expression_node = boost::variant<boost::recursive_wrapper<expression>, operator_expression>;

struct expression
{
  expression_node m_expression;
};

std::ostream& operator<<(std::ostream& os, expression const& expression)
{
  return os << expression.m_expression;
}

std::ostream& operator<<(std::ostream& os, unary_expression const& unary_expression)
{
  return os << unary_expression.m_unary_expression;
}

std::ostream& operator<<(std::ostream& os, operator_expression const& operator_expression)
{
  os << operator_expression.first;
  for (auto& l : operator_expression.second)
  {
    os << ' ' << l.first << ' ' << l.second;
  }
  return os;
}

} // namespace ast

BOOST_FUSION_ADAPT_STRUCT(
  ast::expression,
  (ast::expression_node, m_expression)
)

BOOST_FUSION_ADAPT_STRUCT(
  ast::unary_expression,
  (ast::unary_expression_node, m_unary_expression)
)

BOOST_FUSION_ADAPT_STRUCT(
  ast::operator_expression,
  (ast::unary_expression, first),
  (ast::operator_expression::second_type, second)
)

namespace client
{

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template <typename Iterator>
class expression_grammar : public qi::grammar<Iterator, ast::expression(), qi::space_type>
{
 private:
  qi::symbols<char, ast::operators> operators;
  qi::rule<Iterator, ast::unary_expression(), qi::space_type> unary_expression;
  qi::rule<Iterator, ast::operator_expression(), qi::space_type> operator_expression;
  qi::rule<Iterator, ast::expression(), qi::space_type> expression;

 public:
  expression_grammar() : expression_grammar::base_type(expression, "expression_grammar")
  {
    using qi::double_;
    using qi::char_;
    using qi::int_;

    operators.add
        ("==", ast::op_eq)
        ("!=", ast::op_ne)
    ;

    unary_expression =
        ( '(' > expression > ')' )
        | int_;

    operator_expression =
        unary_expression >> *(operators > expression);

    expression =
        ( '(' > expression > ')' )
        | operator_expression;
  }
};

} // namespace client

int main()
{
  std::string const input{"1 == 1 != 0"};
  using iterator_type = std::string::const_iterator;
  using expression_grammar = client::expression_grammar<iterator_type>;
  namespace qi = boost::spirit::qi;

  expression_grammar program;
  iterator_type iter{input.begin()};
  iterator_type const end{input.end()};
  ast::expression out;
  bool r = qi::phrase_parse(iter, end, program, qi::space, out);

  if (!r || iter != end)
  {
    std::cerr << "Parsing failed." << std::endl;
    return 1;
  }
  std::cout << "Parsed: " << out << std::endl;
}

EDIT:

I tried simplifying things to just two rules (and two 'ast's):

struct expression;
using unary_expression = boost::variant<boost::recursive_wrapper<expression>, int>;

enum operators { op_eq, op_ne };
struct expression
{
  unary_expression first;
  using second_type = std::vector<std::pair<operators, expression>>;
  second_type second;
};

BOOST_FUSION_ADAPT_STRUCT(
  ast::expression,
  (ast::unary_expression, first),
  (ast::expression::second_type, second)
)

[...]

  unary_expression =
    ( '(' > expression > ')' )
    | int_;

  expression =
    unary_expression >> *(operators > expression);

but also this result in an infinite loop.

#18 0x00000000004646f2 in ast::expression::expression
#19 0x00000000004669ac in boost::recursive_wrapper<ast::expression>::recursive_wrapper
#20 0x0000000000465821 in boost::variant<boost::recursive_wrapper<ast::expression>, int>::variant
...

Solution

  • Variants default-construct to their first element type.

    This indeed directly leads to an infinite loop. (Demo)

    The way to solve it is to make the default variant element not re-entrant or to make it lazily constructed. In this case, you can simply re-arrange to make int the first element.

    Better yet, there doesn't seem to be a need to reflect the operator precedence hieararchy (as it is expressed in the rules) in the resultant tree, so why not simplify to:

    struct unary_expression;
    struct binary_expression;
    
    enum operators { op_eq, op_ne };
    
    using expression = boost::variant<
        int, 
        boost::recursive_wrapper<unary_expression>,
        boost::recursive_wrapper<binary_expression>
    >;
    
    struct unary_expression {
        expression expr;
    };
    
    struct binary_expression {
        expression                                    first;
        std::vector<std::pair<operators, expression>> other;
    };
    

    This no longer crashes and seems a bit simpler in adaptation and usage.

    Simplified Full Demo

    This full demo uses that AST, but adds a true unary expression. A few style things have been fixed:

    • don't expose the skipper unless you intend for the caller to change it
    • make the parser const
    • show unparsed trailing data (or instead assert >> qi::eoi)

    Note: I might have changed the precedence rules (specifically, associativity of binary operators). I'm not sure which version you require.

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/std_pair.hpp>
    #include <iostream>
    #include <string>
    #include <vector>
    
    namespace ast {
        struct unary_expression;
        struct binary_expression;
    
        enum operators { op_eq, op_ne };
    
        using expression = boost::variant<
            int, 
            boost::recursive_wrapper<unary_expression>,
            boost::recursive_wrapper<binary_expression>
        >;
    
        struct unary_expression {
            bool negated = false;
            expression expr;
        };
    
        struct binary_expression {
            expression                                    first;
            std::vector<std::pair<operators, expression>> other;
        };
    
    }
    
    BOOST_FUSION_ADAPT_STRUCT(ast::unary_expression, negated, expr)
    BOOST_FUSION_ADAPT_STRUCT(ast::binary_expression, first, other)
    
    namespace ast {
        static inline std::ostream& operator<<(std::ostream& os, operators op) { return os << (op==op_eq?"==":"!="); }
        static inline std::ostream& operator<<(std::ostream& os, binary_expression const& e) { 
            os << e.first;
            for (auto& oe : e.other)
                os << " " << oe.first << " " << oe.second;
            return os;
        }
        static inline std::ostream& operator<<(std::ostream& os, unary_expression const& e) {
            return os << (e.negated?"!":"") << "(" << e.expr << ")";
        }
    }
    
    namespace client
    {
        namespace qi = boost::spirit::qi;
    
        template <typename Iterator>
        class expression_grammar : public qi::grammar<Iterator, ast::expression()> {
          private:
            qi::symbols<char, ast::operators> operators;
            qi::rule<Iterator, ast::expression()> start;
    
            qi::rule<Iterator, ast::expression(),        qi::space_type> simple_expression;
            qi::rule<Iterator, ast::unary_expression(),  qi::space_type> unary_expression;
            qi::rule<Iterator, ast::binary_expression(), qi::space_type> binary_expression;
            qi::rule<Iterator, ast::expression(),        qi::space_type> expression;
    
          public:
            expression_grammar() : expression_grammar::base_type(start, "expression") {
                using namespace qi;
    
                operators.add
                    ("==", ast::op_eq)
                    ("!=", ast::op_ne)
                    ;
    
                simple_expression =
                    ( '(' > expression > ')' )
                    | int_;
    
                unary_expression = 
                    matches['!'] >> simple_expression;
    
                binary_expression =
                    unary_expression >> *(operators > expression);
    
                expression = binary_expression;
    
                start = skip(space) [ expression ];
    
                BOOST_SPIRIT_DEBUG_NODES((expression)(binary_expression)(unary_expression)(simple_expression))
            }
        };
    
    } // namespace client
    
    int main() {
        using It = std::string::const_iterator;
        client::expression_grammar<It> const program;
    
        std::string const input{"1 == !(1 != 0)"};
        It iter = input.begin(), end = input.end();
        ast::expression out;
    
        if (parse(iter, end, program, out)) {
            std::cout << "Parsed: " << out << std::endl;
        } else {
            std::cerr << "Parsing failed." << std::endl;
            return 1;
        }
    
        if (iter != end) {
            std::cout << "Remaining unparsed input: '" << std::string(iter, end) << "'\n";
        }
    }
    

    Prints

    Parsed: (1) == !((1) != (0))