Search code examples
c++recursionstaticboost-spiritforward-declaration

Declaration of cross-recursive rules


I declared rules of my grammar as static const. That worked fine till I tried to use cross-recursive rules (rule1 is defined using rule2 which is defined using rule1). The source code still can be built, but segfaults on parsing source containing such cross-recursive case. Here's a simplified code of the grammar:

template < typename Iterator >
class Skipper : public qi::grammar<Iterator> {
public:
    Skipper ( ) : Skipper::base_type(_skip_rule) { }
private:
    static qi::rule<Iterator> const
        _comment,
        _skip_rule;
};

template < typename Iterator >
typename qi::rule<Iterator> const
    Skipper<Iterator>::_comment(
        boost::spirit::repository::confix("/*", "*/")[*(qi::char_ - "*/")]          // Multi-line
        | boost::spirit::repository::confix("//", qi::eol)[*(qi::char_ - qi::eol)]   // Single-line
    );

template < typename Iterator >
typename qi::rule<Iterator> const
     Skipper<Iterator>::_skip_rule(qi::ascii::space | _comment);

template < typename Iterator, typename Skipper >
class Grammar : public qi::grammar<Iterator, Skipper > {
public:
    Grammar ( ) : Grammar::base_type(expression) { }
private:
    static qi::rule<Iterator, Skipper> const
        // Tokens
        scalar_literal,
        identifier,
        // Rules
        operand,
        expression;
};

template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
    Grammar<Iterator, Skipper>::scalar_literal(qi::uint_ | qi::int_);

template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
    Grammar<Iterator, Skipper>::identifier(qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')]);

template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
    Grammar<Iterator, Skipper>::operand((scalar_literal | identifier | ('(' >> expression >> ')')));

template < typename Iterator, typename Skipper >
typename qi::rule<Iterator, Skipper> const
    Grammar<Iterator, Skipper>::expression(operand);

(expression rule is made identical to operand to make the code easier to understand; of course it should be more complicated yet based on operand). operand declaration uses expression one and vice versa. That segfaults when trying to parse_phrase for example (123). I suppose that it's because of "forward" using of expression; same happens if I put expression declaration before the operand one. So in what way should these rules be declared to avoid runtime error?


Solution

    1. First off, the static has nothing to do with it:

      Live On Coliru fails just as badly:

      #include <boost/spirit/include/qi.hpp>
      #include <boost/spirit/repository/include/qi.hpp>
      
      namespace qi = boost::spirit::qi;
      
      template <typename Iterator>
      struct Skipper : qi::grammar<Iterator> {
          Skipper() : Skipper::base_type(_skip_rule) { }
      private:
          qi::rule<Iterator> const
              _comment { 
                  boost::spirit::repository::confix("/*", "*/")    [*(qi::char_ - "*/")]     // Multi-line
              | boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)]  // Single-line
              },
              _skip_rule {
                  qi::ascii::space | _comment
              };
      };
      
      template <typename Iterator, typename Skipper>
      struct Grammar : qi::grammar<Iterator, Skipper> {
          Grammar() : Grammar::base_type(expression) { }
      private:
          qi::rule<Iterator, Skipper> const
              // Tokens
              scalar_literal { qi::uint_ | qi::int_ },
              identifier     { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
              // Rules
              operand        { (scalar_literal | identifier | ('(' >> expression >> ')')) },
              expression     { operand };
      };
      
      int main() {
          using It = std::string::const_iterator;
          Skipper<It> s;
          Grammar<It, Skipper<It> > p;
          std::string const input = "(123)";
      
          It f = input.begin(), l = input.end();
      
          bool ok = qi::phrase_parse(f,l,p,s);
      
          if (ok)   std::cout << "Parse success\n";
          else      std::cout << "Parse failed\n";
          if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
      }
      
    2. Secondly, the skipper has nothing to with things:

      Live On Coliru fails just as badly:

      #include <boost/spirit/include/qi.hpp>
      #include <boost/spirit/repository/include/qi.hpp>
      
      namespace qi = boost::spirit::qi;
      
      template <typename Iterator, typename Skipper = qi::ascii::space_type>
      struct Grammar : qi::grammar<Iterator, Skipper> {
          Grammar() : Grammar::base_type(expression) { }
      private:
          qi::rule<Iterator, Skipper> const
              // Tokens
              scalar_literal { qi::uint_ | qi::int_ },
              identifier     { qi::lexeme[(qi::alpha | '_') >> *(qi::alnum | '_')] },
              // Rules
              operand        { (scalar_literal | identifier | ('(' >> expression >> ')')) },
              expression     { operand };
      };
      
      int main() {
          using It = std::string::const_iterator;
          Grammar<It> p;
          std::string const input = "(123)";
      
          It f = input.begin(), l = input.end();
      
          bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
      
          if (ok)   std::cout << "Parse success\n";
          else      std::cout << "Parse failed\n";
          if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
      }
      
    3. Thirdly, the timing of initialization has to do with it:

      Live On Coliru succeeds:

      #include <boost/spirit/include/qi.hpp>
      #include <boost/spirit/repository/include/qi.hpp>
      
      namespace qi = boost::spirit::qi;
      
      template <typename Iterator, typename Skipper = qi::ascii::space_type>
      struct Grammar : qi::grammar<Iterator, Skipper> {
          Grammar() : Grammar::base_type(expression) { 
              scalar_literal = qi::uint_ | qi::int_;
              identifier     = (qi::alpha | '_') >> *(qi::alnum | '_');
              // Rules
              operand        = (scalar_literal | identifier | ('(' >> expression >> ')'));
              expression     = operand;
          }
      private:
          qi::rule<Iterator>          scalar_literal, identifier; // Tokens
          qi::rule<Iterator, Skipper> operand,        expression; // Rules
      };
      
      int main() {
          using It = std::string::const_iterator;
          Grammar<It> p;
          std::string const input = "(123)";
      
          It f = input.begin(), l = input.end();
      
          bool ok = qi::phrase_parse(f,l,p,qi::ascii::space);
      
          if (ok)   std::cout << "Parse success\n";
          else      std::cout << "Parse failed\n";
          if (f!=l) std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
      }
      

      Prints

      Parse success
      
    4. Finally, you can have all the cake and eat it too:

      Live On Coliru

      #include <boost/spirit/include/qi.hpp>
      #include <boost/spirit/repository/include/qi.hpp>
      
      namespace qi = boost::spirit::qi;
      
      namespace parsing {
          namespace detail {
              template <typename Iterator>
              struct Skipper : qi::grammar<Iterator> {
                  Skipper() : Skipper::base_type(_skip_rule) {
                      _comment  = boost::spirit::repository::confix("/*", "*/")    [*(qi::char_ - "*/")]     // Multi-line
                              | boost::spirit::repository::confix("//", qi::eol) [*(qi::char_ - qi::eol)]  // Single-line
                              ;
      
                      _skip_rule = qi::ascii::space | _comment;
                  }
              private:
                  qi::rule<Iterator> _comment, _skip_rule;
              };
      
              template <typename Iterator, typename Skipper = Skipper<Iterator> >
              struct Grammar : qi::grammar<Iterator, Skipper> {
                  Grammar() : Grammar::base_type(expression) {
                      scalar_literal = qi::uint_ | qi::int_;
                      identifier     = (qi::alpha | '_') >> *(qi::alnum | '_');
                      // Rules
                      operand        = (scalar_literal | identifier | ('(' >> expression >> ')'));
                      expression     = operand;
                  }
              private:
                  qi::rule<Iterator>          scalar_literal, identifier; // Tokens
                  qi::rule<Iterator, Skipper> operand,        expression; // Rules
              };
          }
      
          template <typename Iterator, typename Skipper = detail::Skipper<Iterator> >
          struct facade {
              template <typename Range> static bool parse(Range const& input) {
                  Iterator f = boost::begin(input), l = boost::end(input);
                  bool ok = qi::phrase_parse(f, l, _parser, _skipper);
      
                  if (f!=l)
                      std::cout << "Remaining input: '" << std::string(f,l) << "'\n";
      
                  return ok;
              }
      
          private:
              static const detail::Skipper<Iterator>          _skipper;
              static const detail::Grammar<Iterator, Skipper> _parser;
          };
      
          template <class I, class S> const detail::Skipper<I>    facade<I,S>::_skipper = {};
          template <class I, class S> const detail::Grammar<I, S> facade<I,S>::_parser  = {};
      }
      
      int main() {
          using It = std::string::const_iterator;
          std::string const input = "(123)";
      
          bool ok = parsing::facade<It>::parse(input);
      
          if (ok)   std::cout << "Parse success\n";
          else      std::cout << "Parse failed\n";
      }
      

    Note that the result is the same, the parser/skipper are every bit as static and const as in the original code, the code is a lot easier to maintain (and has a bit more structure to it at the same time).


    This is basically where the Singletons-are-bad theme meets the inner-const-is-problematic theme. You don't need to make the fields const. You don't need to make the instances static.

    Just, create only one instance if you prefer. Also, it's not a problem that the parser is now copyable (you don't have to copy it; but now you can).