Search code examples
c++boostboost-spirit-qi

Boost::spirit::qi parser not consuming entire string


I'm creating a grammar for a simple calculator, but I'm having trouble hammering out the reason why one specific test case is not working. Here is a functional example of my parser:

#include <iostream>
#include <vector>
#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/qi_char.hpp>
#include <boost/spirit/include/qi_parse.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
using namespace boost::spirit;
using namespace boost::phoenix;
using std::endl;
using std::cout;
using std::string;
using std::vector;

void fPushOp(const string& op){
  cout << "PushOp: " << op << endl;
}

void fPushInt(string& my_str){
  cout << "PushInt: " << my_str << endl;
}

template<class Iterator>
struct Calculator : public qi::grammar<Iterator> {

    qi::rule<Iterator>  
      expression, logical_or_expression, logical_and_expression, negate_expression, series_expression,
      single_expression, inclusive_or_expression, exclusive_or_expression, and_expression, equality_expression, 
      relational_expression, shift_expression, additive_expression, multiplicative_expression, 
      term, complement_factor, factor, number, integer, variable, variable_combo, word, result;

    Calculator() : Calculator::base_type(result)
    {
          number = 
              lexeme[
                qi::as_string[
                    ("0x" >> +qi::char_("0-9a-fA-F"))     
                  | ("0b" >> +qi::char_("0-1"))
                  | ("0" >>  +qi::char_("0-7"))
                  | +qi::char_("0-9")
                ] [bind(&fPushInt, qi::_1)]
              ] 
             ;

          complement_factor = number
              | ('~' >> number)[bind(&fPushOp, "OP_COMPLEMENT")]
              | ('!' >> number)[bind(&fPushOp, "OP_NEGATE")];
              ;
          term = complement_factor
            >> *( (".." >> complement_factor)[bind(&fPushOp, "OP_LEGER")]
                | ('\\' >> complement_factor)[bind(&fPushOp, "OP_MASK")]
                ); 
          multiplicative_expression = term
            >> *( ('/' >> term)[bind(&fPushOp, "OP_DIV")]
                | ('%' >> term)[bind(&fPushOp, "OP_MOD")]
                | ('*' >> term)[bind(&fPushOp, "OP_MUL")]
                );
          additive_expression = multiplicative_expression
            >> *( ('+' >> multiplicative_expression)[bind(&fPushOp, "OP_ADD")]
                | ('-' >> multiplicative_expression)[bind(&fPushOp, "OP_SUB")]
                );
          shift_expression = additive_expression
            >> *( (">>" >> additive_expression)[bind(&fPushOp, "OP_SRL")]
                | ("<<" >> additive_expression)[bind(&fPushOp, "OP_SLL")]
                );
          relational_expression = shift_expression
            >> *( ('<' >> shift_expression)[bind(&fPushOp, "OP_LT")]
                | ('>' >> shift_expression)[bind(&fPushOp, "OP_GT")]
                | ("<=" >> shift_expression)[bind(&fPushOp, "OP_LET")]
                | (">=" >> shift_expression)[bind(&fPushOp, "OP_GET")]
                );
          equality_expression = relational_expression 
            >> *( ("==" >> relational_expression)[bind(&fPushOp, "OP_EQ")]
                | ("!=" >> relational_expression)[bind(&fPushOp, "OP_NEQ")] 
                );
          and_expression = equality_expression 
            >> *(('&' >> equality_expression)[bind(&fPushOp, "OP_AND")]); 
          exclusive_or_expression = and_expression 
            >> *(('^' >> and_expression)[bind(&fPushOp, "OP_XOR")]); 
          inclusive_or_expression = exclusive_or_expression 
            >> *(('|' >> exclusive_or_expression)[bind(&fPushOp, "OP_OR")]); 
          single_expression = inclusive_or_expression;
          series_expression = inclusive_or_expression 
            >> *((',' >> inclusive_or_expression)[bind(&fPushOp, "OP_SERIES")]);
          logical_and_expression = series_expression
            >> *(("&&" >> series_expression)[bind(&fPushOp, "OP_LOGICAL_AND")]); 
          logical_or_expression = logical_and_expression 
            >> *(("||" >> logical_and_expression)[bind(&fPushOp, "OP_LOGICAL_OR")]);
          expression = logical_or_expression;

          result = expression;
    }
};

int main(){
  Calculator<string::const_iterator> calc;
  const string expr("!3 && 0,1");
  string::const_iterator it = expr.begin();
  parse(it, expr.end(), calc, qi::space);
  cout << "Remaining: " << (string(it,expr.end())) << endl;

  return 0;
}

The expected output is the following:

PushInt: 3
PushOp: OP_NEGATE
PushInt: 0
PushInt: 1
PushOp: OP_SERIES
PushOp: OP_LOGICAL_AND
Remaining: 

The current output when expr is !3 && 0,1 seems to indicate that && 0,1 does not get consumed:

PushInt: 3
PushOp: OP_NEGATE
Remaining:  && 0,1

If expr is !3&&0,1, then it works just fine. With a qi::space skipper being used when invoking qi::parse, I don't see how these two strings are seen differently. Can anyone point me towards the problem?


Solution

  • Your rules don't declare the skipper:

    qi::rule<Iterator>  
    

    and hence, they are implicitly lexemes. For a background on lexeme[] in relation to skippers, see Boost spirit skipper issues

    To correctly apply a skipper

    • you need to declare the skipper at the grammar and rule definitions

      template<class Iterator, typename Skipper = qi::space_type>
      struct Calculator : public qi::grammar<Iterator, Skipper> {
      
          qi::rule<Iterator, Skipper>  
            expression, logical_or_expression, logical_and_expression, negate_expression, series_expression,
            single_expression, inclusive_or_expression, exclusive_or_expression, and_expression, equality_expression, 
            relational_expression, shift_expression, additive_expression, multiplicative_expression, 
            term, complement_factor, factor, result;
      
          qi::rule<Iterator>  
            number, integer, variable, variable_combo, word;
      
    • need to use phrase_parse while passing an instance of the skipper type

      phrase_parse(it, expr.end(), calc, qi::space);
      

    Fixed Code

    Further notes:

    • cleaned up the includes (prefer to include the full phoenix.hpp because you will get bitten by inexplicable bugs if you are missing subtle bits. Of course, if you know which bits, feel free to reduce compile times by selectively including sub-headers)
    • I vehemently advice against using namespace unless absolutely necessary. In this case, you easily sollicit confusion between one of the many brands of bind. And, no, just saying using boost::phoenix::ref is not enough, because

      using boost::phoenix::ref;
      std::string s;
      bind(foo, ref(s))(); 
      

      ends up using std::ref, not boost::phoenix::ref due to ADL

    #include <iostream>
    #include <string>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    namespace qi = boost::spirit::qi;
    namespace phx = boost::phoenix;
    
    void fPushOp(const std::string& op){
        std::cout << "PushOp: " << op << std::endl;
    }
    
    void fPushInt(std::string& my_str){
        std::cout << "PushInt: " << my_str << std::endl;
    }
    
    template<class Iterator, typename Skipper = qi::space_type>
    struct Calculator : public qi::grammar<Iterator, Skipper> {
    
        qi::rule<Iterator, Skipper>  
          expression, logical_or_expression, logical_and_expression,
            negate_expression, series_expression, single_expression,
            inclusive_or_expression, exclusive_or_expression, and_expression,
            equality_expression, relational_expression, shift_expression,
            additive_expression, multiplicative_expression, term,
            complement_factor, factor, result;
    
        qi::rule<Iterator>  
            number, integer, variable, variable_combo, word;
    
        Calculator() : Calculator::base_type(result)
        {
            number = 
                qi::lexeme[
                  qi::as_string[
                      ("0x" >> +qi::char_("0-9a-fA-F"))     
                    | ("0b" >> +qi::char_("0-1"))
                    | ("0" >>  +qi::char_("0-7"))
                    | +qi::char_("0-9")
                  ] [phx::bind(&fPushInt, qi::_1)]
                ] 
               ;
    
            complement_factor = number
                | ('~' >> number)[phx::bind(&fPushOp, "OP_COMPLEMENT")]
                | ('!' >> number)[phx::bind(&fPushOp, "OP_NEGATE")];
                ;
            term = complement_factor
              >> *( (".." >> complement_factor)[phx::bind(&fPushOp, "OP_LEGER")]
                  | ('\\' >> complement_factor)[phx::bind(&fPushOp, "OP_MASK")]
                  ); 
            multiplicative_expression = term
              >> *( ('/' >> term)[phx::bind(&fPushOp, "OP_DIV")]
                  | ('%' >> term)[phx::bind(&fPushOp, "OP_MOD")]
                  | ('*' >> term)[phx::bind(&fPushOp, "OP_MUL")]
                  );
            additive_expression = multiplicative_expression
              >> *( ('+' >> multiplicative_expression)[phx::bind(&fPushOp, "OP_ADD")]
                  | ('-' >> multiplicative_expression)[phx::bind(&fPushOp, "OP_SUB")]
                  );
            shift_expression = additive_expression
              >> *( (">>" >> additive_expression)[phx::bind(&fPushOp, "OP_SRL")]
                  | ("<<" >> additive_expression)[phx::bind(&fPushOp, "OP_SLL")]
                  );
            relational_expression = shift_expression
              >> *( ('<' >> shift_expression)[phx::bind(&fPushOp, "OP_LT")]
                  | ('>' >> shift_expression)[phx::bind(&fPushOp, "OP_GT")]
                  | ("<=" >> shift_expression)[phx::bind(&fPushOp, "OP_LET")]
                  | (">=" >> shift_expression)[phx::bind(&fPushOp, "OP_GET")]
                  );
            equality_expression = relational_expression 
              >> *( ("==" >> relational_expression)[phx::bind(&fPushOp, "OP_EQ")]
                  | ("!=" >> relational_expression)[phx::bind(&fPushOp, "OP_NEQ")] 
                  );
            and_expression = equality_expression 
              >> *(('&' >> equality_expression)[phx::bind(&fPushOp, "OP_AND")]); 
            exclusive_or_expression = and_expression 
              >> *(('^' >> and_expression)[phx::bind(&fPushOp, "OP_XOR")]); 
            inclusive_or_expression = exclusive_or_expression 
              >> *(('|' >> exclusive_or_expression)[phx::bind(&fPushOp, "OP_OR")]); 
            single_expression = inclusive_or_expression;
            series_expression = inclusive_or_expression 
              >> *((',' >> inclusive_or_expression)[phx::bind(&fPushOp, "OP_SERIES")]);
            logical_and_expression = series_expression
              >> *(("&&" >> series_expression)[phx::bind(&fPushOp, "OP_LOGICAL_AND")]); 
            logical_or_expression = logical_and_expression 
              >> *(("||" >> logical_and_expression)[phx::bind(&fPushOp, "OP_LOGICAL_OR")]);
            expression = logical_or_expression;
    
            result = expression;
        }
    };
    
    int main(){
      Calculator<std::string::const_iterator> calc;
    
      const std::string expr("!3 && 0,1");
      std::string::const_iterator it = expr.begin();
    
      phrase_parse(it, expr.end(), calc, qi::space);
    
      std::cout << "Remaining: " << std::string(it,expr.end()) << std::endl;
    
      return 0;
    }