Search code examples
c++parsingboost-spiritboost-spirit-qiboost-spirit-lex

Spirit Fails to Parse After only Appearing to get First symbol From the Lexer


Recently, I asked a question here: Boost Spirit Segfault In Parser

In this post it was pointed out the grammar I was working with was absolutely left recursive and that spirit is a PEG parser generator, meaning left recursion is impossible.

I converted the grammar to a non-left recursive grammar, using the rule from the section dealing with left recursion in the Dragon Book.

Given a left recursive grammar

A -> A >> alpha | beta

It can be converted to the equivalent right recursive grammar by doing the following:

A -> beta >> A'

A' -> alpha >> A' | epsilon 

Here is the resulting parser, with what I believe to be the the non-left recursive productions:

namespace interpreter {
namespace qi = boost::spirit::qi;
template <typename Iterator, typename Skipper>
struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
{           
    template <typename TokenDef>
    InterpreterGrammar(TokenDef const& tok)
        : InterpreterGrammar::base_type(start)
    {
        using boost::phoenix::ref;

        start %= functionList >> endList >> qi::eoi;

        // different expressions
        exp %= 
               qi::token(k_alphaTerminal) >> qi::token(k_equalTo) >> qi::token(k_alphaTerminal) >> expPrime
               |
               qi::token(k_numericTerminal) >> expPrime
               |
               qi::token(k_trueTok) >> expPrime
               |
               qi::token(k_falseTok) >> expPrime;

        expPrime %=     
               qi::token(k_equalTo) >> exp >> expPrime
               |
               qi::token(k_notEq) >> exp >> expPrime
               |
               qi::token(k_less) >> exp >> expPrime
               |
               qi::token(k_lessEq) >> exp >> expPrime
               |
               qi::token(k_greater) >> exp >> expPrime
               |
               qi::token(k_greaterEq) >> exp >> expPrime
               |
               qi::token(k_andTok) >> exp >> expPrime
               |
               qi::token(k_orTok) >> exp >> expPrime
               |
               qi::token(k_notTok) >> exp 
               |
               qi::token(k_plues) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp >> expPrime
               |
               qi::token(k_mult) >> exp >> expPrime
               |
               qi::token(k_minus) >> exp
               |
               qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
               | 
               qi::eps;

        // parameter list
        paramList %= exp >> paramListPrime;

        paramListPrime %= qi::token(k_comma) >> exp >> paramListPrime
                          |
                          qi::eps;

        // return statements
        returnStatement %= qi::token(k_returnTok) >> exp
                           |
                           qi::token(k_returnTok);

        // function call statements
        callStatement %= qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                         |
                         qi::token(k_alphaTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);

        // variable assignment
        assignmentStatement %= qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
                               |
                               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp
                                   >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;

        // list of integers
        intList %= qi::token(k_numericTerminal) >> intListPrime;

        intListPrime %= 
                  qi::token(k_comma) >> qi::token(k_numericTerminal) >> intListPrime
                  |
                  qi::eps;

        // print out a variable
        printStatement %= qi::token(k_print) >> exp;

        // take input
        inputStatement %= qi::token(k_alphaTerminal) >> qi::token(k_input);

        // conditional statement
        conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;

        // consitions have optional else
        optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                        |
                        qi::eps;

        // while loop
        whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);

        // actual program statements
        endList %= end >> endListPrime;

        endListPrime %= end >> endListPrime
                        |
                        qi::eps;

        // end possibilities of program in global space
        end %= callStatement
               |
               printStatement
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_input)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> exp
               |
               qi::token(k_alphaTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                   >> qi::token(k_rightBracket)
               |
               qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                   >> qi::token(k_assign) >> exp;

        // function parameters
        param %=
                qi::token(k_alphaTerminal) >> paramPrime
                |
                qi::token(k_alphaTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                    >> paramPrime;

        // for handling left recursion in paramlist
        paramPrime %= 
                    qi::token(k_comma) >> qi::token(k_alphaTerminal) >> paramPrime
                    | 
                    qi::eps;


        // define a statement as assignment print input condition while or call
        statement %= 
                    assignmentStatement
                    |
                    printStatement
                    |
                    inputStatement
                    |
                    conditionStatement
                    |
                    whileStatement
                    |
                    callStatement
                    |
                    returnStatement;

        // general statement list
        statements %= statement >> statementsPrime;

        // for handling left recursion in statements
        statementsPrime %= statement >> statementsPrime
                           |
                           qi::eps;

        // functions
        functionList %= qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> param >> qi::token(k_rightParen) >> qi::token(k_colon)
                            >> statements >> qi::token(k_fed)
                        |
                        qi::token(k_def) >> qi::token(k_alphaTerminal) >> qi::token(k_leftParen)
                            >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                        | qi::eps;

        BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
        debug(start);
    }
    qi::rule<Iterator, Skipper> start;
    qi::rule<Iterator, Skipper> functionList;
    qi::rule<Iterator, Skipper> endList;
    qi::rule<Iterator, Skipper> endListPrime;
    qi::rule<Iterator, Skipper> param;
    qi::rule<Iterator, Skipper> paramPrime;
    qi::rule<Iterator, Skipper> paramList;
    qi::rule<Iterator, Skipper> paramListPrime;
    qi::rule<Iterator, Skipper> statements;
    qi::rule<Iterator, Skipper> statementsPrime;
    qi::rule<Iterator, Skipper> statement;
    qi::rule<Iterator, Skipper> assignmentStatement;
    qi::rule<Iterator, Skipper> printStatement;
    qi::rule<Iterator, Skipper> inputStatement;
    qi::rule<Iterator, Skipper> conditionStatement;
    qi::rule<Iterator, Skipper> whileStatement;
    qi::rule<Iterator, Skipper> callStatement;
    qi::rule<Iterator, Skipper> returnStatement;
    qi::rule<Iterator, Skipper> exp;
    qi::rule<Iterator, Skipper> expPrime;
    qi::rule<Iterator, Skipper> intList;
    qi::rule<Iterator, Skipper> intListPrime;
    qi::rule<Iterator, Skipper> optionalElse;
    qi::rule<Iterator, Skipper> end;
};

}

Here is the lexer

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <fstream>
#include <streambuf>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

namespace interpreter
{
    namespace lex = boost::spirit::lex;

    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_alphaTerminal = 34,
        k_numericTerminal = 35
    };

    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           alphaTerminal("[a-z][a-zA-Z0-9]*"),
           numericTerminal("[0-9]*")
        {
            this->self("WHITESPACE") = whiteSpace;

            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (alphaTerminal, k_alphaTerminal)
                (numericTerminal, k_numericTerminal);
        }

        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> alphaTerminal;
        lex::token_def<std::string> numericTerminal;
    };

And here is my example test program

    int main(int argc, char** argv)
    {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;

        typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;

        interpreter::LexerTokens< lexer_type > lexer;
        interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
        std::string sourceCode("def print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)"); 

        char const* first = sourceCode.c_str();
        char const* last = &first[sourceCode.size()];
        bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);

        std::cout << "Remaining " << std::string(first,last) << std::endl;
        std::cout << "R is " << r << std::endl;
    }

These revisions have given rise to a few questions, first, by looking at the grammar informally, without constructing the full LL(1) parsing table, I don't believe this grammar is LL(1). I still need to verify this, but I was wondering will spirit, be able to parse this grammar? I know PEGs typically use the / operator to do lookahead, does spirit do this currently? I read in another post that spirit may not?

Second, this grammar fails, I notice that when, in the start production, I simplify the start production and make it:

start %= functionList;

and then change the input to be:

def print_it(x, y): print 3*x + y return fed

the grammar debug statement states that the parse was successful. However, I see the string remaining is:

print_it(x, y): print 3*x + y return fed

So only the first token was actually parsed. After a bit of debugging I am unsure why the parse is successful and why only a single symbol is consumed, could this be an issue with the lexer?

Additionally, I see similar results when I change the start production to be:

start %= endList;

and using the input

y = x

This however, fails to parse and only consumes the character y.

Finally, the output of my debug statement is not very helpful, when running with the debug statement the output that is produced is:

<start>
  <try>[][][][][][][][][][][][][][][][][][][][]</try>
  <fail/>
</start>
Remaining  print_it(x, y): print 3*x + y return fed print_it(8,1) x = 3 print_it(x, x)
R is 0

Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption? Also why are the [] empty? I typically see them with some text that is useful in debugging. Is it because the grammar is automatically a success if the regular expression is matched? If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

Any help or points in the correct direction is appreciated, thank you.


Solution

  • Which I assume to mean twenty productions are attempted in the grammar, from the twenty empty [], is that a correct assumption?

    No. The [] indicate input tokens.

    Also why are the [] empty?

    There's likely no useful way to print them so they show up as empty.

    If that is the case, is there a way to get the debug statement to print helpful output when using the token enum token as opposed to adding expressions using macros?

    I'd think so. But I never use Lex. So, It could be a while to figure it out.

    First thing that catches my attention is this:

    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    

    Your AttributeTypes says omit. Indeed, changing to

    typedef lex::lexertl::token< char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_ > token_type;
    

    Does show signs of life. For the input x=y (no whitespace!) it prints:

    Live On Coliru

    <start>
      <try>[y][=][x][][][][][][][][][][][][][][][][][]</try>
      <fail/>
    </start>
    Remaining 
    
    R is false
    

    Now, for the def print_it(x, y): print 3*x + y return fed input, the output still is:

    <start>
      <try>[def][][][][][][][][][][][][][][][][][][][]</try>
      <fail/>
    </start>
    Remaining  print_it(x, y): print 3*x + y return fed
    
    R is false
    

    Slightly more informative. Interestingly, it also seems to fail on the first whitespace. The whiteSpace regex looks okay, so I searched for lex in_state in my answers to remember what I once learned about skipping and Lex.

    I played around with some suggestions. Following the second post lead me to this comment:

    Nice! You can add to your answer that having a separate state inside the lexer for skipping white spaces has another considerable drawback: lack of debugging support. If you use a separate state for skipping and then use BOOST_SPIRIT_DEBUG_NODE, you won't get the full output of the token stream. This is because the default debug handler advances the lexer iterator to get tokens, the lexer is of course in the INITIAL state. As soon as it meets a white space, the iteration stops, because the lexer cannot find a match. The token stream will be cut at the first white space in the rule's trace. – noxmetus Sep 20 '17 at 18:28

    Let's, just for fun, try without the separate state and instead use the one from Troubles with boost::spirit::lex & whitespace.

    Now, the expression doesn't parse because

    • print_it is not a valid identifier. Let's add _ to alphaTerminal
    • alphaTerminal requires = to follow. Fixing the expression a bit:

      exp = (
                  qi::token(k_alphaTerminal)
                | qi::token(k_numericTerminal)
                | qi::token(k_trueTok)
                | qi::token(k_falseTok))
          >> expPrime
          ;
      
    • In fact, those token ids arent valid. You need to start with min_token_id (which is 0x10000):

      enum Tokens
      {
          k_andTok = lex::tokenids::min_token_id + 1,
          k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
          k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
          k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
          k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
          k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
          k_alphaTerminal, k_numericTerminal,
      };
      
    • Why even do you want to dictate the token IDs? Guessing from the fact you are passing a reference to the token definitions to the parser, I would think you were going to use them?

      exp
          = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
          ;
      
    • Also, let's enable debugging for all the rules:

      BOOST_SPIRIT_DEBUG_NODES(
          (start) (functionList) (endList) (endListPrime) (param)
          (paramPrime) (paramList) (paramListPrime) (statements)
          (statementsPrime) (statement) (assignmentStatement)
          (printStatement) (inputStatement) (conditionStatement)
          (whileStatement) (callStatement) (returnStatement) (exp)
          (expPrime) (intList) (intListPrime) (optionalElse) (end)
      )
      

    And, after spending entirely too much time trying to understand why the state-switching skipper seems not to work, I'm giving up. Here's the upshot of my tinkering:

    Live On Coliru

    //#define USE_STATES
    #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/lex.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/phoenix.hpp>
    namespace lex = boost::spirit::lex;
    
    namespace interpreter {
        enum Tokens
        {
            k_andTok = lex::tokenids::min_token_id + 1,
            k_def, k_elihw, k_elseTok, k_falseTok, k_fed, k_fi, k_ifTok, k_input,
            k_notTok, k_orTok, k_print, k_returnTok, k_trueTok, k_whileTok,
            k_plues, k_minus, k_mult, k_div, k_bang, k_equalTo, k_greaterEq,
            k_lessEq, k_notEq, k_less, k_greater, k_assign, k_comma, k_colon,
            k_leftParen, k_rightParen, k_leftBracket, k_rightBracket,
            k_alphaTerminal, k_numericTerminal,
        };
    
        template <typename Lexer>
        struct LexerTokens : lex::lexer<Lexer>
        {
            LexerTokens() :
               whiteSpace("[ \\t\\n]"),
               andTok("and"),
               def("def"),
               elihw("elihw"),
               elseTok("else"),
               falseTok("false"),
               fed("fed"),
               fi("fi"),
               ifTok("if"),
               input("input"),
               notTok("not"),
               orTok("or"),
               print("print"),
               returnTok("return"),
               trueTok("true"),
               whileTok("while"),
               plus("\\+"),
               minus("\\-"),
               mult("\\*"),
               div("\\/"),
               bang("\\!"),
               equalTo("=="),
               greaterEq(">="),
               lessEq("<="),
               notEq("!="),
               less("<"),
               greater(">"),
               assign("="),
               comma(","),
               colon(":"),
               leftParen("\\("),
               rightParen("\\)"),
               leftBracket("\\["),
               rightBracket("\\["),
               alphaTerminal("[a-z][a-zA-Z0-9_]*"),
               numericTerminal("[0-9]*")
            {
                this->self.add
                    (andTok, k_andTok)
                    (def, k_def)
                    (elihw, k_elihw)
                    (elseTok, k_elseTok)
                    (falseTok, k_falseTok)
                    (fed, k_fed)
                    (fi, k_fi)
                    (ifTok, k_ifTok)
                    (andTok, k_andTok)
                    (input, k_input)
                    (notTok, k_notTok)
                    (orTok, k_orTok)
                    (print, k_print)
                    (returnTok, k_returnTok)
                    (trueTok, k_trueTok)
                    (whileTok, k_whileTok)
                    (plus, k_plues)
                    (minus, k_minus)
                    (mult, k_mult)
                    (div, k_div)
                    (bang, k_bang)
                    (equalTo, k_equalTo)
                    (greaterEq, k_greaterEq)
                    (lessEq, k_lessEq)
                    (notEq, k_notEq)
                    (less, k_less)
                    (greater, k_greater)
                    (assign, k_assign)
                    (comma, k_comma)
                    (colon, k_colon)
                    (leftParen, k_leftParen)
                    (rightParen, k_rightParen)
                    (leftBracket, k_leftBracket)
                    (rightBracket, k_rightBracket)
                    (alphaTerminal, k_alphaTerminal)
                    (numericTerminal, k_numericTerminal);
    
    #ifdef USE_STATES
                this->self("WHITESPACE") = whiteSpace;
    #else
                this->self += whiteSpace [ lex::_pass = lex::pass_flags::pass_ignore ];
    #endif
            }
    
            lex::token_def<lex::omit> whiteSpace;
            lex::token_def<std::string> andTok;
            lex::token_def<std::string> def;
            lex::token_def<std::string> elihw;
            lex::token_def<std::string> elseTok;
            lex::token_def<std::string> falseTok;
            lex::token_def<std::string> fed;
            lex::token_def<std::string> fi;
            lex::token_def<std::string> ifTok;
            lex::token_def<std::string> input;
            lex::token_def<std::string> notTok;
            lex::token_def<std::string> orTok;
            lex::token_def<std::string> print;
            lex::token_def<std::string> returnTok;
            lex::token_def<std::string> trueTok;
            lex::token_def<std::string> whileTok;
            lex::token_def<std::string> plus;
            lex::token_def<std::string> minus;
            lex::token_def<std::string> mult;
            lex::token_def<std::string> div;
            lex::token_def<std::string> bang;
            lex::token_def<std::string> equalTo;
            lex::token_def<std::string> greaterEq;
            lex::token_def<std::string> lessEq;
            lex::token_def<std::string> notEq;
            lex::token_def<std::string> less;
            lex::token_def<std::string> greater;
            lex::token_def<std::string> assign;
            lex::token_def<std::string> comma;
            lex::token_def<std::string> colon;
            lex::token_def<std::string> leftParen;
            lex::token_def<std::string> rightParen;
            lex::token_def<std::string> leftBracket;
            lex::token_def<std::string> rightBracket;
            lex::token_def<std::string> alphaTerminal;
            lex::token_def<std::string> numericTerminal;
        };
    
        namespace qi = boost::spirit::qi;
        template <typename Iterator, typename Skipper>
        struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
        {
            template <typename TokenDef>
            InterpreterGrammar(TokenDef const& tok)
                : InterpreterGrammar::base_type(start)
            {
                start
                    = functionList >> endList >> qi::eoi
                    ;
    
                // different expressions
                exp
                    = (tok.alphaTerminal | tok.numericTerminal | tok.trueTok | tok.falseTok) >> expPrime
                    ;
    
                expPrime
                    = tok.equalTo   >> exp >> expPrime
                    | tok.notEq     >> exp >> expPrime
                    | tok.less      >> exp >> expPrime
                    | tok.lessEq    >> exp >> expPrime
                    | tok.greater   >> exp >> expPrime
                    | tok.greaterEq >> exp >> expPrime
                    | tok.andTok    >> exp >> expPrime
                    | tok.orTok     >> exp >> expPrime
                    | tok.notTok    >> exp
                    | tok.plus      >> exp >> expPrime
                    | tok.minus     >> exp >> expPrime
                    | tok.mult      >> exp >> expPrime
                    | tok.minus >> exp
                    | tok.leftParen >> exp >> tok.rightParen
                    | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                    | tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                    | tok.alphaTerminal >> tok.leftParen >> exp >> tok.rightParen
                    | qi::eps
                    ;
    
                // parameter list
                paramList
                    = exp >> paramListPrime
                    ;
    
                paramListPrime
                    = tok.comma >> exp >> paramListPrime
                    | qi::eps
                    ;
    
                // return statements
                returnStatement
                    = tok.returnTok >> exp
                    | tok.returnTok
                    ;
    
                // function call statements
                callStatement
                    = tok.alphaTerminal >> tok.leftParen >> tok.rightParen
                    | tok.alphaTerminal >> tok.leftParen >> paramList >> tok.rightParen
                    ;
    
                // variable assignment
                assignmentStatement
                    = tok.alphaTerminal >> tok.assign >> exp
                    | tok.alphaTerminal >> tok.leftBracket >> exp
                        >> tok.rightBracket >> tok.assign >> exp
                    ;
    
                // list of integers
                intList
                    = tok.numericTerminal >> intListPrime
                    ;
    
                intListPrime
                    = tok.comma >> tok.numericTerminal >> intListPrime
                    | qi::eps
                    ;
    
                // print out a variable
                printStatement
                    = tok.print >> exp
                    ;
    
                // take input
                inputStatement
                    = tok.alphaTerminal >> tok.input
                    ;
    
                // conditional statement
                conditionStatement
                    = tok.ifTok >> exp >> tok.colon >> statements >> optionalElse
                    ;
    
                // consitions have optional else
                optionalElse
                    = tok.elseTok >> tok.colon >> statements
                    | qi::eps
                    ;
    
                // while loop
                whileStatement
                    = tok.whileTok >> exp >> tok.colon >> statements >> tok.elihw
                    ;
    
                // actual program statements
                endList
                    = end >> endListPrime
                    ;
    
                endListPrime
                    = end >> endListPrime
                    | qi::eps
                    ;
    
                // end possibilities of program in global space
                end
                    = callStatement
                    | printStatement
                    | tok.alphaTerminal >> tok.assign >> tok.input
                    | tok.alphaTerminal >> tok.assign >> exp
                    | tok.alphaTerminal >> tok.assign >> tok.leftBracket >> intList
                        >> tok.rightBracket
                    | tok.alphaTerminal >> tok.leftBracket >> exp >> tok.rightBracket
                        >> tok.assign >> exp
                    ;
    
                // function parameters
                param
                    = tok.alphaTerminal >> paramPrime
                    | tok.alphaTerminal >> tok.leftBracket >> tok.rightBracket
                        >> paramPrime
                    ;
    
                // for handling left recursion in paramlist
                paramPrime
                    = tok.comma >> tok.alphaTerminal >> paramPrime
                    | qi::eps
                    ;
    
                // define a statement as assignment print input condition while or call
                statement
                    = assignmentStatement
                    | printStatement
                    | inputStatement
                    | conditionStatement
                    | whileStatement
                    | callStatement
                    | returnStatement
                    ;
    
                // general statement list
                statements
                    = statement >> statementsPrime
                    ;
    
                // for handling left recursion in statements
                statementsPrime
                    = statement >> statementsPrime
                    | qi::eps
                    ;
    
                // functions
                functionList
                    = tok.def >> tok.alphaTerminal >> tok.leftParen
                        >> param >> tok.rightParen >> tok.colon
                        >> statements >> tok.fed
                    | tok.def >> tok.alphaTerminal >> tok.leftParen
                        >> tok.rightParen >> tok.colon >> statements >> tok.fed
                    | qi::eps
                    ;
    
                BOOST_SPIRIT_DEBUG_NODES(
                    (start) (functionList) (endList) (endListPrime) (param)
                    (paramPrime) (paramList) (paramListPrime) (statements)
                    (statementsPrime) (statement) (assignmentStatement)
                    (printStatement) (inputStatement) (conditionStatement)
                    (whileStatement) (callStatement) (returnStatement) (exp)
                    (expPrime) (intList) (intListPrime) (optionalElse) (end)
                )
    
            }
          private:
            qi::rule<Iterator, Skipper> start;
            qi::rule<Iterator, Skipper> functionList;
            qi::rule<Iterator, Skipper> endList;
            qi::rule<Iterator, Skipper> endListPrime;
            qi::rule<Iterator, Skipper> param;
            qi::rule<Iterator, Skipper> paramPrime;
            qi::rule<Iterator, Skipper> paramList;
            qi::rule<Iterator, Skipper> paramListPrime;
            qi::rule<Iterator, Skipper> statements;
            qi::rule<Iterator, Skipper> statementsPrime;
            qi::rule<Iterator, Skipper> statement;
            qi::rule<Iterator, Skipper> assignmentStatement;
            qi::rule<Iterator, Skipper> printStatement;
            qi::rule<Iterator, Skipper> inputStatement;
            qi::rule<Iterator, Skipper> conditionStatement;
            qi::rule<Iterator, Skipper> whileStatement;
            qi::rule<Iterator, Skipper> callStatement;
            qi::rule<Iterator, Skipper> returnStatement;
            qi::rule<Iterator, Skipper> exp;
            qi::rule<Iterator, Skipper> expPrime;
            qi::rule<Iterator, Skipper> intList;
            qi::rule<Iterator, Skipper> intListPrime;
            qi::rule<Iterator, Skipper> optionalElse;
            qi::rule<Iterator, Skipper> end;
        };
    }
    
    #include <fstream>
    #include <iterator>
    
    int main(int argc, char** argv) {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;
    
        typedef lex::lexertl::token<char const*, boost::mpl::vector<lex::omit, std::string>, boost::mpl::true_> token_type;
    #ifdef USE_STATES
        typedef lex::lexertl::actor_lexer<token_type> lexer_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    #else
        typedef lex::lexertl::actor_lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::unused_type skipper_type;
    #endif
    
        interpreter::LexerTokens<lexer_type> lexer;
        interpreter::InterpreterGrammar<iterator_type, skipper_type> parser(lexer);
    
        // read the file
        if (argc != 2)
        {
            std::cout << "File required" << std::endl;
            return 1;
        }
    
        std::ifstream t(argv[1]);
        std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
    
        char const* first = sourceCode.data();
        char const* last = first + sourceCode.size();
    #ifdef USE_STATES
        bool ok = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
    #else
        bool ok = lex::tokenize_and_parse(first, last, lexer, parser);
    #endif
    
        std::cout << "Remaining '" << std::string(first,last) << "'" << std::endl;
        std::cout << "R is " << std::boolalpha << ok << std::endl;
    }
    

    Prints

    <start>
      <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
      <functionList>
        <try>[def][print_it][(][x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
        <param>
          <try>[x][,][y][)][:][print][3][*][x][+][y][return][fed]</try>
          <paramPrime>
            <try>[,][y][)][:][print][3][*][x][+][y][return][fed]</try>
            <paramPrime>
              <try>[)][:][print][3][*][x][+][y][return][fed]</try>
              <success>[)][:][print][3][*][x][+][y][return][fed]</success>
              <attributes>[]</attributes>
            </paramPrime>
            <success>[)][:][print][3][*][x][+][y][return][fed]</success>
            <attributes>[]</attributes>
          </paramPrime>
          <success>[)][:][print][3][*][x][+][y][return][fed]</success>
          <attributes>[]</attributes>
        </param>
        <statements>
          <try>[print][3][*][x][+][y][return][fed]</try>
          <statement>
            <try>[print][3][*][x][+][y][return][fed]</try>
            <assignmentStatement>
              <try>[print][3][*][x][+][y][return][fed]</try>
              <fail/>
            </assignmentStatement>
            <printStatement>
              <try>[print][3][*][x][+][y][return][fed]</try>
              <exp>
                <try>[3][*][x][+][y][return][fed]</try>
                <expPrime>
                  <try>[*][x][+][y][return][fed]</try>
                  <exp>
                    <try>[x][+][y][return][fed]</try>
                    <expPrime>
                      <try>[+][y][return][fed]</try>
                      <exp>
                        <try>[y][return][fed]</try>
                        <expPrime>
                          <try>[return][fed]</try>
                          <success>[return][fed]</success>
                          <attributes>[]</attributes>
                        </expPrime>
                        <success>[return][fed]</success>
                        <attributes>[]</attributes>
                      </exp>
                      <expPrime>
                        <try>[return][fed]</try>
                        <success>[return][fed]</success>
                        <attributes>[]</attributes>
                      </expPrime>
                      <success>[return][fed]</success>
                      <attributes>[]</attributes>
                    </expPrime>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </exp>
                  <expPrime>
                    <try>[return][fed]</try>
                    <success>[return][fed]</success>
                    <attributes>[]</attributes>
                  </expPrime>
                  <success>[return][fed]</success>
                  <attributes>[]</attributes>
                </expPrime>
                <success>[return][fed]</success>
                <attributes>[]</attributes>
              </exp>
              <success>[return][fed]</success>
              <attributes>[]</attributes>
            </printStatement>
            <success>[return][fed]</success>
            <attributes>[]</attributes>
          </statement>
          <statementsPrime>
            <try>[return][fed]</try>
            <statement>
              <try>[return][fed]</try>
              <assignmentStatement>
                <try>[return][fed]</try>
                <fail/>
              </assignmentStatement>
              <printStatement>
                <try>[return][fed]</try>
                <fail/>
              </printStatement>
              <inputStatement>
                <try>[return][fed]</try>
                <fail/>
              </inputStatement>
              <conditionStatement>
                <try>[return][fed]</try>
                <fail/>
              </conditionStatement>
              <whileStatement>
                <try>[return][fed]</try>
                <fail/>
              </whileStatement>
              <callStatement>
                <try>[return][fed]</try>
                <fail/>
              </callStatement>
              <returnStatement>
                <try>[return][fed]</try>
                <exp>
                  <try>[fed]</try>
                  <fail/>
                </exp>
                <success>[fed]</success>
                <attributes>[]</attributes>
              </returnStatement>
              <success>[fed]</success>
              <attributes>[]</attributes>
            </statement>
            <statementsPrime>
              <try>[fed]</try>
              <statement>
                <try>[fed]</try>
                <assignmentStatement>
                  <try>[fed]</try>
                  <fail/>
                </assignmentStatement>
                <printStatement>
                  <try>[fed]</try>
                  <fail/>
                </printStatement>
                <inputStatement>
                  <try>[fed]</try>
                  <fail/>
                </inputStatement>
                <conditionStatement>
                  <try>[fed]</try>
                  <fail/>
                </conditionStatement>
                <whileStatement>
                  <try>[fed]</try>
                  <fail/>
                </whileStatement>
                <callStatement>
                  <try>[fed]</try>
                  <fail/>
                </callStatement>
                <returnStatement>
                  <try>[fed]</try>
                  <fail/>
                </returnStatement>
                <fail/>
              </statement>
              <success>[fed]</success>
              <attributes>[]</attributes>
            </statementsPrime>
            <success>[fed]</success>
            <attributes>[]</attributes>
          </statementsPrime>
          <success>[fed]</success>
          <attributes>[]</attributes>
        </statements>
        <success></success>
        <attributes>[]</attributes>
      </functionList>
      <endList>
        <try></try>
        <end>
          <try></try>
          <callStatement>
            <try></try>
            <fail/>
          </callStatement>
          <printStatement>
            <try></try>
            <fail/>
          </printStatement>
          <fail/>
        </end>
        <fail/>
      </endList>
      <fail/>
    </start>
    Remaining ''
    R is false