Search code examples
c++boostboost-spirit

How to print the variables matched by the symbol table in Boost spirit parser?


I am a beginner in using boost spirit

Say that I have the following code that parse a simple arithmetic expression with variables:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/variant/apply_visitor.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_function.hpp>
#include <boost/foreach.hpp>

#include <iostream>
#include <string>

namespace client {
    namespace ast
    {
        struct nil {};
        struct signed_;
        struct program;

        typedef boost::variant<
            nil
            , double
            , boost::recursive_wrapper<signed_>
            , boost::recursive_wrapper<program>
        >
        operand;

        struct signed_
        {
            char sign;
            operand operand_;
        };

        struct operation
        {
            char operator_;
            operand operand_;
        };

        struct program
        {
            operand first;
            std::list<operation> rest;
        };
    }
}

BOOST_FUSION_ADAPT_STRUCT(
    client::ast::signed_,
    (char, sign)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::operation,
    (char, operator_)
    (client::ast::operand, operand_)
    )

    BOOST_FUSION_ADAPT_STRUCT(
    client::ast::program,
    (client::ast::operand, first)
    (std::list<client::ast::operation>, rest)
    )

namespace client {
    namespace ast
    {
        struct eval
        {
            typedef double result_type;

            double operator()(nil) const { BOOST_ASSERT(0); return 0; }
            double operator()(double n) const { return n; }

            double operator()(operation const& x, double lhs) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.operator_)
                {
                case '+': return lhs + rhs;
                case '-': return lhs - rhs;
                case '*': return lhs * rhs;
                case '/': return lhs / rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(signed_ const& x) const
            {
                double rhs = boost::apply_visitor(*this, x.operand_);
                switch (x.sign)
                {
                case '-': return -rhs;
                case '+': return +rhs;
                }
                BOOST_ASSERT(0);
                return 0;
            }

            double operator()(program const& x) const
            {
                double state = boost::apply_visitor(*this, x.first);
                BOOST_FOREACH(operation const& oper, x.rest)
                {
                    state = (*this)(oper, state);
                }
                return state;
            }
        };
    }
}

namespace client
{
    namespace qi = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    using boost::phoenix::function;

    template <typename Iterator>
    struct calculator : qi::grammar<Iterator, ast::program(), ascii::space_type>
    {
        calculator() : calculator::base_type(expression)
        {
            qi::char_type char_;
            qi::double_type doubleParser_;

            symboleTable.add("var1", 2);
            symboleTable.add("var2", 15);
            symboleTable.add("var4", 5);
            symboleTable.add("var", 5);
            symboleTable.add("x", 5);

            expression =
                term
                >> *((char_('+') > term)
                | (char_('-') > term)
                )
                ;

            term =
                factor
                >> *((char_('*') > factor)
                | (char_('/') > factor)
                )
                ;

            factor =
                doubleParser_
                | symbolTable
                | '(' > expression > ')'
                | (char_('-') > factor)
                | (char_('+') > factor)
                ;
        }
        qi::symbols<char, double> symbolTable;
        qi::rule<Iterator, ast::program(), ascii::space_type> expression;
        qi::rule<Iterator, ast::program(), ascii::space_type> term;
        qi::rule<Iterator, ast::operand(), ascii::space_type> factor;
    };
}

/////////////////////////////////////////////////////////////////////////////
//  Main program
/////////////////////////////////////////////////////////////////////////////
int
main()
{
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Expression parser...\n\n";
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Type an expression...or [q or Q] to quit\n\n";

    typedef std::string::const_iterator iterator_type;
    typedef client::calculator<iterator_type> calculator;
    typedef client::ast::program ast_program;
    typedef client::ast::eval ast_eval;

    std::string str;
    while (std::getline(std::cin, str))
    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q')
            break;

        calculator calc;        // Our grammar
        ast_program program;    // Our program (AST)
        ast_eval eval;          // Evaluates the program

        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();

        boost::spirit::ascii::space_type space;
        bool r = phrase_parse(iter, end, calc, space, program);

        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "\nResult: " << eval(program) << std::endl;
            std::cout << "-------------------------\n";
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "-------------------------\n";
        }
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}

I want to print the variables (not their values) when they are matched by the symbol table (declared in the grammar).

So for example when the input is

var* 2 - 3 +x*var2 - 2

the output should be:

var
x
var2

any help?


Solution

  • The AST used doesn't store the original variable referenced.

    Hence after parsing the information is no longer available (the AST just contains value nodes instead of the original reference).

    There are two ways about this:

    • enrich the AST so you resolve variables at evaluation time only (keeping the variable reference names)

      UPDATE I have added another answer that actually implements this, more elaborate, approach

    • have the parser collect variable references "out-of-band" during parsing.

    The latter requires vastly smaller effort (if you know the tricks of Spirit + Phoenix). So let's show that:

            factor =
                doubleParser_
                | variable
                | '(' > expression > ')'
                | (char_('-') > factor)
                | (char_('+') > factor)
                ;
    

    Here I replaced the symbolTable by a new rule: variable:

        qi::rule<Iterator, double()> variable; // NOTE: also made it a lexeme (no skipper)
    

    That rule still exposes just the value but as a side effect we will have it collect the reference into a set of variable names:

            variable %=  
                   &qi::as_string[qi::raw[symbolTable]] 
                         [ px::insert(px::ref(collect_references), qi::_1) ] 
                >> symbolTable
                ;
    

    As you can see, it is a quick-and-dirty approach leveraging a lot of Spirit tricks (operator%= auto-rule assignment, qi::raw and qi::as_string directives, phoenix::insert and the second parse by using the positive look-ahead assertion (operator&).

    Now, we just need to pass in a collect_references container to the grammar, and we can print the references after successful parsing:

        std::set<std::string> collected_references;
        calculator calc(collected_references); // Our grammar
    
        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "References: ";
            std::copy(collected_references.begin(), collected_references.end(),
                    std::ostream_iterator<std::string>(std::cout, " "));
    
            std::cout << "\nResult: " << eval(program) << std::endl;
            std::cout << "-------------------------\n";
        }
    

    It prints:

    Type an expression...or [q or Q] to quit
    
    var* 2 - 3 +x*var2 - 2
    -------------------------
    Parsing succeeded
    References: var var2 x 
    Result: 80
    -------------------------
    Bye... :-) 
    

    DEMO CODE

    Live On Coliru

    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/variant/recursive_variant.hpp>
    #include <boost/variant/apply_visitor.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/foreach.hpp>
    
    #include <iostream>
    #include <string>
    #include <set>
    
    namespace client {
        namespace ast
        {
            struct nil {};
            struct signed_;
            struct program;
    
            typedef boost::variant<
                nil
                , double
                , boost::recursive_wrapper<signed_>
                , boost::recursive_wrapper<program>
            >
            operand;
    
            struct signed_
            {
                char sign;
                operand operand_;
            };
    
            struct operation
            {
                char operator_;
                operand operand_;
            };
    
            struct program
            {
                operand first;
                std::list<operation> rest;
            };
        }
    }
    
    BOOST_FUSION_ADAPT_STRUCT(
        client::ast::signed_,
        (char, sign)
        (client::ast::operand, operand_)
        )
    
        BOOST_FUSION_ADAPT_STRUCT(
        client::ast::operation,
        (char, operator_)
        (client::ast::operand, operand_)
        )
    
        BOOST_FUSION_ADAPT_STRUCT(
        client::ast::program,
        (client::ast::operand, first)
        (std::list<client::ast::operation>, rest)
        )
    
    namespace client {
        namespace ast
        {
            struct eval
            {
                typedef double result_type;
    
                double operator()(nil) const { BOOST_ASSERT(0); return 0; }
                double operator()(double n) const { return n; }
    
                double operator()(operation const& x, double lhs) const
                {
                    double rhs = boost::apply_visitor(*this, x.operand_);
                    switch (x.operator_)
                    {
                    case '+': return lhs + rhs;
                    case '-': return lhs - rhs;
                    case '*': return lhs * rhs;
                    case '/': return lhs / rhs;
                    }
                    BOOST_ASSERT(0);
                    return 0;
                }
    
                double operator()(signed_ const& x) const
                {
                    double rhs = boost::apply_visitor(*this, x.operand_);
                    switch (x.sign)
                    {
                    case '-': return -rhs;
                    case '+': return +rhs;
                    }
                    BOOST_ASSERT(0);
                    return 0;
                }
    
                double operator()(program const& x) const
                {
                    double state = boost::apply_visitor(*this, x.first);
                    BOOST_FOREACH(operation const& oper, x.rest)
                    {
                        state = (*this)(oper, state);
                    }
                    return state;
                }
            };
        }
    }
    
    namespace client
    {
        namespace qi = boost::spirit::qi;
        namespace ascii = boost::spirit::ascii;
    
        template <typename Iterator>
        struct calculator : qi::grammar<Iterator, ast::program(), ascii::space_type>
        {
            calculator(std::set<std::string>& collect_references) : calculator::base_type(expression)
            {
                qi::char_type char_;
                qi::double_type doubleParser_;
    
                symbolTable.add("var1", 2);
                symbolTable.add("var2", 15);
                symbolTable.add("var4", 5);
                symbolTable.add("var",  5);
                symbolTable.add("x",    5);
    
                namespace px = boost::phoenix;
    
                expression =
                    term
                    >> *((char_('+') > term)
                     |  (char_('-') > term)
                    )
                    ;
    
                term =
                    factor
                    >> *((char_('*') > factor)
                    | (char_('/') > factor)
                    )
                    ;
    
                variable %=  
                       &qi::as_string[qi::raw[symbolTable]] 
                             [ px::insert(px::ref(collect_references), qi::_1) ] 
                    >> symbolTable
                    ;
    
                factor =
                    doubleParser_
                    | variable
                    | ('(' > expression > ')')
                    | (char_('-') > factor)
                    | (char_('+') > factor)
                    ;
            }
          private:
            qi::symbols<char, double> symbolTable;
            qi::rule<Iterator, double()> variable; // NOTE: also made it a lexeme (no skipper)
            qi::rule<Iterator, ast::program(), ascii::space_type> expression;
            qi::rule<Iterator, ast::program(), ascii::space_type> term;
            qi::rule<Iterator, ast::operand(), ascii::space_type> factor;
        };
    }
    
    /////////////////////////////////////////////////////////////////////////////
    //  Main program
    /////////////////////////////////////////////////////////////////////////////
    int
    main()
    {
        std::cout << "/////////////////////////////////////////////////////////\n\n";
        std::cout << "Expression parser...\n\n";
        std::cout << "/////////////////////////////////////////////////////////\n\n";
        std::cout << "Type an expression...or [q or Q] to quit\n\n";
    
        typedef std::string::const_iterator iterator_type;
        typedef client::calculator<iterator_type> calculator;
        typedef client::ast::program ast_program;
        typedef client::ast::eval ast_eval;
    
        std::string str;
        while (std::getline(std::cin, str))
        {
            if (str.empty() || str[0] == 'q' || str[0] == 'Q')
                break;
    
            std::set<std::string> collected_references;
            calculator calc(collected_references); // Our grammar
            ast_program program;                   // Our program (AST)
            ast_eval eval;                         // Evaluates the program
    
            std::string::const_iterator iter = str.begin();
            std::string::const_iterator end = str.end();
    
            boost::spirit::ascii::space_type space;
            bool r = phrase_parse(iter, end, calc, space, program);
    
            if (r && iter == end)
            {
                std::cout << "-------------------------\n";
                std::cout << "Parsing succeeded\n";
                std::cout << "References: ";
                std::copy(collected_references.begin(), collected_references.end(),
                        std::ostream_iterator<std::string>(std::cout, " "));
    
                std::cout << "\nResult: " << eval(program) << std::endl;
                std::cout << "-------------------------\n";
            }
            else
            {
                std::string rest(iter, end);
                std::cout << "-------------------------\n";
                std::cout << "Parsing failed\n";
                std::cout << "-------------------------\n";
            }
        }
    
        std::cout << "Bye... :-) \n\n";
        return 0;
    }