Search code examples
c++parsingexpression-treesboost-spiritboost-spirit-qi

Building a Custom Expression Tree in Spirit:Qi (Without Utree or Boost::Variant)


First of all, if it is much easier using either Boost Variant or Utree, then I will settle with them, and i will try to solve my issues with them in another topic. However, i would very much like to be able to build a tree like i have below.

Background, ignore if you would like to go straight to the issue: I would like to be able to build an expression tree which parses something like

"({a} == 0) && ({b} > 5)"

or a standard mathmatic expression

"(2 * a) + b"

I will then define what a and b are before i evaluate my tree, something like this:

a = 10;
double val = myExpression->Evaluate();

My issue comes from when i try to build the try to parse the string into my Expression Tree. I am using an abstract class "Expression" which then derives "Variable", "Constant" and "Binary" expressions (it will also do unary, but it shouldnt effect my problem. I keep having problems with adding to the tree using my rules, so im clearly doing something wrong. Im having a hard time wrapping my head around the attributes.

My Tree is as follows (Tree.h):

class BinaryExpression;
typedef double (*func)(double, double);

class Expression
{
public:
    virtual double Evaluate() = 0;
};

class BinaryExpression : public Expression
{
private:
    Expression* lhs;
    Expression* rhs;
    func method;

    double Evaluate();

public:
    BinaryExpression(void);
    BinaryExpression(char op, Expression* lhs, Expression* rhs);
    BinaryExpression(char op);
    void operator()(Expression* lhs, Expression* rhs);
};

class ConstantExpression : public Expression
{
private:
    double value;
public:
    ConstantExpression(void);
    ConstantExpression(char op);
    ConstantExpression(double val);

    double Evaluate();
};

// Require as many types as there are fields in expression?
static double a;
static double b;
class VariableExpression : public Expression
{
private:
    char op;

public:
    VariableExpression(char op);

    double Evaluate();
};

BOOST_FUSION_ADAPT_STRUCT(
    BinaryExpression,
    (Expression*, lhs)
    (Expression*, rhs)
    (func, method)
)

BOOST_FUSION_ADAPT_STRUCT(
    VariableExpression,
    (char, op)
)

BOOST_FUSION_ADAPT_STRUCT(
    ConstantExpression,
    (double, op)
)

Tree.cpp

typedef double (*func)(double, double);

/////////////////////////////////////////////////////////////////////////////
// BINARY EXPRESSION
////////////////////////////////////////////////////////////////////////////

BinaryExpression::BinaryExpression(void) {}

BinaryExpression::BinaryExpression(char op, Expression* lhs, Expression* rhs)
{
    this->lhs = lhs;
    this->rhs = rhs;

    // Example, methods are held in another header
    if (op == '+')
        method = Add;
    else if (op == '-')
        method = Subtract;

}

double BinaryExpression::Evaluate()
{
    return method(lhs->Evaluate(), rhs->Evaluate());
}

BinaryExpression::BinaryExpression(char op)
{
    if (op == '+')
        method = Add;
    else if (op == '-')
        method = Subtract;
}

void BinaryExpression::operator()(Expression* lhs, Expression* rhs)
{
    this->lhs = lhs;
    this->rhs = rhs;
}

/////////////////////////////////////////////////////////////////////////////
// CONSTANT EXPRESSION
////////////////////////////////////////////////////////////////////////////

ConstantExpression::ConstantExpression() {}

ConstantExpression::ConstantExpression(char op)
{
    this->value = op - 48;
}
ConstantExpression::ConstantExpression(double val)
{
    value = val;
}

double ConstantExpression::Evaluate()
{
    return value;
}

/////////////////////////////////////////////////////////////////////////////
// VARIABLE EXPRESSION
////////////////////////////////////////////////////////////////////////////

VariableExpression::VariableExpression(char op)
{
    this->op = op;
}

double VariableExpression::Evaluate()
{
    // a and b are defined in the header, and are used to fill in the variables we     want to evaluate
    if (op == 'a')
        return a;
    if (op == 'b')
        return b;
    return 0;
}

Now if i build the tree manually it all works fine, so i dont think theres an issue with the way it is structured.

Here is Grammar.h (Lots of comments from where i tried various things, i could remove them, but i may be worth showing what i've tried / where i want to go with it)

#include "Tree.h"

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_function.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

qi::_1_type _1;
qi::_2_type _2;

// Pass functions to boost
boost::phoenix::function<BinaryExpression> plus = BinaryExpression('+');
boost::phoenix::function<BinaryExpression> minus = BinaryExpression('-');

template <typename Iterator>
struct ExpressionParser : qi::grammar<Iterator, BinaryExpression(), ascii::space_type>
{
    ExpressionParser() : ExpressionParser::base_type(expression)
    {
        qi::_3_type _3;
        qi::_4_type _4;

        qi::char_type char_;
        qi::uint_type uint_;
        qi::_val_type _val;
        qi::raw_type raw;
        qi::lexeme_type lexeme;
        qi::alpha_type alpha;
        qi::alnum_type alnum;
        qi::bool_type bool_;
        qi::double_type double_;


        expression = //?
            additive_expr                       [_val = _1]
            ;

        //equality_expr = 
        //      relational_expr >> 
        //      *(lit("==") > relational_expr)      [/*Semantice action to add to tree*/]
        //      ;

        additive_expr =
            primary_expr >>
            ( '+' > primary_expr)               [plus(_val, _1)]   
            | ( '-' > primary_expr)             [minus(_val, _1)]
            ;
        // Also tried "_val = plus(_1, _2)"

        primary_expr =
            constant                                [_val = _1]
            | variable                          [_val = _1]
            //| '(' > expression > ')'          [_val = _1]
            ;

        string %=
            '{' >> *(char_ - '}') >> '}'
            ;

        // Returns ConstantExpression
        constant =
            double_                                 [_val = _1];

        // Returns VariableExpression
        variable =
            char_                                   [_val = _1]
            ;
    }

    // constant expression = double
    // variable expression = string
    qi::rule<Iterator, BinaryExpression(), ascii::space_type>
        expression;

    qi::rule<Iterator, BinaryExpression(), ascii::space_type>
        // eventually will deal with all these rules
        equality_expr,
        relational_expr,        
        logical_expr,
        additive_expr,
        multiplicative_expr,
        primary_expr
            ;

    qi::rule<Iterator, ConstantExpression(), ascii::space_type>
        constant
        ;

    qi::rule<Iterator, VariableExpression(), ascii::space_type>
        variable
        ;

    qi::rule<Iterator, std::string(), ascii::space_type>
        string
        ;
};

So this is a really hacked apart, but hopefully it will show what im trying to achieve. Any advice or tips would be really appreciated. Is there an example where someone has built a tree like this without using variant or utree.

Also sorry if ive broken convention, and for my formatting, i tried to make it as readable as possible.


Solution

  • It isn't clear to me what your gripe with (recursive) variants are, but here is a variation that goes along with your wish to use 'old fashioned' tree building using dynamically allocated nodes:

    I have purposefully sidestepped the issue of operator precedence in your grammar because

    Note how I

    • removed ubiquitous memory leaks by using shared_ptr (you can use the Boost one if you don't have a TR1 library)
    • I removed the misguided reuse of a specific BinaryExpression instance as a phoenix lazy actor. Instead I made a local makebinary actor now.
    • Note how chains of operators (1+2+5+6-10) are now supported:

      additive_expr =
          primary_expr                         [ _val = _1 ]
          >> *(char_("-+*/") >> primary_expr)  [ _val = makebinary(_1, _val, _2)]
          ;
      
    • I added {var}, /, * and (expr) support

    • added serialization for display (Print virtual method, operator<<) (for display convenience, BinaryExpression stores the operator instead of the resultant method now)

    • Therefore now you can use BOOST_SPIRIT_DEBUG (uncomment first line)
    • I have renamed Expression to AbstractExpression (and made de constructor protected)
    • I have renamed PrimaryExpression to Expression (and this is now your main expression datatype)
    • I show how to store simplistically variables in a static map
    • Uses far less fusion struct adaptation (only for variable now)
    • Uses the templated constructor trick to make it very easy to construct an expression from disparate parsed types:

      struct Expression : AbstractExpression {
          template <typename E>
          Expression(E const& e) : _e(make_from(e)) { } // cloning the expression
          // ...
      };
      

      is enough to efficiently support e.g.:

      primary_expr =
            ( '(' > expression > ')' )         [ _val = _1 ]
          | constant                           [ _val = _1 ]
          | variable                           [ _val = _1 ]
          ;
      
    • for fun have included a few more test cases:

      Input:                3*8 + 6
      Expression:           Expression(BinaryExpression(BinaryExpression(ConstantExpression(3) * ConstantExpression(8)) + ConstantExpression(6)))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    30
      ----------------------------------------
      Input:                3*(8+6)
      Expression:           Expression(BinaryExpression(ConstantExpression(3) * BinaryExpression(ConstantExpression(8) + ConstantExpression(6))))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    42
      ----------------------------------------
      Input:                0x1b
      Expression:           Expression(ConstantExpression(27))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    27
      ----------------------------------------
      Input:                1/3
      Expression:           Expression(BinaryExpression(ConstantExpression(1) / ConstantExpression(3)))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    0.333333
      ----------------------------------------
      Input:                .3333 * 8e12
      Expression:           Expression(BinaryExpression(ConstantExpression(0.3333) * ConstantExpression(8e+12)))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    2.6664e+12
      ----------------------------------------
      Input:                (2 * a) + b
      Expression:           Expression(BinaryExpression(BinaryExpression(ConstantExpression(2) * VariableExpression('a')) + VariableExpression('b')))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               10, 7
      Evaluation result:    27
      ----------------------------------------
      Input:                (2 * a) + b
      Expression:           Expression(BinaryExpression(BinaryExpression(ConstantExpression(2) * VariableExpression('a')) + VariableExpression('b')))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               -10, 800
      Evaluation result:    780
      ----------------------------------------
      Input:                (2 * {a}) + b
      Expression:           Expression(BinaryExpression(BinaryExpression(ConstantExpression(2) * VariableExpression('a')) + VariableExpression('b')))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               -10, 800
      Evaluation result:    780
      ----------------------------------------
      Input:                {names with spaces}
      Expression:           Expression(VariableExpression('names with spaces'))
      Parse success:        true
      Remaining unparsed:  ''
      (a, b):               0, 0
      Evaluation result:    0
      ----------------------------------------
      

    Full Code

    // #define BOOST_SPIRIT_DEBUG
    // #define BOOST_RESULT_OF_USE_DECLTYPE
    // #define BOOST_SPIRIT_USE_PHOENIX_V3
    
    #include <cassert>
    #include <memory>
    #include <iostream>
    #include <map>
    
    struct AbstractExpression;
    typedef std::shared_ptr<AbstractExpression> Ptr;
    
    struct AbstractExpression {
        virtual ~AbstractExpression() {}
        virtual double Evaluate() const = 0;
        virtual std::ostream& Print(std::ostream& os) const = 0;
    
        friend std::ostream& operator<<(std::ostream& os, AbstractExpression const& e)
            { return e.Print(os); }
    
        protected: AbstractExpression() {}
    };
    
    template <typename Expr> // general purpose, static Expression cloner
        static Ptr make_from(Expr const& t) { return std::make_shared<Expr>(t); }
    
    struct BinaryExpression : AbstractExpression 
    {
        BinaryExpression() {}
    
        template<typename L, typename R>
        BinaryExpression(char op, L const& l, R const& r) 
            : _op(op), _lhs(make_from(l)), _rhs(make_from(r)) 
        {}
    
        double Evaluate() const {
            func f = Method(_op);
            assert(f && _lhs && _rhs);
            return f(_lhs->Evaluate(), _rhs->Evaluate());
        }
    
      private:
        char _op;
        Ptr _lhs, _rhs;
    
        typedef double(*func)(double, double);
    
        static double Add(double a, double b)      { return a+b; }
        static double Subtract(double a, double b) { return a-b; }
        static double Multuply(double a, double b) { return a*b; }
        static double Divide(double a, double b)   { return a/b; }
    
        static BinaryExpression::func Method(char op)
        {
            switch(op) {
                case '+': return Add;
                case '-': return Subtract;
                case '*': return Multuply;
                case '/': return Divide;
                default:  return nullptr;
            }
        }
        std::ostream& Print(std::ostream& os) const
            { return os << "BinaryExpression(" << *_lhs << " " << _op << " " << *_rhs << ")"; }
    };
    
    struct ConstantExpression : AbstractExpression {
        double value;
        ConstantExpression(double v = 0) : value(v) {}
    
        double Evaluate() const { return value; }
    
        virtual std::ostream& Print(std::ostream& os) const
            { return os << "ConstantExpression(" << value << ")"; }
    };
    
    struct VariableExpression : AbstractExpression {
        std::string _name;
    
        static double& get(std::string const& name) {
            static std::map<std::string, double> _symbols;
            return _symbols[name];
            /*switch(name) {
             *    case 'a': static double a; return a;
             *    case 'b': static double b; return b;
             *    default:  throw "undefined variable";
             *}
             */
        }
    
        double Evaluate() const { return get(_name); }
    
        virtual std::ostream& Print(std::ostream& os) const
            { return os << "VariableExpression('" << _name << "')"; }
    };
    
    struct Expression : AbstractExpression
    {
        Expression() { }
    
        template <typename E>
        Expression(E const& e) : _e(make_from(e)) { } // cloning the expression
    
        double Evaluate() const { assert(_e); return _e->Evaluate(); }
    
        // special purpose overload to avoid unnecessary wrapping
        friend Ptr make_from(Expression const& t) { return t._e; }
      private:
        Ptr _e;
        virtual std::ostream& Print(std::ostream& os) const
            { return os << "Expression(" << *_e << ")"; }
    };
    
    //Tree.cpp
    
    /////////////////////////////////////////////////////////////////////////////
    // BINARY EXPRESSION
    ////////////////////////////////////////////////////////////////////////////
    
    //#include "Tree.h"
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/fusion/adapted.hpp>
    
    BOOST_FUSION_ADAPT_STRUCT(VariableExpression, (std::string, _name))
    
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    namespace phx   = boost::phoenix;
    
    // Pass functions to boost
    template <typename Iterator>
    struct ExpressionParser : qi::grammar<Iterator, Expression(), ascii::space_type> 
    {
        struct MakeBinaryExpression {
            template<typename,typename,typename> struct result { typedef BinaryExpression type; };
    
            template<typename C, typename L, typename R>
                BinaryExpression operator()(C op, L const& lhs, R const& rhs) const 
                { return BinaryExpression(op, lhs, rhs); }
        };
    
        phx::function<MakeBinaryExpression> makebinary;
    
        ExpressionParser() : ExpressionParser::base_type(expression) 
        {
            using namespace qi;
            expression =
                additive_expr                        [ _val = _1]
                ;
    
            additive_expr =
                primary_expr                         [ _val = _1 ]
                >> *(char_("-+*/") >> primary_expr)  [ _val = makebinary(_1, _val, _2)]
                ;
    
            primary_expr =
                  ( '(' > expression > ')' )         [ _val = _1 ]
                | constant                           [ _val = _1 ]
                | variable                           [ _val = _1 ]
                ;
    
            constant = lexeme ["0x" >> hex] | double_ | int_;
            string   = '{' >> lexeme [ *~char_("}") ] > '}';
            variable = string | as_string [ alpha ];
    
            BOOST_SPIRIT_DEBUG_NODE(expression);
            BOOST_SPIRIT_DEBUG_NODE(additive_expr);
    
            BOOST_SPIRIT_DEBUG_NODE(primary_expr);
            BOOST_SPIRIT_DEBUG_NODE(constant);
            BOOST_SPIRIT_DEBUG_NODE(variable);
            BOOST_SPIRIT_DEBUG_NODE(string);
        }
    
        qi::rule<Iterator, Expression()        , ascii::space_type> expression;
        qi::rule<Iterator, Expression()        , ascii::space_type> additive_expr;
    
        qi::rule<Iterator, Expression()        , ascii::space_type> primary_expr;
        qi::rule<Iterator, ConstantExpression(), ascii::space_type> constant;
        qi::rule<Iterator, VariableExpression(), ascii::space_type> variable;
        qi::rule<Iterator, std::string()       , ascii::space_type> string;
    };
    
    void test(const std::string input, double a=0, double b=0)
    {
        typedef std::string::const_iterator It;
        ExpressionParser<It> p;
    
        Expression e;
        It f(input.begin()), l(input.end());
        bool ok = qi::phrase_parse(f,l,p,ascii::space,e);
    
        std::cout << "Input:                "  << input            << "\n";
        std::cout << "Expression:           "  << e                << "\n";
        std::cout << "Parse success:        "  << std::boolalpha   << ok << "\n";
        std::cout << "Remaining unparsed:  '"  << std::string(f,l) << "'\n";
    
        std::cout << "(a, b):               "  << a << ", " << b   << "\n";
    
        VariableExpression::get("a") = a;
        VariableExpression::get("b") = b;
        std::cout << "Evaluation result:    "  << e.Evaluate()     << "\n";
        std::cout << "----------------------------------------\n";
    }
    
    int main() 
    {
        test("3*8 + 6"); 
        test("3*(8+6)"); 
        test("0x1b"); 
        test("1/3"); 
        test(".3333 * 8e12");
        test("(2 * a) + b",    10,   7);
        test("(2 * a) + b",   -10, 800);
        test("(2 * {a}) + b", -10, 800);
        test("{names with spaces}");
    }