Search code examples
c++boostboost-spiritboost-spirit-qi

How to access attribute of spirit rule, if attribute types of rule and parser don't match?


Using boost::spirit::qi for parsing, there's the possibility to use semantic actions to call functions via phoenix. The result can then be assigned to the rule's attribute, using boost::qi::_val, in case the attribute type of the rule and the assigned parser match. In case the types differ, the label qi::_val stands for the top attribute. This is the case in the following short working example, as the parsers for the rules add and mul return tuple<std::size_t, std::size_t>, while the rules themself expect vector<std::size_t>:

#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

#include <iostream>
#include <string>
#include <vector>

/* useful abbreviations */
namespace ascii = boost::spirit::ascii;
namespace ph = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ql = qi::labels;


namespace parser
{

/* these functions are called by the semantic actions */
std::vector<std::size_t> add(std::size_t a, std::size_t b)
{
    return std::vector<std::size_t>{a, b, a+b};
}

std::vector<std::size_t> mul(std::size_t a, std::size_t b)
{
    return std::vector<std::size_t>{a, b, a*b};
}

/* actual grammar */
template<typename Iterator>
class Parser : public boost::spirit::qi::grammar<Iterator, std::vector<std::size_t>(), boost::spirit::ascii::space_type>
{
public:
    Parser() : Parser::base_type(packed)
    {
        packed %= *(add | mul | val) >> qi::eoi;
        add = (qi::uint_ >> '+' >> qi::uint_ >> ';')[ qi::_val = ph::bind(&parser::add, ql::_1, ql::_2) ];
        mul = (qi::uint_ >> '*' >> qi::uint_ >> ';')[ qi::_val = ph::bind(&parser::mul, ql::_1, ql::_2) ];
        val %= qi::uint_ >> ';';
    }

private:
    using Rule = boost::spirit::qi::rule<Iterator, std::vector<std::size_t>(), boost::spirit::ascii::space_type>;
    Rule packed;
    Rule add;
    Rule mul;
    Rule val;
};

} /* namespace parser */

/* MAIN PROGRAM */
int main()
{
    using Iterator = std::string::const_iterator;
    parser::Parser<Iterator> parser;
    std::vector<std::size_t> result;
    
    std::string string = "1; 2*4; 5; 6; 7+9; 10;";
    Iterator it = string.begin(), end = string.end();
    qi::phrase_parse(it, end, parser, ascii::space, result);
    
    std::cout << "[ ";
    for(auto i: result) std::cout << i << ", ";
    std::cout << "]\n";
    
    return 0;
}

Output:

[ 7, 9, 16, 10, ]

The not-expected-but-really-wanted output would be:

[ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]

As you can see, the assignment qi::_val = ph::bind(&add, ql::_1, ql::_2) simply overwrites everything in the top rules attribute. I know I can work around this, by appending to the top rules attribute, but I'd like to keep things clean and have this done by the parser of the rule packed.

Is there a nice and easy way to write the result of the calls to parser::add and parser::mul to their respective rules' attributes?


Solution

  • My usual suggestion here is to simply not conflate parsing and evaluation. That would make this much simpler, since you could just print all operands during evaluation.

    See the SEPARATION OF CONCERNS section below.

    Do It Anyways

    You can of course do it manually, by making the functions do what you want:

    void add(Vec& vec, size_t a, size_t b) {
        vec.insert(vec.end(), {a, b, a+b});
    }
    
    void mul(Vec& vec, size_t a, size_t b) {
        vec.insert(vec.end(), {a, b, a*b});
    }
    

    And then call them as such:

    packed %= *((add | mul | val) >> ';') >> qi::eoi;
    add = (qi::uint_ >> '+' >> qi::uint_) [add_(_val, _1, _2)];
    mul = (qi::uint_ >> '*' >> qi::uint_) [mul_(_val, _1, _2)];
    val = qi::repeat(1) [ qi::uint_ ];
    

    Note a few minor tweaks, including the more elegant ph::function<> instead of ph::bind:

        ph::function<PackedF> 
            add_{&parser::add},
            mul_{&parser::mul};
    

    See it Live On Coliru

    Prints

    [ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]
    

    More Simpler?

    You could do all the inserts in the semantic action directly:

    add = (qi::uint_ >> '+' >> qi::uint_) [ (
         ph::push_back(_val, _1),
         ph::push_back(_val, _2),
         ph::push_back(_val, _1 + _2)
        )
    ];
    mul = (qi::uint_ >> '*' >> qi::uint_) [ (
         ph::push_back(_val, _1),
         ph::push_back(_val, _2),
         ph::push_back(_val, _1 * _2)
        )
    ];
    val = qi::repeat(1) [ qi::uint_ ];
    

    Arguably this would make more sense with something more dynamic:

    _binop.add("+", std::plus<size_t>{});
    _binop.add("*", std::multiplies<size_t>{});
    _binop.add("-", std::minus<size_t>{});
    _binop.add("/", std::divides<size_t>{});
    _binop.add("^", static_cast<double(*)(double, double)>(&std::pow));
    
    bin = (qi::uint_ >> _binop >> qi::uint_) [ (
         ph::push_back(_val, _1),
         ph::push_back(_val, _3),
         ph::push_back(_val, ph::bind(_2, _1, _3))
        ) ];
    

    Here, _binop is a symbol table:

    qi::symbols<char, std::function<size_t(size_t, size_t)> > _binop;
    

    See it Live On Coliru

    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    #include <iostream>
    #include <string>
    #include <vector>
    
    /* useful abbreviations */
    namespace ascii = boost::spirit::ascii;
    namespace ph = boost::phoenix;
    namespace qi = boost::spirit::qi;
    namespace ql = qi::labels;
    
    using Vec = std::vector<size_t>;
    
    namespace parser
    {
        template <typename Iterator>
        class Parser : public qi::grammar<Iterator, Vec(), ascii::space_type> {
          public:
            Parser() : Parser::base_type(packed) {
                using namespace ql;
                packed %= *((bin | val) >> ';') >> qi::eoi;
    
                _binop.add("+", std::plus<size_t>{});
                _binop.add("*", std::multiplies<size_t>{});
                _binop.add("-", std::minus<size_t>{});
                _binop.add("/", std::divides<size_t>{});
                _binop.add("^", static_cast<double(*)(double, double)>(&std::pow));
    
                bin = (qi::uint_ >> _binop >> qi::uint_) [ (
                     ph::push_back(_val, _1),
                     ph::push_back(_val, _3),
                     ph::push_back(_val, ph::bind(_2, _1, _3))
                    ) ];
                val = qi::repeat(1) [ qi::uint_ ];
            }
    
          private:
            using Rule = qi::rule<Iterator, Vec(), ascii::space_type>;
            Rule packed, bin, val;
            qi::symbols<char, std::function<size_t(size_t, size_t)> > _binop;
        };
    
    } /* namespace parser */
    
    /* MAIN PROGRAM */
    int main() {
        using Iterator = std::string::const_iterator;
        parser::Parser<Iterator> parser;
        Vec result;
    
        std::string string = "1; 2*4; 5; 6; 7+9; 10;";
        Iterator it = string.begin(), end = string.end();
        qi::phrase_parse(it, end, parser, ascii::space, result);
    
        std::cout << "[ ";
        for (auto i : result)
            std::cout << i << ", ";
        std::cout << "]\n";
    }
    

    Prints (again):

    [ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]
    

    Even more simpler?

    You can avoid backtracking the values by rewording the rules:

    expr_list = expr % ';' >> qi::eol;
    
    auto val = qi::copy(qi::uint_ [ ph::push_back(_val, _1) ]);
    auto penultimate = *(ph::rbegin(_val)+1);
    
    expr = val >> *(_binop >> val) 
        [ ph::push_back(_val, ph::bind(_1, penultimate, _2)) ];
    

    This is widely more expressive:

    std::string const string = "1; 2*4; 2^7-1; 4-1*2";
    

    Result in: Live On Coliru:

    [ 1, 2, 4, 8, 2, 7, 128, 1, 127, 4, 1, 3, 2, 6, ]
    

    As you can see this is getting more towards general arithmetic expression evaluation. If you wanted this, make sure you go the extra step of doing operator precedence rules. I have many answers up on this site showing some approaches.

    SEPARATION OF CONCERNS INSTEAD

    So, if you didn't want to "do it anyways", I'd parse into a list of elements first:

    namespace parser {
        using Operand = std::size_t;
        struct Binary {
            Operand lhs;
            char operator_;
            Operand rhs;
        };
        using Element = boost::variant<Operand, Binary>;
        using Elements = std::vector<Element>;
        using boost::fusion::operator<<;
    } // namespace parser
    

    This is mostly simpler because we don't need a single semantic action anymore, nor do we need to evaluate anything:

    template <typename Iterator>
    class Parser : public qi::grammar<Iterator, Elements(), qi::space_type> {
      public:
        Parser() : Parser::base_type(packed) {
            packed = (bin | qi::uint_) % ';' >> qi::eoi;
            bin = qi::uint_ >> qi::char_("-+*/^") >> qi::uint_;
        }
    
      private:
        qi::rule<Iterator, Elements(), qi::space_type> packed;
        qi::rule<Iterator, Binary(), qi::space_type> bin;
    };
    

    That's what I call simple. Now, using it is as simple:

    parser::Elements elements;
    
    std::string string = "1; 2*4; 5; 6; 7+9; 10;";
    Iterator it = string.begin(), end = string.end();
    qi::phrase_parse(it, end, parser, qi::space, elements);
    
    for (auto& element : elements)
        std::cout << element << "; ";
    

    This prints

    1; (2 * 4); 5; 6; (7 + 9); 10; 
    

    As you can see, we know have the input parsed, we can evaluate it. In fact, we can evaluate the same elements in different ways:

    std::vector<Operand> evaluate(Elements const& elements, bool include_literals = true) {
        struct {
            bool literals;
            std::vector<Operand> result;
            void operator()(Operand const& oper) { result.push_back(oper); }
            void operator()(Binary const& bin) { 
                if (literals) {
                    operator()(bin.lhs);
                    operator()(bin.rhs);
                }
                switch(bin.operator_) {
                    case '+': operator()(bin.lhs + bin.rhs); break;
                    case '-': operator()(bin.lhs - bin.rhs); break;
                    case '*': operator()(bin.lhs * bin.rhs); break;
                    case '/': operator()(bin.lhs / bin.rhs); break;
                    case '^': operator()(std::pow(bin.lhs, bin.rhs)); break;
                }
            }
        } vis;
    
        vis.literals = include_literals;
    
        for (auto& el : elements)
            apply_visitor(vis, el);
    
        return vis.result;
    }
    

    This evaluation is pretty straight-forward, but uses a technique that might be new: the variant visitor. The standard library calls apply_visitor visit.

    Now we can choose whether to include the literal operands or not:

    std::cout << "\nwithout literals [ ";
    for (auto i : evaluate(elements, false)) std::cout << i << ", ";
    std::cout << "]\n";
    
    std::cout << "with literals [ ";
    for (auto i : evaluate(elements, true)) std::cout << i << ", ";
    std::cout << "]\n";
    

    Prints

    without literals [ 1, 8, 5, 6, 16, 10, ]
    with literals [ 1, 2, 4, 8, 5, 6, 7, 9, 16, 10, ]
    

    OTHER BENEFITS

    Another (non obvious) benefit of this approach is that it makes it easier to have some kind of operator precedence. This is getting off-topic, bit let me indulge a few tweaks and sample evaluations (without going as far as making the parser aware of precedence):

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted.hpp>
    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    #include <iostream>
    #include <string>
    #include <vector>
    
    /* useful abbreviations */
    namespace qi = boost::spirit::qi;
    
    namespace parser {
        using Operand = std::size_t;
        struct Binary;
        using Element = boost::variant<Operand, boost::recursive_wrapper<Binary> >;
    
        struct Binary {
            Element lhs;
            char operator_;
            Element rhs;
        };
    
        using Elements = std::vector<Element>;
        using boost::fusion::operator<<;
    } // namespace parser
    BOOST_FUSION_ADAPT_STRUCT(parser::Binary, lhs, operator_, rhs)
    
    namespace parser {
        /* actual grammar */
        template <typename Iterator>
        class Parser : public qi::grammar<Iterator, Elements(), qi::space_type> {
          public:
            Parser() : Parser::base_type(packed) {
                packed = elem % ';' >> qi::eoi;
                elem   = bin | qi::uint_;
                bin    = '(' >> elem >> qi::char_("-+*/^") >> elem >> ')';
    
                BOOST_SPIRIT_DEBUG_NODES((packed)(elem)(bin))
            }
    
          private:
            qi::rule<Iterator, Elements(), qi::space_type> packed;
            qi::rule<Iterator, Element(), qi::space_type> elem;
            qi::rule<Iterator, Binary(), qi::space_type> bin;
        };
    
        Elements parse(std::string const& input) {
            using Iterator = std::string::const_iterator;
            static const parser::Parser<Iterator> parser;
            Elements elements;
            qi::phrase_parse(input.begin(), input.end(), parser, qi::space, elements);
            // TODO error handling please
            return elements;
        }
    
        std::vector<Operand> evaluate(Elements const& elements) {
            struct {
                Operand operator()(Element const& el) { return boost::apply_visitor(*this, el); }
                Operand operator()(Operand const& oper) { return oper; }
                Operand operator()(Binary const& bin) { 
                    auto lhs = operator()(bin.lhs);
                    auto rhs = operator()(bin.rhs);
                    switch(bin.operator_) {
                        case '+': return operator()(lhs + rhs);
                        case '-': return operator()(lhs - rhs);
                        case '*': return operator()(lhs * rhs);
                        case '/': return operator()(lhs / rhs);
                        case '^': return operator()(std::pow(lhs, rhs));
                    }
                    throw std::invalid_argument("operator not implemented");
                }
            } vis;
    
            std::vector<Operand> result;
            for (auto& el : elements) {
                result.push_back(apply_visitor(vis, el));
            }
    
            return result;
        }
    
    } /* namespace parser */
    
    /* MAIN PROGRAM */
    int main() {
        auto const elements = parser::parse("1; ((2*4)+7); (2*(4+7)); ((7-4)^4);");
    
        for (auto& element : elements)
            std::cout << element << "; ";
    
        std::cout << "\nevaluated ";
        for (auto i : evaluate(elements))
            std::cout << i << ", ";
    }
    

    Prints

    1; ((2 * 4) + 7); (2 * (4 + 7)); ((7 - 4) ^ 4);
    evaluated 1, 15, 22, 81,