What is the right way to transform some expression to AST using Boost.Spirit?
I tried to build it, but I think its messy and can be simplified a lot.
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace ast {
struct unary_operator;
struct binary_operator;
struct expression {
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>,
boost::recursive_wrapper<expression>
> type;
expression() {
}
template<typename Expr>
expression(const Expr &expr)
: expr(expr) {
}
expression &operator+=(expression rhs);
expression &operator-=(expression rhs);
expression &operator*=(expression rhs);
expression &operator/=(expression rhs);
expression &and_(expression rhs);
expression &or_(expression rhs);
expression &equals(expression rhs);
expression ¬_equals(expression rhs);
expression &less_than(expression rhs);
expression &less_equals(expression rhs);
expression &greater_than(expression rhs);
expression &greater_equals(expression rhs);
expression &factor(expression rhs);
expression &dot(expression rhs);
type expr;
};
struct unary_operator {
std::string op;
expression rhs;
unary_operator() {}
unary_operator(std::string op, expression rhs)
: op(std::move(op)), rhs(std::move(rhs)) {
}
};
struct binary_operator {
std::string op;
expression lhs;
expression rhs;
binary_operator() {}
binary_operator(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {
}
};
expression &expression::operator+=(expression rhs) {
expr = binary_operator("+", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator-=(expression rhs) {
expr = binary_operator("-", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator*=(expression rhs) {
expr = binary_operator("*", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::operator/=(expression rhs) {
expr = binary_operator("/", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::and_(expression rhs) {
expr = binary_operator("&&", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::or_(expression rhs) {
expr = binary_operator("||", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::equals(expression rhs) {
expr = binary_operator("==", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::not_equals(expression rhs) {
expr = binary_operator("!=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_than(expression rhs) {
expr = binary_operator("<", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::less_equals(expression rhs) {
expr = binary_operator("<=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_than(expression rhs) {
expr = binary_operator(">", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::greater_equals(expression rhs) {
expr = binary_operator(">=", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::factor(expression rhs) {
expr = binary_operator("**", std::move(expr), std::move(rhs));
return *this;
}
expression &expression::dot(expression rhs) {
expr = binary_operator(".", std::move(expr), std::move(rhs));
return *this;
}
struct printer {
void operator()(const double n) const {
std::cout << n;
}
void operator()(const std::string &s) const {
std::cout << s;
}
void operator()(const expression &ast) const {
boost::apply_visitor(*this, ast.expr);
}
void operator()(const binary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.lhs.expr);
std::cout << ", ";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
void operator()(const unary_operator &expr) const {
std::cout << "op:" << expr.op << "(";
boost::apply_visitor(*this, expr.rhs.expr);
std::cout << ')';
}
};
struct operators {
struct and_ {
};
struct or_ {
};
struct equals {
};
struct not_equals {
};
struct less_than {
};
struct less_equals {
};
struct greater_than {
};
struct greater_equals {
};
struct factor {
};
struct dot {
};
expression &operator()(expression &lhs, expression rhs, and_) const {
return lhs.and_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, or_) const {
return lhs.or_(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, equals) const {
return lhs.equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, not_equals) const {
return lhs.not_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_than) const {
return lhs.less_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, less_equals) const {
return lhs.less_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_than) const {
return lhs.greater_than(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, greater_equals) const {
return lhs.greater_equals(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, factor) const {
return lhs.factor(std::move(rhs));
}
expression &operator()(expression &lhs, expression rhs, dot) const {
return lhs.dot(std::move(rhs));
}
};
}
namespace qi = boost::spirit::qi;
struct expectation_handler {
template<typename Iterator>
void operator()(Iterator first, Iterator last, const boost::spirit::info &info) const {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
}
};
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression(), qi::ascii::space_type> {
grammar()
: grammar::base_type(expression) {
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
expression = logical.alias() > qi::eoi;
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
equality = relational[qi::_val = qi::_1]
>> *(
((qi::lit("==") > relational[op(qi::_val, qi::_1, ast::operators::equals{})]) |
(qi::lit("!=") > relational[op(qi::_val, qi::_1, ast::operators::not_equals{})]))
);
relational = additive[qi::_val = qi::_1]
>> *(
((qi::lit("<") > relational[op(qi::_val, qi::_1, ast::operators::less_than{})]) |
(qi::lit("<=") > relational[op(qi::_val, qi::_1, ast::operators::less_equals{})]) |
(qi::lit(">") > relational[op(qi::_val, qi::_1, ast::operators::greater_than{})]) |
(qi::lit(">=") > relational[op(qi::_val, qi::_1, ast::operators::greater_equals{})]))
);
additive = multiplicative[qi::_val = qi::_1]
>> *(
((qi::lit("+") > multiplicative[qi::_val += qi::_1]) |
(qi::lit("-") > multiplicative[qi::_val -= qi::_1]))
);
multiplicative = factor[qi::_val = qi::_1]
>> *(
((qi::lit("*") > factor[qi::_val *= qi::_1]) |
(qi::lit("/") > factor[qi::_val /= qi::_1]))
);
factor = primary[qi::_val = qi::_1]
>> *((qi::lit("**")) > primary[op(qi::_val, qi::_1, ast::operators::factor{})]);
primary =
qi::double_[qi::_val = qi::_1]
| ('(' > expression[qi::_val = qi::_1] > ')')
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})])
| variable[qi::_val = qi::_1]
>> *(qi::char_('.') > variable[qi::_val = op(qi::_val, qi::_1, ast::operators::dot{})]);
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(boost::phoenix::ref(err_handler), qi::_3, qi::_2, qi::_4));
}
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, logical, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable;
boost::phoenix::function<ast::operators> op;
expectation_handler err_handler;
};
int main(int argc, const char *argv[]) {
std::string input("2 + 5 + t.a");
auto it_begin(input.begin()), it_end(input.end());
grammar<decltype(it_begin)> parser;
ast::expression expression;
qi::phrase_parse(it_begin, it_end, parser, qi::ascii::space, expression);
ast::printer printer;
printer(expression);
return 0;
}
Prints
op:+(op:+(2, 5), op:.(t, a))
I'll narrate this in the order I "discover" your code. Then I'll present some tweaks which I think mattered the most in the end.
I'm liking a lot of what you've done.
A few names could (should?) be improved. E.g. ast::operators
does nothing to suggest its purpose. It's a lazy factory for binary operator expressions.
So, name it make_binary
or similar.
Same with the phoenix::function<>
wrapper that wraps it. op
in the semantic action doesn't express what happens there very well.
Instead of having the op
(alias make_binary
) actor be side-effectful on the _val argument, consider making it return a different value. Then everything can become immutable and the semantic action better expresses intent:
rule = expr [ _val = foo(_val, _1, _2, _3) ];
Expresses that _val is updated to something created from the given parameters.
At the level of the grammar, things don't look "tidy". A lot of it could be improved by simply using namespace qi::labels
, and getting rid of redundant qi::lit()
wrappers, which changes e.g.
logical = equality[qi::_val = qi::_1]
>> *(
((qi::lit("&&") > equality[op(qi::_val, qi::_1, ast::operators::and_{})]) |
(qi::lit("||") > equality[op(qi::_val, qi::_1, ast::operators::or_{})]))
);
into
using ast::operators;
using namespace qi::labels;
logical = equality[_val = _1]
>> *(
(("&&" > equality[op(_val, _1, operators::and_{})]) |
("||" > equality[op(_val, _1, operators::or_{})]))
);
You check for eoi
in your grammar (Good for you!). However, it's put inside a recursed rule:
expression = logical.alias() > qi::eoi;
This means that (a+b)*3
would never parse, because )
is found where eoi
is required. Fix it by putting eoi
on the top level.
You have a skipper at the grammar level, which means people have to pass in the correct skipper. If they don't, they can wreck the grammar. Instead, make the skipper internal so you control it, and the interface is easier to use (correctly):
start = qi::skip(qi::ascii::space) [ expression ];
Usage:
if (qi::parse(it_begin, it_end, parser, expression)) {
Or perhaps:
if (qi::parse(it_begin, it_end, parser > qi::eoi, expression)) {
I realize that the driver code (main
) might be out of scope for your review, but I'll show you the missing error-handling, because it can be pretty subtle w.r.t. partial parses:
int main() {
ast::printer printer;
grammar<std::string::const_iterator> parser;
for (std::string const input : {
"2 + 5 + t.a",
"(2 + 5) + t.a", // note the removed eoi constraint
"2 + 5 * t.a",
"2 * 5 - t.a",
"partial match",
"uhoh *",
})
try {
std::cout << "----- " << std::quoted(input) << " ---- \n";
auto it_begin(input.begin()), it_end(input.end());
ast::expression expression;
if (qi::parse(it_begin, it_end, parser, expression)) {
printer(expression);
std::cout << std::endl;
} else {
std::cout << "Not matched\n";
}
if (it_begin != it_end) {
std::string tail(it_begin, it_end);
std::cout << "Remaining unparsed input: " << std::quoted(tail) << "\n";
}
} catch(std::exception const& e) {
std::cout << "Exception: " << std::quoted(e.what()) << "\n";
}
}
Note that the expectations will not give useful messages unless you named your rules.
Exception: Expected <unnamed-rule> at ""
The idiomatic way to name them is to use the DEBUG macros:
BOOST_SPIRIT_DEBUG_NODES(
(start)
(expression)(logical)(equality)
(relational)(additive)(multiplicative)
(factor)(unary)(binary)(primary)
(variable)
)
Now:
Exception: Expected <factor> at ""
Intermission: superficial changes to here: Live On Coliru
In the printer there's a lot of repetition (apply_visitor(*this
...) and it's slightly less than readable due to operator()
. My preference is to relay to a call
or apply
function
Also in the printer, don't hardcode the output stream. One day(TM) you will want to format to a string. Or std::cerr
, or a file
Combining these notes on the printer: Live On Coliru
struct printer { std::ostream& _os; template <typename T> std::ostream& operator()(T const& v) const { return call(v); } private: std::ostream& call(expression const& ast) const { return boost::apply_visitor(*this, ast.expr); } std::ostream& call(binary_operator const& expr) const { _os << "op:" << expr.op << "("; call(expr.lhs) << ", "; return call(expr.rhs) << ')'; } std::ostream& call(unary_operator const& expr) const { _os << "op:" << expr.op << "("; return call(expr.rhs) << ')'; } template <typename Lit> std::ostream& call(Lit const& v) const { return _os << v; } };
The logical extension of this is to make it an actual output manipulator:
std::cout << "Parsed: " << fmt_expr{expression} << std::endl;
Again, Live On Coliru, also simplified the
printer
visitor again:std::ostream& call(binary_operator const& expr) const { return _os << "op:" << expr.op << "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')'; }
In the AST you store the actual operator dynamically, as a string. It seems to me that there is not a lot of value to encode the operator statically just for all the ast-building overloads (ast::operator::operator()
as well as all the delegated members of ast::expr
). Instead, just pass a string everytime?
Now the builder namespace can vanish, the asymmetrical factory members, and the whole phoenix function is grammar-local:
struct make_binary_f {
ast::binary_operator operator()(ast::expression lhs, ast::expression rhs, std::string op) const {
return { op, lhs, rhs };
}
};
boost::phoenix::function<make_binary_f> make;
Another in-between station Live On Coliru
ACHIEVEMENT UNLOCKED
Code down 113 lines of code (now 218 instead of 331 lines of code)
Random spot:
variable = qi::lexeme[qi::alpha >> *(qi::alnum | '_')];
'_'
is equivalent to qi::lit('_')
, not qi::char_('_')
so this would remove all underscores. Either use the char_, or use raw[]
to directly construct the argument from source iterators.
Now we're getting into details: instead of [_val=_1]
we can use automatic attribute propagation (see Boost Spirit: "Semantic actions are evil"? and operator %=
rule init).
Factor out more common subexpressions. Together with previous bullet:
primary
= qi::double_[_val = _1]
| ('(' > expression[_val = _1] > ')')
>> *("." > variable[_val = make(_val, _1, ".")])
| variable[_val = _1]
>> *("." > variable[_val = make(_val, _1, ".")]);
Becomes:
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *("." > variable[_val = make(_val, _1, ".")])
;
Lift variant type outside expression
so you can implement the recursive types before expression
. Also, consider expression
deriving from the variant (LSK). In your case, there is no real need for nested expressions because the unary/binary nodes impose order already. So your entire AST can be:
struct unary_operator;
struct binary_operator;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<unary_operator>,
boost::recursive_wrapper<binary_operator>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct unary_operator { expression rhs; std::string op; } ;
struct binary_operator { expression lhs; expression rhs; std::string op; } ;
Move expectation_handler
inside the grammar class (it's of no use to anything else), and optionally modernize it with phoenix::function? Regardless, since the functor is stateless, no need to ref
(and certainly not ref
instead of cref
):
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(expectation_handler{}, _3, _2, _4));
Actually, just make it
auto handler = [](Iterator first, Iterator last, const boost::spirit::info &info) {
std::stringstream msg;
msg << "Expected " << info << " at \"" << std::string(first, last) << "\"";
throw std::runtime_error(msg.str());
};
qi::on_error<qi::fail>(
expression,
boost::phoenix::bind(handler, _3, _2, _4));
Minor nit: use std::quoted
instead of "fake" quoting :)
Late brainwave, you can extract the bulk of that semantic action:
auto make_bin =
_val = px::bind(make_<ast::binary_expr>{}, _val, _2, _1);
As long as all the limbs are stateless/by value, that's not a problem (contrast with Assigning parsers to auto variables though!). Now just make the operators expose attributes:
expression %= equality
>> *(
(qi::string("&&") > equality)[make_bin] |
(qi::string("||") > equality)[make_bin]
);
equality %= relational
>> *(
(qi::string("==") > relational)[make_bin] |
(qi::string("!=") > relational)[make_bin]
);
relational %= additive
>> *(
(qi::string("<") > relational)[make_bin] |
(qi::string("<=") > relational)[make_bin] |
(qi::string(">") > relational)[make_bin] |
(qi::string(">=") > relational)[make_bin]
);
additive %= multiplicative
>> *(
(qi::string("+") > multiplicative)[make_bin] |
(qi::string("-") > multiplicative)[make_bin]
);
multiplicative %= factor
>> *(
(qi::string("*") > factor)[make_bin] |
(qi::string("/") > factor)[make_bin]
);
factor %= primary
>> *(
(qi::string("**") > primary)[make_bin]
);
primary %= qi::double_
| (('(' > expression > ')') | variable)
>> *(qi::string(".") > variable)[make_bin]
;
Actually, just checked and phoenix::construct
can do aggregates:
auto make_bin =
_val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
Also dropped the unused unary_*
machinery, moved the IO manipulator into io
namespace (instead of ast
) and reintroduced eoi
checking in the main
driver...
Heck, with some c++17 you can combine the branches of each production:
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
Only just didn't manage to bring it under 100 LoC, but I added more test cases in the process.
Live Demo On Coliru (where I found out that phoenix::construct<>
for aggregates requires either GCC or recent boost or both, so added a constructor)
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <iostream>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace ast {
struct binary_expr;
typedef boost::variant<
double,
std::string,
boost::recursive_wrapper<binary_expr>
> expr_variant;
struct expression : expr_variant {
using expr_variant::expr_variant;
using expr_variant::operator=;
};
struct binary_expr {
binary_expr(std::string op, expression lhs, expression rhs)
: op(std::move(op)), lhs(std::move(lhs)), rhs(std::move(rhs)) {}
std::string op; expression lhs; expression rhs;
};
}
namespace io {
struct fmt_expr { // io manipulator
ast::expression const& _ref;
friend std::ostream& operator<<(std::ostream& os, fmt_expr manip);
};
struct formatter_visitor {
std::ostream& _os;
template <typename T> std::ostream& operator()(T const& v) const
{ return call(v); }
private:
std::ostream& call(ast::expression const& v) const {
return boost::apply_visitor(*this, v);
}
std::ostream& call(ast::binary_expr const& expr) const {
return _os << "op:" << expr.op
<< "(" << fmt_expr{expr.lhs} << ", " << fmt_expr{expr.rhs} << ')';
}
template <typename Lit>
std::ostream& call(Lit const& v) const { return _os << v; }
};
std::ostream& operator<<(std::ostream& os, fmt_expr manip) {
return formatter_visitor{os}(manip._ref);
}
}
template<typename Iterator>
struct grammar : qi::grammar<Iterator, ast::expression()> {
grammar() : grammar::base_type(start) {
using namespace qi::labels;
auto make_bin = _val = boost::phoenix::construct<ast::binary_expr>(_1, _val, _2);
auto op = [](auto... sym) { return qi::copy((qi::string(sym) | ...)); };
expression %= equality >> *(op("&&","||") > equality)[make_bin];
equality %= relational >> *(op("==","!=") > relational)[make_bin];
relational %= additive >> *(op("<","<=",">",">=") > relational)[make_bin];
additive %= multiplicative >> *(op("+","-") > multiplicative)[make_bin];
multiplicative %= factor >> *(op("*","/") > factor)[make_bin];
factor %= primary >> *(op("**") > primary)[make_bin];
variable = qi::lexeme[qi::alpha >> *(qi::alnum | qi::char_('_'))];
primary %= qi::double_ | (('(' > expression > ')') | variable)
>> *(op(".") > variable)[make_bin];
start = qi::skip(qi::ascii::space) [ qi::eps > expression ] > qi::eoi;
qi::on_error<qi::fail>(start, boost::phoenix::bind([](auto first, auto last, auto const& info) {
std::stringstream msg;
msg << "Expected " << info << " at " << std::quoted(std::string(first, last));
throw std::runtime_error(msg.str());
}, _3, _2, _4));
BOOST_SPIRIT_DEBUG_NODES((expression)(equality)(relational)(additive)
(multiplicative)(factor)(unary)(binary)(primary)(variable))
}
private:
qi::rule<Iterator, ast::expression()> start;
qi::rule<Iterator, ast::expression(), qi::ascii::space_type> expression, equality, relational, additive, multiplicative, factor, unary, binary, primary;
qi::rule<Iterator, std::string()> variable; // lexeme
};
int main() {
using io::fmt_expr;
grammar<std::string::const_iterator> parser;
for (std::string const s : { "2 + 5 + t.a", "(2 + 5) + t.a", "2 + 5 * t.a",
"2 * 5 - t.a", "partial match", "uhoh *", "under_scores", "" })
try {
ast::expression expression;
qi::parse(s.begin(), s.end(), parser, expression);
std::cout << std::quoted(s) << " -> " << fmt_expr{expression} << "\n";
} catch(std::exception const& e) {
std::cout << "Exception: " << e.what() << "\n";
}
}
Prints
"2 + 5 + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"(2 + 5) + t.a" -> op:+(op:+(2, 5), op:.(t, a))
"2 + 5 * t.a" -> op:+(2, op:*(5, op:.(t, a)))
"2 * 5 - t.a" -> op:-(op:*(2, 5), op:.(t, a))
Exception: Expected <eoi> at " match"
Exception: Expected <factor> at ""
"under_scores" -> under_scores
The things I'll consider beyond the scope are related to your grammar/ast semantics.
Operator precedence is a bit noisy. What you'd like to have is some meta data that allows you to "just combine" the binary operands and have the correct precedence emerge, like so:
expression %= primary
>> *(
(binop > expression) [_val = make_bin(_1, _val, _2)]
);
I've applied this strategy on an extended chat at this answer and the resulting code is on github: https://github.com/sehe/qi-extended-parser-evaluator
Consider using X3 if you have C++14 support. The compile times will be much lower.