I am trying to use qi::on_success
callback (here) to set a field when a rule is matched. The code below is slightly adapted from this code though my slight changes to the rules/ast class has made it no to recognize _rule_name
. My intention is commented in the code below. I want to set the field term_type
to TermType::literal
if the _literal
rule is matched or to Term::rule_name
if _rule_name
rule is matched.
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
enum class TermType {
literal,
rule_name
};
struct Term {
std::string data;
TermType term_type;
};
using List = std::list<Term>;
using Expression = std::list<List>;
struct Rule {
Term name; // lhs
Expression rhs;
};
using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)
namespace Parser {
template<typename Iterator>
struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
BNF() : BNF::base_type(start) {
using namespace qi;
_blank = blank;
_skipper = blank | (eol >> !skip(_blank.alias())[_rule]);
start = skip(_skipper.alias())[_rule % +eol];
_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_list = +(_literal | _rule_name);
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';
BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_literal)
(_character)
(_rule_name))
}
/*qi::on_success(_term, setTermTypeHandler());
setTermTypeHandler(){
if term is literal
term.symbol_type = TermType::literal
else
term.term_type = TermType::rule_name
}
*/
private:
using Skipper = qi::rule<Iterator>;
Skipper _skipper, _blank;
qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), Skipper> _rule;
qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
qi::rule<Iterator, Ast::List(), Skipper> _list;
// lexemes
qi::rule<Iterator, Ast::Term()> _literal;
qi::rule<Iterator, Ast::Term()> _rule_name;
// qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, char()> _character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
std::string const input = R"(<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
| "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" |
"4"
)";
auto it = input.begin(), itEnd = input.end();
Ast::Syntax syntax;
if (parse(it, itEnd, parser, syntax)) {
for (auto &rule : syntax) {
std::cout << rule.name.data << " ::= ";
std::string sep;
for (auto &list : rule.rhs) {
std::cout << sep;
for (auto &term: list) { std::cout << term.data; }
sep = " | ";
};
std::cout << "\n";
}
} else {
std::cout << "Failed\n";
}
if (it != itEnd)
std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}
Since your struct Term
has become a discriminated union of Name/Literal emulated by a tuple (std::string, TermType) I would make it so that both _literal
and _rule_name
just create a string, and append the TermType with qi::attr
.
So,
struct Term {
std::string data;
TermType term_type;
};
Adapting both members
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)
Declaring relevant rules:
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, std::string()> _rule_name;
which are initialized as
_list = +_term;
_term = _literal >> attr(Ast::TermType::literal)
| _rule_name >> attr(Ast::TermType::rule_name);
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';
This keeps with my creed that you should try to avoid semantic actions (Boost Spirit: "Semantic actions are evil"?) and keeps the complexity to a minimum.
on_success
I think the idea to use on_success
was ill-advised here because it works well for non-context-dependent actions (like binding source location to each AST node, regardless of the type).
In this case you explicitly want to add different information (the variant discrimator), so you're better served injecting that into the particular branch of the parser expression it applies to.
You seem to have complicated things for yourself by promoting the type of
Rule::name
to Term
(instead of std::string
, where it used to be Name
).
The name of a rule cannot be any other literal, so I'd suggest
either reverting it down to std::string
(stripping it from the extra type
info that Name
had)
struct Rule {
std::string name; // lhs
Expression rhs;
};
or making _rule_name
synthesize into Term
directly (including the
TermType
into its rule) https://godbolt.org/z/Kbb9dP
or Keeping the best of both worlds where Term
has a conversion
constructor that takes Name
:
explicit Term(Name other)
: data(std::move(other))
, term_type(TermType::rule_name)
{ }
Note that the loss of the Name
literate type was NOT without cost, because
the output became very wrong. I'd suggest the
last approach (bullet 3. above) adding a custom operator<<
for your own variant emulation:
friend std::ostream& operator<<(std::ostream& os, Term const& term) {
switch(term.term_type) {
case TermType::rule_name: return os << Name(term.data);
case TermType::literal: return os << std::quoted(term.data);
default: return os << "?";
}
}
Now you can enjoy your own variant type and correct output again:
//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
struct Name : std::string {
using std::string::string;
using std::string::operator=;
explicit Name(std::string s) : std::string(std::move(s)) {}
friend std::ostream& operator<<(std::ostream& os, Name const& n) {
return os << '<' << n.c_str() << '>';
}
};
enum class TermType { literal, rule_name };
struct Term {
std::string data;
TermType term_type;
Term() = default;
explicit Term(Name other)
: data(std::move(other))
, term_type(TermType::rule_name)
{ }
friend std::ostream& operator<<(std::ostream& os, Term const& term) {
switch(term.term_type) {
case TermType::rule_name: return os << Name(term.data);
case TermType::literal: return os << std::quoted(term.data);
default: return os << "?";
}
}
};
using List = std::list<Term>;
using Expression = std::list<List>;
struct Rule {
Name name; // lhs
Expression rhs;
};
using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)
namespace Parser {
template <typename Iterator>
struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
BNF()
: BNF::base_type(start)
{
using namespace qi;
// clang-format off
_blank = blank;
_skipper = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
start = skip(_skipper.alias()) [ _rule % +eol ];
_rule = _rule_name >> "::=" >> _expression;
_expression = _list % '|';
_list = +_term;
_term = _literal >> attr(Ast::TermType::literal)
| _rule_name;
_literal = '"' >> *(_character - '"') >> '"'
| "'" >> *(_character - "'") >> "'";
_character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
_rule_name = '<' >> qi::raw[ (alpha >> *(alnum | char_('-'))) ] >> '>';
// clang-format on
BOOST_SPIRIT_DEBUG_NODES(
(_rule)(_expression)(_list)(_literal)(_character)(_rule_name))
}
private:
using Skipper = qi::rule<Iterator>;
Skipper _skipper, _blank;
qi::rule<Iterator, Ast::Syntax()> start;
qi::rule<Iterator, Ast::Rule(), Skipper> _rule;
qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
qi::rule<Iterator, Ast::List(), Skipper> _list;
// lexemes
qi::rule<Iterator, Ast::Term()> _term;
qi::rule<Iterator, std::string()> _literal;
qi::rule<Iterator, Ast::Name()> _rule_name;
qi::rule<Iterator, char()> _character;
};
}
int main() {
Parser::BNF<std::string::const_iterator> const parser;
std::string const input = R"(<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
| "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" |
"4"
)";
auto it = input.begin(), itEnd = input.end();
Ast::Syntax syntax;
if (parse(it, itEnd, parser, syntax)) {
for (auto &rule : syntax) {
std::cout << rule.name << " ::= ";
std::string sep;
for (auto &list : rule.rhs) {
std::cout << std::exchange(sep, " | ");
for (auto &term: list) { std::cout << term; }
};
std::cout << "\n";
}
} else {
std::cout << "Failed\n";
}
if (it != itEnd)
std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}
Prints
<code> ::= <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
<digit> ::= "0" | "1" | "2" | "3" | "4"