Search code examples
c++boost-spiritboost-spirit-qi

Using Spirit qi::success callbacks to set fields after a rule match


I am trying to use qi::on_success callback (here) to set a field when a rule is matched. The code below is slightly adapted from this code though my slight changes to the rules/ast class has made it no to recognize _rule_name. My intention is commented in the code below. I want to set the field term_type to TermType::literal if the _literal rule is matched or to Term::rule_name if _rule_name rule is matched.

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;

namespace Ast {
    enum class TermType {
        literal,
        rule_name
    };


    struct Term {
        std::string data;
        TermType term_type;
    };

    using List = std::list<Term>;
    using Expression = std::list<List>;

    struct Rule {
        Term name; // lhs
        Expression rhs;
    };

    using Syntax = std::list<Rule>;
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data)
BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)

namespace Parser {
    template<typename Iterator>
    struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
        BNF() : BNF::base_type(start) {
            using namespace qi;
            _blank = blank;
            _skipper = blank | (eol >> !skip(_blank.alias())[_rule]);
            start = skip(_skipper.alias())[_rule % +eol];

            _rule = _rule_name >> "::=" >> _expression;
            _expression = _list % '|';
            _list = +(_literal | _rule_name);
            _literal = '"' >> *(_character - '"') >> '"'
                    | "'" >> *(_character - "'") >> "'";
            _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
            _rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';

            BOOST_SPIRIT_DEBUG_NODES(
                    (_rule)(_expression)(_list)(_literal)
                            (_character)
                            (_rule_name))
        }

        /*qi::on_success(_term, setTermTypeHandler());

        setTermTypeHandler(){
             if term is literal
                term.symbol_type = TermType::literal
            else
                term.term_type = TermType::rule_name
        }
        */

    private:
        using Skipper = qi::rule<Iterator>;
        Skipper _skipper, _blank;

        qi::rule<Iterator, Ast::Syntax()> start;
        qi::rule<Iterator, Ast::Rule(), Skipper> _rule;
        qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
        qi::rule<Iterator, Ast::List(), Skipper> _list;
        // lexemes
        qi::rule<Iterator, Ast::Term()> _literal;
        qi::rule<Iterator, Ast::Term()> _rule_name;
        //  qi::rule<Iterator, std::string()>     _literal;
        qi::rule<Iterator, char()> _character;
    };
}

int main() {
    Parser::BNF<std::string::const_iterator> const parser;

    std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
<letter> ::= "a" | "b" | "c" | "d" | "e"
           | "f" | "g" | "h" | "i"
<digit>  ::= "0" | "1" | "2" | "3" |
             "4"
    )";

    auto it = input.begin(), itEnd = input.end();

    Ast::Syntax syntax;
    if (parse(it, itEnd, parser, syntax)) {
        for (auto &rule : syntax) {
            std::cout << rule.name.data << " ::= ";
            std::string sep;
            for (auto &list : rule.rhs) {
                std::cout << sep;
                for (auto &term: list) { std::cout << term.data; }
                sep = " | ";
            };
            std::cout << "\n";
        }
    } else {
        std::cout << "Failed\n";
    }

    if (it != itEnd)
        std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
}


Solution

  • Since your struct Term has become a discriminated union of Name/Literal emulated by a tuple (std::string, TermType) I would make it so that both _literal and _rule_name just create a string, and append the TermType with qi::attr.

    So,

    struct Term {
        std::string data;
        TermType term_type;
    };
    

    Adapting both members

    BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)
    

    Declaring relevant rules:

    qi::rule<Iterator, Ast::Term()>   _term;
    qi::rule<Iterator, std::string()> _literal;
    qi::rule<Iterator, std::string()> _rule_name;
    

    which are initialized as

    _list       = +_term;
    _term       = _literal >> attr(Ast::TermType::literal)
                | _rule_name >> attr(Ast::TermType::rule_name);
    _literal    = '"' >> *(_character - '"') >> '"'
                | "'" >> *(_character - "'") >> "'";
    
    _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
    _rule_name = '<' >> (alpha >> *(alnum | char_('-'))) >> '>';
    

    This keeps with my creed that you should try to avoid semantic actions (Boost Spirit: "Semantic actions are evil"?) and keeps the complexity to a minimum.

    on_success

    I think the idea to use on_success was ill-advised here because it works well for non-context-dependent actions (like binding source location to each AST node, regardless of the type).

    In this case you explicitly want to add different information (the variant discrimator), so you're better served injecting that into the particular branch of the parser expression it applies to.

    Sidenotes?

    You seem to have complicated things for yourself by promoting the type of Rule::name to Term (instead of std::string, where it used to be Name).

    The name of a rule cannot be any other literal, so I'd suggest

    1. either reverting it down to std::string (stripping it from the extra type info that Name had)

      struct Rule {
          std::string name; // lhs 
          Expression rhs;
      };
      
    2. or making _rule_name synthesize into Term directly (including the TermType into its rule) https://godbolt.org/z/Kbb9dP

    3. or Keeping the best of both worlds where Term has a conversion constructor that takes Name:

      explicit Term(Name other)
          : data(std::move(other))
          , term_type(TermType::rule_name)
      { }
      

    Literate Programming with ADTs

    Note that the loss of the Name literate type was NOT without cost, because the output became very wrong. I'd suggest the last approach (bullet 3. above) adding a custom operator<< for your own variant emulation:

    friend std::ostream& operator<<(std::ostream& os, Term const& term) {
        switch(term.term_type) {
            case TermType::rule_name: return os << Name(term.data);
            case TermType::literal:   return os << std::quoted(term.data);
            default:                  return os << "?";
        }
    }
    

    Now you can enjoy your own variant type and correct output again:

    Live On Compiler Explorer

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/adapted.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    namespace Ast {
        struct Name : std::string {
            using std::string::string;
            using std::string::operator=;
            explicit Name(std::string s) : std::string(std::move(s)) {}
    
            friend std::ostream& operator<<(std::ostream& os, Name const& n) {
                return os << '<' << n.c_str() << '>';
            }
        };
    
        enum class TermType { literal, rule_name };
    
        struct Term {
            std::string data;
            TermType term_type;
    
            Term() = default;
            explicit Term(Name other)
                : data(std::move(other))
                , term_type(TermType::rule_name)
            { }
    
            friend std::ostream& operator<<(std::ostream& os, Term const& term) {
                switch(term.term_type) {
                    case TermType::rule_name: return os << Name(term.data);
                    case TermType::literal:   return os << std::quoted(term.data);
                    default:                  return os << "?";
                }
            }
        };
    
        using List = std::list<Term>;
        using Expression = std::list<List>;
    
        struct Rule {
            Name name; // lhs
            Expression rhs;
        };
    
        using Syntax = std::list<Rule>;
    }
    BOOST_FUSION_ADAPT_STRUCT(Ast::Term, data, term_type)
    BOOST_FUSION_ADAPT_STRUCT(Ast::Rule, name, rhs)
    
    namespace Parser {
        template <typename Iterator>
        struct BNF : qi::grammar<Iterator, Ast::Syntax()> {
            BNF()
                : BNF::base_type(start)
            {
                using namespace qi;
                // clang-format off
                _blank      = blank;
                _skipper    = blank | (eol >> !skip(_blank.alias()) [ _rule ]);
                start       = skip(_skipper.alias()) [ _rule % +eol ];
    
                _rule       = _rule_name >> "::=" >> _expression;
                _expression = _list % '|';
                _list       = +_term;
                _term       = _literal >> attr(Ast::TermType::literal)
                            | _rule_name;
                _literal    = '"' >> *(_character - '"') >> '"'
                            | "'" >> *(_character - "'") >> "'";
    
                _character = alnum | char_("\"'| !#$%&()*+,./:;>=<?@]\\^_`{}~[-");
                _rule_name = '<' >> qi::raw[ (alpha >> *(alnum | char_('-'))) ] >> '>';
    
                // clang-format on
                BOOST_SPIRIT_DEBUG_NODES(
                    (_rule)(_expression)(_list)(_literal)(_character)(_rule_name))
            }
    
          private:
            using Skipper = qi::rule<Iterator>;
            Skipper _skipper, _blank;
    
            qi::rule<Iterator, Ast::Syntax()>     start;
            qi::rule<Iterator, Ast::Rule(),       Skipper> _rule;
            qi::rule<Iterator, Ast::Expression(), Skipper> _expression;
            qi::rule<Iterator, Ast::List(),       Skipper> _list;
            // lexemes
            qi::rule<Iterator, Ast::Term()>   _term;
            qi::rule<Iterator, std::string()> _literal;
            qi::rule<Iterator, Ast::Name()>   _rule_name;
            qi::rule<Iterator, char()>        _character;
        };
    }
    
    int main() {
        Parser::BNF<std::string::const_iterator> const parser;
    
        std::string const input = R"(<code>   ::=  <letter><digit> | <letter><digit><code>
    <letter> ::= "a" | "b" | "c" | "d" | "e"
               | "f" | "g" | "h" | "i"
    <digit>  ::= "0" | "1" | "2" | "3" |
                 "4"
        )";
    
        auto it = input.begin(), itEnd = input.end();
    
        Ast::Syntax syntax;
        if (parse(it, itEnd, parser, syntax)) {
            for (auto &rule : syntax) {
                std::cout << rule.name << " ::= ";
                std::string sep;
                for (auto &list : rule.rhs) {
                    std::cout << std::exchange(sep, " | ");
                    for (auto &term: list) { std::cout << term; }
                };
                std::cout << "\n";
            }
        } else {
            std::cout << "Failed\n";
        }
    
        if (it != itEnd)
            std::cout << "Remaining: " << std::quoted(std::string(it, itEnd)) << "\n";
    }
    

    Prints

    <code> ::= <letter><digit> | <letter><digit><code>
    <letter> ::= "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i"
    <digit> ::= "0" | "1" | "2" | "3" | "4"