Search code examples
c++17boost-spiritboost-spirit-x3

Using BOOST Spirit X3 with custom lexer


How a X3 parser could use an already generated vector of tokens. How the rules could be defined, having something like

enum class token { aa, bb, cc};    
auto rule = token::aa >> token::bb >> -token::cc;
std::vector<token> tokens{token::aa, token:bb, token:cc};
auto ok = parse(tokens.cbegin(), tokens.cend(), rule);

I'm interested to validate the input. The idea is to avoid any lexical analysis (x3::lit, x3::char_, x3::lexeme, x3::alpha, etc.) following the zero-overhead principle of C++.


Solution

  • Based on @sehe's reply, the implementation could be

    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/home/x3.hpp>
    #include <cassert>
    #include <iostream>
    #include <utility>
    #include <vector>
    
    namespace my_parser
    {
        enum class token
        {
            unknown,
            aa,
            bb,
            cc
        };
    
        namespace ast
        {
            struct my_seq
            {
                token m1 {};
                token m2 {};
                std::optional<token> m3 {};
            };
        }
    
        namespace x3 = boost::spirit::x3;
    
        struct dummy_token_parser: x3::parser<dummy_token_parser>
        {
            using attribute_type = token;
            static const bool has_attribute = true;
    
            constexpr dummy_token_parser(const token tok): tok(tok) {}
    
            template <typename It, typename Ctx, typename A>
            bool parse(It& first, const It& last, const Ctx& /*ctx*/, x3::unused_type, A& attr) const
            {
                if (first != last && *first == tok)
                {
                    attr = *first;
                    ++first;
                    return true;
                }
    
                return false;
            }
    
            token tok;
        };
    
        void parse_tokens(const std::vector<token>& input, auto& rule, ast::my_seq& my_seq_data, bool expected_result)
        {
            my_seq_data = {};
            const bool result = x3::parse(input.cbegin(), input.cend(), rule, my_seq_data);
            assert(result == expected_result);
            std::cout << result << std::endl;
        }
    
        using tp = dummy_token_parser;
    
        const x3::rule<struct my_seq_rule, ast::my_seq> my_seq {"my_seq"};
        const auto my_seq_def = tp(token::aa) >> tp(token::bb) >> -tp(token::cc);
    
        BOOST_SPIRIT_DEFINE(my_seq);
    }
    
    BOOST_FUSION_ADAPT_STRUCT(my_parser::ast::my_seq, m1, m2, m3);
    
    int main()
    {
        using namespace my_parser;
    
        ast::my_seq my_seq_data {};
        parse_tokens({token::aa, token::bb, token::cc}, my_seq, my_seq_data, true);
        parse_tokens({token::aa, token::cc}, my_seq, my_seq_data, false);
        parse_tokens({token::bb, token::cc}, my_seq, my_seq_data, false);
        parse_tokens({token::aa, token::bb}, my_seq, my_seq_data, true);
    
        return 0;
    }