Search code examples
boost-spiritboost-spirit-qiboost-spirit-lex

Retrieving the ID of a token_def<>


Is it possible to retrieve the ID of a token defined in the lexer like this: token_def<> tok; from inside the semantic action of a Spirit parser.

What I'm trying to do is to use the token ID of every operator (like +, -, *, etc.) and retrieve a corresponding name for that operator from inside the parser semantic action (like, add, subtract, times, etc.)

As far as I understand, in a production like:

toks.symbol >> toks.plus >> toks.symbol;

if toks.plus is or type token_def<> plus;, _1 will refer to the first toks.symbol and _2 will refer to the second toks.symbol. Is that true? If so, how can I access the middle token (just to retrieve the ID)?

Thanks!


Solution

  • You can use the builtin lazy placeholder lex::_tokenid, see docs:

    I adapted the second word-count sample from the tutorials to print the token ids on the fly:

    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/spirit/include/phoenix_statement.hpp>
    #include <boost/spirit/include/phoenix_algorithm.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    
    #include <iostream>
    #include <string>
    
    namespace lex = boost::spirit::lex;
    namespace phx = boost::phoenix;
    
    struct distance_func
    {
        template <typename Iterator1, typename Iterator2>
        struct result : boost::iterator_difference<Iterator1> {};
    
        template <typename Iterator1, typename Iterator2>
        typename result<Iterator1, Iterator2>::type 
        operator()(Iterator1& begin, Iterator2& end) const
        {
            return std::distance(begin, end);
        }
    };
    boost::phoenix::function<distance_func> const distance = distance_func();
    
    template <typename Lexer>
    struct word_count_tokens : lex::lexer<Lexer>
    {
        word_count_tokens()
          : c(0), w(0), l(0)
          , word("[^ \t\n]+")     // define tokens
          , eol("\n")
          , any(".")
        {
            using boost::spirit::lex::_start;
            using boost::spirit::lex::_end;
            using boost::spirit::lex::_tokenid;
            using boost::phoenix::ref;
    
            // associate tokens with the lexer
            this->self 
                =   word  [++ref(w), ref(c) += distance(_start, _end), phx::ref(std::cout) << _tokenid << ";" ]
                |   eol   [++ref(c), ++ref(l), phx::ref(std::cout) << _tokenid << ";" ] 
                |   any   [++ref(c), phx::ref(std::cout) << _tokenid << ";" ]
                ;
        }
    
        std::size_t c, w, l;
        lex::token_def<> word, eol, any;
    };
    
    ///////////////////////////////////////////////////////////////////////////////
    int main(int argc, char* argv[])
    {
        typedef 
            lex::lexertl::token<char const*, lex::omit, boost::mpl::false_> 
            token_type;
    
        typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    
        word_count_tokens<lexer_type> word_count_lexer;
    
        std::string str ("the lazy moon jumped over the brazen mold");
        char const* first = str.c_str();
        char const* last = &first[str.size()];
    
        lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
        lexer_type::iterator_type end = word_count_lexer.end();
    
        while (iter != end && token_is_valid(*iter))
            ++iter;
    
        if (iter == end) {
            std::cout << "\nlines: " << word_count_lexer.l 
                << ", words: " << word_count_lexer.w 
                << ", characters: " << word_count_lexer.c 
                << "\n";
        }
        else {
            std::string rest(first, last);
            std::cout << "Lexical analysis failed\n" << "stopped at: \"" 
                << rest << "\"\n";
        }
        return 0;
    }
    

    Output:

    65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;
    lines: 0, words: 8, characters: 41