boost-spirit boost-spirit-qi boost-spirit-lex

Retrieving the ID of a token_def<>

Is it possible to retrieve the ID of a token defined in the lexer like this: token_def<> tok; from inside the semantic action of a Spirit parser.

What I'm trying to do is to use the token ID of every operator (like +, -, *, etc.) and retrieve a corresponding name for that operator from inside the parser semantic action (like, add, subtract, times, etc.)

As far as I understand, in a production like:

toks.symbol >> toks.plus >> toks.symbol;

if toks.plus is or type token_def<> plus;, _1 will refer to the first toks.symbol and _2 will refer to the second toks.symbol. Is that true? If so, how can I access the middle token (just to retrieve the ID)?

Thanks!

Solution

You can use the builtin lazy placeholder lex::_tokenid, see docs:

Lexer Semantic Actions Using Phoenix

I adapted the second word-count sample from the tutorials to print the token ids on the fly:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_algorithm.hpp>
#include <boost/spirit/include/phoenix_core.hpp>

#include <iostream>
#include <string>

namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;

struct distance_func
{
    template <typename Iterator1, typename Iterator2>
    struct result : boost::iterator_difference<Iterator1> {};

    template <typename Iterator1, typename Iterator2>
    typename result<Iterator1, Iterator2>::type 
    operator()(Iterator1& begin, Iterator2& end) const
    {
        return std::distance(begin, end);
    }
};
boost::phoenix::function<distance_func> const distance = distance_func();

template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
    word_count_tokens()
      : c(0), w(0), l(0)
      , word("[^ \t\n]+")     // define tokens
      , eol("\n")
      , any(".")
    {
        using boost::spirit::lex::_start;
        using boost::spirit::lex::_end;
        using boost::spirit::lex::_tokenid;
        using boost::phoenix::ref;

        // associate tokens with the lexer
        this->self 
            =   word  [++ref(w), ref(c) += distance(_start, _end), phx::ref(std::cout) << _tokenid << ";" ]
            |   eol   [++ref(c), ++ref(l), phx::ref(std::cout) << _tokenid << ";" ] 
            |   any   [++ref(c), phx::ref(std::cout) << _tokenid << ";" ]
            ;
    }

    std::size_t c, w, l;
    lex::token_def<> word, eol, any;
};

///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
    typedef 
        lex::lexertl::token<char const*, lex::omit, boost::mpl::false_> 
        token_type;

    typedef lex::lexertl::actor_lexer<token_type> lexer_type;

    word_count_tokens<lexer_type> word_count_lexer;

    std::string str ("the lazy moon jumped over the brazen mold");
    char const* first = str.c_str();
    char const* last = &first[str.size()];

    lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
    lexer_type::iterator_type end = word_count_lexer.end();

    while (iter != end && token_is_valid(*iter))
        ++iter;

    if (iter == end) {
        std::cout << "\nlines: " << word_count_lexer.l 
            << ", words: " << word_count_lexer.w 
            << ", characters: " << word_count_lexer.c 
            << "\n";
    }
    else {
        std::string rest(first, last);
        std::cout << "Lexical analysis failed\n" << "stopped at: \"" 
            << rest << "\"\n";
    }
    return 0;
}

Output:

65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;
lines: 0, words: 8, characters: 41