Is it possible to retrieve the ID of a token defined in the lexer like this: token_def<> tok;
from inside the semantic action of a Spirit parser.
What I'm trying to do is to use the token ID of every operator (like +, -, *, etc.) and retrieve a corresponding name for that operator from inside the parser semantic action (like, add, subtract, times, etc.)
As far as I understand, in a production like:
toks.symbol >> toks.plus >> toks.symbol;
if toks.plus
is or type token_def<> plus;
, _1 will refer to the first toks.symbol
and _2 will refer to the second toks.symbol
. Is that true? If so, how can I access the middle token (just to retrieve the ID)?
Thanks!
You can use the builtin lazy placeholder lex::_tokenid
, see docs:
I adapted the second word-count sample from the tutorials to print the token ids on the fly:
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_algorithm.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <iostream>
#include <string>
namespace lex = boost::spirit::lex;
namespace phx = boost::phoenix;
struct distance_func
{
template <typename Iterator1, typename Iterator2>
struct result : boost::iterator_difference<Iterator1> {};
template <typename Iterator1, typename Iterator2>
typename result<Iterator1, Iterator2>::type
operator()(Iterator1& begin, Iterator2& end) const
{
return std::distance(begin, end);
}
};
boost::phoenix::function<distance_func> const distance = distance_func();
template <typename Lexer>
struct word_count_tokens : lex::lexer<Lexer>
{
word_count_tokens()
: c(0), w(0), l(0)
, word("[^ \t\n]+") // define tokens
, eol("\n")
, any(".")
{
using boost::spirit::lex::_start;
using boost::spirit::lex::_end;
using boost::spirit::lex::_tokenid;
using boost::phoenix::ref;
// associate tokens with the lexer
this->self
= word [++ref(w), ref(c) += distance(_start, _end), phx::ref(std::cout) << _tokenid << ";" ]
| eol [++ref(c), ++ref(l), phx::ref(std::cout) << _tokenid << ";" ]
| any [++ref(c), phx::ref(std::cout) << _tokenid << ";" ]
;
}
std::size_t c, w, l;
lex::token_def<> word, eol, any;
};
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
typedef
lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
word_count_tokens<lexer_type> word_count_lexer;
std::string str ("the lazy moon jumped over the brazen mold");
char const* first = str.c_str();
char const* last = &first[str.size()];
lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
lexer_type::iterator_type end = word_count_lexer.end();
while (iter != end && token_is_valid(*iter))
++iter;
if (iter == end) {
std::cout << "\nlines: " << word_count_lexer.l
<< ", words: " << word_count_lexer.w
<< ", characters: " << word_count_lexer.c
<< "\n";
}
else {
std::string rest(first, last);
std::cout << "Lexical analysis failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
Output:
65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;65538;65536;
lines: 0, words: 8, characters: 41