this is my small sample .. i have a language and on parsing i have something like
foo()
nextfoo() <-- here an error appears because of the keyword "next"
so the grammer
typedef boost::proto::result_of::deep_copy<BOOST_TYPEOF(ascii::no_caseqi::lit(std::wstring())])>::type nocaselit_return_type;
nocaselit_return_type nocaselit(const std::wstring& keyword)
{
return boost::proto::deep_copy(ascii::no_case[qi::lit(keyword)]);
}
keywords = nocaselit(L"next")
| nocaselit(L"else")
| nocaselit(L"if")
| nocaselit(L"then")
| nocaselit(L"for")
| nocaselit(L"to")
| nocaselit(L"dim")
| nocaselit(L"true")
| nocaselit(L"false")
| nocaselit(L"as")
| nocaselit(L"class")
| nocaselit(L"end")
| nocaselit(L"function")
| nocaselit(L"new")
| nocaselit(L"sub");
name_valid = !keywords>> lexeme[+(boost::spirit::standard_wide::alpha | '_') >> *(boost::spirit::standard_wide::alnum | '_')];
i learned from docu and goolge that i have to write something like this one to make the parser work correct with keywords
name_valid = distinct(Keywords)[ lexeme[+(boost::spirit::standard_wide::alpha | '_') >> *(boost::spirit::standard_wide::alnum | '_')] ];
but this don´t work .. can sombody explain me why ?
Special question .. as Long as i use the Syntax above i get an template compiler error the work sample must be written as the following (the Keywords list is inline instead a rule). I assume that this has someting to do with the type spec of the rule .. but what is the correct one ?
name_valid = distinct(nocaselit(L"next")| nocaselit(L"else") | ... )
[ lexeme[+(boost::spirit::standard_wide::alpha | '_') >> *(boost
thank you
The distinct directive takes the subject parser inside the []
block instead of the()
. Inside the ()
specify the exclusion to disallow at the boundary (most often a character set comprising identifier characters).
Also consider using qi::symbol
which works well with qi::no_case
but uses a Trie internally which removes the need for any backtracking.
When I'm close to computer I'll provide a working example. Meanwhile feel free to look here for existing examples: How to parse reserved words correctly in boost spirit
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
namespace enc = boost::spirit::standard_wide;
template <typename It>
struct Grammar : qi::grammar<It> {
Grammar() : Grammar::base_type(start) {
using namespace qi;
auto kw = qr::distinct(copy(enc::alnum | L'_'));
start = skip(enc::space) [function_call];
function_call = identifier >> L'(' >> L')';
identifier = !keyword >> raw[(enc::alpha|L'_') >> *(enc::alnum|L'_')];
keyword = kw[ no_case[keywords] ];
BOOST_SPIRIT_DEBUG_NODES((start)(function_call)(identifier)(keyword));
}
private:
qi::rule<It> start;
qi::rule<It, enc::space_type> function_call;
// implicit lexemes
struct keywords_t : qi::symbols<wchar_t> {
keywords_t() {
this->add
(L"as")(L"class")(L"dim")(L"else")(L"end")(L"false")
(L"for")(L"function")(L"if")(L"new")(L"next")(L"sub")
(L"then")(L"to")(L"true");
}
} keywords;
qi::rule<It, std::string()> identifier, keyword;
};
int main() {
using It = std::wstring::const_iterator;
Grammar<It> const g;
for (std::wstring input : {
L"foo()",
L"nextfoo()",
})
{
It f=input.begin(), l=input.end();
if (parse(f, l, g)) {
std::wcout << L"Parse success\n";
} else {
std::wcout << L"Parse failed\n";
}
if (f!=l) {
std::wcout << L"Remaining unparsed input: '" << std::wstring(f,l) << L"\n";
}
}
}
Prints
Parse success
Parse success
As expected