I'm trying to parse this command template that can take the following:
SendCmd SomeCommand Left_Side = "Some Value";
SendCmd AnotherCmd "Some Literal" = Some_Value;
SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";
SendCmd SomeCommand Just_object_name;
This is what I have which successfully parse all but the 1st case
//#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;
namespace Ast {
using boost::recursive_wrapper;
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Ident = String<struct TagIdent>;
using Literal = String<struct TagLiteral>;
using Number = double;
// represent ident[i][j]
struct Object {
Ident id;
std::vector<Ident> subscrpt;
};
struct GenericAssignment {
boost::variant<Literal, Object> left;
boost::variant<Literal, Number, Object> right;
};
using GenAssignments = std::vector<GenericAssignment>;
struct SendCmd {
boost::variant<Literal, Object> litobj;
boost::variant<Object, GenAssignments> objasgn;
};
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id, subscrpt);
BOOST_FUSION_ADAPT_STRUCT(Ast::GenericAssignment, left, right);
BOOST_FUSION_ADAPT_STRUCT(Ast::SendCmd, litobj, objasgn);
namespace client {
template <typename Itr> struct DML : qi::grammar<Itr, Ast::SendCmd()> {
DML() : DML::base_type(start) {
using namespace qi;
start = skip(space)[send_cmd_];
ident_ = raw[alpha >> *(alnum | '_')];
number_ = double_;
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
object_ = ident_ >> *('[' >> ident_ >> ']');
gen_asgn_ = (literal_ | object_) >> '=' >> (literal_ | number_ | object_);
gen_asgns_ = *gen_asgn_;
send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) >> (object_ | gen_asgns_)
>> ';'
;
BOOST_SPIRIT_DEBUG_NODES(
(ident_)(literal_)(number_)(object_)(gen_asgn_)(send_cmd_)
)
}
private:
qi::rule<Itr, Ast::SendCmd()> start;
using Skipper = qi::space_type;
qi::rule<Itr, Ast::Literal()> literal_;
qi::rule<Itr, Ast::Number()> number_;
qi::rule<Itr, Ast::Ident()> ident_;
qi::rule<Itr, Ast::GenericAssignment(), Skipper> gen_asgn_;
qi::rule<Itr, Ast::GenAssignments(), Skipper> gen_asgns_;
qi::rule<Itr, Ast::SendCmd(), Skipper> send_cmd_;
qi::rule<Itr, Ast::Object(), Skipper> object_;
};
} // namespace client
static const std::string test_cases[] = {
R"(SendCmd SomeCommand Left_Side = "Some Value";)",
R"(SendCmd AnotherCmd "Some Literal" = Some_Value;)",
R"(SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";)",
R"(SendCmd SomeCommand Just_object_name;)"
};
int main() {
using It = std::string::const_iterator;
static const client::DML<It> p;
int i = 0;
for (std::string const& input : test_cases) {
try {
Ast::SendCmd sc;
std::cout << "Case #" << ++i << std::endl;
std::cout << input;
if (qi::parse(begin(input), end(input), p, sc)) {
std::cout << " [Success]" << std::endl;
}
else {
std::cout << " [INVALID]" << std::endl;
}
}
catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
//#pragma GCC diagnostic push
//#pragma GCC diagnostic ignored "-Wsign-conversion"
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(static_cast<int>(p - bol)) << ""
<< "^--- here" << std::endl;
//#pragma GCC diagnostic pop
}
}
}
And the result of running this code is
Case #1
SendCmd SomeCommand Left_Side = "Some Value"; [INVALID]
Case #2
SendCmd AnotherCmd "Some Literal" = Some_Value; [Success]
Case #3
SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something"; [Success]
Case #4
SendCmd SomeCommand Just_object_name; [Success]
The reason for failing the 1st case is because the below rule when parsing Left_Side = "Some Value", it successfully parse object_ Left_Size then expect the ';' to follow then flag this as invalid. It did not try the next option at all.
send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) >> (object_ | gen_asgns_)
>> ';'
;
My question is, is it possible to make it try object_, if fail, try gen_asgns_ before flag this as invalid?
By the way, switching the order of (object_ | gen_asgns_)
will make case 4th fails.
EDIT
I did try to use qi::hold[object_] | gen_asgns_
but make no different
Thanks for your help
First of all, what a beatiful testbed for the reproducer :)
Second of all, your analysis is spot on.
So, I'd say the real question is: why is #4 not parsed with (gen_asgns_ | object_)
?
The problem is that gen_asgns
ALWAYS matches. *p
always matches the empty string. To avoid that, use +p
which requires at least a single match:
//#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace Ast {
template <typename> struct custom_string : std::char_traits<char> {};
template <typename Tag>
using String = std::basic_string<char, custom_string<Tag> >;
using Ident = String<struct TagIdent>;
using Literal = String<struct TagLiteral>;
using Number = double;
// represent ident[i][j]
struct Object {
Ident id;
std::vector<Ident> subscrpt;
};
struct GenericAssignment {
boost::variant<Literal, Object> left;
boost::variant<Literal, Number, Object> right;
};
using GenAssignments = std::vector<GenericAssignment>;
struct SendCmd {
boost::variant<Literal, Object> litobj;
boost::variant<Object, GenAssignments> objasgn;
};
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id, subscrpt)
BOOST_FUSION_ADAPT_STRUCT(Ast::GenericAssignment, left, right)
BOOST_FUSION_ADAPT_STRUCT(Ast::SendCmd, litobj, objasgn)
namespace client {
template <typename It> struct DML : qi::grammar<It, Ast::SendCmd()> {
DML() : DML::base_type(start) {
using namespace qi;
start = skip(space)[send_cmd_];
ident_ = raw[alpha >> *(alnum | '_')];
number_ = double_;
literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
object_ = ident_ >> *('[' >> ident_ >> ']');
gen_asgn_ = (literal_ | object_) >> '=' >> (literal_ | number_ | object_);
gen_asgns_ = +gen_asgn_;
send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) //
//>> (object_ | gen_asgns_) //
>> (gen_asgns_ | object_) //
>> ';' //
;
BOOST_SPIRIT_DEBUG_NODES((ident_)(literal_)(number_)(object_)(gen_asgn_)(send_cmd_))
}
private:
qi::rule<It, Ast::SendCmd()> start;
using Skipper = qi::space_type;
qi::rule<It, Ast::Literal()> literal_;
qi::rule<It, Ast::Number()> number_;
qi::rule<It, Ast::Ident()> ident_;
qi::rule<It, Ast::GenericAssignment(), Skipper> gen_asgn_;
qi::rule<It, Ast::GenAssignments(), Skipper> gen_asgns_;
qi::rule<It, Ast::SendCmd(), Skipper> send_cmd_;
qi::rule<It, Ast::Object(), Skipper> object_;
};
} // namespace client
static const std::string test_cases[] = {
R"(SendCmd SomeCommand Left_Side = "Some Value";)",
R"(SendCmd AnotherCmd "Some Literal" = Some_Value;)",
R"(SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";)",
R"(SendCmd SomeCommand Just_object_name;)"
};
int main() {
using It = std::string::const_iterator;
static const client::DML<It> p;
for (int i = 0; std::string const& input : test_cases) {
try {
Ast::SendCmd sc;
std::cout << "Case #" << ++i << std::endl;
std::cout << input;
if (qi::parse(begin(input), end(input), p, sc)) {
std::cout << " [Success]" << std::endl;
}
else {
std::cout << " [INVALID]" << std::endl;
}
} catch (qi::expectation_failure<It> const& ef) {
auto f = begin(input);
auto p = ef.first - input.begin();
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wsign-conversion"
auto bol = input.find_last_of("\r\n", p) + 1;
auto line = std::count(f, f + bol, '\n') + 1;
auto eol = input.find_first_of("\r\n", p);
std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
<< input.substr(bol, eol - bol) << "\n"
<< std::setw(static_cast<int>(p - bol)) << ""
<< "^--- here" << std::endl;
#pragma GCC diagnostic pop
}
}
}
Printing
Case #1
SendCmd SomeCommand Left_Side = "Some Value"; [Success]
Case #2
SendCmd AnotherCmd "Some Literal" = Some_Value; [Success]
Case #3
SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something"; [Success]
Case #4
SendCmd SomeCommand Just_object_name; [Success]