Boost Spirit Qi parsing certainly is a unique application of C++, one that comes with a steep learning curve. In this case I am attempting to parse a string containing syntactically correct C++ list-initialization of a struct
containing a std::vector
of std::tuple<std::string, short>
. Here is the declaration of the struct
:
typedef std::vector<std::tuple<std::string, int>> label_t;
struct BulkDataParmas
{
std::string strUUID;
short subcam;
long long pts_beg;
long long pts_len;
long long pts_gap;
label_t labels;
};
And here is my failing attempt to bind such a structure to a Qi attribute. The commented out start
works as expected if I also comment out the vector
member of the struct
. (I've also tried std::pair
instead of std::tuple
).
BOOST_FUSION_ADAPT_STRUCT
(
BulkDataParmas,
(std::string, strUUID)
(short, subcam)
(long long, pts_beg)
(long long, pts_len)
(long long, pts_gap)
(label_t, labels)
)
template <typename Iterator>
struct load_parser : boost::spirit::qi::grammar<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type>
{
load_parser() : load_parser::base_type(start)
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using qi::attr;
using qi::short_;
using qi::int_;
using qi::long_long;
using qi::lit;
using qi::xdigit;
using qi::lexeme;
using ascii::char_;
using boost::proto::deep_copy;
auto hex2_ = deep_copy(xdigit >> xdigit >> xdigit >> xdigit);
auto hex4_ = deep_copy(hex2_ >> hex2_);
auto hex6_ = deep_copy(hex4_ >> hex2_);
auto fmt_ = deep_copy('"' >> hex4_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex2_ >> char_('-') >> hex6_ >> '"');
uuid = qi::as_string[fmt_];
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
label = '{' >> quoted_string >> ',' >> int_ >> '}';
start = '{' >> uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> ',' >> '{' >> -(label >> *(',' >> label)) >>'}' >> '}';
// start = '{' >> uuid >> ',' >> short_ >> ',' >> long_long >> ',' >> long_long >> ',' >> long_long >> '}';
}
private:
boost::spirit::qi::rule<Iterator, std::string()> uuid;
boost::spirit::qi::rule<Iterator, std::string()> quoted_string;
boost::spirit::qi::rule<Iterator, std::string(), boost::spirit::ascii::space_type> label;
boost::spirit::qi::rule<Iterator, BulkDataParmas(), boost::spirit::ascii::space_type> start;
};
Here is an example string to parse:
"{ \"68965363-2d87-46d4-b05d-f293f2c8403b\", 0, 1583798400000000, 86400000000, 600000000, { { \"motorbike\", 5 }, { \"aeroplane\", 6 } } };"
Beyond the two things you mentioned (which are correct), I'd suggest
some simplifications:
uuid = '"' >> qi::raw [
hex_<4>{} >> qi::repeat(3)['-' >> hex_<2>{}] >> '-' >> hex_<6>{}
] >> '"';
Note, this removes all the sub-expressions, as-string and deepcopy, instead using the integer parser:
template<int N> using hex_ = boost::spirit::qi::int_parser<std::intmax_t, 16, 2*N, 2*N>;
The raw[]
parser will nicely expose the source string matched.
Next up,
quoted_string = '"' >> *~qi::char_('"') >> '"';
Here I'd suggest using *
to accept empty strings (this is frequently
"the point" of quoted strings, so we can be explicit about embdedded
whitespace or intentionally empty strings). Also, using ~charset
to be
more efficient.
Also dropped the lexeme[]
because the rule is already declared without a skipper anyways.
Finishing up:
label = '{' >> quoted_string >> ',' >> qi::int_ >> '}';
start = qi::skip(ascii::space) [ '{'
>> uuid >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> '{' >> -(label % ',') >> '}'
>> '}' >> ';'
];
Note that I incorporated the choice of skipper. so you don't have to tediously pass the correct thing in phrase_parse
. The skipper is usually not something the caller should be able to change anyways.
Now let's also modernize the adaptation:
BOOST_FUSION_ADAPT_STRUCT(BulkDataParams, strUUID, subcam, pts_beg, pts_len, pts_gap, labels)
After which you can respell the types in modern fashion without risking any compatibility issues. Note this is also a reason to prefer qi::auto_
in the start rule there, so you don't get painful surprises when e.g. the parser results get implicitly converted to the target type in expected ways.
struct BulkDataParams {
std::string strUUID;
int16_t subcam;
int64_t pts_beg;
int64_t pts_len;
int64_t pts_gap;
label_t labels;
};
Now let's throw in debug output and a test body:
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <iostream>
#include <iomanip>
using label_t = std::vector<std::tuple<std::string, int>>;
namespace std {
std::ostream& operator<<(std::ostream& os, label_t::value_type const& t) {
auto const& [k,v] = t;
return os << "[" << std::quoted(k) << "," << v << "]";
}
std::ostream& operator<<(std::ostream& os, label_t const& m) {
os << "{";
for (auto&& el:m) os << el << ",";
return os << "}";
}
}
struct BulkDataParams {
std::string strUUID;
int16_t subcam;
int64_t pts_beg;
int64_t pts_len;
int64_t pts_gap;
label_t labels;
};
BOOST_FUSION_ADAPT_STRUCT(BulkDataParams, strUUID, subcam, pts_beg, pts_len, pts_gap, labels)
template <typename Iterator> struct load_parser : boost::spirit::qi::grammar<Iterator, BulkDataParams()> {
load_parser() : load_parser::base_type(start) {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
uuid = '"' >> qi::raw [
hex_<4>{} >> qi::repeat(3)['-' >> hex_<2>{}] >> '-' >> hex_<6>{}
] >> '"';
quoted_string = '"' >> *~qi::char_('"') >> '"';
label = '{' >> quoted_string >> ',' >> qi::int_ >> '}';
start = qi::skip(ascii::space) [ '{'
>> uuid >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> qi::auto_ >> ','
>> '{' >> -(label % ',') >> '}'
>> '}' >> ';'
];
BOOST_SPIRIT_DEBUG_NODES(
(uuid) (quoted_string) (label) (start)
)
}
template<int N> using hex_ = boost::spirit::qi::int_parser<std::intmax_t, 16, 2*N, 2*N>;
private:
boost::spirit::qi::rule<Iterator, std::string()> uuid;
boost::spirit::qi::rule<Iterator, std::string()> quoted_string;
boost::spirit::qi::rule<Iterator, label_t::value_type(), boost::spirit::ascii::space_type> label;
boost::spirit::qi::rule<Iterator, BulkDataParams()> start;
};
int main() {
for (std::string const input : {
R"({ "68965363-2d87-46d4-b05d-f293f2c8403b", 0, 1583798400000000, 86400000000, 600000000, { { "motorbike", 5 }, { "aeroplane", 6 } } };)",
})
{
auto f = begin(input), l = end(input);
BulkDataParams bdp;
load_parser<std::string::const_iterator> p;
if (parse(f, l, p, bdp)) {
std::cout << "Parsed: " << boost::fusion::as_vector(bdp) << "\n";
} else {
std::cout << "Parse Failed\n";
}
if (f != l) {
std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
}
}
}
Regular output:
Parsed: (68965363-2d87-46d4-b05d-f293f2c8403b 0 1583798400000000 86400000000 600000000 {["motorbike",5],["aeroplane",6],})
Debug output:
<start>
<try>{ "68965363-2d87-46d</try>
<uuid>
<try>"68965363-2d87-46d4-</try>
<success>, 0, 158379840000000</success>
<attributes>[[6, 8, 9, 6, 5, 3, 6, 3, -, 2, d, 8, 7, -, 4, 6, d, 4, -, b, 0, 5, d, -, f, 2, 9, 3, f, 2, c, 8, 4, 0, 3, b]]</attributes>
</uuid>
<label>
<try> { "motorbike", 5 },</try>
<quoted_string>
<try>"motorbike", 5 }, { </try>
<success>, 5 }, { "aeroplane"</success>
<attributes>[[m, o, t, o, r, b, i, k, e]]</attributes>
</quoted_string>
<success>, { "aeroplane", 6 }</success>
<attributes>[[[m, o, t, o, r, b, i, k, e], 5]]</attributes>
</label>
<label>
<try> { "aeroplane", 6 } </try>
<quoted_string>
<try>"aeroplane", 6 } } }</try>
<success>, 6 } } };</success>
<attributes>[[a, e, r, o, p, l, a, n, e]]</attributes>
</quoted_string>
<success> } };</success>
<attributes>[[[a, e, r, o, p, l, a, n, e], 6]]</attributes>
</label>
<success></success>
<attributes>[[[6, 8, 9, 6, 5, 3, 6, 3, -, 2, d, 8, 7, -, 4, 6, d, 4, -, b, 0, 5, d, -, f, 2, 9, 3, f, 2, c, 8, 4, 0, 3, b], 0, 1583798400000000, 86400000000, 600000000, [[[m, o, t, o, r, b, i, k, e], 5], [[a, e, r, o, p, l, a, n, e], 6]]]]</attributes>
</start>