I have a simple parser which can parse lists of ints or quoted strings.
If I do the SIMPLE_CASE where I take the input to be:
std::string input1 = "{ INT: 42, 24 STR: \"Smith\", \"John\" }";
it parses correctly into my_record
, which contains a list of ints and a list of std::string.
I want to modify this code to be generic so that it can take zero or more INT lists and zero or more STR lists in arbitrary order and stuff them into my_record
in the proper order. I would like my second, more generic test case:
std::string input1 = "{ STR: \"Joe\" INT: 42, 24 STR: \"Smith\", \"John\" }";
to parse as:
client::my_record expected1 { { 42, 24 }, {"Joe", "Smith", "John"} };
The code below works fine if I run:
/tmp$ g++ -DSIMPLE_CASE -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox
but I'm not sure how to get the general case to work when running this:
/tmp$ g++ -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox
Code for sandbox.cpp
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
#include <complex>
#include <algorithm>
namespace client
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct my_record
{
std::vector<int> m_ints;
std::vector<std::string> m_strs;
bool operator==( const my_record& other ) const
{
return std::equal( m_ints.begin(), m_ints.end(), other.m_ints.begin() )
&& std::equal( m_strs.begin(), m_strs.end(), other.m_strs.begin() );
}
bool operator!=( const my_record& other ) const
{
return ! operator==( other );
}
friend std::ostream& operator<<( std::ostream& os, const my_record& rec );
};
std::ostream& operator<<( std::ostream& os, const my_record& rec )
{
for( const auto& x : rec.m_ints )
std::cerr << x << ' ';
std::cerr << std::endl;
for( const auto& x : rec.m_strs )
std::cerr << x << ' ';
std::cerr << std::endl;
}
}
BOOST_FUSION_ADAPT_STRUCT(
client::my_record,
(std::vector<int>, m_ints)
(std::vector<std::string>, m_strs)
)
namespace client
{
template <typename Iterator>
struct employee_parser : qi::grammar<Iterator, my_record(), ascii::space_type>
{
employee_parser() : employee_parser::base_type(start)
{
using qi::int_;
using qi::lit;
using qi::double_;
using qi::lexeme;
using ascii::char_;
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
#ifdef SIMPLE_CASE
start %=
'{'
>> int_list
>> str_list
>> '}'
;
#else
// not sure how to approach this
start %=
'{'
>> *(int_list) // want zero or more of these, in any order
>> *(str_list) // want zero or more of these, in any order
>> '}'
;
#endif
str_list %=
lit( "STR:" ) >> quoted_string % ','
;
int_list %=
lit( "INT:" ) >> int_ % ','
;
}
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> str_list;
qi::rule<Iterator, std::vector<int>(), ascii::space_type> int_list;
qi::rule<Iterator, my_record(), ascii::space_type> start;
};
}
static int
TryParse( const std::string& input, const client::my_record& expected )
{
using boost::spirit::ascii::space;
client::my_record rec;
auto iter = input.begin(), end = input.end();
client::employee_parser<decltype(iter)> g;
phrase_parse( iter, end, g, space, rec );
if ( iter!=end )
{
std::cerr << "failed to parse completely" << std::endl;
return -1;
} else if ( rec!=expected ) {
std::cerr << "unexpected result in parse" << std::endl;
std::cerr << rec;
return -1;
}
return 0;
}
int
main(int argc, char* argv[])
{
#ifdef SIMPLE_CASE
client::my_record expected1 { { 42, 24 }, {"Smith", "John"} }, emp;
std::string input1 = "{ INT: 42, 24 STR: \"Smith\", \"John\" }";
return TryParse( input1, expected1 );
#else
client::my_record expected1 { { 42, 24 }, {"Joe", "Smith", "John"} }, emp;
std::string input1 = "{ STR: \"Joe\" INT: 42, 24 STR: \"Smith\", \"John\" }";
return TryParse( input1, expected1 );
#endif
}
You grammar is wrong,
start %=
'{'
>> *(int_list) // want zero or more of these, in any order
>> *(str_list) // want zero or more of these, in any order
>> '}'
;
This means accept any number of int
s followed by any number of string
. You can no have int
, string
, int
, or any other combination.
You need something like
start %=
'{'
>> *( int_list // want zero or more of these, in any order
| str_list // want zero or more of these, in any order
)
>>
'}'
;
But obviously you need to plum that into you data structure, bewarned you may have to use semantic actions.
whilst I am here, I can't let this slide:
std::ostream& operator<<( std::ostream& os, const my_record& rec )
{
for( const auto& x : rec.m_ints )
std::cerr << x << ' ';
std::cerr << std::endl;
for( const auto& x : rec.m_strs )
std::cerr << x << ' ';
std::cerr << std::endl;
}
should be straeming to os
like:
for( const auto& x : rec.m_ints )
os << x << ' ';
os << '\n';
Also try and avoid endl
ing in stream insertion operator, use \n
if you need a new line.
What was need in the end was to use phoenix functions, push_back and a binder.
template<typename Iterator>
struct my_grammar
: qi::grammar<Iterator, my_record(), ascii::space_type> {
my_grammar()
: my_grammar::base_type(start) {
quoted_string %= qi::lexeme['"' >> +(qi::char_ - '"') >> '"'];
start = qi::lit("{")
>>
*( "INT:" >> qi::int_
[
phx::push_back(
phx::at_c<0>(
qi::_val
),
qi::_1
)
] % ","
| "STR:" >> quoted_string
[
phx::push_back(
phx::bind(
&my_record::m_strs,
qi::_val
),
qi::_1
)
] % ","
)
>>
"}"
;
}
qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, my_record(), ascii::space_type> start;
};
The whole code listing can be seen here: