I want to parse something like the following:
1;2
=1200
3;4
5;6
lines can appear in any order. Lines starting with the = sign can be more than one and only the last one matters; lines containing a ; represent a pair of values that I want to store in a map. Reading the answer to this question I came up with some code that should be good enough (sorry but I'm still a noob with Spirit) and should do what I'm trying to achieve. Here's the code:
#define BOOST_SPIRIT_USE_PHOENIX_V3
#define DATAPAIR_PAIR
#include <iostream>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/mpl/bool.hpp>
#include <map>
#if !defined(DATAPAIR_PAIR)
#include <vector>
#endif
static const char g_data[] = "1;2\n=1200\n3;4\n5;6\n";
typedef std::string DataTypeFirst;
#if defined(DATAPAIR_PAIR)
typedef std::string DataTypeSecond;
typedef std::pair<DataTypeFirst, DataTypeSecond> DataPair;
typedef std::map<DataTypeFirst, DataTypeSecond> DataMap;
#else
typedef std::vector<DataTypeFirst> DataPair;
typedef std::map<DataTypeFirst, DataTypeFirst> DataMap;
#endif
struct MyContainer {
DataMap data;
double number;
};
namespace boost { namespace spirit { namespace traits {
template<> struct is_container<MyContainer> : boost::mpl::true_ {};
template<>
struct container_value<MyContainer> {
typedef boost::variant<double, DataPair> type;
};
template <>
struct push_back_container<MyContainer, double> {
static bool call ( MyContainer& parContainer, double parValue ) {
parContainer.number = parValue;
return true;
}
};
template <>
struct push_back_container<MyContainer, DataPair> {
static bool call ( MyContainer& parContainer, const DataPair& parValue ) {
#if defined(DATAPAIR_PAIR)
parContainer.data[parValue.first] = parValue.second;
#else
parContainer.data[parValue[0]] = parValue[1];
#endif
return true;
}
};
} } }
template <typename Iterator>
struct TestGrammar : boost::spirit::qi::grammar<Iterator, MyContainer()> {
TestGrammar ( void );
boost::spirit::qi::rule<Iterator, MyContainer()> start;
boost::spirit::qi::rule<Iterator, DataPair()> data;
boost::spirit::qi::rule<Iterator, double()> num;
};
template <typename Iterator>
TestGrammar<Iterator>::TestGrammar() :
TestGrammar::base_type(start)
{
using boost::spirit::qi::alnum;
using boost::spirit::qi::lit;
using boost::spirit::ascii::char_;;
using boost::spirit::qi::double_;
using boost::spirit::qi::eol;
using boost::spirit::qi::eoi;
start %= *((num | data) >> (eol | eoi));
data = +alnum >> lit(";") >> +alnum;
num = '=' >> double_;
}
int main() {
std::cout << "Parsing data:\n" << g_data << "\n";
TestGrammar<const char*> gramm;
MyContainer result;
boost::spirit::qi::parse(static_cast<const char*>(g_data),
g_data + sizeof(g_data) / sizeof(g_data[0]) - 1,
gramm,
result
);
std::cout << "Parsed data:\n";
std::cout << "Result: " << result.number << "\n";
for (const auto& p : result.data) {
std::cout << p.first << " = " << p.second << '\n';
}
return 0;
}
I'm developing this on Gentoo Linux, using dev-libs/boost-1.55.0-r2:0/1.55.0 and gcc (Gentoo 4.8.3 p1.1, pie-0.5.9) 4.8.3. Compiling the above code I get an error like
/usr/include/boost/spirit/home/support/container.hpp:278:13: error: ‘struct MyContainer’ has no member named ‘insert’
as a workaround, I came up with the alternative code you get by commenting the "#define DATAPAIR_PAIR" line. In that case the code compiles and works, but what I really want is a pair where I can for example mix std::string and int values. Why using std::pair as the attribute for my data rule causes the compiler to miss the correct specialization of push_back_container? Is it possible to fix the code and have it working, either using std::pair or anything equivalent?
I'd simplify this by /just/ not treating things like a container and not-a-container at the same time. So for this particular situation I might deviate from my usual mantra (avoid semantic actions) and use them¹:
template <typename It, typename Skipper = qi::blank_type>
struct grammar : qi::grammar<It, MyContainer(), Skipper> {
grammar() : grammar::base_type(start) {
update_number = '=' > qi::double_ [ qi::_r1 = qi::_1 ];
map_entry = qi::int_ > ';' > qi::int_;
auto number = phx::bind(&MyContainer::number, qi::_val);
auto data = phx::bind(&MyContainer::data, qi::_val);
start = *(
( update_number(number)
| map_entry [ phx::insert(data, phx::end(data), qi::_1) ]
)
>> qi::eol);
}
private:
qi::rule<It, void(double&), Skipper> update_number;
qi::rule<It, MyContainer::Pair(), Skipper> map_entry;
qi::rule<It, MyContainer(), Skipper> start;
};
If you can afford a (0;0)
entry in your map, you can even dispense with the grammar:
std::map<int, int> data;
double number;
bool ok = qi::phrase_parse(f, l,
*(
(qi::omit['=' > qi::double_ [phx::ref(number)=qi::_1]]
| (qi::int_ > ';' > qi::int_)
) >> qi::eol)
, qi::blank, data);
I can try to make your "advanced spirit" approach work too, but it might take a while :)
¹ I use auto
for readability, but of course you don't need to use that; just repeat the subexpressions inline or use BOOST_AUTO. Note that this is not generically good advice for stateful parser expressions (see BOOST_SPIRIT_AUTO)