I want to parse a c-struct-like declaration which has some scalars or arrays as members. Then a C++ header file which has this c-struct definition can be generated for HDF5 serialization. But I found some difficulty when tried to handle scalars and arrays using boost::spirit at the same time.
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;
struct struct_field
{
std::string type;
std::string name;
int dim;
};
struct struct_body
{
std::string name;
std::vector<struct_field> fields;
};
BOOST_FUSION_ADAPT_STRUCT(
struct_field,
(std::string, type)
(std::string, name)
(int, dim)
)
BOOST_FUSION_ADAPT_STRUCT(
struct_body,
(std::string, name)
(std::vector<struct_field>, fields)
)
template <typename Iterator, typename Skipper>
struct preprocessor :
qi::grammar<Iterator, struct_body(), Skipper>
{
preprocessor() :
preprocessor::base_type(body)
{
using namespace qi::labels;
using qi::eol;
using qi::lit;
using qi::lexeme;
using qi::int_;
using ascii::char_;
using phoenix::at_c;
using phoenix::push_back;
vartype =
*lit(' ') >> lexeme[+(char_ - ' ') [_val += _1]];
varname =
(*lit(' ') >> lexeme[+(char_ - '[') [_val += _1]]) |
(*lit(' ') >> lexeme[+(char_ - ';') [_val += _1]] >> ';');
vardim = '[' >> int_ [_val += _1] >> "];";
strucname =
"declare(" >>
lexeme[+(char_ - ')')[_val += _1]] >>
')' >>
eol;
field =
vartype [at_c<0>(_val) = _1] >>
varname [at_c<1>(_val) = _1] >>
-vardim [at_c<2>(_val) = _1] >>
eol;
body =
strucname [at_c<0>(_val) = _1] >>
'(' >> eol >>
*(field [push_back(at_c<1>(_val), _1)]) >>
')' >> -eol;
}
qi::rule<Iterator, struct_body(), Skipper> body;
qi::rule<Iterator, struct_field(), Skipper> field;
qi::rule<Iterator, std::string(), Skipper> strucname;
qi::rule<Iterator, std::string(), Skipper> vartype, varname;
qi::rule<Iterator, int(), Skipper> vardim;
};
template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
preprocessor<Iterator, Skipper> g;
return qi::phrase_parse(first, end, g, skipper, mystruct);
}
int main(int argc, char **argv)
{
std::string storage = "declare(grid_point)\r\n(\r\n int id[1];\r\n int cp[1];\r\n double pos[3];\r\n)";
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
struct_body mystruct;
bool result = parse(iter, end, qi::blank, mystruct);
if (result && iter == end)
{
std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
BOOST_FOREACH(struct_field const& field, mystruct.fields)
{
std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
}
}
}
As we can see, all members are declared as arrays. Otherwise, scalars cannot be parsed correctly.
declare(grid_point)
(
int id;
int cp;
double pos[3];
)
The above declaration cannot be parsed. It seems boost::spirit always perform an aggressive match on [dim]. Actually [dim] is only needed for arrays instead of scalars. So how to fix this problem?
First off, all your semantic actions are redundant, because they merely duplicate the standard attribute propagation rules. (Boost Spirit: "Semantic actions are evil"?). The following is exactly equivalent: http://paste.ubuntu.com/10049892/
You seem confused about the skipper. You can't usefully use
*lit(' ')
because blanks are already skipped
The varname rule
varname =
(*lit(' ') >> lexeme[+(char_ - '[') ]) |
(*lit(' ') >> lexeme[+(char_ - ';') ] >> ';');
this eats till the end of the line if you don't have the [
. This includes even the ;
. Fix it, e.g. like
varname = lexeme[+(char_ - "[;") ];
With respect to the skipper confusion spotted, I'd suggest simplifying:
vartype = +graph;
varname = +(graph - char_("[;"));
vardim = '[' >> int_ >> "]";
Instead of bolting lexeme[]
on, I just dropped the Skipper
from vartype
and varname
(see also Boost spirit skipper issues)
Note that I also dropped the ';'
from the varname
and vardim
rules. I mean. Seriously, the ';
' was never part of that anyway!
Instead, just put the ';'
in the field
rule, making vardim
optional:
field =
vartype >>
varname >>
-vardim >>
';' >>
eol;
Use spirit to debug your rules!
#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
General observation: the grammar seems to be whitespace agnostic. It's a bit of an antipattern to use qi::blank
as the skipper. (For example, I ran into a surprise because I used a raw string literal, but it didn't parse because it started with a newline.). Fixing this is left as an exercise for the reader :)
All in all, here's the modified sample that works:
//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
struct struct_field
{
std::string type;
std::string name;
int dim;
};
struct struct_body
{
std::string name;
std::vector<struct_field> fields;
};
BOOST_FUSION_ADAPT_STRUCT(
struct_field,
(std::string, type)
(std::string, name)
(int, dim)
)
BOOST_FUSION_ADAPT_STRUCT(
struct_body,
(std::string, name)
(std::vector<struct_field>, fields)
)
template <typename Iterator, typename Skipper>
struct preprocessor :
qi::grammar<Iterator, struct_body(), Skipper>
{
preprocessor() :
preprocessor::base_type(body)
{
using namespace qi::labels;
using qi::eol;
using qi::graph;
using qi::lit;
using qi::lexeme;
using qi::int_;
using ascii::char_;
vartype = +graph;
varname = +(graph - char_("[;"));
vardim = '[' >> int_ >> "]";
strucname =
"declare" >> lit('(') >> +~char_(')') >> ')' >>
eol;
field =
vartype >>
varname >>
-vardim >>
';' >>
eol;
body =
strucname >>
'(' >> eol >>
*field >>
')' >> -eol;
BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
}
qi::rule<Iterator, struct_body(), Skipper> body;
qi::rule<Iterator, struct_field(), Skipper> field;
qi::rule<Iterator, std::string(), Skipper> strucname;
qi::rule<Iterator, int(), Skipper> vardim;
// lexemes
qi::rule<Iterator, std::string()> vartype, varname;
};
template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
preprocessor<Iterator, Skipper> g;
return qi::phrase_parse(first, end, g, skipper, mystruct);
}
int main()
{
std::string const storage = "declare(grid_point)\r\n(\r\n int id;\r\n int cp;\r\n double pos[3];\r\n)";
std::string::const_iterator iter = storage.begin();
std::string::const_iterator end = storage.end();
struct_body mystruct;
bool result = parse(iter, end, qi::blank, mystruct);
if (result && iter == end)
{
std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
BOOST_FOREACH(struct_field const& field, mystruct.fields)
{
std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
}
}
}
Prints
3 fields are parsed.
int : id [ 0 ] ;
int : cp [ 0 ] ;
double : pos [ 3 ] ;
To have a default value, make it
vardim = '[' >> int_ >> "]" | qi::attr(1);
field = vartype >> varname >> vardim >> ';' >> eol;
In this case the output becomes
3 fields are parsed.
int : id [ 1 ] ;
int : cp [ 1 ] ;
double : pos [ 3 ] ;