Parse c-struct-like declaration with boost::spirit

I want to parse a c-struct-like declaration which has some scalars or arrays as members. Then a C++ header file which has this c-struct definition can be generated for HDF5 serialization. But I found some difficulty when tried to handle scalars and arrays using boost::spirit at the same time.

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;

struct struct_field
{
    std::string type;
    std::string name;
    int dim;
};

struct struct_body
{
    std::string name;
    std::vector<struct_field> fields;
};

BOOST_FUSION_ADAPT_STRUCT(
    struct_field,
    (std::string, type)
    (std::string, name)
    (int, dim)
)

BOOST_FUSION_ADAPT_STRUCT(
    struct_body,
    (std::string, name)
    (std::vector<struct_field>, fields)
)

template <typename Iterator, typename Skipper>
struct preprocessor :
    qi::grammar<Iterator, struct_body(), Skipper>
{
    preprocessor() :
        preprocessor::base_type(body)
    {
        using namespace qi::labels;
        using qi::eol;
        using qi::lit;
        using qi::lexeme;
        using qi::int_;
        using ascii::char_;
        using phoenix::at_c;
        using phoenix::push_back;

        vartype =
            *lit(' ') >> lexeme[+(char_ - ' ') [_val += _1]];
        varname =
            (*lit(' ') >> lexeme[+(char_ - '[') [_val += _1]]) |
            (*lit(' ') >> lexeme[+(char_ - ';') [_val += _1]] >> ';');
        vardim = '[' >> int_ [_val += _1] >> "];";

        strucname =
            "declare(" >>
            lexeme[+(char_ - ')')[_val += _1]] >>
            ')' >>
            eol;

        field =
            vartype [at_c<0>(_val) = _1] >>
            varname [at_c<1>(_val) = _1] >>
            -vardim [at_c<2>(_val) = _1] >>
            eol;

        body =
            strucname [at_c<0>(_val) = _1] >>
            '(' >> eol >>
            *(field [push_back(at_c<1>(_val), _1)]) >>
            ')' >> -eol;
    }

    qi::rule<Iterator, struct_body(), Skipper> body;
    qi::rule<Iterator, struct_field(), Skipper> field;
    qi::rule<Iterator, std::string(), Skipper> strucname;
    qi::rule<Iterator, std::string(), Skipper> vartype, varname;
    qi::rule<Iterator, int(), Skipper> vardim;
};

template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
    preprocessor<Iterator, Skipper> g;
    return qi::phrase_parse(first, end, g, skipper, mystruct);
}

int main(int argc, char **argv)
{
    std::string storage = "declare(grid_point)\r\n(\r\n    int id[1];\r\n    int cp[1];\r\n    double pos[3];\r\n)";
    std::string::const_iterator iter = storage.begin();
    std::string::const_iterator end = storage.end();

    struct_body mystruct;
    bool result = parse(iter, end, qi::blank, mystruct);
    if (result && iter == end)
    {
        std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
        BOOST_FOREACH(struct_field const& field, mystruct.fields)
        {
            std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
        }
    }
}

As we can see, all members are declared as arrays. Otherwise, scalars cannot be parsed correctly.

declare(grid_point)
(
    int         id;
    int         cp;
    double      pos[3];
)

The above declaration cannot be parsed. It seems boost::spirit always perform an aggressive match on [dim]. Actually [dim] is only needed for arrays instead of scalars. So how to fix this problem?

Solution

First off, all your semantic actions are redundant, because they merely duplicate the standard attribute propagation rules. (Boost Spirit: "Semantic actions are evil"?). The following is exactly equivalent: http://paste.ubuntu.com/10049892/
You seem confused about the skipper. You can't usefully use
```
*lit(' ')
```
because blanks are already skipped

The varname rule

varname =
    (*lit(' ') >> lexeme[+(char_ - '[') ]) |
    (*lit(' ') >> lexeme[+(char_ - ';') ] >> ';');

this eats till the end of the line if you don't have the [. This includes even the ;. Fix it, e.g. like

varname = lexeme[+(char_ - "[;") ];

With respect to the skipper confusion spotted, I'd suggest simplifying:
```
vartype = +graph;
varname = +(graph - char_("[;"));
vardim = '[' >> int_  >> "]";
```
Instead of bolting lexeme[] on, I just dropped the Skipper from vartype and varname (see also Boost spirit skipper issues)
Note that I also dropped the ';' from the varname and vardim rules. I mean. Seriously, the ';' was never part of that anyway!

Instead, just put the ';' in the field rule, making vardim optional:

field =
    vartype >>
    varname >>
    -vardim >>
    ';' >> 
    eol;

Use spirit to debug your rules!

#define BOOST_SPIRIT_DEBUG
BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))

General observation: the grammar seems to be whitespace agnostic. It's a bit of an antipattern to use qi::blank as the skipper. (For example, I ran into a surprise because I used a raw string literal, but it didn't parse because it started with a newline.). Fixing this is left as an exercise for the reader :)

All in all, here's the modified sample that works:

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi    = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

struct struct_field
{
    std::string type;
    std::string name;
    int dim;
};

struct struct_body
{
    std::string name;
    std::vector<struct_field> fields;
};

BOOST_FUSION_ADAPT_STRUCT(
    struct_field,
    (std::string, type)
    (std::string, name)
    (int, dim)
)

BOOST_FUSION_ADAPT_STRUCT(
    struct_body,
    (std::string, name)
    (std::vector<struct_field>, fields)
)

template <typename Iterator, typename Skipper>
struct preprocessor :
    qi::grammar<Iterator, struct_body(), Skipper>
{
    preprocessor() :
        preprocessor::base_type(body)
    {
        using namespace qi::labels;
        using qi::eol;
        using qi::graph;
        using qi::lit;
        using qi::lexeme;
        using qi::int_;
        using ascii::char_;

        vartype = +graph;
        varname = +(graph - char_("[;"));
        vardim  = '[' >> int_  >> "]";

        strucname =
            "declare" >> lit('(') >> +~char_(')') >> ')' >>
            eol;

        field =
            vartype >>
            varname >>
            -vardim >>
            ';' >> 
            eol;

        body =
            strucname  >>
            '(' >> eol >>
            *field >>
            ')' >> -eol;

        BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
    }

    qi::rule<Iterator, struct_body(),  Skipper> body;
    qi::rule<Iterator, struct_field(), Skipper> field;
    qi::rule<Iterator, std::string(),  Skipper> strucname;
    qi::rule<Iterator, int(),          Skipper> vardim;
    // lexemes
    qi::rule<Iterator, std::string()> vartype, varname;
};

template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
    preprocessor<Iterator, Skipper> g;
    return qi::phrase_parse(first, end, g, skipper, mystruct);
}

int main()
{
    std::string const storage = "declare(grid_point)\r\n(\r\n    int    id;\r\n    int    cp;\r\n    double pos[3];\r\n)";
    std::string::const_iterator iter = storage.begin();
    std::string::const_iterator end = storage.end();

    struct_body mystruct;
    bool result = parse(iter, end, qi::blank, mystruct);
    if (result && iter == end)
    {
        std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
        BOOST_FOREACH(struct_field const& field, mystruct.fields)
        {
            std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
        }
    }
}

Prints

3 fields are parsed.
int : id [ 0 ] ;
int : cp [ 0 ] ;
double : pos [ 3 ] ;

To have a default value, make it

    vardim = '[' >> int_  >> "]" | qi::attr(1);
    field  = vartype >> varname >> vardim >> ';' >> eol;

In this case the output becomes

3 fields are parsed.
int : id [ 1 ] ;
int : cp [ 1 ] ;
double : pos [ 3 ] ;