Search code examples
c++boostboost-spiritboost-spirit-qi

Parse c-struct-like declaration with boost::spirit


I want to parse a c-struct-like declaration which has some scalars or arrays as members. Then a C++ header file which has this c-struct definition can be generated for HDF5 serialization. But I found some difficulty when tried to handle scalars and arrays using boost::spirit at the same time.

#include <iostream>
#include <fstream>
#include <string>
#include <vector>

#include <boost/foreach.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/include/adapt_struct.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
namespace fusion = boost::fusion;

struct struct_field
{
    std::string type;
    std::string name;
    int dim;
};

struct struct_body
{
    std::string name;
    std::vector<struct_field> fields;
};

BOOST_FUSION_ADAPT_STRUCT(
    struct_field,
    (std::string, type)
    (std::string, name)
    (int, dim)
)

BOOST_FUSION_ADAPT_STRUCT(
    struct_body,
    (std::string, name)
    (std::vector<struct_field>, fields)
)

template <typename Iterator, typename Skipper>
struct preprocessor :
    qi::grammar<Iterator, struct_body(), Skipper>
{
    preprocessor() :
        preprocessor::base_type(body)
    {
        using namespace qi::labels;
        using qi::eol;
        using qi::lit;
        using qi::lexeme;
        using qi::int_;
        using ascii::char_;
        using phoenix::at_c;
        using phoenix::push_back;

        vartype =
            *lit(' ') >> lexeme[+(char_ - ' ') [_val += _1]];
        varname =
            (*lit(' ') >> lexeme[+(char_ - '[') [_val += _1]]) |
            (*lit(' ') >> lexeme[+(char_ - ';') [_val += _1]] >> ';');
        vardim = '[' >> int_ [_val += _1] >> "];";

        strucname =
            "declare(" >>
            lexeme[+(char_ - ')')[_val += _1]] >>
            ')' >>
            eol;

        field =
            vartype [at_c<0>(_val) = _1] >>
            varname [at_c<1>(_val) = _1] >>
            -vardim [at_c<2>(_val) = _1] >>
            eol;

        body =
            strucname [at_c<0>(_val) = _1] >>
            '(' >> eol >>
            *(field [push_back(at_c<1>(_val), _1)]) >>
            ')' >> -eol;
    }

    qi::rule<Iterator, struct_body(), Skipper> body;
    qi::rule<Iterator, struct_field(), Skipper> field;
    qi::rule<Iterator, std::string(), Skipper> strucname;
    qi::rule<Iterator, std::string(), Skipper> vartype, varname;
    qi::rule<Iterator, int(), Skipper> vardim;
};

template<typename Iterator, typename Skipper>
bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
{
    preprocessor<Iterator, Skipper> g;
    return qi::phrase_parse(first, end, g, skipper, mystruct);
}

int main(int argc, char **argv)
{
    std::string storage = "declare(grid_point)\r\n(\r\n    int id[1];\r\n    int cp[1];\r\n    double pos[3];\r\n)";
    std::string::const_iterator iter = storage.begin();
    std::string::const_iterator end = storage.end();

    struct_body mystruct;
    bool result = parse(iter, end, qi::blank, mystruct);
    if (result && iter == end)
    {
        std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
        BOOST_FOREACH(struct_field const& field, mystruct.fields)
        {
            std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
        }
    }
}

As we can see, all members are declared as arrays. Otherwise, scalars cannot be parsed correctly.

declare(grid_point)
(
    int         id;
    int         cp;
    double      pos[3];
)

The above declaration cannot be parsed. It seems boost::spirit always perform an aggressive match on [dim]. Actually [dim] is only needed for arrays instead of scalars. So how to fix this problem?


Solution

    • First off, all your semantic actions are redundant, because they merely duplicate the standard attribute propagation rules. (Boost Spirit: "Semantic actions are evil"?). The following is exactly equivalent: http://paste.ubuntu.com/10049892/

    • You seem confused about the skipper. You can't usefully use

      *lit(' ')
      

      because blanks are already skipped

    • The varname rule

      varname =
          (*lit(' ') >> lexeme[+(char_ - '[') ]) |
          (*lit(' ') >> lexeme[+(char_ - ';') ] >> ';');
      

      this eats till the end of the line if you don't have the [. This includes even the ;. Fix it, e.g. like

      varname = lexeme[+(char_ - "[;") ];
      
    • With respect to the skipper confusion spotted, I'd suggest simplifying:

      vartype = +graph;
      varname = +(graph - char_("[;"));
      vardim = '[' >> int_  >> "]";
      

      Instead of bolting lexeme[] on, I just dropped the Skipper from vartype and varname (see also Boost spirit skipper issues)

    • Note that I also dropped the ';' from the varname and vardim rules. I mean. Seriously, the ';' was never part of that anyway!

    • Instead, just put the ';' in the field rule, making vardim optional:

      field =
          vartype >>
          varname >>
          -vardim >>
          ';' >> 
          eol;
      
    • Use spirit to debug your rules!

      #define BOOST_SPIRIT_DEBUG
      BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
      
    • General observation: the grammar seems to be whitespace agnostic. It's a bit of an antipattern to use qi::blank as the skipper. (For example, I ran into a surprise because I used a raw string literal, but it didn't parse because it started with a newline.). Fixing this is left as an exercise for the reader :)

    All in all, here's the modified sample that works:

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <iostream>
    #include <fstream>
    #include <string>
    #include <vector>
    
    #include <boost/foreach.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    
    struct struct_field
    {
        std::string type;
        std::string name;
        int dim;
    };
    
    struct struct_body
    {
        std::string name;
        std::vector<struct_field> fields;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(
        struct_field,
        (std::string, type)
        (std::string, name)
        (int, dim)
    )
    
    BOOST_FUSION_ADAPT_STRUCT(
        struct_body,
        (std::string, name)
        (std::vector<struct_field>, fields)
    )
    
    template <typename Iterator, typename Skipper>
    struct preprocessor :
        qi::grammar<Iterator, struct_body(), Skipper>
    {
        preprocessor() :
            preprocessor::base_type(body)
        {
            using namespace qi::labels;
            using qi::eol;
            using qi::graph;
            using qi::lit;
            using qi::lexeme;
            using qi::int_;
            using ascii::char_;
    
            vartype = +graph;
            varname = +(graph - char_("[;"));
            vardim  = '[' >> int_  >> "]";
    
            strucname =
                "declare" >> lit('(') >> +~char_(')') >> ')' >>
                eol;
    
            field =
                vartype >>
                varname >>
                -vardim >>
                ';' >> 
                eol;
    
            body =
                strucname  >>
                '(' >> eol >>
                *field >>
                ')' >> -eol;
    
            BOOST_SPIRIT_DEBUG_NODES((body)(field)(strucname)(varname)(vartype)(varname)(vardim))
        }
    
        qi::rule<Iterator, struct_body(),  Skipper> body;
        qi::rule<Iterator, struct_field(), Skipper> field;
        qi::rule<Iterator, std::string(),  Skipper> strucname;
        qi::rule<Iterator, int(),          Skipper> vardim;
        // lexemes
        qi::rule<Iterator, std::string()> vartype, varname;
    };
    
    template<typename Iterator, typename Skipper>
    bool parse(Iterator &first, Iterator end, Skipper const &skipper, struct_body &mystruct)
    {
        preprocessor<Iterator, Skipper> g;
        return qi::phrase_parse(first, end, g, skipper, mystruct);
    }
    
    int main()
    {
        std::string const storage = "declare(grid_point)\r\n(\r\n    int    id;\r\n    int    cp;\r\n    double pos[3];\r\n)";
        std::string::const_iterator iter = storage.begin();
        std::string::const_iterator end = storage.end();
    
        struct_body mystruct;
        bool result = parse(iter, end, qi::blank, mystruct);
        if (result && iter == end)
        {
            std::cout << mystruct.fields.size() << " fields are parsed." << std::endl;
            BOOST_FOREACH(struct_field const& field, mystruct.fields)
            {
                std::cout << field.type << " : " << field.name << " [ " << field.dim << " ] ;" << std::endl;
            }
        }
    }
    

    Prints

    3 fields are parsed.
    int : id [ 0 ] ;
    int : cp [ 0 ] ;
    double : pos [ 3 ] ;
    

    To have a default value, make it

        vardim = '[' >> int_  >> "]" | qi::attr(1);
        field  = vartype >> varname >> vardim >> ';' >> eol;
    

    In this case the output becomes

    3 fields are parsed.
    int : id [ 1 ] ;
    int : cp [ 1 ] ;
    double : pos [ 3 ] ;