Search code examples
c++boostboost-spiritboost-spirit-x3

Cannot parse an empty C++ struct with Boost Spirit X3


I'm trying to parse a C++ struct defined in a header file. I'm starting to define the grammar but I've a problem.

This is my code:

#include <boost/spirit/home/x3.hpp>

int main() {
  namespace x3 = boost::spirit::x3;

  // Parse "#if !defined XXX_X_" or "'#ifndef X_X"
  auto Ifndef = x3::skip(x3::space)[(x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))))];
  auto HeaderGuardFirstRow = Ifndef >> +(x3::alnum | '_');

  // Parse "#define XXX_X" or "#  define XXX_X"
  auto Define = x3::skip(x3::space)[(x3::lit('#') >> x3::lit("define"))];
  auto HeaderGuardSecondRow = Define >> +(x3::alnum | '_');

  // Parse
  // "
  //  #if !defined XXX_X_
  //  #define XXX_X
  // "
  auto HeaderGuardBegin = HeaderGuardFirstRow >> HeaderGuardSecondRow;

  // Parse "#endif" or "#  endif"
  auto HeaderGuardEnd = x3::skip(x3::space)[x3::lit('#') >> (x3::lit("endif"))];

  // Parse variable name like "xxx" or "my_var"
  auto VariableName = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];

  // Skipper for C++ comments (nested /* */ are not handled for now)
  auto SingleLineComment = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
  auto BlockComment = "/*" >> *(x3::char_ - "*/") >> "*/";
  auto Skipper = SingleLineComment | BlockComment | x3::ascii::space;

  // Parse
  // "
  // typedef struct {
  // } MyStruct;
  // "
  // ERROR: This parse does not work
  auto StructType = -x3::lit("typedef") >> x3::skip(Skipper)[x3::lit("struct") >> x3::lit('{')] >>
  x3::skip(Skipper)[x3::lit('}') >> VariableName >> x3::lit(";")];

  // Header grammar. Should parse
  // "
  // #if !defined XXX_H
  //  #define XXX_H
  //  typedef struct {
  //  } MyStruct;
  //  #endif
  // "
  auto grammar = HeaderGuardBegin >> *(StructType) >> HeaderGuardEnd;

  std::string data01(R"xx(
    #if !defined XXX_H
    #define XXX_H
    #endif
  )xx");

  bool r = phrase_parse(
    data01.begin(),
    data01.end(),
    grammar,
    Skipper
    );

    std::string data02(R"xx(
    #if !defined XXX_H
    // Single line comment
    #define XXX_H
    #endif // !XXX_H
  )xx");

  r = phrase_parse(
    data02.begin(),
    data02.end(),
    grammar,
    Skipper
    );

  std::string data03(R"xx(
    #if !defined XXX_H
    #define XXX_H
    typedef struct {
    } MyStruct;
    #endif
  )xx");

  // r = false: This parsing does not work.
  r = phrase_parse(
    data03.begin(),
    data03.end(),
    grammar,
    Skipper
    );
  return 0;
}

In the code there are three strings to parse: one with only header guards, the second like the first but with some C++ comments, and the third with an empty struct.

It's the last one that's failing to parse, and I don't understand why. In the grammar that I use for the struct StructType I first check for an optional typedef, then the keyword struct with the { character that can be attached or not, then I search for the } character followed by a variable name followed by ;.

I don't understand where the error is. What I'm doing wrong in parsing the empty structure?


Solution

  • Few things:

    • skippers are inherited by the surrounding context
    • you didn't have lexeme[] around the "tokens" in the header guards, so it would match including the typedefstruct because space also includes line-ends.

    You can simplify things:

    Live On Coliru

    #include <iostream>
    #include <boost/spirit/home/x3.hpp>
    #include <iomanip>
    
    int main() {
        namespace x3 = boost::spirit::x3;
    
        // Parse "#if !defined XXX_X_" or "'#ifndef X_X"
        auto const Ifndef               = (x3::lit('#') >> (x3::lit("ifndef") | (x3::lit("if") >> x3::lit("!defined"))));
        auto const HeaderGuardFirstRow  = Ifndef >> x3::lexeme[+(x3::alnum | '_')];
    
        // Parse "#define XXX_X" or "#  define XXX_X"
        auto const Define               = (x3::lit('#') >> x3::lit("define"));
        auto const HeaderGuardSecondRow = Define >> x3::lexeme[+(x3::alnum | '_')];
    
        // Parse
        // "
        //  #if !defined XXX_X_
        //  #define XXX_X
        // "
        auto const HeaderGuardBegin     = HeaderGuardFirstRow >> HeaderGuardSecondRow;
    
        // Parse "#endif" or "#  endif"
        auto const HeaderGuardEnd       = x3::lit('#') >> (x3::lit("endif"));
    
        // Parse variable name like "xxx" or "my_var"
        auto const VariableName         = x3::lexeme[x3::char_("a-zA-Z_") >> *(x3::alnum | x3::lit("_"))];
    
        // Skipper for C++ comments (nested /* */ are not handled for now)
        auto const SingleLineComment    = "//" >> *(x3::char_ - x3::eol) >> (x3::eol | x3::eoi);
        auto const BlockComment         = "/*" >> *(x3::char_ - "*/") >> "*/";
        auto const Skipper              = SingleLineComment | BlockComment | x3::ascii::space;
    
        auto const StructType = 
            -x3::lit("typedef") 
            >> "struct" >> '{' >> '}' >> VariableName
            >> ";"
            ;
    
        // Header grammar.
        auto grammar = HeaderGuardBegin >> *StructType >> HeaderGuardEnd;
    
        for (std::string const data : {
                R"xx(
        #if !defined XXX_H
        #define XXX_H
        #endif
      )xx",
                R"xx(
        #if !defined XXX_H
        // Single line comment
        #define XXX_H
        #endif // !XXX_H
      )xx",
                R"xx(
        #if !defined XXX_H
        #define XXX_H
        typedef struct {
            // aloha
        } MyStruct;
    
        typedef struct { /* caramba */ } MyOtherStruct
    ;
        #endif
      )xx" }) {
            auto f = data.begin(), l = data.end();
            std::cout << "Parsing " << std::quoted(data) << "\n";
            if (phrase_parse(f, l, grammar, Skipper)) {
                std::cout << "Parsed\n";
            } else {
                std::cout << "Failed to parse\n";
            }
    
            if (f!=l) {
                std::cout << "Remaining unparsed: " << std::quoted(std::string(f,l)) << "\n";
            }
        }
    }
    

    Prints

    Parsing "
        #if !defined XXX_H
        #define XXX_H
        #endif
      "
    Parsed
    Parsing "
        #if !defined XXX_H
        // Single line comment
        #define XXX_H
        #endif // !XXX_H
      "
    Parsed
    Parsing "
        #if !defined XXX_H
        #define XXX_H
        typedef struct {
            // aloha
        } MyStruct;
    
        typedef struct { /* caramba */ } MyOtherStruct
    ;
        #endif
      "
    Parsed