Search code examples
c++parsingboost-spiritboost-spirit-qiebnf

Why does this boost spirit qi rule fail to parse when a part of a composite rule but succeed when used on its own?


I am trying to combine multiple boost::spirit parse rules into bigger, composite rules, and have the following code:

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/adt/adapt_adt.hpp>
#include <boost/fusion/include/adapt_adt.hpp>
#include <boost/variant.hpp>

#include <string> 
#include <vector>
#include <fstream>


namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

struct PdCanvas {
    int topX;
    int topY;
    int wX;
    int wY;
    std::string name;
    int openOnLoad; 
};


struct PdArrayData {
    std::vector<float> data; 
};


BOOST_FUSION_ADAPT_STRUCT(
    PdCanvas,
    (int, topX)
    (int, topY)
    (int, wX) 
    (int, wY)
    (std::string, name)
    (int, openOnLoad));


BOOST_FUSION_ADAPT_STRUCT(
    PdArrayData,
    (std::vector<float>, data)); 


using PdRecord = boost::variant<
PdArrayData,
PdCanvas
>;


template <typename Iterator> struct PdCanvasGrammar : qi::grammar<Iterator, PdCanvas()> {
    PdCanvasGrammar() : PdCanvasGrammar::base_type(start) {
        using namespace qi;
        start      = skip(space)[canvasRule >> eoi];
        name       = +(('\\' >> space) |graph);
        canvasRule = "#N canvas" >> int_ >> int_ >> int_ >> int_ >> name >> int_ >> ';';

        BOOST_SPIRIT_DEBUG_NODES((start)(canvasRule)(name))
    }

  private:
    qi::rule<Iterator, PdCanvas()> start;
    qi::rule<Iterator, PdCanvas(), qi::space_type> canvasRule;
    qi::rule<Iterator, std::string()> name;
};


template <typename Iterator> 
struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), ascii::space_type> {
    PdRecordGrammar () : PdRecordGrammar::base_type(recordRule) {         
        using namespace qi;
        
        arrayDataRule = (lit("#A") >>  +(qi::float_) >> ";");
        canvasRule = PdCanvasGrammar<Iterator>();
        recordRule = (canvasRule |  (arrayDataRule)  );
        BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(canvasRule)(recordRule))
    }

    
    qi::rule<Iterator, PdArrayData(), ascii::space_type> arrayDataRule; 
    qi::rule<Iterator, PdCanvas()> canvasRule;     
    qi::rule<Iterator, PdRecord(), ascii::space_type> recordRule;     
};



int main(int argc, char** argv)
{
  if(argc != 2)
    {
        std::cout << "Usage: "  <<argv[0] << " <PatchFile>" << std::endl;
        exit(1); 
    }

    std::ifstream inputFile(argv[1]); 
    std::string inputString(std::istreambuf_iterator<char>(inputFile), {}); 

    PdRecord root;
    PdRecordGrammar<std::string::iterator> parser;
    std::cout << "Loaded file:\n " << inputString << std::endl;


    PdCanvas canvObj;
    PdCanvasGrammar<std::string::iterator> canvParse;
    bool canvSuccess = qi::phrase_parse(inputString.begin(), inputString.end(), canvParse, boost::spirit::ascii::space, canvObj);
    std::cout << "Canvas success: " << canvSuccess << std::endl; 

    bool success = qi::phrase_parse(inputString.begin(), inputString.end(), parser, boost::spirit::ascii::space, root); 
    std::cout << "Success: " << success << std::endl;

    return 0; 

}

I then tested the code on the following string, which should parse:

#N canvas 0 0 400 300 moo 1;

Running the code gives the following output:

Loaded file:
 #N canvas 0 0 400 300 moo 1;

<start>
  <try>#N canvas 0 0 400 30</try>
  <canvasRule>
    <try>#N canvas 0 0 400 30</try>
    <name>
      <try>moo 1;\n</try>
      <success> 1;\n</success>
      <attributes>[[m, o, o]]</attributes>
    </name>
    <success>\n</success>
    <attributes>[[0, 0, 400, 300, [m, o, o], 1]]</attributes>
  </canvasRule>
  <success></success>
  <attributes>[[0, 0, 400, 300, [m, o, o], 1]]</attributes>
</start>
Canvas success: 1
<recordRule>
  <try>#N canvas 0 0 400 30</try>
  <canvasRule>
    <try>#N canvas 0 0 400 30</try>
    <fail/>
  </canvasRule>
  <arrayDataRule>
    <try>#N canvas 0 0 400 30</try>
    <fail/>
  </arrayDataRule>
  <fail/>
</recordRule>
Success: 0

As one can see, the file is successfully parsed by the standalone PdCanvasGrammar rule, but fails to parse when I attempt to use the composite PdRecordGrammar rule.

I assume I'm doing something wrong in combining the rules together, but I don't know what.

As an aside, the arrayDataRule, which is defined as part of the PdRecordGrammar, successfully parses its input:

Loaded file:
 #A 1 2 3 4 5;
<start>
  <try>#A 1 2 3 4 5;</try>
  <canvasRule>
    <try>#A 1 2 3 4 5;</try>
    <fail/>
  </canvasRule>
  <fail/>
</start>
Canvas success: 0
<recordRule>
  <try>#A 1 2 3 4 5;</try>
  <canvasRule>
    <try>#A 1 2 3 4 5;</try>
    <fail/>
  </canvasRule>
  <arrayDataRule>
    <try>#A 1 2 3 4 5;</try>
    <success></success>
    <attributes>[[[1, 2, 3, 4, 5]]]</attributes>
  </arrayDataRule>
  <success></success>
  <attributes>[[[1, 2, 3, 4, 5]]]</attributes>
</recordRule>
Success: 1

So my assumption is that if I just took the PdCanvasGrammar rule and defined it intrinsically to the PdRecord rule, that it would work. But I'm trying to understand how to properly combine rules so that no single rule gets too large and unwieldy.

So my question is thus: Why does this rule as defined not work, and what is the proper way to combine rules into a composite rule?

Versions:

Boost: 1.75.0

C++ standard: 11

GCC: gcc version 4.8.5 20150623 (Red Hat 4.8.5-44) (GCC)


Solution

  • This:

    canvasRule    = PdCanvasGrammar<Iterator>();
    

    creates a reference to a dangling temporary. Fix it:

    template <typename Iterator>
    struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), qi::space_type> {
        PdRecordGrammar() : PdRecordGrammar::base_type(recordRule) {
            arrayDataRule = "#A" >> +qi::float_ >> ";";
            recordRule    = canvasRule | arrayDataRule;
    
            BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(recordRule))
        }
    
      private:
        qi::rule<Iterator, PdArrayData(), qi::space_type> arrayDataRule;
        PdCanvasGrammar<Iterator>                         canvasRule;
        qi::rule<Iterator, PdRecord(), qi::space_type>    recordRule;
    };
    

    Simplify

    • I've simplified some parts before. There's no reason to undo that.
    • You're not using the adapt_adt header (and that's for the best). Why include that?
    • Why use the pre-c++11 versions of the Fusion ADAPT_XXXX macros?
    • Why use phrase_parse when the grammar has no skipper (or overrides it internally). See Boost spirit skipper issues again for context.
    • There's redundant lit(), parentheses and ; in quite a few places.
    • You have using namespace qi but still qualify qi::float_.
    • One grammar declares rules private, the other doesn't
    • The scopes of variables doesn't match the use (in main). This leads to lifetime/variable re-use bugs. Or just code that's hard to maintain.
    • Your input can use const_iterator. It should.

    Live Demo

    // #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/adapted.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    struct PdCanvas { int topX, topY, wX, wY, openOnLoad; std::string name; };
    struct PdArrayData { std::vector<float> data; };
    
    BOOST_FUSION_ADAPT_STRUCT(PdCanvas, , topX, topY, wX, wY, name, openOnLoad)
    BOOST_FUSION_ADAPT_STRUCT(PdArrayData, data)
    
    using PdRecord = boost::variant<PdArrayData, PdCanvas>;
    
    template <typename Iterator> struct PdCanvasGrammar : qi::grammar<Iterator, PdCanvas()> {
        PdCanvasGrammar() : PdCanvasGrammar::base_type(start) {
            using namespace qi;
            start      = skip(space)[canvasRule >> eoi];
            name       = +('\\' >> space | graph);
            canvasRule = "#N canvas" >> int_ >> int_ >> int_ >> int_ >> name >> int_ >> ';';
    
            BOOST_SPIRIT_DEBUG_NODES((start)(canvasRule)(name))
        }
    
      private:
        qi::rule<Iterator, PdCanvas()>                 start;
        qi::rule<Iterator, PdCanvas(), qi::space_type> canvasRule;
        qi::rule<Iterator, std::string()>              name;
    };
    
    template <typename Iterator>
    struct PdRecordGrammar : qi::grammar<Iterator, PdRecord(), qi::space_type> {
        PdRecordGrammar() : PdRecordGrammar::base_type(recordRule) {
            arrayDataRule = "#A" >> +qi::float_ >> ";";
            recordRule    = canvasRule | arrayDataRule;
    
            BOOST_SPIRIT_DEBUG_NODES((arrayDataRule)(recordRule))
        }
    
      private:
        qi::rule<Iterator, PdArrayData(), qi::space_type> arrayDataRule;
        PdCanvasGrammar<Iterator>                         canvasRule;
        qi::rule<Iterator, PdRecord(), qi::space_type>    recordRule;
    };
    
    int main() {
        std::string const input = "#N canvas 0 0 400 300 moo 1; ";
        std::cout << "Input:\n " << quoted(input) << std::endl;
    
        {
            PdCanvas obj;
            PdCanvasGrammar<std::string::const_iterator> const canvParse;
            std::cout << "Canvas success: " << parse(input.begin(), input.end(), canvParse, obj) << "\n";
        }
    
        {
            PdRecord                                     obj;
            PdRecordGrammar<std::string::const_iterator> const parser;
            std::cout << "Success: " << phrase_parse(input.begin(), input.end(), parser, qi::space, obj) << "\n";
        }
    }
    

    Output:

    Input:
     "#N canvas 0 0 400 300 moo 1; "
    Canvas success: 1
    Success: 1