Search code examples
c++parsingboostboost-spiritboost-spirit-qi

boost::spirit::qi matches[] trouble


I have the trouble with parsing on my project. At least I found the minimum code sample on which the problem appears. So at now tries to parse the string

"bool func1; const bool func2;"

Here the minimum sample code:

namespace qi = boost::spirit::qi;
using qi::lexeme;
using qi::string;
typedef boost::spirit::ascii::space_type TSkipper;

struct MyField
{
    bool isConst;
    std::string mtype;
    std::string name;
};

BOOST_FUSION_ADAPT_STRUCT
    (
    MyField,
    (bool, isConst)
    (std::string, mtype)
    (std::string, name)
    )

void SpiritTestSimple()
{
    qi::rule<std::string::const_iterator, std::string(), TSkipper> valid_symbols;
    qi::rule<std::string::const_iterator, MyField(), TSkipper> field;

    valid_symbols %= lexeme[*qi::char_("a-zA-Z")];

    field %= qi::matches[string("const")] >> valid_symbols >> valid_symbols;

    std::string data = "aaa aaaa; const bbb bbbb;";

    //-----------------------------------------------------------------
    std::string::const_iterator first = data.begin(), last = data.end();
    std::list<MyField> parsed_vals;
    bool is_parsed;
    try
    {
        is_parsed = qi::phrase_parse(   first, last,  
                                        +(field > ';'), 
                                        boost::spirit::ascii::space, parsed_vals);
    }
    catch(const qi::expectation_failure<std::string::const_iterator>& e)
    {
        std::string frag(e.first, e.last);
        std::cout << e.what() << "'" << frag << "'" << std::endl;
    }

    BOOST_ASSERT(is_parsed && "the example not parsed");
}

I wrote

+(field > ';')

because in my project this block can be absent. And each function declaration must be ended by ';'. When the above code sample runs I see that raised exception and in the console appeared record:

boost::spirit::qi::expectation_failure''

As I understand right - after last semicolon the parser tries parse the next field rule and it parsed the "matches" (because it return value in any case) but the next parse of the valid_symbols is failed because after the last semicolon there are no data. So parsing fails.

How to avoid the parse fails in my case?


Solution

  • The biggest problem is with

    valid_symbols = qi::lexeme[*qi::char_("a-zA-Z")];
    

    The trouble here is that it can match the empty string. So the whole production for "field" can be an empty string. If you change it to (using qi::alpha as a shorthand):

    valid_symbols = qi::lexeme[+qi::alpha];
    

    You will find it does parse the input correctly.

    Further notes:

    • when you say "I wrote +(field > ';') because in my project this block can be absent" that seems contradictory to me. If you wanted to make the whole block optional, you'd write

      *(field > ';')
      

      or if you would want to repeated ;; I'd expect

      *(-field > ';')
      

      (or perhaps +(-field > ';')).

    • you will want to learn to use BOOST_SPIRIT_DEBUG to pinpoint your problems on your own

    See the complete working example for demonstration of how to use spirit builtin debug capabilities: Live On Coliru

    #define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted/struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    namespace qi = boost::spirit::qi;
    typedef boost::spirit::ascii::space_type TSkipper;
    
    struct MyField
    {
        bool isConst;
        std::string mtype;
        std::string name;
    };
    
    BOOST_FUSION_ADAPT_STRUCT
        (
         MyField,
         (bool, isConst)
         (std::string, mtype)
         (std::string, name)
        )
    
    template <typename It, typename TSkipper = boost::spirit::ascii::space_type>
    struct MyGrammar : qi::grammar<It, std::list<MyField>(), TSkipper>
    {
        MyGrammar() : MyGrammar::base_type(start) {
            valid_symbols = qi::lexeme[+qi::alpha];
            field         = qi::matches[qi::string("const")] >> valid_symbols >> valid_symbols;
            start         = +(field > ";");
    
            BOOST_SPIRIT_DEBUG_NODES((valid_symbols)(field)(start))
        }
      private:
        qi::rule<std::string::const_iterator, std::string(),        TSkipper> valid_symbols;
        qi::rule<std::string::const_iterator, MyField(),            TSkipper> field;
        qi::rule<std::string::const_iterator, std::list<MyField>(), TSkipper> start;
    };
    
    int main()
    {
        std::string const data = "aaa aaaa; const bbb bbbb;";
    
        //-----------------------------------------------------------------
        std::string::const_iterator first = data.begin(), last = data.end();
        std::list<MyField> parsed_vals;
    
        bool is_parsed = false;
    
        try
        {
            MyGrammar<std::string::const_iterator> grammar;
    
            is_parsed = qi::phrase_parse(   first, last,  
                                            grammar, 
                                            boost::spirit::ascii::space, 
                                            parsed_vals);
        }
        catch(const qi::expectation_failure<std::string::const_iterator>& e)
        {
            std::string frag(e.first, e.last);
            std::cout << "Expectation failure: " << e.what() << " at '" << frag << "'" << std::endl;
        }
    
        BOOST_ASSERT(is_parsed && "the example not parsed");
    }
    

    Which prints:

    <start>
    <try>aaa aaaa; const bbb </try>
    <field>
        <try>aaa aaaa; const bbb </try>
        <valid_symbols>
        <try>aaa aaaa; const bbb </try>
        <success> aaaa; const bbb bbb</success>
        <attributes>[[a, a, a]]</attributes>
        </valid_symbols>
        <valid_symbols>
        <try> aaaa; const bbb bbb</try>
        <success>; const bbb bbbb;</success>
        <attributes>[[a, a, a, a]]</attributes>
        </valid_symbols>
        <success>; const bbb bbbb;</success>
        <attributes>[[0, [a, a, a], [a, a, a, a]]]</attributes>
    </field>
    <field>
        <try> const bbb bbbb;</try>
        <valid_symbols>
        <try> bbb bbbb;</try>
        <success> bbbb;</success>
        <attributes>[[b, b, b]]</attributes>
        </valid_symbols>
        <valid_symbols>
        <try> bbbb;</try>
        <success>;</success>
        <attributes>[[b, b, b, b]]</attributes>
        </valid_symbols>
        <success>;</success>
        <attributes>[[1, [b, b, b], [b, b, b, b]]]</attributes>
    </field>
    <field>
        <try></try>
        <valid_symbols>
        <try></try>
        <fail/>
        </valid_symbols>
        <fail/>
    </field>
    <success></success>
    <attributes>[[[0, [a, a, a], [a, a, a, a]], [1, [b, b, b], [b, b, b, b]]]]</attributes>
    </start>