Search code examples
c++boost-spiritboost-spirit-qi

Rule syntax to parse string decorated by double braces in Spirit Qi


I'm trying to parse multiline content that include the following pattern: {{some_dynamic_string}}

I tried defining the rule as follows, but the parse fails:

token_rule = qi::lit("{{") >> +qi::char_ >> qi::lit("}}");

The debug output for parsing shows the following:

<start_rule>
  <try>{{foo|bar}}\n</try>
  <token_rule>
    <try>{{foo|bar}}\n</try>
    <fail/>
  </token_rule>
  <fail/>
</start_rule>

It is trying to parse the first line in the multiline content, which happens to include the pattern by itself. What is wrong with the rule? Do I need to exclude {{ and }} from the second item in the sequence? If so, how do I do that?


Solution

  • You have to prevent char_ from matching where }} would match.

    The simplest way:

    token_rule = "{{" >> +(qi::char_ - "}}") >> "}}";
    

    DEMO

    Full demo with debug info for comparison:

    Live On Coliru

    #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    namespace qi = boost::spirit::qi;
    
    using string = std::string;
    using strings = std::vector<std::string>;
    
    template <typename It>
    struct Grammar: qi::grammar<It, strings()>
    {
        Grammar() : Grammar::base_type(start) {
    
            start       = qi::skip(qi::space) [tokens_rule];
    
            tokens_rule = *token_rule;
            token_rule  = "{{" >> +(qi::char_ - "}}") >> "}}";
    
            BOOST_SPIRIT_DEBUG_NODES((start)(tokens_rule)(token_rule))
        }
      private:
        // implicit lexemes
        qi::rule<It, strings()> start;
        qi::rule<It, strings(), qi::space_type> tokens_rule;
        qi::rule<It, string()> token_rule;
    };
    
    int main() {
        using It = std::string::const_iterator;
        Grammar<It> g;
        for (std::string const input : { "{{a}}", " {{a b}} {{more}}\n", "{{a{b} }}" })
        {
            It f = input.begin(), l = input.end();
            strings parsed;
            if (parse(f, l, g, parsed)) {
                std::cout << "Parsed '" << input << "'\n";
                for(auto& tok : parsed)
                    std::cout << " -- '" << tok << "'\n";
            } else {
                std::cout << "Parse failed\n";
            }
    
            if (f != l)
                std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
        }
    }
    

    Printing

    Parsed '{{a}}'
     -- 'a'
    Parsed ' {{a b}} {{more}}
    '
     -- 'a b'
     -- 'more'
    Parsed '{{a{b} }}'
     -- 'a{b} '
    

    With debug info:

    <start>
      <try>{{a}}</try>
      <tokens_rule>
        <try>{{a}}</try>
        <token_rule>
          <try>{{a}}</try>
          <success></success>
          <attributes>[[a]]</attributes>
        </token_rule>
        <token_rule>
          <try></try>
          <fail/>
        </token_rule>
        <success></success>
        <attributes>[[[a]]]</attributes>
      </tokens_rule>
      <success></success>
      <attributes>[[[a]]]</attributes>
    </start>
    Parsed '{{a}}'
     -- 'a'
    <start>
      <try> {{a b}} {{more}}\n</try>
      <tokens_rule>
        <try> {{a b}} {{more}}\n</try>
        <token_rule>
          <try>{{a b}} {{more}}\n</try>
          <success> {{more}}\n</success>
          <attributes>[[a,  , b]]</attributes>
        </token_rule>
        <token_rule>
          <try>{{more}}\n</try>
          <success>\n</success>
          <attributes>[[m, o, r, e]]</attributes>
        </token_rule>
        <token_rule>
          <try></try>
          <fail/>
        </token_rule>
        <success></success>
        <attributes>[[[a,  , b], [m, o, r, e]]]</attributes>
      </tokens_rule>
      <success></success>
      <attributes>[[[a,  , b], [m, o, r, e]]]</attributes>
    </start>
    Parsed ' {{a b}} {{more}}
    '
     -- 'a b'
     -- 'more'
    <start>
      <try>{{a{b} }}</try>
      <tokens_rule>
        <try>{{a{b} }}</try>
        <token_rule>
          <try>{{a{b} }}</try>
          <success></success>
          <attributes>[[a, {, b, },  ]]</attributes>
        </token_rule>
        <token_rule>
          <try></try>
          <fail/>
        </token_rule>
        <success></success>
        <attributes>[[[a, {, b, },  ]]]</attributes>
      </tokens_rule>
      <success></success>
      <attributes>[[[a, {, b, },  ]]]</attributes>
    </start>
    Parsed '{{a{b} }}'
     -- 'a{b} '