Search code examples
c++boostboost-spiritboost-spirit-qi

Boost Spirit - Skip unwanted lines among key value pairs


I have a list of key value pairs, separated by EOL. I got Boost Spirit to do what I want for properly formatted lines (i.e. "MyKey : MyValue \r\n MyKey2 : MyValue2").

Now my problem is that I want to skip lines that do not comply. For example:

This is some title line! 
Key1:Value1
Some more gibberish to skip
Key2:Value2

I came up with the following code that I thought would work, but instead, the resulting map is empty and parsing fails.

  • In my KeyRule, I added '- qi::eol' to avoid eating up the invalid line until the first KeyValue separator is encountered.
  • In my ItemRule, both PairRule's are made optional and the eol is 1 or more to address multiple breaklines.

I read the following thread: Why does parsing a blank line with Spirit produce an empty key value pair in map? It skips the comment line (starting with #) via a custom skipper but in my case, I want to skip ANY lines not containing the Key Value separator :. There has to be something elegant.

#include <iostream>
#include <string>
#include <map>

#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>

namespace qi = boost::spirit::qi;

template <typename Iterator, typename Skipper = qi::blank_type>
struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>(), Skipper> {
    KeyValueParser() : KeyValueParser::base_type(ItemRule) {
        ItemRule = -PairRule >> *(+qi::eol >> -PairRule) >> -qi::eol;
        PairRule = KeyRule >> ':' >> ValueRule;
        KeyRule = qi::raw[+(qi::char_ - ':' - qi::eol)];
        ValueRule = qi::raw[+(qi::char_ - qi::eol)];
    }
    qi::rule<Iterator, std::map<std::string, std::string>(), Skipper> ItemRule;
    qi::rule<Iterator, std::pair<std::string, std::string>(), Skipper> PairRule;
    qi::rule<Iterator, std::string(), Skipper> KeyRule;
    qi::rule<Iterator, std::string(), Skipper> ValueRule;
};

int main() {
    const std::string input = " Line To Skip! \r\n  My Key : Value \r\n My2ndKey : Long    Value \r\n";

    std::string::const_iterator iter = input.begin(), end = input.end();

    KeyValueParser<std::string::const_iterator> parser;
    typedef std::map<std::string, std::string> MyMap;
    MyMap parsed_map;

    bool result = qi::phrase_parse(iter, end, parser, qi::blank, parsed_map);

    if (result && (iter == end)) {
        std::cout << "Success." << std::endl;
        for (MyMap::const_iterator pIter = parsed_map.begin(); pIter != parsed_map.end(); ++pIter) {
            std::cout << "\"" << pIter->first << "\" : \"" << pIter->second << "\"" << std::endl;
        }
    } else {
        std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-" << std::endl;
    }

    getchar();
    return 0;
}

Solution

  • The most elegant I can think of is to parse a keyvalue pair /optionally/, followed by any gibberish till the end of the line.

    You could write:

    ItemRule  = -PairRule % (*~char_("\r\n") >> eol);
    

    The only caveat is that on gibberish lines, the "default" pair (empty key & value) will be inserted, so you'd have to remove that post-parse.

    An equivalent way to write it (but less elegant) would be:

    ItemRule  = (hold[PairRule] | omit[ *~char_("\r\n") ]) % eol;
    

    DEMO

    Here's a full demo. Note I also moved the skipper knowledge inside the grammar (it's essential to the correct operation of the grammar).

    Finally, I used BOOST_SPIRIT_DEBUG to print debug output.

    Live On Coliru

    #define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/include/std_pair.hpp>
    #include <map>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator>
    struct KeyValueParser : qi::grammar<Iterator, std::map<std::string, std::string>()> {
        KeyValueParser() : KeyValueParser::base_type(ItemRule) {
    
            using namespace qi;
    
            ItemRule  = skip(blank) [ -PairRule % (*~char_("\r\n") >> eol) ];
            ItemRule  = skip(blank) [ hold[PairRule] | omit[ *~char_("\r\n") ] ] % eol;
            PairRule  = KeyRule >> ':' >> ValueRule;
            KeyRule   = +~char_("\r\n:");
            ValueRule = +~char_("\r\n");
    
            BOOST_SPIRIT_DEBUG_NODES((ItemRule)(PairRule)(KeyRule)(ValueRule))
        }
      private:
        qi::rule<Iterator, std::map<std::string, std::string>()> ItemRule;
        qi::rule<Iterator, std::pair<std::string, std::string>(), qi::blank_type> PairRule;
        // lexemes
        qi::rule<Iterator, std::string()> KeyRule, ValueRule;
    };
    
    int main() {
        const std::string input = R"(
     Line To Skip! 
      My Key : Value 
    Some more gibberish to skip
     My2ndKey : Long    Value 
    )";
    
        std::string::const_iterator iter = input.begin(), end = input.end();
    
        KeyValueParser<std::string::const_iterator> parser;
        std::map<std::string, std::string> parsed_map;
    
        bool result = qi::parse(iter, end, parser, parsed_map);
    
        if (result && (iter == end)) {
            std::cout << "Success.\n";
    
            // drop empty lines:
            parsed_map.erase("");
    
            for (auto& p : parsed_map)
                std::cout << "\"" << p.first << "\" : \"" << p.second << "\"\n";
        } else {
            std::cout << "Something failed. Unparsed: ->|" << std::string(iter, end) << "|<-\n";
        }
    }
    

    Prints

    Success.
    "My Key " : "Value "
    "My2ndKey " : "Long    Value "
    

    With debug information

    <ItemRule>
      <try>\n Line To Skip! \n  M</try>
      <PairRule>
        <try>\n Line To Skip! \n  M</try>
        <KeyRule>
          <try>\n Line To Skip! \n  M</try>
          <fail/>
        </KeyRule>
        <fail/>
      </PairRule>
      <PairRule>
        <try> Line To Skip! \n  My</try>
        <KeyRule>
          <try>Line To Skip! \n  My </try>
          <success>\n  My Key : Value \nS</success>
          <attributes>[[L, i, n, e,  , T, o,  , S, k, i, p, !,  ]]</attributes>
        </KeyRule>
        <fail/>
      </PairRule>
      <PairRule>
        <try>  My Key : Value \nSo</try>
        <KeyRule>
          <try>My Key : Value \nSome</try>
          <success>: Value \nSome more g</success>
          <attributes>[[M, y,  , K, e, y,  ]]</attributes>
        </KeyRule>
        <ValueRule>
          <try>Value \nSome more gib</try>
          <success>\nSome more gibberish</success>
          <attributes>[[V, a, l, u, e,  ]]</attributes>
        </ValueRule>
        <success>\nSome more gibberish</success>
        <attributes>[[[M, y,  , K, e, y,  ], [V, a, l, u, e,  ]]]</attributes>
      </PairRule>
      <PairRule>
        <try>Some more gibberish </try>
        <KeyRule>
          <try>Some more gibberish </try>
          <success>\n My2ndKey : Long   </success>
          <attributes>[[S, o, m, e,  , m, o, r, e,  , g, i, b, b, e, r, i, s, h,  , t, o,  , s, k, i, p]]</attributes>
        </KeyRule>
        <fail/>
      </PairRule>
      <PairRule>
        <try> My2ndKey : Long    </try>
        <KeyRule>
          <try>My2ndKey : Long    V</try>
          <success>: Long    Value \n</success>
          <attributes>[[M, y, 2, n, d, K, e, y,  ]]</attributes>
        </KeyRule>
        <ValueRule>
          <try>Long    Value \n</try>
          <success>\n</success>
          <attributes>[[L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]</attributes>
        </ValueRule>
        <success>\n</success>
        <attributes>[[[M, y, 2, n, d, K, e, y,  ], [L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]]</attributes>
      </PairRule>
      <PairRule>
        <try></try>
        <KeyRule>
          <try></try>
          <fail/>
        </KeyRule>
        <fail/>
      </PairRule>
      <success></success>
      <attributes>[[[[], []], [[M, y,  , K, e, y,  ], [V, a, l, u, e,  ]], [[M, y, 2, n, d, K, e, y,  ], [L, o, n, g,  ,  ,  ,  , V, a, l, u, e,  ]]]]</attributes>
    </ItemRule>