Search code examples
c++parsingboostboost-spirit

Parsing list of 'key = value' with unknown keys with Boost::Spirit


I've a string like the following one:

[GENERAL]
FMax Antenna = 3000
FMin Antenna = 2000
Invalid key  = Invalid value
EMin Antenna = -50
EMax Antenna = 80

I want to parse it in order to save the value of FMin Antenna, FMax Antenna, EMin Antenna, EMax Antenna in a structure. I've created a Spirit parser, but it works partially. Since the file can have many key = value rows, I need to parse only what I need (the key values that I must read) ignoring other pairs.

Both key and value can be alphanumeric strings with spaces and tabs.

I've defined inside the parser the keys that I want to read but when I encounter an unknown key, I cannot read keys that follows it (in the example case, I can't read EMin Antenna and EMax Antenna because are defined after an unknown key).

I've tried the code below: If I parse file1, that contains only keys that I want to read, it works, but if I add unknown key = value pairs in the middle of the file, like in file2, it stops to read all subsequent lines.

How can I fix it and continue to parse the file after unknown key-value pairs?

#include <boost/optional/optional_io.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/date_time/posix_time/posix_time_io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi = boost::spirit::qi;

const std::string file1 = R"xx(
[GENERAL]
FMax Antenna = 3000
FMin Antenna = 2000
EMin Antenna = -50
EMax Antenna = 80
)xx";

const std::string file2 = R"xx(
[GENERAL]
FMax Antenna = 3000
FMin Antenna = 2000
EMin Antenna = -50
pappa pio = po po
EMax Antenna = 80
Ciao = 55
)xx";

struct Data {
  double minFrequency = 0.0;
  double maxFrequency = 0.0;
  double minElevation = 0.0;
  double maxElevation = 0.0;
};

BOOST_FUSION_ADAPT_STRUCT(
  Data,
  (double, minFrequency)
  (double, maxFrequency)
  (double, minElevation)
  (double, maxElevation)
)

template <typename It, typename Skipper = qi::space_type>
struct grammar : qi::grammar<It, Data(), Skipper> {

  grammar() : grammar::base_type(start) {

    auto minFrequency = bind(&Data::minFrequency, qi::_val);
    auto maxFrequency = bind(&Data::maxFrequency, qi::_val);
    auto minElevation = bind(&Data::minElevation, qi::_val);
    auto maxElevation = bind(&Data::maxElevation, qi::_val);

    start = qi::no_case["[GENERAL]"] >> *(
      ("FMin Antenna" >> qi::lit('=') >> qi::int_)[minFrequency = qi::_1] |
      ("FMax Antenna" >> qi::lit('=') >> qi::int_)[maxFrequency = qi::_1] |
      ("EMin Antenna" >> qi::lit('=') >> qi::int_)[minElevation = qi::_1] |
      ("EMax Antenna" >> qi::lit('=') >> qi::int_)[maxElevation = qi::_1] |
      (+(qi::alnum | qi::blank) >> qi::lit('=') >> +(qi::alnum | qi::blank)) // Issue here?
    );
  }

private:

  qi::rule<It, Data(), Skipper> start;
};

int main() {
  using It = std::string::const_iterator;
  Data parsed1, parsed2;
  bool ok = qi::phrase_parse(file1.begin(), file1.end(), grammar<It>(), qi::space, parsed1);
  std::cout << "--- File 1 ---" << std::endl;
  std::cout << "parsed   = " << std::boolalpha << ok << std::endl;
  std::cout << "min freq = " << parsed1.minFrequency << std::endl;
  std::cout << "max freq = " << parsed1.maxFrequency << std::endl;
  std::cout << "min elev = " << parsed1.minElevation << std::endl;
  std::cout << "max elev = " << parsed1.maxElevation << std::endl;
  std::cout << "--- File 2 ---" << std::endl;
  ok = qi::phrase_parse(file2.begin(), file2.end(), grammar<It>(), qi::space, parsed2);
  std::cout << "parsed   = " << std::boolalpha << ok << std::endl;
  std::cout << "min freq = " << parsed2.minFrequency << std::endl;
  std::cout << "max freq = " << parsed2.maxFrequency << std::endl;
  std::cout << "min elev = " << parsed2.minElevation << std::endl;
  std::cout << "max elev = " << parsed2.maxElevation << std::endl;
  return 0;
}

Ouput:

--- File 1 ---
parsed   = true
min freq = 2000
max freq = 3000
min elev = -50
max elev = 80
--- File 2 ---
parsed   = true
min freq = 2000
max freq = 3000
min elev = -50
max elev = 0  <-- This should be 80 like in the first parsing

Solution

  • You're confused about skippers.

    • Newlines are significant in your grammar, which is why you need a skipper that doesn't eat them
    • In your rules, you match +(alnum|blank) which is never gonna work because the skipper eats everything that matches blank anyways

    (See Boost spirit skipper issues for background)

    Other notes:

    • You don't need Fusion adaptation unless you want auto-magic attribute propagation. You're not using it right now.

    Solving It

    I'd make things very explicit:

    known =
        ("FMin Antenna" >> lit('=') >> int_)[minFrequency = _1] |
        ("FMax Antenna" >> lit('=') >> int_)[maxFrequency = _1] |
        ("EMin Antenna" >> lit('=') >> int_)[minElevation = _1] |
        ("EMax Antenna" >> lit('=') >> int_)[maxElevation = _1]
        ;
    
    unknown = +alnum >> '=' >> +alnum;
    
    setting = (known(_r1) | unknown) >> +eol;
    
    start =
        no_case["[GENERAL]"] >> eol 
        >> *setting(_val);
    

    Splitting up in rules is a little tricky, because *setting would try to synthesize a container attribute, making it impossible to propagate to the actual Data attribute.

    I solved it by passing the attribute by reference in an inherited attribute, which disables automatic attribute propagation.

    Alternatively, you could add a semantic action of any kind to inhibit automatic attribute propagation

    DEMO

    Live On Coliru

    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    struct Data {
        double minFrequency = 0.0;
        double maxFrequency = 0.0;
        double minElevation = 0.0;
        double maxElevation = 0.0;
    };
    
    template <typename It, typename Skipper = qi::blank_type>
    struct grammar : qi::grammar<It, Data(), Skipper> {
    
        grammar() : grammar::base_type(start) {
    
            using namespace qi;
            auto minFrequency = bind(&Data::minFrequency, _r1);
            auto maxFrequency = bind(&Data::maxFrequency, _r1);
            auto minElevation = bind(&Data::minElevation, _r1);
            auto maxElevation = bind(&Data::maxElevation, _r1);
    
            known =
                ("FMin Antenna" >> lit('=') >> int_)[minFrequency = _1] |
                ("FMax Antenna" >> lit('=') >> int_)[maxFrequency = _1] |
                ("EMin Antenna" >> lit('=') >> int_)[minElevation = _1] |
                ("EMax Antenna" >> lit('=') >> int_)[maxElevation = _1]
                ;
    
            unknown = +alnum >> '=' >> +alnum;
    
            setting = (known(_r1) | unknown) >> +eol;
    
            start =
                no_case["[GENERAL]"] >> eol 
                >> *setting(_val);
        }
    
      private:
        qi::rule<It, Data(), Skipper> start;
        qi::rule<It, void(Data&), Skipper> setting, known;
        qi::rule<It, Skipper> unknown;
    };
    
    int main() {
        using It = std::string::const_iterator;
        grammar<It> const g;
    
        for (std::string const file : {
                "[GENERAL]\nFMax Antenna = 3000\nFMin Antenna = 2000\nEMin Antenna = -50\nEMax Antenna = 80\n",
                "[GENERAL]\nFMax Antenna = 3000\nFMin Antenna = 2000\nEMin Antenna = -50\npappa pio = po po\nEMax Antenna = 80\nCiao = 55\n",
            })
        {
            Data parsed;
            It f = begin(file), l = end(file);
            bool ok = qi::phrase_parse(f, l, g, qi::blank, parsed);
    
            std::cout << "--- File ---" << "\n";
            std::cout << "parsed   = " << std::boolalpha << ok << "\n";
            if (ok) {
                std::cout << "min freq = " << parsed.minFrequency << "\n";
                std::cout << "max freq = " << parsed.maxFrequency << "\n";
                std::cout << "min elev = " << parsed.minElevation << "\n";
                std::cout << "max elev = " << parsed.maxElevation << "\n";
            }
    
            if (f!=l) {
                std::cout << "Remaining unparsed: ";
                while (f!=l) {
                    char c = *f++;
                    if (isprint(c)) std::cout << c;
                    else std::cout << "\\x" << std::setw(2) << std::setfill('0') << std::hex << static_cast<int>(c);
                }
            }
        }
    }
    

    Prints

    --- File ---
    parsed   = true
    min freq = 2000
    max freq = 3000
    min elev = -50
    max elev = 80
    --- File ---
    parsed   = true
    min freq = 2000
    max freq = 3000
    min elev = -50
    max elev = 80