Search code examples
c++boostboost-spiritboost-spirit-qi

Resolve ambiguous boost::spirit::qi grammar with lookahead


I want to a list of name-value pairs. Each list is terminated by a '.' and EOL. Each name-value pair is separated by a ':'. Each pair is separated by a ';' in the list. E.g.

NAME1: VALUE1; NAME2: VALUE2; NAME3: VALUE3.<EOL>

The problem I have is that the values contain '.' and the last value always consumes the '.' at the EOL. Can I use some sort of lookahead to ensure the last '.' before the EOL is treated differently?


Solution

  • I have created a sample, that presumably looks like what you have. The tweak is in the following line:

    value = lexeme [ *(char_ - ';' - ("." >> (eol|eoi))) ];
    

    Note how - ("." >> (eol|eoi))) means: exclude any . that is immediately followed by end-of-line or end-of-input.

    Test case (also live on http://liveworkspace.org/code/949b1d711772828606ddc507acf4fb4b):

    const std::string input = 
        "name1: value 1; other name : value #2.\n" 
        "name.sub1: value.with.periods; other.sub2: \"more fun!\"....\n";
    bool ok = doParse(input, qi::blank);
    

    Output:

    parse success
    data: name1 : value 1 ; other name  : value #2 . 
    data: name.sub1 : value.with.periods ; other.sub2 : "more fun!"... . 
    

    Full code:

    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/karma.hpp>
    #include <map>
    #include <vector>
    
    namespace qi    = boost::spirit::qi;
    namespace karma = boost::spirit::karma;
    namespace phx   = boost::phoenix;
    
    typedef std::map<std::string, std::string> map_t;
    typedef std::vector<map_t> maps_t;
    
    template <typename It, typename Skipper = qi::space_type>
        struct parser : qi::grammar<It, maps_t(), Skipper>
    {
        parser() : parser::base_type(start)
        {
            using namespace qi;
    
            name  = lexeme [ +~char_(':') ];
            value = lexeme [ *(char_ - ';' - ('.' >> (eol|eoi))) ];
            line  = ((name >> ':' >> value) % ';') >> '.';
            start = line % eol;
        }
    
      private:
        qi::rule<It, std::string(), Skipper> name, value;
        qi::rule<It, map_t(), Skipper> line;
        qi::rule<It, maps_t(), Skipper> start;
    };
    
    template <typename C, typename Skipper>
        bool doParse(const C& input, const Skipper& skipper)
    {
        auto f(std::begin(input)), l(std::end(input));
    
        parser<decltype(f), Skipper> p;
        maps_t data;
    
        try
        {
            bool ok = qi::phrase_parse(f,l,p,skipper,data);
            if (ok)   
            {
                std::cout << "parse success\n";
                for (auto& line : data)
                    std::cout << "data: " << karma::format_delimited((karma::string << ':' << karma::string) % ';' << '.', ' ', line) << '\n';
            }
            else    std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
    
            //if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
            return ok;
        } catch(const qi::expectation_failure<decltype(f)>& e)
        {
            std::string frag(e.first, e.last);
            std::cerr << e.what() << "'" << frag << "'\n";
        }
    
        return false;
    }
    
    int main()
    {
        const std::string input = 
            "name1: value 1; other name : value #2.\n" 
            "name.sub1: value.with.periods; other.sub2: \"more fun!\"....\n";
        bool ok = doParse(input, qi::blank);
    
        return ok? 0 : 255;
    }