Search code examples
boost-spiritboost-spirit-qi

boost::spirit::qi lookahead to match last occurrence in string


Is it possible to use boost::spirit::qi to parse the following?

 A_B       --> (A, B)
 A_B_C     --> (A_B, C)
 A_B_C_D   --> (A_B_C, D)
 A_B_C_D_E --> (A_B_C_D, E)

I would like to generate rules such that I can match the last '_' in a string and split the string on it.


Solution

  • This looks like a strange grammar, and a better job for a specialized function (especially since the source "tokens" are actually very short in your samples).

    However, without further ado, here's what you could do with Spirit:

    name  = lexeme [ +(graph - '_') ]; // or just char_("ABCDEFG") if it's that simple
    entry = lexeme [ raw [ +(name >> '_') ] >> name ];
    

    Again, see the code live on http://liveworkspace.org/code/15d970571a61dd39faedc8b7231ef67d

    // #define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/karma.hpp>
    #include <map>
    
    namespace qi    = boost::spirit::qi;
    namespace karma = boost::spirit::karma;
    namespace phx   = boost::phoenix;
    
    typedef std::map<std::string, std::string> pairs_t;
    
    template <typename It, typename Skipper = qi::space_type>
        struct parser : qi::grammar<It, pairs_t(), Skipper>
    {
        parser() : parser::base_type(start)
        {
            using namespace qi;
    
            name  = lexeme [ +(graph - '_') ]; // or just char_("ABCDEFG") if it's that simple
            entry = lexeme [ raw [ +(name >> '_') ] >> name ];
            start = *entry;
            BOOST_SPIRIT_DEBUG_NODE(name);
            BOOST_SPIRIT_DEBUG_NODE(entry);
            BOOST_SPIRIT_DEBUG_NODE(start);
        }
    
      private:
        qi::rule<It, std::string()> name;
        qi::rule<It, std::pair<std::string, std::string>(), Skipper> entry;
        qi::rule<It, pairs_t(), Skipper> start;
    };
    
    template <typename C, typename Skipper>
        bool doParse(const C& input, const Skipper& skipper)
    {
        auto f(std::begin(input)), l(std::end(input));
    
        parser<decltype(f), Skipper> p;
        pairs_t data;
    
        try
        {
            bool ok = qi::phrase_parse(f,l,p,skipper,data);
            if (ok)   
            {
                std::cout << "parse success\n";
                std::cout << "data: " << karma::format_delimited(karma::auto_ % ';', ' ', data) << '\n';
            }
            else    std::cerr << "parse failed: '" << std::string(f,l) << "'\n";
    
            if (f!=l) std::cerr << "trailing unparsed: '" << std::string(f,l) << "'\n";
            return ok;
        } catch(const qi::expectation_failure<decltype(f)>& e)
        {
            std::string frag(e.first, e.last);
            std::cerr << e.what() << "'" << frag << "'\n";
        }
    
        return false;
    }
    
    int main()
    {
        const std::string input = "A_B A_B_C A_B_C_D A_B_C_D_E";
        bool ok = doParse(input, qi::blank);
    
        return ok? 0 : 255;
    }