Search code examples
c++parsingboost-spiritboost-spirit-qi

Boost Spirit Parser optional expression evaluation


I am trying to parse a line from a text file which of the form:

[int_:] [int_/int_] [(int_, string)] string [string:int_]...

Where [] are optional parameter but will contain tags such as (":", "(", ")", "/"). Also the last format is repeat format "key:value" combination. e.g.:

10: 0x1/2 (8, INC) rd API:2 SI:100

I am able to parse the whole line when all the parameter are available. But if any of the starting optional parameter is missing then the parser fails.

How can I ignore the optional parameters in the Boost Spirit library? (i.e. skip the assignment of optional variables to default values.)

These are the qi grammar rules:

quoted_string = lexeme[+(char_ -(lit(' ') | lit(')')))];
hex_num = ((lit("0x") | lit("0X")) >> hex) | uint_;

start = (hex_num >> lit(":"))
    >> (hex_num >> lit("/") >> hex_num )
    >> lit("(") >> hex_num >> lit(",") >> quoted_string >> lit(")")
    >> quoted_string
    >> quoted_string;

qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
qi::rule<Iterator, uint32_t(), ascii::space_type> hex_num;
qi::rule<Iterator, employee(), ascii::space_type> start;

Solution

  • Model your AST node to reflect the Parser tree:

    struct ratio_t { uint32_t a,b; };
    struct opcode_t { uint32_t id; std::string name; };
    
    struct Node {
        uint32_t label; // prefix:
    
        boost::optional<ratio_t> ratio; // a/b
        boost::optional<opcode_t> opcode; // (id, name)
    
        std::string extra;
        std::multimap<std::string, uint32_t> params;
    };
    

    (Just making stuff up as I go, because I can only guess what the data means. I'm assuming employee, hex_num and quoted_string are somehow remnants from sample code you started with).

    Now when you adapt these structures:

    BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
    BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
    BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
    

    You can simply parse into it with an analogous parse tree:

        // lexemes
        unquoted_string = +(graph - ')');
        num = (no_case[ "0x" ] >> hex) | uint_;
        param = +(graph - ':') >> ':' >> num;
    
        // skipping productions
        opcode = '(' >> num >> ',' >> unquoted_string >> ')';
        ratio  = num >> '/' >> num;
        prefix = (num >> ':') | attr(0);                      // defaults to 0
        start  = prefix
                >> -ratio
                >> -opcode
                >> unquoted_string
                >> *param;
    

    Now when you parse these test cases:

    for (std::string const input : {
            "10: 0x1/2 (8, INC) rd API:2 SI:100",
            "10: 0x1/2 (8, INC) rd API:2",
            "10: 0x1/2 (8, INC) rd",
            "10: 0x1/2 rd API:2 SI:100",
            "10: rd API:2 SI:100",
            "0x1/2 rd API:2 SI:100",
            "rd API:2 SI:100",
        })
    {
        It f = input.begin(), l = input.end();
        AST::Node data;
        bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
        if (ok) {
            std::cout << "Parse success: " << data << "\n";
        }
        else {
            std::cout << "Parse failure ('" <<  input << "')\n";
        }
    
        if (f!=l) {
            std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
        }
    }
    

    you get:

    Parse success: 10:  1/2  (8, 'INC') rd API:2 SI:100
    Parse success: 10:  1/2  (8, 'INC') rd API:2
    Parse success: 10:  1/2  (8, 'INC') rd
    Parse success: 10:  1/2 -- rd API:2 SI:100
    Parse success: 10: -- -- rd API:2 SI:100
    Parse success: 0:  1/2 -- rd API:2 SI:100
    Parse success: 0: -- -- rd API:2 SI:100
    

    FULL DEMO

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/adapted.hpp>
    #include <boost/optional/optional_io.hpp>
    #include <map>
    
    namespace qi = boost::spirit::qi;
    
    namespace AST {
        struct ratio_t  { uint32_t a,b; };
        struct opcode_t { uint32_t id; std::string name; };
    
        struct Node {
            uint32_t label; // prefix:
    
            boost::optional<ratio_t> ratio; // a/b
            boost::optional<opcode_t> opcode; // (id, name)
    
            std::string extra;
            std::multimap<std::string, uint32_t> params;
        };
    
        std::ostream& operator<<(std::ostream& os, ratio_t const& v) {
            return os << v.a << "/" << v.b;
        }
        std::ostream& operator<<(std::ostream& os, opcode_t const& v) {
            return os << "(" << v.id << ", '" << v.name << "')";
        }
        std::ostream& operator<<(std::ostream& os, Node const& v) {
            os << v.label << ": " << v.ratio << " " << v.opcode << " " << v.extra;
            for (auto& p : v.params) os << " " << p.first << ":" << p.second;
            return os;
        }
    }
    
    BOOST_FUSION_ADAPT_STRUCT(AST::ratio_t, a, b)
    BOOST_FUSION_ADAPT_STRUCT(AST::opcode_t, id, name)
    BOOST_FUSION_ADAPT_STRUCT(AST::Node, label, ratio, opcode, extra, params)
    
    template <typename It, typename Skipper = qi::ascii::space_type>
    struct P : qi::grammar<It, AST::Node(), Skipper> {
        P() : P::base_type(start) 
        {
            using namespace qi;
    
            // lexemes
            unquoted_string = +(graph - ')');
            num = (no_case[ "0x" ] >> hex) | uint_;
            param = +(graph - ':') >> ':' >> num;
    
            // skipping productions
            opcode = '(' >> num >> ',' >> unquoted_string >> ')';
            ratio  = num >> '/' >> num;
            prefix = (num >> ':') | attr(0);                      // defaults to 0
            start  = prefix
                    >> -ratio
                    >> -opcode
                    >> unquoted_string
                    >> *param;
    
            BOOST_SPIRIT_DEBUG_NODES((start)(unquoted_string)(num)(prefix)(ratio)(opcode)(param))
        }
    
      private:
        qi::rule<It, AST::ratio_t(),  Skipper> ratio;
        qi::rule<It, AST::opcode_t(), Skipper> opcode;
        qi::rule<It, AST::Node(),     Skipper> start;
        qi::rule<It, uint32_t(),      Skipper> prefix;
    
        //lexemes
        qi::rule<It, std::string()> unquoted_string;
        qi::rule<It, uint32_t()> num;
        qi::rule<It, std::pair<std::string, uint32_t>> param;
    };
    
    int main() {
        using It = std::string::const_iterator;
        P<It> const p;
    
        for (std::string const input : {
                "10: 0x1/2 (8, INC) rd API:2 SI:100",
                "10: 0x1/2 (8, INC) rd API:2",
                "10: 0x1/2 (8, INC) rd",
                "10: 0x1/2 rd API:2 SI:100",
                "10: rd API:2 SI:100",
                "0x1/2 rd API:2 SI:100",
                "rd API:2 SI:100",
            })
        {
            It f = input.begin(), l = input.end();
            AST::Node data;
            bool ok = qi::phrase_parse(f, l, p, qi::ascii::space, data);
            if (ok) {
                std::cout << "Parse success: " << data << "\n";
            }
            else {
                std::cout << "Parse failure ('" <<  input << "')\n";
            }
    
            if (f!=l) {
                std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
            }
        }
    }