Search code examples
boostc++14boost-spirit

How do I enforce Qi spirit rule to try all rules in OR condition?


I'm trying to parse this command template that can take the following:

SendCmd SomeCommand Left_Side = "Some Value";
SendCmd AnotherCmd "Some Literal" = Some_Value;
SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";
SendCmd SomeCommand Just_object_name;

This is what I have which successfully parse all but the 1st case

//#define BOOST_SPIRIT_DEBUG 1 
#include <boost/fusion/adapted.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>

namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace Ast {
    using boost::recursive_wrapper;

    template <typename> struct custom_string : std::char_traits<char> {};
    template <typename Tag>
    using String = std::basic_string<char, custom_string<Tag> >;

    using Ident = String<struct TagIdent>;
    using Literal = String<struct TagLiteral>;
    using Number = double;
    
    // represent ident[i][j]
    struct Object {
        Ident id;
        std::vector<Ident> subscrpt;
    };

    struct GenericAssignment {
        boost::variant<Literal, Object> left;
        boost::variant<Literal, Number, Object> right;
    };
    using GenAssignments = std::vector<GenericAssignment>;
    struct SendCmd {
        boost::variant<Literal, Object> litobj;
        boost::variant<Object, GenAssignments> objasgn;
    };
}
BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id, subscrpt);
BOOST_FUSION_ADAPT_STRUCT(Ast::GenericAssignment, left, right);
BOOST_FUSION_ADAPT_STRUCT(Ast::SendCmd, litobj, objasgn);

namespace client {

    template <typename Itr> struct DML : qi::grammar<Itr, Ast::SendCmd()> {

        DML() : DML::base_type(start) {
            using namespace qi;

            start = skip(space)[send_cmd_];
            ident_ = raw[alpha >> *(alnum | '_')];
            number_ = double_;

            literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
            object_ = ident_ >> *('[' >> ident_ >> ']');
            gen_asgn_ = (literal_ | object_) >> '=' >> (literal_ | number_ | object_);
            gen_asgns_ = *gen_asgn_;

            send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) >> (object_ | gen_asgns_)
                >> ';'
                ;

            BOOST_SPIRIT_DEBUG_NODES(
                (ident_)(literal_)(number_)(object_)(gen_asgn_)(send_cmd_)
            )
        }

    private:
        qi::rule<Itr, Ast::SendCmd()> start;

        using Skipper = qi::space_type;
        qi::rule<Itr, Ast::Literal()>       literal_;
        qi::rule<Itr, Ast::Number()>        number_;
        qi::rule<Itr, Ast::Ident()>         ident_;

        qi::rule<Itr, Ast::GenericAssignment(), Skipper>    gen_asgn_;
        qi::rule<Itr, Ast::GenAssignments(), Skipper>           gen_asgns_;
        qi::rule<Itr, Ast::SendCmd(), Skipper>                      send_cmd_;
        qi::rule<Itr, Ast::Object(), Skipper>                           object_;
    };
} // namespace client


static const std::string test_cases[] = {
        R"(SendCmd SomeCommand Left_Side = "Some Value";)",
        R"(SendCmd AnotherCmd "Some Literal" = Some_Value;)",
        R"(SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";)",
        R"(SendCmd SomeCommand Just_object_name;)"
};

int main() {
    using It = std::string::const_iterator;
    static const client::DML<It> p;
    
    int i = 0;
    for (std::string const& input : test_cases) {
        try {
            Ast::SendCmd sc;
            std::cout << "Case #" << ++i << std::endl;
            std::cout << input;
            if (qi::parse(begin(input), end(input), p, sc)) {
                std::cout << " [Success]" << std::endl;
            }
            else {
                std::cout << " [INVALID]" << std::endl;
            }
        }
        catch (qi::expectation_failure<It> const& ef) {
            auto f = begin(input);
            auto p = ef.first - input.begin();
            //#pragma GCC diagnostic push
            //#pragma GCC diagnostic ignored "-Wsign-conversion"
            auto bol = input.find_last_of("\r\n", p) + 1;
            auto line = std::count(f, f + bol, '\n') + 1;
            auto eol = input.find_first_of("\r\n", p);

            std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
                << input.substr(bol, eol - bol) << "\n"
                << std::setw(static_cast<int>(p - bol)) << ""
                << "^--- here" << std::endl;
            //#pragma GCC diagnostic pop
        }
    }   
}

And the result of running this code is

Case #1
SendCmd SomeCommand Left_Side = "Some Value"; [INVALID]
Case #2
SendCmd AnotherCmd "Some Literal" = Some_Value; [Success]
Case #3
SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something"; [Success]
Case #4
SendCmd SomeCommand Just_object_name; [Success]

The reason for failing the 1st case is because the below rule when parsing Left_Side = "Some Value", it successfully parse object_ Left_Size then expect the ';' to follow then flag this as invalid. It did not try the next option at all.

send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) >> (object_ | gen_asgns_)
                >> ';'
                ;

My question is, is it possible to make it try object_, if fail, try gen_asgns_ before flag this as invalid?

By the way, switching the order of (object_ | gen_asgns_) will make case 4th fails.

EDIT I did try to use qi::hold[object_] | gen_asgns_ but make no different

Thanks for your help


Solution

  • First of all, what a beatiful testbed for the reproducer :)

    Second of all, your analysis is spot on.

    So, I'd say the real question is: why is #4 not parsed with (gen_asgns_ | object_)?

    The problem is that gen_asgns ALWAYS matches. *p always matches the empty string. To avoid that, use +p which requires at least a single match:

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG 1 
    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    namespace qi = boost::spirit::qi;
    
    namespace Ast {
        template <typename> struct custom_string : std::char_traits<char> {};
        template <typename Tag>
        using String = std::basic_string<char, custom_string<Tag> >;
    
        using Ident   = String<struct TagIdent>;
        using Literal = String<struct TagLiteral>;
        using Number  = double;
    
        // represent ident[i][j]
        struct Object {
            Ident id;
            std::vector<Ident> subscrpt;
        };
    
        struct GenericAssignment {
            boost::variant<Literal, Object>         left;
            boost::variant<Literal, Number, Object> right;
        };
        using GenAssignments = std::vector<GenericAssignment>;
    
        struct SendCmd {
            boost::variant<Literal, Object>        litobj;
            boost::variant<Object, GenAssignments> objasgn;
        };
    }
    BOOST_FUSION_ADAPT_STRUCT(Ast::Object, id, subscrpt)
    BOOST_FUSION_ADAPT_STRUCT(Ast::GenericAssignment, left, right)
    BOOST_FUSION_ADAPT_STRUCT(Ast::SendCmd, litobj, objasgn)
    
    namespace client {
    
        template <typename It> struct DML : qi::grammar<It, Ast::SendCmd()> {
    
            DML() : DML::base_type(start) {
                using namespace qi;
    
                start   = skip(space)[send_cmd_];
                ident_  = raw[alpha >> *(alnum | '_')];
                number_ = double_;
    
                literal_   = '"' > *('\\' >> char_ | ~char_('"')) > '"';
                object_    = ident_ >> *('[' >> ident_ >> ']');
                gen_asgn_  = (literal_ | object_) >> '=' >> (literal_ | number_ | object_);
                gen_asgns_ = +gen_asgn_;
    
                send_cmd_ = no_case["sendcmd"] >> (literal_ | object_) //
                   //>> (object_ | gen_asgns_) //
                    >> (gen_asgns_ | object_) //
                    >> ';'                    //
                    ;
    
                BOOST_SPIRIT_DEBUG_NODES((ident_)(literal_)(number_)(object_)(gen_asgn_)(send_cmd_))
            }
    
        private:
            qi::rule<It, Ast::SendCmd()> start;
    
            using Skipper = qi::space_type;
            qi::rule<It, Ast::Literal()> literal_;
            qi::rule<It, Ast::Number()>  number_;
            qi::rule<It, Ast::Ident()>   ident_;
    
            qi::rule<It, Ast::GenericAssignment(), Skipper> gen_asgn_;
            qi::rule<It, Ast::GenAssignments(), Skipper>    gen_asgns_;
            qi::rule<It, Ast::SendCmd(), Skipper>           send_cmd_;
            qi::rule<It, Ast::Object(), Skipper>            object_;
        };
    } // namespace client
    
    
    static const std::string test_cases[] = {
            R"(SendCmd SomeCommand Left_Side = "Some Value";)",
            R"(SendCmd AnotherCmd "Some Literal" = Some_Value;)",
            R"(SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something";)",
            R"(SendCmd SomeCommand Just_object_name;)"
    };
    
    int main() {
        using It = std::string::const_iterator;
        static const client::DML<It> p;
    
        for (int i = 0; std::string const& input : test_cases) {
            try {
                Ast::SendCmd sc;
                std::cout << "Case #" << ++i << std::endl;
                std::cout << input;
                if (qi::parse(begin(input), end(input), p, sc)) {
                    std::cout << " [Success]" << std::endl;
                }
                else {
                    std::cout << " [INVALID]" << std::endl;
                }
            } catch (qi::expectation_failure<It> const& ef) {
                auto f = begin(input);
                auto p = ef.first - input.begin();
                #pragma GCC diagnostic push
                #pragma GCC diagnostic ignored "-Wsign-conversion"
                auto bol  = input.find_last_of("\r\n", p) + 1;
                auto line = std::count(f, f + bol, '\n') + 1;
                auto eol  = input.find_first_of("\r\n", p);
    
                std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
                    << input.substr(bol, eol - bol) << "\n"
                    << std::setw(static_cast<int>(p - bol)) << ""
                    << "^--- here" << std::endl;
                #pragma GCC diagnostic pop
            }
        }
    }
    

    Printing

    Case #1
    SendCmd SomeCommand Left_Side = "Some Value"; [Success]
    Case #2
    SendCmd AnotherCmd "Some Literal" = Some_Value; [Success]
    Case #3
    SendCmd AnotherCmd "Some Literal" = Some_Value "Other Literal" = "Something"; [Success]
    Case #4
    SendCmd SomeCommand Just_object_name; [Success]