Search code examples
c++boostboost-spirit-x3

boost spirit x3 - parse tokens in any order


This is basically a follow up of a question i asked earlier and @sehe so graciously answered!

Question: How do i parse multiple command parsers using boost spirit x3 and here is the code given by @sehe - https://coliru.stacked-crooked.com/a/5879831b11c51f84

The follow up question is how to parse the command arguments in any order:

i.e. parse the following successfully

cmd1 param1=<value> param2=value OR
cmd1 param2=<value> param1=value

and so on


Solution

  • I feel I have to mention we're not ChatGPT. You're in luck though, I like doing X3 finger exercises.

    First, let's observe that Spirit Qi has a parser operator that comes close out of the box: Permutation Parser

    Live On Coliru

    #include <boost/fusion/adapted.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/optional/optional_io.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    #include <iostream>
    
    namespace qi = boost::spirit::qi;
    
    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
        using boost::fusion::operator<<;
    } // namespace ast
    
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd1, param1, param2)
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd2, param1)
    
    template <typename It> struct CommandParser : qi::grammar<It, ast::Command()> {
        CommandParser() : CommandParser::base_type(start) {
            using namespace qi;
            quoted_string = lexeme['"' >> *~char_('"') >> '"'];
    
            cmd1  = lit("cmd1") >> ((lit("param1") >> '=' >> double_) ^ //
                                   (lit("param2") >> '=' >> double_));
            cmd2  = lit("cmd2") >> ((lit("param1") >> '=' >> quoted_string));
            start = qi::skip(qi::space)[cmd1 | cmd2];
    
            BOOST_SPIRIT_DEBUG_NODES((cmd1)(cmd2)(start))
        }
    
      private:
        using Skipper = qi::space_type;
        qi::rule<It, ast::Command()>          start;
        qi::rule<It, ast::cmd1(), Skipper>    cmd1;
        qi::rule<It, ast::cmd2(), Skipper>    cmd2;
        qi::rule<It, std::string(), Skipper>  quoted_string;
    };
    
    template <typename It> boost::optional<ast::Command> parse_line(It first, It last) {
        static CommandParser<It> const p;
        ast::Command attr;
    
        // if (phrase_parse(first, last, qi::expect[parser::command >> qi::eoi], qi::space, attr))
        if (phrase_parse(first, last, p >> qi::eoi, qi::space, attr))
            return attr;
        return {};
    }
    
    auto parse_line(std::string_view input) { return parse_line(begin(input), end(input)); }
    
    int main() {
        // for (std::string line; getline(std::cin, line) && !line.empty();) {
        for (std::string line :
             {
                 R"()",
                 R"(cmd1 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param2 = 8e-9 param1 = 3.14)", // flipped order
                 R"(cmd1 param1 = 3.14 param2 = -inf)",
                 R"(cmd1 param2 = -inf param1 = 3.14)", // flipped order
                 R"(cmd2 param1 = " hello world " )",
    
                 // things that would not have parsed with question code:
                 R"(cmd2 param1 = "" )",
    
                 // things that should not parse
                 R"(cmd2 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = " hello world " )",
                 R"(cmd2 param1 = "" trailing rubbish)",
                 R"(trailing rubbish)",
             }) //
        {
            std::cout << std::left << std::setw(40) << quoted(line);
            try {
                auto parsed = parse_line(line);
                std::cout << " -> " << parsed << std::endl;
            } catch (std::exception const& e) {
                std::cout << " -> ERROR " << e.what() << std::endl;
            }
        }
    }
    

    Printing

    ""                                       -> --
    "cmd1 param1 = 3.14 param2 = 8e-9"       ->  (3.14 8e-09)
    "cmd1 param2 = 8e-9 param1 = 3.14"       ->  (3.14 8e-09)
    "cmd1 param1 = 3.14 param2 = -inf"       ->  (3.14 -inf)
    "cmd1 param2 = -inf param1 = 3.14"       ->  (3.14 -inf)
    "cmd2 param1 = \" hello world \" "       ->  ( hello world )
    "cmd2 param1 = \"\" "                    ->  ()
    "cmd2 param1 = 3.14 param2 = 8e-9"       -> --
    "cmd1 param1 = \" hello world \" "       -> --
    "cmd2 param1 = \"\" trailing rubbish"    -> --
    "trailing rubbish"                       -> --
    

    You might consider staying with Qi for this.

    Other Approaches

    To get similar things done in X3 you some heroics will be required. Let me try by

    • ignoring missing or repeated assignments (cmd1 param1 = 3 param1 = 4 is fine)
    • using latest PFR additions for c++20 member name reflection
    • dropping the need for/use of Fusion adaptation

    Note this builds on the ideas developed here Boost Spirit x3: parse into structs and specifically the workaround mentioned in the last comment

    Here are the quick-and-dirty heroics:

    namespace detail {
        template <typename Attr> auto member_parser = x3::eps;
        template <>
        auto member_parser<std::string> = x3::rule<struct quoted_string, std::string>{"quoted_string"} =
            x3::lexeme['"' >> *~x3::char_('"') >> '"'];
    
        template <> auto member_parser<double> = x3::double_;
    
        template <size_t II, typename T, typename Tuple> auto handle_member(Tuple const& tied) {
            auto&&      val = std::get<II>(tied);
            std::string name{boost::pfr::get_name<II, T>()};
    
            using Attr = std::decay_t<decltype(val)>;
    
            auto assign = [name](auto& ctx) { boost::pfr::get<II>(*x3::get<T>(ctx)) = _attr(ctx); };
            return x3::rule<struct _>{name.c_str()} = (x3::lit(name) >> '=' >> member_parser<Attr>)[assign];
        }
    
        template <typename T, typename Tuple, size_t... I>
        auto params_impl(Tuple const& tied, std::integer_sequence<size_t, I...>) {
            return *(handle_member<I, T, Tuple>(tied) | ...);
        }
    } // namespace detail
    
    template <typename T> auto make_parser(T const& v = {}) {
        std::string tname = boost::typeindex::type_id<T>().pretty_name();
        tname             = tname.substr(tname.find_last_of(":") + 1);
        std::cout << "---- " << tname << std::endl;
    
        auto set_context = [](auto& ctx) { x3::get<T>(ctx) = &_val(ctx); };
    
        return x3::rule<struct _, T>{tname.c_str()} = //
            x3::with<T>(static_cast<T*>(nullptr))     //
                [x3::eps[set_context]                 //
                 >> x3::lit(tname)                    //
                 >> detail::params_impl<T>(boost::pfr::structure_tie(v),
                                           std::make_index_sequence<boost::pfr::tuple_size<T>::value>{})];
    }
    

    I would probably clean it up to use static type info instead of requiring default-constructability, but in the interest of speed let's keep it as that. Now, use it:

    namespace parser {
        auto const command = make_parser<ast::cmd1>() | make_parser<ast::cmd2>();
    } // namespace parser
    

    Or indeed, with some more factory help:

    template <typename... Cmd> auto commands() { return (make_parser<Cmd>() | ...); }
    
    auto const command = commands<ast::cmd1, ast::cmd2>();
    

    Integrating in the example test cases:

    Live On Coliru

    #include <boost/pfr.hpp>
    #include <boost/spirit/home/x3.hpp>
    #include <boost/type_index.hpp>
    #include <iomanip>
    #include <iostream>
    #include <optional>
    
    namespace x3 = boost::spirit::x3;
    
    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
    } // namespace ast
    
    namespace parser {
        namespace detail {
            template <typename Attr> auto member_parser = x3::eps;
            template <>
            auto member_parser<std::string> = x3::rule<struct quoted_string, std::string>{"quoted_string"} =
                x3::lexeme['"' >> *~x3::char_('"') >> '"'];
    
            template <> auto member_parser<double> = x3::double_;
    
            template <size_t II, typename T, typename Tuple> auto handle_member(Tuple const& tied) {
                auto&&      val = std::get<II>(tied);
                std::string name{boost::pfr::get_name<II, T>()};
    
                using Attr = std::decay_t<decltype(val)>;
    
                auto assign = [name](auto& ctx) { boost::pfr::get<II>(*x3::get<T>(ctx)) = _attr(ctx); };
                return x3::rule<struct _>{name.c_str()} = (x3::lit(name) >> '=' >> member_parser<Attr>)[assign];
            }
    
            template <typename T, typename Tuple, size_t... I>
            auto params_impl(Tuple const& tied, std::integer_sequence<size_t, I...>) {
                return *(handle_member<I, T, Tuple>(tied) | ...);
            }
        } // namespace detail
    
        template <typename T> auto make_parser(T const& v = {}) {
            std::string tname = boost::typeindex::type_id<T>().pretty_name();
            tname             = tname.substr(tname.find_last_of(":") + 1);
    
            auto set_context = [](auto& ctx) { x3::get<T>(ctx) = &_val(ctx); };
    
            return x3::rule<struct _, T>{tname.c_str()} = //
                x3::with<T>(static_cast<T*>(nullptr))     //
                    [x3::eps[set_context]                 //
                     >> x3::lit(tname)                    //
                     >> detail::params_impl<T>(boost::pfr::structure_tie(v),
                                               std::make_index_sequence<boost::pfr::tuple_size<T>::value>{})];
        }
    
        template <typename... Cmd> auto commands() { return (make_parser<Cmd>() | ...); }
    
        auto const command = commands<ast::cmd1, ast::cmd2>();
    } // namespace parser
    
    template <typename It> std::optional<ast::Command> parse_line(It first, It last) {
        ast::Command attr;
    
        // if (phrase_parse(first, last, x3::expect[parser::command >> x3::eoi], x3::space, attr))
        if (phrase_parse(first, last, parser::command >> x3::eoi, x3::space, attr))
            return attr;
        return std::nullopt;
    }
    
    auto parse_line(std::string_view input) { return parse_line(begin(input), end(input)); }
    
    int main() {
        // for (std::string line; getline(std::cin, line) && !line.empty();) {
        for (std::string line :
             {
                 R"()",
                 R"(cmd1 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param2 = 8e-9 param1 = 3.14)", // flipped
                 R"(cmd1 param1 = 3.14 param2 = -inf)",
                 R"(cmd1 param2 = -inf param1 = 3.14)", // flipped
                 R"(cmd2 param1 = " hello world " )",
    
                 // things that would not have parsed with question code:
                 R"(cmd2 param1 = "" )",
    
                 // things that should not parse
                 R"(cmd2 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = " hello world " )",
                 R"(cmd2 param1 = "" trailing rubbish)",
                 R"(trailing rubbish)",
             }) //
        {
            std::cout << std::left << std::setw(37) << quoted(line);
            try {
                if (auto parsed = parse_line(line)) {
                    apply_visitor(
                        [](auto const& cmd) {
                            std::cout << " -> " << boost::typeindex::type_id_runtime(cmd).pretty_name()
                                      << boost::pfr::io(cmd) << std::endl;
                        },
                        *parsed);
                } else {
                    std::cout << " -> --" << std::endl;
                }
            } catch (std::exception const& e) {
                std::cout << " -> ERROR " << e.what() << std::endl;
            }
        }
    }
    

    Printing

    ""                                    -> --
    "cmd1 param1 = 3.14 param2 = 8e-9"    -> ast::cmd1{3.14, 8e-09}
    "cmd1 param2 = 8e-9 param1 = 3.14"    -> ast::cmd1{3.14, 8e-09}
    "cmd1 param1 = 3.14 param2 = -inf"    -> ast::cmd1{3.14, -inf}
    "cmd1 param2 = -inf param1 = 3.14"    -> ast::cmd1{3.14, -inf}
    "cmd2 param1 = \" hello world \" "    -> ast::cmd2{" hello world "}
    "cmd2 param1 = \"\" "                 -> ast::cmd2{""}
    "cmd2 param1 = 3.14 param2 = 8e-9"    -> --
    "cmd1 param1 = \" hello world \" "    -> --
    "cmd2 param1 = \"\" trailing rubbish" -> --
    "trailing rubbish"                    -> --
    

    Summarizing

    I would probably make a general grammar and AST like

    enum class CmdType { cmd1, cmd2, ... };
    using Param = std::string;
    using Value = variant<double, std::string>;
    using Args  = std::multimap<Param, Value>;
    
    struct Cmd {
        CmdType cmd;
        Args    args;
    };
    

    And create a validator function that validates the correctness of the commands after parsing. This way you get a very simple grammar that's easy to maintain, and way more flexibility regarding validation logic.