Search code examples
c++boostboost-spirit-x3

How do i parse multiple command parsers using boost spirit x3


I have a use case where i need to accept multiple commands on a CLI. Each command has its own set of parameters. To date, have used an ad-hoc regex style parser, want to do a bit better using boost spirit x3.

In the following cpp file - i am trying to parse cmd1 and cmd2 - how do i get the parsed results into the appropriate ASTs? How do i even specify multiple ASTs into phrase_parse?

begineer with boost-spirit cobbled together this basic experimental program from the examples - finding it a bit difficult to close the gap!

#include <iostream>
#include <tuple>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/include/std_tuple.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/char/char_parser.hpp>
#include <fmt/format.h>

namespace x3 = boost::spirit::x3;

namespace client::ast
{
    struct cmd1
    {
        double param1;
        double param2;
    };

    struct cmd2
    {
        std::string param1;
    };
}

BOOST_FUSION_ADAPT_STRUCT(client::ast::cmd1, param1, param2);
BOOST_FUSION_ADAPT_STRUCT(client::ast::cmd2, param1);

namespace parser
{
    using x3::lit;
    using x3::lexeme;
    using x3::char_;
    using x3::double_;
    using x3::phrase_parse;
    using x3::ascii::space;

    x3::rule<class cmd1_class, client::ast::cmd1> const cmd1 = "cmd1";
    x3::rule<class cmd2_class, client::ast::cmd2> const cmd2 = "cmd2";

    auto const quoted_string = lexeme['"' >> +(char_ - '"') >> '"'];
    auto const cmd1_def = lit("cmd1") >> lit("param1") >> lit("=") >> double_ >> lit("param2") >> lit("=") >> double_;
    auto const cmd2_def = lit("cmd2") >> lit("param1") >> lit("=") >> quoted_string;

    BOOST_SPIRIT_DEFINE(cmd1, cmd2);
}

template <typename Iterator>
bool parse_line(Iterator first, Iterator last)
{
    using x3::lit;
    using x3::lexeme;
    using x3::char_;
    using x3::double_;
    using x3::phrase_parse;
    using x3::ascii::space;
    using parser::cmd1;
    using parser::cmd2;

    client::ast::cmd1 cmd1_ast;
    client::ast::cmd2 cmd2_ast;
    std::variant<client::ast::cmd1, client::ast::cmd2> cmd_ast;

    /// how do i parse the appropriate rule into the correct AST
    bool r = phrase_parse(
        first,                          //  Start Iterator
        last,                           //  End Iterator
        cmd1 | cmd2,
        space                          //  The Skip-Parser
        /// cmd_ast);                       //  The AST
        );

    if (first != last) // fail if we did not get a full match
        return false;
    return r;
}

int main()
{
    /// read a line from stdin
    while (true)
    {
        std::string line;
        std::getline(std::cin, line);
        if (line.empty())
            break;
        fmt::print("line: {}\n", line);

        bool r = parse_line(line.begin(), line.end());
        fmt::print("parse_line: {}\n", r);
    }

    return 0;
}

Expecting cmd1 and cmd2 parse into the appropriate ASTs


Solution

  • You are close. I'd use boost::variant:

    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
    }
    

    I'd also simplify the rules and add one for cmd1 | cmd2:

    namespace parser {
        x3::rule<class cmd1_class, ast::cmd1> const cmd1 = "cmd1";
        x3::rule<class cmd2_class, ast::cmd2> const cmd2 = "cmd2";
    
        auto const quoted_string = x3::lexeme['"' >> *~x3::char_('"') >> '"'];
        auto const cmd1_def = x3::lit("cmd1") >> "param1" >> '=' >> x3::double_ >> "param2" >> '=' >> x3::double_;
        auto const cmd2_def = x3::lit("cmd2") >> "param1" >> '=' >> quoted_string;
    
        BOOST_SPIRIT_DEFINE(cmd1, cmd2);
        auto const command = cmd1 | cmd2;
    } // namespace parser
    

    Now you can make a function that returns the parsed command if any:

    template <typename It> boost::optional<ast::Command> parse_line(It first, It last) {
        ast::Command attr;
    
        if (phrase_parse(first, last, parser::command > x3::eoi, x3::space, attr))
            return attr;
        return {};
    }
    

    See it Live On Coliru with test cases:

    #include <boost/fusion/adapted.hpp>
    #include <boost/fusion/include/io.hpp>
    #include <boost/optional/optional_io.hpp>
    #include <boost/spirit/home/x3.hpp>
    #include <iomanip>
    #include <iostream>
    
    namespace x3 = boost::spirit::x3;
    
    namespace ast {
        struct cmd1 { double param1, param2; };
        struct cmd2 { std::string param1; };
    
        using Command = boost::variant<cmd1, cmd2>;
    
        using boost::fusion::operator<<;
    } // namespace ast
    
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd1, param1, param2)
    BOOST_FUSION_ADAPT_STRUCT(ast::cmd2, param1)
    
    namespace parser {
        x3::rule<class cmd1_class, ast::cmd1> const cmd1 = "cmd1";
        x3::rule<class cmd2_class, ast::cmd2> const cmd2 = "cmd2";
    
        auto const quoted_string = x3::lexeme['"' >> *~x3::char_('"') >> '"'];
        auto const cmd1_def = x3::lit("cmd1") >> "param1" >> '=' >> x3::double_ >> "param2" >> '=' >> x3::double_;
        auto const cmd2_def = x3::lit("cmd2") >> "param1" >> '=' >> quoted_string;
    
        BOOST_SPIRIT_DEFINE(cmd1, cmd2);
        auto const command = cmd1 | cmd2;
    } // namespace parser
    
    template <typename It> boost::optional<ast::Command> parse_line(It first, It last) {
        ast::Command attr;
    
        // if (phrase_parse(first, last, x3::expect[parser::command >> x3::eoi], x3::space, attr))
        if (phrase_parse(first, last, parser::command >> x3::eoi, x3::space, attr))
            return attr;
        return {};
    }
    
    auto parse_line(std::string_view input) { return parse_line(begin(input), end(input)); }
    
    int main() {
        // for (std::string line; getline(std::cin, line) && !line.empty();) {
        for (std::string line :
             {
                 R"()",
                 R"(cmd1 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = 3.14 param2 = -inf)",
                 R"(cmd2 param1 = " hello world " )",
    
                 // things that would not have parsed with question code:
                 R"(cmd2 param1 = "" )",
    
                 // things that should not parse
                 R"(cmd2 param1 = 3.14 param2 = 8e-9)",
                 R"(cmd1 param1 = " hello world " )",
                 R"(cmd2 param1 = "" trailing rubbish)",
                 R"(trailing rubbish)",
             }) //
        {
            std::cout << std::left << std::setw(40) << quoted(line);
            try {
                auto parsed = parse_line(line);
                std::cout << " -> " << parsed << std::endl;
            } catch (std::exception const& e) {
                std::cout << " -> ERROR " << e.what() << std::endl;
            }
        }
    }
    

    Printing

    ""                                       -> --
    "cmd1 param1 = 3.14 param2 = 8e-9"       ->  (3.14 8e-09)
    "cmd1 param1 = 3.14 param2 = -inf"       ->  (3.14 -inf)
    "cmd2 param1 = \" hello world \" "       ->  ( hello world )
    "cmd2 param1 = \"\" "                    ->  ()
    "cmd2 param1 = 3.14 param2 = 8e-9"       -> --
    "cmd1 param1 = \" hello world \" "       -> --
    "cmd2 param1 = \"\" trailing rubbish"    -> --
    "trailing rubbish"                       -> --
    

    To make it stricter, you can make it throw on unrecognized input as well:

    if (phrase_parse(first, last, x3::expect[parser::command > x3::eoi], x3::space, attr))
    

    Printing (Live):

    ""                                       -> ERROR boost::spirit::x3::expectation_failure
    "cmd1 param1 = 3.14 param2 = 8e-9"       ->  (3.14 8e-09)
    "cmd1 param1 = 3.14 param2 = -inf"       ->  (3.14 -inf)
    "cmd2 param1 = \" hello world \" "       ->  ( hello world )
    "cmd2 param1 = \"\" "                    ->  ()
    "cmd2 param1 = 3.14 param2 = 8e-9"       -> ERROR boost::spirit::x3::expectation_failure
    "cmd1 param1 = \" hello world \" "       -> ERROR boost::spirit::x3::expectation_failure
    "cmd2 param1 = \"\" trailing rubbish"    -> ERROR boost::spirit::x3::expectation_failure
    "trailing rubbish"                       -> ERROR boost::spirit::x3::expectation_failure
    

    The expectation_failure can be used to create some more helpful diagnostic output (you may search my answers for many examples)

    Other Ideas

    std::variant does not currently have good support in Spirit, see e.g.