Search code examples
c++boost-spiritboost-spirit-qi

How to push all the arguments into result vector when parsing with Spirit::Qi?


I have several commands in a script language, so I need to parse them. During parsing, I would like to check that syntax is correct and the type of commands and its arguments (there is a variable number of arguments per script command type, so I use a std::vector<std::string> to store them).

I have had problems because when parsing, Only the first string is included into the vector, whatever the real numbers of strings exists.

Also, I have had to use a qi::as_string rule in all the arguments in order compiler works.

A minimal working example of my project is shown next:

//#define BOOST_SPIRIT_DEBUG
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
#include <iostream>
#include <sstream>

namespace qi = boost::spirit::qi;

enum class TYPE {
    NONE,
    CMD1,
    CMD2,
    FAIL
};

struct Command {
    TYPE type = TYPE::NONE;
    std::vector<std::string> args;
};

using Commands = std::vector<Command>;

BOOST_FUSION_ADAPT_STRUCT(Command, type, args)

template <typename It>
class Parser : public qi::grammar<It, Commands()>
{
  private:
    qi::rule<It, Command(), qi::blank_type> none, cmd1, cmd2, fail;
    qi::rule<It, Commands()> start;

  public:
    Parser() : Parser::base_type(start)
    {
        using namespace qi;

        none = omit[*blank] >> &(eol | eoi)
            >> attr(TYPE::NONE)
            >> attr(std::vector<std::string>{});

        cmd1 = lit("CMD1") >> '('
            >> attr(TYPE::CMD1)
            >> as_string[lexeme[+~char_(")\r\n")]] >> ')';

        cmd2 = lit("CMD2") >> '('
            >> attr(TYPE::CMD2)
            >> as_string[lexeme[+~char_(",)\r\n")]] >> ','
            >> as_string[raw[double_]] >> ')';


        fail = omit[*~char_("\r\n")] //
            >> attr(TYPE::FAIL);

        start = skip(blank)[(none | cmd1 | cmd2 | fail) % eol] > eoi;
    }
};

Commands parse(std::string text)
{
    std::istringstream in(std::move(text));
    using It = boost::spirit::istream_iterator;

    static const Parser<It> parser;

    Commands commands;
    It first(in >> std::noskipws), last;//No white space skipping

    if (!qi::parse(first, last, parser, commands))
        // throw std::runtime_error("command parse error")
        ;

    return commands;
}

int main()
{
    std::string test{
R"(CMD1(some ad hoc text)
CMD2(identity, 25.5))"};

    try {
        auto commands = parse(test);
        std::cout << "elements: " << commands.size() << std::endl;
        std::cout << "CMD1 args: " << commands[0].args.size() << std::endl;
        std::cout << "CMD2 args: " << commands[1].args.size() << std::endl;// Error! Should be 2!!!!!

    } catch (std::exception const& e) {
        std::cout << e.what() << "\n";
    }
}

Also, here is a link to compiler explorer: https://godbolt.org/z/qM6KTcTTK

Any help fixing this? Thanks in advance


Solution

  • Enabling your debugging shows: https://godbolt.org/z/o3nvjz9bG

    Not clear enough for me. Let's add an argument rule:

    struct Command {
        using Arg  = std::string;
        using Args = std::vector<Arg>;
        enum TYPE { NONE, CMD1, CMD2, FAIL };
    
        TYPE type = NONE;
        Args args;
    };
    
    qi::rule<It, Command::Arg()> arg;
    

    And

    none = omit[*blank] >> &(eol | eoi)
        >> attr(Command::NONE)
        /*>> attr(Command::Args{})*/;
    
    arg  = raw[double_] | +~char_(",)\r\n");
    
    cmd1 = lit("CMD1") >> attr(Command::CMD1) //
        >> '(' >> arg >> ')';
    
    cmd2 = lit("CMD2") >> attr(Command::CMD2) //
        >> '(' >> arg >> ',' >> arg >> ')';
    
    fail = omit[*~char_("\r\n")] //
        >> attr(Command::FAIL);
    

    Now we can see https://godbolt.org/z/3Kqr3K41v

      <cmd2>
        <try>CMD2(identity, 25.5)</try>
        <arg>
          <try>identity, 25.5)</try>
          <success>, 25.5)</success>
          <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
        </arg>
        <arg>
          <try>25.5)</try>
          <success>)</success>
          <attributes>[[2, 5, ., 5]]</attributes>
        </arg>
        <success></success>
        <attributes>[[CMD2, [[i, d, e, n, t, i, t, y]]]]</attributes>
      </cmd2>
    

    Clearly, both arguments are parsed, but only one is assigned. The sad fact is that you're actively confusing the rule, by adapting a two-element struct and parsing a sequence of 3 elements.

    You can get this to work, but you'd have help it (e.g. with transform_attribute, attr_cast<> or a separate rule):

        arg  = raw[double_] | +~char_(",)\r\n");
        args = arg % ',';
    
        cmd1 = lit("CMD1") >> attr(Command::CMD1) //
            >> '(' >> arg >> ')';
    
        cmd2 = lit("CMD2") >> attr(Command::CMD2) //
            >> '(' >> args >> ')';
    

    Now you get:

      <cmd2>
        <try>CMD2(identity, 25.5)</try>
        <args>
          <try>identity, 25.5)</try>
          <arg>
            <try>identity, 25.5)</try>
            <success>, 25.5)</success>
            <attributes>[[i, d, e, n, t, i, t, y]]</attributes>
          </arg>
          <arg>
            <try> 25.5)</try>
            <success>)</success>
            <attributes>[[ , 2, 5, ., 5]]</attributes>
          </arg>
          <success>)</success>
          <attributes>[[[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]</attributes>
        </args>
        <success></success>
        <attributes>[[CMD2, [[i, d, e, n, t, i, t, y], [ , 2, 5, ., 5]]]]</attributes>
      </cmd2>
    

    Now this hints at an obvious improvement: improve the grammar by simplifying:

        none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
        fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);
    
    arg   = raw[double_] | +~char_(",)\r\n");
    args  = '(' >> arg % ',' >> ')';
    cmd   = no_case[type_] >> -args;
    
    start = skip(blank)[(cmd|fail) % eol] > eoi;
    

    Then add validation to the commands after the fact.

    Demo

    Live On Compiler Explorer

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    #include <iostream>
    
    namespace qi = boost::spirit::qi;
    
    struct Command {
        using Arg  = std::string;
        using Args = std::vector<Arg>;
        enum Type { NONE, CMD1, CMD2, FAIL };
    
        Type type = NONE;
        Args args;
    
        friend std::ostream& operator<<(std::ostream& os, Type type) {
            switch(type) {
                case NONE: return os << "NONE";
                case CMD1: return os << "CMD1";
                case CMD2: return os << "CMD2";
                case FAIL: return os << "FAIL";
                default: return os << "???";
            }
        }
        friend std::ostream& operator<<(std::ostream& os, Command const& cmd) {
            os << cmd.type << "(";
            auto sep = "";
            for (auto& arg : cmd.args)
                os << std::exchange(sep, ", ") << std::quoted(arg);
            return os << ")";
        }
    };
    using Commands = std::vector<Command>;
    
    BOOST_FUSION_ADAPT_STRUCT(Command, type, args)
    
    template <typename It> struct Parser : qi::grammar<It, Commands()> {
        Parser() : Parser::base_type(start) {
            using namespace qi;
    
            none  = omit[*blank] >> &(eol | eoi) >> attr(Command{Command::NONE, {}});
            fail  = omit[*~char_("\r\n")] >> attr(Command::FAIL);
    
            arg   = raw[double_] | +~char_(",)\r\n");
            args  = '(' >> arg % ',' >> ')';
            cmd   = no_case[type] >> -args;
    
            start = skip(blank)[(cmd|none|fail) % eol] > eoi;
    
            BOOST_SPIRIT_DEBUG_NODES((start)(fail)(none)(cmd)(arg)(args))
        }
    
    private:
        struct type_sym : qi::symbols<char, Command::Type> {
            type_sym() { this->add//
                ("cmd1", Command::CMD1)
                ("cmd2", Command::CMD2);
            }
        } type;
        qi::rule<It, Command::Arg()>            arg;
        qi::rule<It, Command::Args()>           args;
        qi::rule<It, Command(), qi::blank_type> cmd, none, fail;
        qi::rule<It, Commands()>                start;
    };
    
    Commands parse(std::string const& text)
    {
        using It = std::string::const_iterator;
        static const Parser<It> parser;
    
        Commands commands;
        It first = text.begin(), last = text.end();
    
        if (!qi::parse(first, last, parser, commands))
            throw std::runtime_error("command parse error");
    
        return commands;
    }
    
    int main()
    {
        try {
            for (auto& cmd : parse(R"(
    CMD1(some ad hoc text)
    this is a bogus line
    cmd2(identity, 25.5))"))
                std::cout << cmd << "\n";
        } catch (std::exception const& e) {
            std::cout << e.what() << "\n";
        }
    }
    

    Prints

    NONE()
    CMD1("some ad hoc text")
    FAIL()
    CMD2("identity", " 25.5")