Search code examples
c++parsingboostboost-spiritqi

Boost::Spirit struggle with parsing a String


I'm trying to Parse a String with Boost::Spirit, but i just cannot get it to work. I have no experience with Boost::Spirit since today.

The string is composed of commands separated by an ';'. The commands are

"INC someInteger"

"BOMB firstInteger secondInteger"

"MOVE firstInteger secondInteger thirdInteger"

"MSG someString"

"WAIT"

I Managed to get this far:

#include <boost/spirit/include/qi.hpp>
#include <boost/phoenix/phoenix.hpp>

using namespace boost::spirit;

int main() {    
    std::string testInput = "MOVE 1 2 43;BOMB 0 3;INC 6;MOVE 2 3 99;MOVE 1 2 6";

    typedef std::string::iterator iter;
    using boost::phoenix::ref;

    iter start = testInput.begin();

    std::vector<int> IncCommands;
    std::vector<std::pair<int, int>> BombCommands;
    std::vector<std::tuple<int, int, int>> MoveCommands;

    qi::rule<iter, std::vector<int>(), ascii::space_type> nextIncrease = ("INC " >> qi::int_);
    //qi::rule<iter, std::vector<std::pair<int, int>>(), ascii::space_type> nextBomb = ("BOMB " >> qi::int_ >> qi::int_);
    //qi::rule<iter, std::vector<int>(), ascii::space_type> nextMove = ("MOVE " >> qi::int_ >> qi::int_ >> qi::int_);

    //qi::rule<iter, std::string, ascii::space_type> nextAction = (nextMove | nextBomb | nextIncrease) % ';';

    bool match = qi::phrase_parse(
        start,
        testInput.end(),
        nextIncrease,
        ascii::space,
        IncCommands
    );

    return 0;
}

The Problems that i have now:

  1. I don't know how i can extract more than 1 integer

  2. I don't know how i can merge everything into a proper grammar so that everything is parsed into several vectors.

  3. I didn't consider MSG and WAIT yet.


Solution

  • I'd suggest starting out with the desired AST as always.

    Spirit works well with static polymorphism, so I'd use a variant to represent commands:

    namespace AST {
        namespace Cmd {
            struct Move { int x,y,z;  };
            struct Bomb { int x,y;    };
            struct Inc  { int amount; };
            struct Msg  { std::string text; };
            struct Wait {};
        }
    
        using Command = boost::variant<Cmd::Move, Cmd::Bomb, Cmd::Inc, Cmd::Msg, Cmd::Wait>;
        using Commands = std::vector<Command>;
    }
    

    Now, write the most straight-forward grammar to match it:

    template <typename It>
    struct ScriptGrammar : qi::grammar<It, AST::Commands()>
    {
        ScriptGrammar() : ScriptGrammar::base_type(start) {
            using namespace qi;
            start   = skip(space) [ script ];
            script  = command % ";";
            command = move|bomb|inc|msg|wait;
    
            move = "MOVE" >> int_ >> int_ >> int_;
            bomb = "BOMB" >> int_ >> int_;
            inc  = "INC"  >> int_;
            msg  = "MSG"  >> text;
            wait = "WAIT" >> qi::attr(AST::Cmd::Wait{});
    
            text  = +~char_(";");
            BOOST_SPIRIT_DEBUG_NODES((start)(script)(command)(move)(bomb)(inc)(msg)(wait)(text))
        }
      private:
        using Skipper = qi::space_type;
        qi::rule<It, AST::Commands(), Skipper>  script;
        qi::rule<It, AST::Command(), Skipper>   command;
        qi::rule<It, AST::Cmd::Move(), Skipper> move;
        qi::rule<It, AST::Cmd::Bomb(), Skipper> bomb;
        qi::rule<It, AST::Cmd::Inc(), Skipper>  inc;
        qi::rule<It, AST::Cmd::Msg(), Skipper>  msg;
        qi::rule<It, AST::Cmd::Wait(), Skipper> wait;
        // lexeme
        qi::rule<It, AST::Commands()>  start;
        qi::rule<It, std::string()>  text;
    };
    

    Add in some glue for debug (Fusion adaptation and output streaming), and we have a working sample:

    Live On Coliru

    #define BOOST_SPIRIT_DEBUG
    #include <iostream>
    #include <vector>
    #include <string>
    #include <iterator>
    #include <iomanip>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/phoenix/phoenix.hpp>
    
    namespace AST {
        namespace Cmd {
            struct Move { int x,y,z;  };
            struct Bomb { int x,y;    };
            struct Inc  { int amount; };
            struct Msg  { std::string text; };
            struct Wait {};
        }
    
        using Command = boost::variant<Cmd::Move, Cmd::Bomb, Cmd::Inc, Cmd::Msg, Cmd::Wait>;
        using Commands = std::vector<Command>;
    }
    
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmd::Move, x,y,z)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmd::Bomb, x,y)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmd::Inc, amount)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmd::Msg, text)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmd::Wait)
    
    namespace AST { namespace Cmd { // For demo/debug
        std::ostream& operator<<(std::ostream& os, Move const& cmd) { return os << "MOVE " << boost::fusion::as_vector(cmd); } 
        std::ostream& operator<<(std::ostream& os, Bomb const& cmd) { return os << "BOMB " << boost::fusion::as_vector(cmd); } 
        std::ostream& operator<<(std::ostream& os, Inc const& cmd)  { return os << "INC " << boost::fusion::as_vector(cmd);  } 
        std::ostream& operator<<(std::ostream& os, Msg const& cmd)  { return os << "MSG " << boost::fusion::as_vector(cmd);  } 
        std::ostream& operator<<(std::ostream& os, Wait const& cmd) { return os << "WAIT " << boost::fusion::as_vector(cmd); } 
    } }
    
    namespace qi = boost::spirit::qi;
    
    template <typename It>
    struct ScriptGrammar : qi::grammar<It, AST::Commands()>
    {
        ScriptGrammar() : ScriptGrammar::base_type(start) {
            using namespace qi;
            start   = skip(space) [ script ];
            script  = command % ";";
            command = move|bomb|inc|msg|wait;
    
            move = "MOVE" >> int_ >> int_ >> int_;
            bomb = "BOMB" >> int_ >> int_;
            inc  = "INC"  >> int_;
            msg  = "MSG"  >> text;
            wait = "WAIT" >> qi::attr(AST::Cmd::Wait{});
    
            text  = +~char_(";");
            BOOST_SPIRIT_DEBUG_NODES((start)(script)(command)(move)(bomb)(inc)(msg)(wait)(text))
        }
      private:
        using Skipper = qi::space_type;
        qi::rule<It, AST::Commands(), Skipper>  script;
        qi::rule<It, AST::Command(), Skipper>   command;
        qi::rule<It, AST::Cmd::Move(), Skipper> move;
        qi::rule<It, AST::Cmd::Bomb(), Skipper> bomb;
        qi::rule<It, AST::Cmd::Inc(), Skipper>  inc;
        qi::rule<It, AST::Cmd::Msg(), Skipper>  msg;
        qi::rule<It, AST::Cmd::Wait(), Skipper> wait;
        // lexeme
        qi::rule<It, AST::Commands()>  start;
        qi::rule<It, std::string()>  text;
    };
    
    int main() {    
        std::string const testInput = "MOVE 1 2 43;BOMB 0 3;INC 6;MOVE 2 3 99;MSG MOVE ZIG;WAIT;MSG FOR GREAT JUSTICE!;MOVE 1 2 6";
    
        typedef std::string::const_iterator iter;
    
        iter start = testInput.begin(), end = testInput.end();
    
        AST::Commands script;
    
        bool match = qi::parse(start, testInput.end(), ScriptGrammar<iter>(), script);
    
        if (match) {
            std::cout << "Parsed " << script.size() << " commands\n";
            std::copy(script.begin(), script.end(), std::ostream_iterator<AST::Command>(std::cout, ";"));
        } else {
            std::cout << "Parse failed\n";
        }
    
        if (start != end)
            std::cout << "Remaining unparsed input: '" << std::string(start, end) << "'\n";
    }
    

    Which prints:

    Parsed 8 commands
    MOVE (1 2 43);BOMB (0 3);INC (6);MOVE (2 3 99);MSG (MOVE ZIG);WAIT ();MSG (FOR GREAT JUSTICE!);MOVE (1 2 6);
    

    And optionally the BOOST_SPIRIT_DEBUG output:

    <start>
      <try>MOVE 1 2 43;BOMB 0 3</try>
      <script>
        <try>MOVE 1 2 43;BOMB 0 3</try>
        <command>
          <try>MOVE 1 2 43;BOMB 0 3</try>
          <move>
            <try>MOVE 1 2 43;BOMB 0 3</try>
            <success>;BOMB 0 3;INC 6;MOVE</success>
            <attributes>[[1, 2, 43]]</attributes>
          </move>
          <success>;BOMB 0 3;INC 6;MOVE</success>
          <attributes>[[1, 2, 43]]</attributes>
        </command>
        <command>
          <try>BOMB 0 3;INC 6;MOVE </try>
          <move>
            <try>BOMB 0 3;INC 6;MOVE </try>
            <fail/>
          </move>
          <bomb>
            <try>BOMB 0 3;INC 6;MOVE </try>
            <success>;INC 6;MOVE 2 3 99;M</success>
            <attributes>[[0, 3]]</attributes>
          </bomb>
          <success>;INC 6;MOVE 2 3 99;M</success>
          <attributes>[[0, 3]]</attributes>
        </command>
        <command>
          <try>INC 6;MOVE 2 3 99;MS</try>
          <move>
            <try>INC 6;MOVE 2 3 99;MS</try>
            <fail/>
          </move>
          <bomb>
            <try>INC 6;MOVE 2 3 99;MS</try>
            <fail/>
          </bomb>
          <inc>
            <try>INC 6;MOVE 2 3 99;MS</try>
            <success>;MOVE 2 3 99;MSG MOV</success>
            <attributes>[[6]]</attributes>
          </inc>
          <success>;MOVE 2 3 99;MSG MOV</success>
          <attributes>[[6]]</attributes>
        </command>
        <command>
          <try>MOVE 2 3 99;MSG MOVE</try>
          <move>
            <try>MOVE 2 3 99;MSG MOVE</try>
            <success>;MSG MOVE ZIG;WAIT;M</success>
            <attributes>[[2, 3, 99]]</attributes>
          </move>
          <success>;MSG MOVE ZIG;WAIT;M</success>
          <attributes>[[2, 3, 99]]</attributes>
        </command>
        <command>
          <try>MSG MOVE ZIG;WAIT;MS</try>
          <move>
            <try>MSG MOVE ZIG;WAIT;MS</try>
            <fail/>
          </move>
          <bomb>
            <try>MSG MOVE ZIG;WAIT;MS</try>
            <fail/>
          </bomb>
          <inc>
            <try>MSG MOVE ZIG;WAIT;MS</try>
            <fail/>
          </inc>
          <msg>
            <try>MSG MOVE ZIG;WAIT;MS</try>
            <text>
              <try>MOVE ZIG;WAIT;MSG FO</try>
              <success>;WAIT;MSG FOR GREAT </success>
              <attributes>[[M, O, V, E,  , Z, I, G]]</attributes>
            </text>
            <success>;WAIT;MSG FOR GREAT </success>
            <attributes>[[[M, O, V, E,  , Z, I, G]]]</attributes>
          </msg>
          <success>;WAIT;MSG FOR GREAT </success>
          <attributes>[[[M, O, V, E,  , Z, I, G]]]</attributes>
        </command>
        <command>
          <try>WAIT;MSG FOR GREAT J</try>
          <move>
            <try>WAIT;MSG FOR GREAT J</try>
            <fail/>
          </move>
          <bomb>
            <try>WAIT;MSG FOR GREAT J</try>
            <fail/>
          </bomb>
          <inc>
            <try>WAIT;MSG FOR GREAT J</try>
            <fail/>
          </inc>
          <msg>
            <try>WAIT;MSG FOR GREAT J</try>
            <fail/>
          </msg>
          <wait>
            <try>WAIT;MSG FOR GREAT J</try>
            <success>;MSG FOR GREAT JUSTI</success>
            <attributes>[[]]</attributes>
          </wait>
          <success>;MSG FOR GREAT JUSTI</success>
          <attributes>[[]]</attributes>
        </command>
        <command>
          <try>MSG FOR GREAT JUSTIC</try>
          <move>
            <try>MSG FOR GREAT JUSTIC</try>
            <fail/>
          </move>
          <bomb>
            <try>MSG FOR GREAT JUSTIC</try>
            <fail/>
          </bomb>
          <inc>
            <try>MSG FOR GREAT JUSTIC</try>
            <fail/>
          </inc>
          <msg>
            <try>MSG FOR GREAT JUSTIC</try>
            <text>
              <try>FOR GREAT JUSTICE!;M</try>
              <success>;MOVE 1 2 6</success>
              <attributes>[[F, O, R,  , G, R, E, A, T,  , J, U, S, T, I, C, E, !]]</attributes>
            </text>
            <success>;MOVE 1 2 6</success>
            <attributes>[[[F, O, R,  , G, R, E, A, T,  , J, U, S, T, I, C, E, !]]]</attributes>
          </msg>
          <success>;MOVE 1 2 6</success>
          <attributes>[[[F, O, R,  , G, R, E, A, T,  , J, U, S, T, I, C, E, !]]]</attributes>
        </command>
        <command>
          <try>MOVE 1 2 6</try>
          <move>
            <try>MOVE 1 2 6</try>
            <success></success>
            <attributes>[[1, 2, 6]]</attributes>
          </move>
          <success></success>
          <attributes>[[1, 2, 6]]</attributes>
        </command>
        <success></success>
        <attributes>[[[1, 2, 43], [0, 3], [6], [2, 3, 99], [[M, O, V, E,  , Z, I, G]], [], [[F, O, R,  , G, R, E, A, T,  , J, U, S, T, I, C, E, !]], [1, 2, 6]]]</attributes>
      </script>
      <success></success>
      <attributes>[[[1, 2, 43], [0, 3], [6], [2, 3, 99], [[M, O, V, E,  , Z, I, G]], [], [[F, O, R,  , G, R, E, A, T,  , J, U, S, T, I, C, E, !]], [1, 2, 6]]]</attributes>
    </start>