Search code examples
c++boost-spiritboost-spirit-qi

Boost-Spirit: Parsing lists into different fields of a struct


I'm tying to get started with boost-spirit, however I'm stuck and it's error messages are not exactly easy to understand.

First off, I have the following text format:

(111, 222, 333, ...) {
    X 231
    Y 227
    X 54
    Z 41156
    Y 1112
    ...
}

A header containing a list of ints and a body containing an unordered sequence of (X|Y|Z) followed by a number each. I want to parse it into the following struct:

struct my_struct {
        std::vector<int> head;
        std::vector<int> X;
        std::vector<int> Y;
        std::vector<int> Z;
};

So far I've got this parser:

BOOST_FUSION_ADAPT_STRUCT(
        my_struct,
        (std::vector<int>, head)
        (std::vector<int>, X)
        (std::vector<int>, Y)
        (std::vector<int>, Z)
)

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

template<typename Iterator, typename Skipper = ascii::space_type>
struct my_parser : qi::grammar<Iterator, my_struct(), Skipper> {
        my_parser() : my_parser::base_type(start) {
                start %= '(' >> head >> ')' >> '{' >> body >> '}';
                head = int_ % ',';
                body = +(X | Y | Z);
                X = 'X' >> int_;
                Y = 'Y' >> int_;
                Z = 'Z' >> int_;
        }

        qi::rule<Iterator, my_struct(), Skipper> start;
        qi::rule<Iterator, std::vector<int>(), Skipper> head;
        qi::rule<Iterator, std::vector<int>(), Skipper> body;
        qi::rule<Iterator, std::vector<int>(), Skipper> X;
        qi::rule<Iterator, std::vector<int>(), Skipper> Y;
        qi::rule<Iterator, std::vector<int>(), Skipper> Z;
};

So far this compiles nicely, however the results are obviously wrong. Using the above example, the result is:

my_struct {
    header = [ 111, 222, 333 ]
         X = [ 231, 227, 54, 41156, 1112 ]
         Y = []
         Z = []
}

I'm pretty sure what I need is something along the lines of the following, but I can't get it to compile and I don't understand why.

template<typename Iterator, typename Skipper = ascii::space_type>
struct my_parser : qi::grammar<Iterator, my_struct(), Skipper> {
        my_parser() : my_parser::base_type(start) {
                start %= '(' >> head >> ')' >> '{' >> body(_val) >> '}';
                head = int_ % ',';
                body = +( X(bind(&my_struct::X, _r1))
                        | Y(bind(&my_struct::Y, _r1))
                        | Z(bind(&my_struct::Z, _r1))
                        );
                X = 'X' >> int_[push_back(_r1, _1)];
                Y = 'Y' >> int_[push_back(_r1, _1)];
                Z = 'Z' >> int_[push_back(_r1, _1)];
        }

        qi::rule<Iterator, my_struct(), Skipper> start;
        qi::rule<Iterator, std::vector<int>(), Skipper> head;
        qi::rule<Iterator, void(my_struct), Skipper> body;
        qi::rule<Iterator, void(std::vector<int>), Skipper> X;
        qi::rule<Iterator, void(std::vector<int>), Skipper> Y;
        qi::rule<Iterator, void(std::vector<int>), Skipper> Z;
};

Solution

  • I don't really like using semantic actions, but given the choice of AST and input, I don't think there's much of a choice.

    I'd simplify the grammar:

        rule  = '(' >> (int_[head_(_val, _1)] % ',') >> ')'
            >> '{' >> +(
                    'X' >> int_[X_(_val, _1)]
                  | 'Y' >> int_[Y_(_val, _1)]
                  | 'Z' >> int_[Z_(_val, _1)]
              )
            >> '}';
    

    And I'd create phoenix functions to push the elements:

    template <std::vector<int> my_struct::*m> struct push {
        void operator()(my_struct& ms, int v) const { (ms.*m).push_back(v); }
    };
    

    Now, it's as simple as:

    px::function<push<&my_struct::head> > head_;
    px::function<push<&my_struct::X> > X_;
    px::function<push<&my_struct::Y> > Y_;
    px::function<push<&my_struct::Z> > Z_;
    

    Demo

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    
    namespace qi = boost::spirit::qi;
    namespace px = boost::phoenix;
    
    struct my_struct {
        std::vector<int> head;
        std::vector<int> X;
        std::vector<int> Y;
        std::vector<int> Z;
    };
    
    template<typename Iterator>
    struct my_parser : qi::grammar<Iterator, my_struct()> {
        my_parser() : my_parser::base_type(start) {
            using namespace qi;
            using px::push_back;
    
            rule  = '(' >> (int_[head_(_val, _1)] % ',') >> ')'
                >> '{' >> +(
                        'X' >> int_[X_(_val, _1)]
                      | 'Y' >> int_[Y_(_val, _1)]
                      | 'Z' >> int_[Z_(_val, _1)]
                  )
                >> '}';
    
            start = skip(space) [rule];
        }
      private:
        template <std::vector<int> my_struct::*m> struct push {
            void operator()(my_struct& ms, int v) const { (ms.*m).push_back(v); }
        };
        px::function<push<&my_struct::head> > head_;
        px::function<push<&my_struct::X> > X_;
        px::function<push<&my_struct::Y> > Y_;
        px::function<push<&my_struct::Z> > Z_;
    
        qi::rule<Iterator, my_struct()> start;
        qi::rule<Iterator, my_struct(), qi::space_type> rule;
    };
    
    int main() {
        using It = boost::spirit::istream_iterator;
        It f(std::cin >> std::noskipws), l;
    
        my_struct data;
        if (parse(f, l, my_parser<It>{}, data)) {
            std::cout << "Parsed:";
            std::copy(data.head.begin(), data.head.end(), std::ostream_iterator<int>(std::cout << "\nhead: ", " " ));
            std::copy(data.X.begin(), data.X.end(), std::ostream_iterator<int>(std::cout << "\nX: ", " " ));
            std::copy(data.Y.begin(), data.Y.end(), std::ostream_iterator<int>(std::cout << "\nY: ", " " ));
            std::copy(data.Z.begin(), data.Z.end(), std::ostream_iterator<int>(std::cout << "\nZ: ", " " ));
            std::cout << "\n";
        } else {
            std::cout << "Parse failed\n";
        }
    
    
        if (f != l)
            std::cout << "Remaining unparsed input: '" << std::string(f,l) << "'\n";
    }
    

    For the input (111, 222, 333) { X 231 Y 227 X 54 Z 41156 Y 1112 } prints:

    Parsed:
    head: 111 222 333 
    X: 231 54 
    Y: 227 1112 
    Z: 41156 
    Remaining unparsed input: '
    '