Search code examples
c++parsingboostboost-spirit-qi

Boost.Spirit qi value sequence vector


Following code does not compile with error:

/usr/include/boost/spirit/home/qi/detail/assign_to.hpp:153:20: error: no matching conversion for static_cast from 'const char' to 'boost::fusion::vector<char,
      std::vector<double, std::allocator<double> > >'
            attr = static_cast<Attribute>(val);
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~

I can't figute out why, because it works as expected with change to auto grammar = boost::spirit::no_skip[drawto_commands];.

Types of what moveto and lineto parses are same.

Qi operator >> has type rule a: A, b: vector<A> --> (a >> b): vector<A>, that should make types of what drawto_commands and moveto_drawto_command_group parses same.

What am I missing?

#include <string>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/boost_tuple.hpp>

typedef boost::fusion::vector<char, std::vector<double>> Arc;

template <typename P, typename T>
bool test_phrase_parser_attr(const std::string &string, P const& grammar, T& attr, bool full_match = true)
{
    using boost::spirit::qi::phrase_parse;
    using boost::spirit::qi::ascii::space;
    auto f = string.begin();
    auto l = string.end();
    bool match = phrase_parse(f, l, grammar, space, attr);
    return match && (!full_match || (f == l));
}

int main()
{
    using boost::spirit::omit;
    using boost::spirit::qi::ascii::char_;
    using boost::spirit::qi::ascii::space;
    using boost::spirit::qi::attr;
    using boost::spirit::qi::double_;
    using boost::spirit::qi::copy;

    auto wsp = copy(omit[boost::spirit::ascii::space]);
    auto comma_wsp = copy(omit[(char_(',') >> *wsp) | (+wsp >> -char_(',') >> *wsp)]);
    auto coordinate = copy(double_);
    auto coordinate_pair = copy(coordinate >> -comma_wsp >> coordinate);
    auto closepath = copy(char_("Zz") >> attr(std::vector<double>()));
    auto vertical_lineto = copy(char_("Vv") >> *wsp >> (coordinate % -comma_wsp));
    auto lineto = copy(char_("Ll") >> *wsp >> (coordinate_pair % -comma_wsp));
    auto moveto = copy(char_("Mm") >> *wsp >> (coordinate_pair % -comma_wsp));
    auto drawto_command = copy(closepath | vertical_lineto | lineto);
    auto drawto_commands = copy(*(*wsp >> drawto_command >> *wsp));
    auto moveto_drawto_command_group = copy(moveto >> drawto_commands);

    auto grammar = boost::spirit::no_skip[moveto_drawto_command_group];
    std::vector<Arc> attribute;
    std::string str;
    std::cout << "*\n";
    while (getline(std::cin, str))
    {
        if (str.empty())
            break;
        attribute = {};
        bool r = test_phrase_parser_attr(str, grammar, attribute, true);
        if (r)
        {
            std::cout << "Parsing succeeded, got: " << std::endl;
            for (auto &command: attribute){
                char line_type = boost::fusion::at_c<0>(command);
                std::cout << line_type;
                const std::vector<double> arguments = boost::fusion::at_c<1>(command);
                for (size_t i = 0; i < arguments.size(); ++i)
                {
                    std::cout << ' ' << arguments[i];
                }
                std::cout << std::endl;
            }
        }
        else
        {
            std::cout << "Parsing failed\n";
        }
    }
}
`

Solution

  • Okay, as tends to happen, I was looking at it, and the SVG specs and just felt it more worth while to share some ideas of

    1. style
    2. Qi convention
    3. advanced ideas

    that you might be interested in. Fair warning: I did not try to address your question as posed.

    Be specific with your types

    You seem to "always auto" in an already heuristics-based parser framework. I'm not surprised that sometimes things "don't magic out the right way". Assuming you want to keep using Qi, let's have a Qi parser:

    Live On Coliru

    #define BOOST_SPIRIT_DEBUG
    #include <string>
    #include <iostream>
    #include <boost/spirit/home/qi.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    
    namespace AST {
        using Coordinates = std::vector<double>;
    
        struct Arc {
            char command;
            Coordinates coordinates;
        };
    
        using PathData = std::vector<Arc>;
    }
    
    BOOST_FUSION_ADAPT_STRUCT(AST::Arc, command, coordinates)
    
    namespace qi = boost::spirit::qi;
    namespace Parsers {
    
        template <typename It>
        struct PathData : qi::grammar<It, AST::PathData()> {
            PathData() : PathData::base_type(start) {
                using namespace qi;
    
                opt_comma       = -lit(',');
                coordinate      = double_;
                coordinate_pair = coordinate >> opt_comma >> coordinate;
    
                moveto          = char_("Mm") >> (coordinate_pair % opt_comma);
                closepath       = char_("Zz") >> attr(AST::Coordinates{});
                vertical_lineto = char_("Vv") >> (coordinate % opt_comma);
                lineto          = char_("Ll") >> (coordinate_pair % opt_comma);
                drawto_command  = closepath | vertical_lineto | lineto;
    
                drawto_commands = *drawto_command;
                start           = skip(space) [ moveto >> drawto_commands ];
    
    
                BOOST_SPIRIT_DEBUG_NODES((opt_comma)(coordinate)(coordinate_pair)
                        (moveto)(closepath)(vertical_lineto)(lineto)(drawto_command)
                        (drawto_commands))
            }
          private:
            using Skipper = qi::space_type;
            qi::rule<It> opt_comma;
            qi::rule<It, double()> coordinate;
            qi::rule<It, AST::Coordinates(), Skipper> coordinate_pair;
    
            qi::rule<It, AST::Arc(), Skipper> moveto, closepath, vertical_lineto, lineto, drawto_command;
            qi::rule<It, AST::PathData(), Skipper> drawto_commands;
    
            qi::rule<It, AST::PathData()> start;
        };
    }
    
    template <typename P, typename T>
    bool test_parse_attr(const std::string &text, P const& grammar, T& attr, bool full_match = true) {
        return parse(text.cbegin(), text.cend(),
                grammar >> (qi::eps(!full_match) | qi::eoi),
                attr);
    }
    
    int main() {
        const Parsers::PathData<std::string::const_iterator> grammar;
    
        for (std::string const str : { "M 100 100 L 300 100 L 200 300 z" }) {
            AST::PathData attribute;
            if (test_parse_attr(str, grammar, attribute, true)) {
                std::cout << "Parsing succeeded, got: " << std::endl;
    
                for (auto &command: attribute) {
                    std::cout << command.command;
                    for (auto const& arg : command.coordinates) {
                        std::cout << ' ' << arg;
                    }
                    std::cout << std::endl;
                }
            } else {
                std::cout << "Parsing failed\n";
            }
        }
    }
    

    Prints

    Parsing succeeded, got: 
    M 100 100
    L 300 100
    L 200 300
    z
    

    Notes:

    • the Skipper is the responsibility of the parser, not the caller
    • don't meddle with fusion::vector (or even tuple) so keep your code maintainable:

      namespace AST {
          using Coordinates = std::vector<double>;
      
          struct Arc {
              char command;
              Coordinates coordinates;
          };
      
          using PathData = std::vector<Arc>;
      }
      

      And later:

       for (auto &command: attribute) {
            std::cout << command.command;
            for (auto const& arg : command.coordinates) { std::cout << ' ' << arg; }
            std::cout << std::endl;
        }
      
    • It defers all the optional whitespace matching to a Skipper. I know this changes behaviour (we'd parse "L100,200" while "L 100,200" would be required). If you insist on diagnosing this case, spell it out:

          command_letter  = no_case [ char_(_r1) ] >> &(space|eoi);
          moveto          = command_letter('m') >> (coordinate_pair % opt_comma);
          closepath       = command_letter('z') >> attr(AST::Coordinates{});
          vertical_lineto = command_letter('v') >> (coordinate % opt_comma);
          lineto          = command_letter('l') >> (coordinate_pair % opt_comma);
      

      Where command_letter is a rule that takes an inherited attribute:

      qi::rule<It, char(char)> command_letter;
      

    Be Specific With More Types

    Maybe you want to be specific about your AST types as well. Depending on your domain logic you really shouldn't treat all arguments as just a vector, probably.

    namespace AST {
        using Coordinate = double;
        using Coordinates = std::vector<Coordinate>;
    
        struct Point { Coordinate x, y; };
        using Points = std::vector<Point>;
    
        namespace Cmds {
            struct MoveTo         { Points points; } ;
            struct ClosePath      {                } ;
            struct VerticalLineTo { Coordinates x; } ;
            struct LineTo         { Points points; } ;
        }
    
        using Cmd = boost::variant<
                Cmds::MoveTo,
                Cmds::ClosePath,
                Cmds::VerticalLineTo,
                Cmds::LineTo
            >;
    
        using PathData = std::vector<Cmd>;
    }
    

    Adapt them all:

    BOOST_FUSION_ADAPT_STRUCT(AST::Point, x, y)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmds::MoveTo, points)
    BOOST_FUSION_ADAPT_STRUCT(AST::Cmds::LineTo, points)
    

    You might consider the Nabialek Trick to parse them. See here for an example: Parsing a command language using Boost Spirit

    More Advanced Ideas

    Perhaps using X3 emulates your original code organization more cleanly:

    Live On Coliru

    #include <string>
    #include <iostream>
    #include <boost/spirit/home/x3.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    
    namespace AST {
        using Coordinates = std::vector<double>;
    
        struct Arc {
            char command;
            Coordinates coordinates;
        };
    
        using PathData = std::vector<Arc>;
    }
    
    BOOST_FUSION_ADAPT_STRUCT(AST::Arc, command, coordinates)
    
    namespace x3 = boost::spirit::x3;
    namespace Parsers {
        using namespace x3;
    
        auto const opt_comma       = -lit(',');
        auto const coordinate      = double_;
        auto const coordinate_pair = coordinate >> opt_comma >> coordinate;
    
        template <typename T> auto as = [](auto p) { return rule<struct _, T>{} = p; };
    
        auto const command_letter  = [](auto p) { return lexeme [ no_case [ char_(p) ] >> &(space|eoi) ]; };
        auto const moveto          = command_letter('m') >> as<AST::Coordinates>(coordinate_pair % opt_comma);
        auto const lineto          = command_letter('l') >> as<AST::Coordinates>(coordinate_pair % opt_comma);
        auto const vertical_lineto = command_letter('v') >> as<AST::Coordinates>(coordinate % opt_comma);
        auto const closepath       = command_letter('z') >> attr(AST::Coordinates{});
        auto const drawto_command  = as<AST::Arc>(closepath | vertical_lineto | lineto);
    
        auto const drawto_commands = as<AST::PathData>(*drawto_command);
        auto const path_data       = as<AST::PathData>(skip(space) [ moveto >> drawto_commands ]);
    }
    
    template <typename P, typename T>
    bool test_parse_attr(const std::string &text, P const& grammar, T& attr, bool full_match = true) {
        return parse(
                text.cbegin(), text.cend(),
                grammar >> (x3::eps(!full_match) | x3::eoi),
                attr
            );
    }
    
    int main() {
        for (std::string const str : { "M 100 100 L 300 100 L 200 300 z" }) {
            AST::PathData attribute;
            if (test_parse_attr(str, Parsers::path_data, attribute, true)) {
                std::cout << "Parsing succeeded, got: " << std::endl;
    
                for (auto &command: attribute) {
                    std::cout << command.command;
                    for (auto const& arg : command.coordinates) {
                        std::cout << ' ' << arg;
                    }
                    std::cout << std::endl;
                }
            } else {
                std::cout << "Parsing failed\n";
            }
        }
    }
    

    Also printins:

    Parsing succeeded, got: 
    M 100 100
    L 300 100
    L 200 300
    z