This is long with a lot of code, so I hope Stack Overflow can cope with it. :P
I'm trying to write an SVG parser with Boost Spirit. I have a grammar that populates a vector with "Contours," which are vectors of "BezierPoints," which may represent either regular points or points with bezier controls.
So far I have this (not handling relative draw commands yet):
#ifndef SVG_PARSER_HPP
#define SVG_PARSER_HPP
#include <vector>
#include "boost/spirit/include/qi.hpp"
#include "boost/spirit/include/phoenix.hpp"
#include "boost/fusion/include/adapt_struct.hpp"
#include "boost/fusion/include/std_pair.hpp"
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
namespace ascii = boost::spirit::ascii;
struct Point
{
Point(const double nx = 0.0, const double ny = 0.0) : x(nx), y(ny)
{}
double x;
double y;
};
BOOST_FUSION_ADAPT_STRUCT(
Point,
(double, x)
(double, y)
)
struct BezierPoint
{
BezierPoint(const double x = 0.0, const double y = 0.0) :
point(x, y), control1(0.0, 0.0), control2(0.0, 0.0) {}
BezierPoint(const Point &p) : point(p), control1(0.0, 0.0),
control2(0.0, 0.0) {}
Point point; // End point. Start point is in the BezierPoint that
// came before it.
// Todo: Set these to be coincident with point for non-curve points.
Point control1;
Point control2;
};
BOOST_FUSION_ADAPT_STRUCT(
BezierPoint,
(Point, control1)
(Point, control2)
(Point, point)
)
typedef std::vector<BezierPoint> BezierVec;
typedef std::vector<BezierVec> Contours;
template <typename Iterator>
struct PathGrammar : qi::grammar<Iterator, Contours()>
{
///////////////////////////
// SVG is a damn monster //
///////////////////////////
PathGrammar() : PathGrammar::base_type(path_data)
{
using qi::char_;
using qi::double_;
using qi::_val;
using qi::_1;
using phoenix::push_back;
using phoenix::insert;
using phoenix::begin;
using phoenix::end;
using phoenix::construct;
using phoenix::val;
using ascii::space;
path_data = *space >> -moveto_drawto_command_groups
>> *space;
moveto_drawto_command_groups = moveto_drawto_command_group
% *space;
moveto_drawto_command_group = moveto[
insert(_val, end(_val), begin(_1), end(_1))
] >> *space
>> -drawto_commands[
insert(_val, end(_val), begin(_1), end(_1))
];
// Draw commands are (optionally) followed by a closepath
// command.
drawto_commands = (drawto_command[
insert(_val, end(_val), begin(_1), end(_1))
] % *space) >> *space >> -closepath;
drawto_command = lineto | horizontal_lineto
| vertical_lineto | curveto | smooth_curveto;
moveto = ( char_('M') | char_('m') ) >> *space
>> lineto_argument_sequence;
closepath = (char_('Z') | char_('z'));
lineto = ( char_('L') | char_('l') ) >> *space
>> lineto_argument_sequence;
lineto_argument_sequence = coordinate_pair[
push_back(_val, construct<BezierPoint>(_1))
] % -comma_space;
horizontal_lineto = ( char_('H') | char_('h') ) >> *space
>> horizontal_lineto_argument_sequence;
horizontal_lineto_argument_sequence = coordinate[
push_back(_val, construct<BezierPoint>(_1, val(0.0)))
] % -comma_space;
vertical_lineto = ( char_('V') | char_('v') ) >> *space
>> vertical_lineto_argument_sequence;
vertical_lineto_argument_sequence = coordinate[
push_back(_val, construct<BezierPoint>(val(0.0), _1))
] % -comma_space;
curveto = ( char_('C') | char_('c') ) >> *space
>> curveto_argument_sequence;
curveto_argument_sequence = curveto_argument % -comma_space;
curveto_argument = coordinate_pair >> -comma_space
>> coordinate_pair >> -comma_space >> coordinate_pair;
smooth_curveto = ( char_('S') | char_('s') ) >> *space
>> smooth_curveto_argument_sequence;
smooth_curveto_argument_sequence = smooth_curveto_argument
% -comma_space;
smooth_curveto_argument = coordinate_pair >> -comma_space
>> coordinate_pair;
coordinate_pair = (double_ >> -comma_space >> double_);
coordinate = double_;
comma_space = (+space >> -char_(',') >> *space)
| (char_(',') >> *space);
}
// Quadratic curves are not supported
qi::rule<Iterator, Contours()> path_data;
qi::rule<Iterator, Contours()> moveto_drawto_command_groups;
qi::rule<Iterator, BezierVec()> moveto_drawto_command_group;
qi::rule<Iterator, BezierVec()> drawto_commands;
qi::rule<Iterator, BezierVec()> drawto_command;
qi::rule<Iterator, BezierVec()> moveto;
qi::rule<Iterator, BezierVec()> moveto_argument_sequence;
qi::rule<Iterator> closepath;
qi::rule<Iterator, BezierVec()> lineto;
qi::rule<Iterator, BezierVec()> lineto_argument_sequence;
qi::rule<Iterator, BezierVec()> horizontal_lineto;
qi::rule<Iterator, BezierVec()>
horizontal_lineto_argument_sequence;
qi::rule<Iterator, BezierVec()> vertical_lineto;
qi::rule<Iterator, BezierVec()> vertical_lineto_argument_sequence;
qi::rule<Iterator, BezierVec()> curveto;
qi::rule<Iterator, BezierVec()> curveto_argument_sequence;
qi::rule<Iterator, BezierPoint()> curveto_argument;
qi::rule<Iterator, BezierVec()> smooth_curveto;
qi::rule<Iterator, BezierVec()> smooth_curveto_argument_sequence;
qi::rule<Iterator, BezierPoint()> smooth_curveto_argument;
qi::rule<Iterator, Point()> coordinate_pair;
qi::rule<Iterator, double()> coordinate;
qi::rule<Iterator> comma_space;
};
#endif
The grammar is invoked like this:
typedef string::const_iterator StrItr;
PathGrammar<StrItr> grammar;
Contours paths;
StrItr startIt = pathData.begin();
StrItr endIt = pathData.end();
qi::parse(startIt, endIt, grammar, paths);
BOOST_FOREACH(BezierVec v, paths)
{
cout << "Path:" << endl;
BOOST_FOREACH(BezierPoint p, v)
{
cout << '\t' << p.point.x << ", " << p.point.y << endl;
}
}
And this is my current test string:
M26.591,0L0,22.348l25.46,23.479L12.306,100l36.067-23.619L85.008,28.43L26.591,0z M30.553,34.23
l-8.487-10.467l9.052-5.234l25.601,8.77l-3.109,12.729L30.553,34.23z
The string reformated to make it easier to read:
M 26.591, 0
L 0 , 22.348
l 25.46 , 23.479
L 12.306, 100
l 36.067, -23.619
L 85.008, 28.43
L 26.591, 0
z
M 30.553, 34.23
l -8.487, -10.467
l 9.052, -5.234
l 25.601, 8.77
l -3.109, 12.729
L 30.553, 34.23
z
Here is the output:
Path:
77, 0
26.591, 0
76, 0
0, 22.348
108, 0
25.46, 23.479
76, 0
12.306, 100
108, 0
36.067, -23.619
76, 0
85.008, 28.43
76, 0
26.591, 0
Path:
77, 0
30.553, 34.23
108, 0
-8.487, -10.467
108, 0
9.052, -5.234
108, 0
25.601, 8.77
108, 0
-3.109, 12.729
76, 0
30.553, 34.23
The grammar is seeing the points, but it keeps putting in all these extra points and I have no idea where they're coming from.
I'm also wondering about a couple of my rules. First there's this rule:
qi::rule<Iterator, BezierVec()> drawto_commands;
qi::rule<Iterator, BezierVec()> drawto_command;
...
drawto_commands = (drawto_command[
insert(_val, end(_val), begin(_1), end(_1))
] % *space) >> *space >> -closepath;
I want to have the results of (drawto_command % *space)
as a single vector instead of a vector of vectors. As far as I can tell, I have to do this manually with Phoenix. Is that the case?
I have a similar thing with my moveto rules:
qi::rule<Iterator, BezierVec()> moveto_drawto_command_group;
qi::rule<Iterator, BezierVec()> moveto;
qi::rule<Iterator, BezierVec()> moveto_argument_sequence;
...
moveto_drawto_command_group = moveto[
insert(_val, end(_val), begin(_1), end(_1))
] >> *space
>> -drawto_commands[
insert(_val, end(_val), begin(_1), end(_1))
];
I have two rules that give a BezierVec, which I want to combine into a single BezierVec for the third rule. So far the only way to do this appears to be manual insertion with Phoenix. Is there no simpler way?
The additional values in the output are generated from the ASCII characters 'M' == 77
, 'L' == 76
, 'l' == 108
, etc. This happens as you're matching those using char_('M')
, etc., which exposes the matched value as a char
attribute. The compiler happily assigns this to the double
values in the output array. In order to avoid this, use either lit('M')
, or just 'M'
. Neither of those exposes any attribute, making the expressions neutral in terms of generated values.
A second thing which could be improved is to remove the *space
constructs from all over the place and to switch to phrase parsing (see the documentation of the phrase_parse
API function here). If you supply a space
parser component as the skipper parameter and add a space_type
skipper template type parameter to all of your rule's, you'll get the same effect as having the *space
constructs interspersed in your grammar. For instance:
qi::rule<Iterator, Contours(), space_type> path_data;
If you have parts of the input which is not allowed to contain whitespace, those still can be embedded into lexeme[]
directives. See here for more information.
Your P.S:
In order to combine all vectors returned from drawto_command
you could employ a trick forcing Spirit.Qi to append to the provided (left hand side attribute) vector:
drawto_commands = +drawto_command >> -closepath;
which is already assuming you switched to phrase parsing, thus I removed the *space
constructs. Why does this work? Well, Spirit.Qi has a special attribute propagation rule for sequences, forcing it to append all attributes exposed by its elements into a provided container if all elements of this sequence either expose this attribute type or a container of those attribute types. No semantic action is required here. Note, this works only for sequences, not single element right hand side constructs.
Your second related question can be solved in a similar manner:
moveto_drawto_command_group = moveto >> -drawto_commands;
again, no semantic actions are required.