To parse expressions like follow:
"asd {img} {ref I}sdkflsdlk {img} wmrwerml"
I have code like this:
struct CMyTag
{
std::string tagName;
std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (std::string, tagName) (std::string, tagData));
struct fillMyTag
{
template <typename A, typename B = boost::spirit::unused_type, typename C = boost::spirit::unused_type, typename D = boost::spirit::unused_type>
struct result { typedef void type; };
void operator()(::CMyTag& _tag, const std::string& _name, const std::string& _type) const
{
_tag.tagName = _name;
_tag.tagData = _type;
}
};
template <typename Iterator>
struct testTag_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
testTag_grammar() :
testTag_grammar::base_type(data)
{
data = (text | imgtag | vartag | inctag | blktag | reftag) >> *data;
imgtagraw %= '{' >> qi::lit("img") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
imgtag = imgtagraw[op(qi::labels::_val, "img", boost::spirit::_1)];
vartagraw %= '{' >> qi::lit("var") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
vartag = vartagraw[op(qi::labels::_val, "var", boost::spirit::_1)];
inctagraw %= '{' >> qi::lit("inc") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
inctag = inctagraw[op(qi::labels::_val, "inc", boost::spirit::_1)];
blktagraw %= '{' >> qi::lit("blank") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
blktag = blktagraw[op(qi::labels::_val, "blk", boost::spirit::_1)];
reftagraw %= '{' >> lexeme[("ref")] >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
reftag = reftagraw[op(qi::labels::_val, "ref", boost::spirit::_1)];
textraw %= lexeme[+(char_ - '{' - '}')];
text = textraw[op(qi::labels::_val, "text", boost::spirit::_1)];
}
qi::rule<Iterator, std::string()> imgtagraw, vartagraw, inctagraw, blktagraw, reftagraw, textraw;
qi::rule<Iterator, CMyTag()> imgtag, vartag, inctag, blktag, reftag, text;
qi::rule<Iterator, std::vector<CMyTag>()> data;
boost::phoenix::function<fillMyTag> op;
};
Usage of parser:
testTag_grammar<std::string::iterator> l_gramar;
std::string l_test = "asd {img} {ref I}sdkflsdlk {img} wmrwerml";
std::vector<CMyTag> l_result;
bool result = qi::parse(l_test.begin(), l_test.end(), l_gramar, l_result);
As a result I expect to get a list of CmyTag structs, but compilation of code failed:
Error 1 error C2440: 'static_cast' : cannot convert from 'const std::vector<CMyTag,std::allocator<_Ty>>' to 'CMyTag' d:\lib\boost\include\boost-1_57\boost\spirit\home\qi\detail\assign_to.hpp 152 1 TestSpiritParser
When I change data rule to follow:
data = ((text | imgtag | vartag | inctag | blktag | reftag)[opvec(qi::labels::_val, boost::spirit::_1)]) >> *data;
with definition of opvec:
struct fillMyVec
{
template <typename A, typename B = boost::spirit::unused_type, typename C = boost::spirit::unused_type, typename D = boost::spirit::unused_type>
struct result { typedef void type; };
void operator()(std::vector<CMyTag>& _tagvec, const CMyTag& _name) const
{
_tagvec.push_back(_name);
}
void operator()(std::vector<CMyTag>& _tagvec, std::vector<CMyTag>& _name) const
{
_tagvec.insert(_tagvec.end(), _name.begin(), _name.end());
}
};
boost::phoenix::function<fillMyVec> opvec;
Code begin compile success, but as result of run I got list with only one item in it. Also before modification, when CMytag type was only std::string, I got a std::string list with correct numbers of items in it,
Now I have no idea what wrong, and what to do for fix issue
Firstly, I can only assume
data = (text | imgtag | vartag | inctag | blktag | reftag) >> *data;
was /meant/ as 1-or-more repeats of the (...) expression. Writing it as
data = +(text | imgtag | vartag | inctag | blktag | reftag);
expresses the same, but allows attribute propagation to match the exposes attribute type.
There are a number of lexeme[]
directives that have no purpose when not using a skipper
There is a suspicious manual skipping of whitespace that might be better served by using a skipper
Insofar as you do wish to require a mandatory space after the "tag name", consider using operator&
operator. That way you can still use a skipper.
Anyhow, it's possible you were looking for something like the Qi Repository distinct()[]
parser directive
Even with a skipper
*(+lit(' ') >> lexeme[+(char_ - '{' - '}')])
doesn't make sense as lexeme[...]
would eat any space up to closing '}' and hence the second repeat of the *()
would never apply.
See also Boost spirit skipper issues
There is a lot of manual repetition between rules. Consider using qi::symbols
to map the input to tag types.
If you do, it becomes easier to avoid semantic actions (Good Thing: Boost Spirit: "Semantic actions are evil"?). Even if you didn't, you could use qi::attr
to expose a specific value as the type
value.
Consider adding debug information (see BOOST_SPIRIT_DEBUG
in the demo below)
I'd reduce the whole grammar to just this:
data = +( ('{' >> tag >> '}') | text );
tag = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
text = attr("text") >> lexeme[+~char_("{}")];
Done! No more semantic actions, no more dozens of rules doing basically the same. No more complicated nested repeats with unclear multiplicities. type
is a qi::symbols
parser now, that contains the mapping of tag names:
type.add
("img", "img")
("var", "var")
("inc", "inc")
("blank", "blk")
("ref", "ref");
And here's a complete demo:
//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
namespace qi = boost::spirit::qi;
struct CMyTag
{
std::string tagName;
std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (std::string, tagName) (std::string, tagData))
template <typename Iterator, typename Skipper = qi::space_type>
struct testTag_grammar : qi::grammar<Iterator, std::vector<CMyTag>(), Skipper>
{
testTag_grammar() :
testTag_grammar::base_type(data)
{
using namespace qi;
data = +( ('{' >> tag >> '}') | text );
type.add
("img", "img")
("var", "var")
("inc", "inc")
("blank", "blk")
("ref", "ref");
tag = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
text = attr("text") >> lexeme[+~char_("{}")];
BOOST_SPIRIT_DEBUG_NODES( (data) (tag) (text))
}
private:
qi::symbols<char, std::string> type;
qi::rule<Iterator, CMyTag(), Skipper> tag, text;
qi::rule<Iterator, std::vector<CMyTag>(), Skipper> data;
};
int main() {
testTag_grammar<std::string::const_iterator> l_gramar;
std::string const l_test = "asd {img} {ref I}sdkflsdlk {img} wmrwerml";
std::vector<CMyTag> l_result;
auto f = l_test.begin(), l = l_test.end();
bool result = qi::phrase_parse(f, l, l_gramar, qi::space, l_result);
if (result) {
std::cout << "Parse success: " << l_result.size() << "\n";
for (auto& v : l_result)
std::cout << "Name '" << v.tagName << "', Data '" << v.tagData << "'\n";
}
else {
std::cout << "Parse failed\n";
}
if (f!=l) {
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
}
}
Prints
Parse success: 6
Name 'text', Data 'asd '
Name 'img', Data ''
Name 'ref', Data 'I'
Name 'text', Data 'sdkflsdlk '
Name 'img', Data ''
Name 'text', Data 'wmrwerml'