Search code examples
c++parsingboostboost-spirit

Strange static_cast compilation error while compile boost spirit parser


To parse expressions like follow:

"asd {img} {ref I}sdkflsdlk {img} wmrwerml"

I have code like this:

struct CMyTag
{
    std::string tagName;
    std::string tagData;
};
BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (std::string, tagName) (std::string, tagData));

struct fillMyTag
{
    template <typename A, typename B = boost::spirit::unused_type, typename C = boost::spirit::unused_type, typename D = boost::spirit::unused_type>
    struct result { typedef void type; };


    void operator()(::CMyTag& _tag, const std::string& _name, const std::string& _type) const
    {
        _tag.tagName = _name;
        _tag.tagData = _type;
    }
};

template <typename Iterator>
struct testTag_grammar : qi::grammar<Iterator, std::vector<CMyTag>()>
{
    testTag_grammar() :
        testTag_grammar::base_type(data)
    {
        data = (text | imgtag | vartag | inctag | blktag | reftag) >> *data;

        imgtagraw %= '{' >> qi::lit("img") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
        imgtag = imgtagraw[op(qi::labels::_val, "img", boost::spirit::_1)];

        vartagraw %= '{' >> qi::lit("var") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
        vartag = vartagraw[op(qi::labels::_val, "var", boost::spirit::_1)];

        inctagraw %= '{' >> qi::lit("inc") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
        inctag = inctagraw[op(qi::labels::_val, "inc", boost::spirit::_1)];

        blktagraw %= '{' >> qi::lit("blank") >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
        blktag = blktagraw[op(qi::labels::_val, "blk", boost::spirit::_1)];

        reftagraw %= '{' >> lexeme[("ref")] >> *(+qi::lit(' ') >> lexeme[+(char_ - '{' - '}')]) >> '}';
        reftag = reftagraw[op(qi::labels::_val, "ref", boost::spirit::_1)];

        textraw %= lexeme[+(char_ - '{' - '}')];
        text = textraw[op(qi::labels::_val, "text", boost::spirit::_1)];
    }

    qi::rule<Iterator, std::string()> imgtagraw, vartagraw, inctagraw, blktagraw, reftagraw, textraw;
    qi::rule<Iterator, CMyTag()> imgtag, vartag, inctag, blktag, reftag, text;
    qi::rule<Iterator, std::vector<CMyTag>()> data;

    boost::phoenix::function<fillMyTag> op;
};

Usage of parser:

testTag_grammar<std::string::iterator> l_gramar;
std::string l_test = "asd {img} {ref I}sdkflsdlk {img} wmrwerml";

std::vector<CMyTag> l_result;
bool result = qi::parse(l_test.begin(), l_test.end(), l_gramar, l_result);

As a result I expect to get a list of CmyTag structs, but compilation of code failed:

Error   1   error C2440: 'static_cast' : cannot convert from 'const std::vector<CMyTag,std::allocator<_Ty>>' to 'CMyTag'    d:\lib\boost\include\boost-1_57\boost\spirit\home\qi\detail\assign_to.hpp   152 1   TestSpiritParser

When I change data rule to follow:

data = ((text | imgtag | vartag | inctag | blktag | reftag)[opvec(qi::labels::_val, boost::spirit::_1)]) >> *data;

with definition of opvec:

struct fillMyVec
{
    template <typename A, typename B = boost::spirit::unused_type, typename C = boost::spirit::unused_type, typename D = boost::spirit::unused_type>
    struct result { typedef void type; };


    void operator()(std::vector<CMyTag>& _tagvec, const CMyTag& _name) const
    {
        _tagvec.push_back(_name);
    }

    void operator()(std::vector<CMyTag>& _tagvec, std::vector<CMyTag>& _name) const
    {
        _tagvec.insert(_tagvec.end(), _name.begin(), _name.end());
    }
};

boost::phoenix::function<fillMyVec> opvec;

Code begin compile success, but as result of run I got list with only one item in it. Also before modification, when CMytag type was only std::string, I got a std::string list with correct numbers of items in it,

Now I have no idea what wrong, and what to do for fix issue


Solution

    1. Firstly, I can only assume

      data = (text | imgtag | vartag | inctag | blktag | reftag) >> *data;
      

      was /meant/ as 1-or-more repeats of the (...) expression. Writing it as

      data = +(text | imgtag | vartag | inctag | blktag | reftag);
      

      expresses the same, but allows attribute propagation to match the exposes attribute type.

    2. There are a number of lexeme[] directives that have no purpose when not using a skipper

    3. There is a suspicious manual skipping of whitespace that might be better served by using a skipper

    4. Insofar as you do wish to require a mandatory space after the "tag name", consider using operator& operator. That way you can still use a skipper.

      Anyhow, it's possible you were looking for something like the Qi Repository distinct()[] parser directive

    5. Even with a skipper

      *(+lit(' ') >> lexeme[+(char_ - '{' - '}')])
      

      doesn't make sense as lexeme[...] would eat any space up to closing '}' and hence the second repeat of the *() would never apply.

      See also Boost spirit skipper issues

    6. There is a lot of manual repetition between rules. Consider using qi::symbols to map the input to tag types.

    7. If you do, it becomes easier to avoid semantic actions (Good Thing: Boost Spirit: "Semantic actions are evil"?). Even if you didn't, you could use qi::attr to expose a specific value as the type value.

    8. Consider adding debug information (see BOOST_SPIRIT_DEBUG in the demo below)

    The grammar simplified

    I'd reduce the whole grammar to just this:

    data = +( ('{' >> tag >> '}') | text );
    
    tag  = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
    text = attr("text")                  >> lexeme[+~char_("{}")];
    

    Done! No more semantic actions, no more dozens of rules doing basically the same. No more complicated nested repeats with unclear multiplicities. type is a qi::symbols parser now, that contains the mapping of tag names:

    type.add
        ("img",   "img")
        ("var",   "var")
        ("inc",   "inc")
        ("blank", "blk")
        ("ref",   "ref");
    

    And here's a complete demo:

    DEMO

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    #include <boost/fusion/adapted.hpp>
    #include <boost/spirit/include/qi.hpp>
    namespace qi  = boost::spirit::qi;
    
    struct CMyTag
    {
        std::string tagName;
        std::string tagData;
    };
    BOOST_FUSION_ADAPT_STRUCT(::CMyTag, (std::string, tagName) (std::string, tagData))
    
    template <typename Iterator, typename Skipper = qi::space_type>
    struct testTag_grammar : qi::grammar<Iterator, std::vector<CMyTag>(), Skipper>
    {
        testTag_grammar() :
            testTag_grammar::base_type(data)
        {
            using namespace qi;
    
            data = +( ('{' >> tag >> '}') | text );
    
            type.add
                ("img",   "img")
                ("var",   "var")
                ("inc",   "inc")
                ("blank", "blk")
                ("ref",   "ref");
    
            tag  = lexeme[type >> &char_(" {}")] >> lexeme[*~char_("{}")];
            text = attr("text")                  >> lexeme[+~char_("{}")];
    
            BOOST_SPIRIT_DEBUG_NODES( (data) (tag) (text))
        }
    
      private:
        qi::symbols<char, std::string> type;
        qi::rule<Iterator, CMyTag(), Skipper>              tag, text;
        qi::rule<Iterator, std::vector<CMyTag>(), Skipper> data;
    };
    
    int main() {
        testTag_grammar<std::string::const_iterator> l_gramar;
        std::string const l_test = "asd {img} {ref I}sdkflsdlk {img} wmrwerml";
    
        std::vector<CMyTag> l_result;
        auto f = l_test.begin(), l = l_test.end();
        bool result = qi::phrase_parse(f, l, l_gramar, qi::space, l_result);
    
        if (result) {
            std::cout << "Parse success: " << l_result.size() << "\n";
    
            for (auto& v : l_result)
                std::cout << "Name '" << v.tagName << "', Data '" << v.tagData << "'\n";
        }
        else {
            std::cout << "Parse failed\n";
        }
    
        if (f!=l) {
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
        }
    }
    

    Prints

    Parse success: 6
    Name 'text', Data 'asd '
    Name 'img', Data ''
    Name 'ref', Data 'I'
    Name 'text', Data 'sdkflsdlk '
    Name 'img', Data ''
    Name 'text', Data 'wmrwerml'