Search code examples
c++boost-spiritboost-spirit-qiboost-spirit-lex

Why boost spirit lex hung instead parse error?


I long time didnt work with boost::spirit and back again. And stuck at simple case (omg, sometimes I want to kill this library...why why why so simple task is so complex with boost).

#include <iostream>
#include <string>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace bs = boost::spirit;
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

//---------------------------------------------------------------------------------
// configuration

using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;

//---------------------------------------------------------------------------------

template <typename Lexer>
struct cpp_tokens : lex::lexer<Lexer>
{
    cpp_tokens()
    {
        class_ = "class";
        identifier = "[a-zA-Z_][a-zA-Z0-9_]*";

        this->self += class_ | identifier;
        this->self("WS") = lex::token_def<>("[ \\t]+");
    }

    lex::token_def<> class_;
    lex::token_def<std::string> identifier;
};

using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;

//---------------------------------------------------------------------------------

template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : bs::qi::grammar<Iterator, bs::qi::in_state_skipper<Lexer>>
{
    template <typename TokenDef>
    cpp_grammar_impl(TokenDef const& tok) : cpp_grammar_impl::base_type(program, "program")
    {
        program = tok.identifier >> tok.class_;
    }

private:
    using skipper_type = bs::qi::in_state_skipper<Lexer>;
    using simple_rule = qi::rule<Iterator, skipper_type>;

    simple_rule program;
};

using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;

//---------------------------------------------------------------------------------

int main()
{
    std::string str("111 class");

    cpp_lex cppLexer;
    cpp_grammar cppGrammar(cppLexer);

    auto it = str.begin();
    cpp_iterator iter = cppLexer.begin(it, str.end());
    cpp_iterator end = cppLexer.end();

    bool r = qi::phrase_parse(iter, end, cppGrammar, bs::qi::in_state("WS")[cppLexer.self]);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::string rest(iter, end);
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "stopped at: \"" << rest << "\"\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
}

I expected the parsing error, but on above sample the boost::spirit hung processor and consume all memmory. Example on - Coliru

What I do wrong and how to fix it?


Solution

  • Using the debugger you can easily find that parsing is not hanging.

    Instead it's the line

      std::string rest(iter, end);
    

    that is hanging. I suppose it's more like UB because the iter and end iterators do not dereference into character elements, but into tokens.

    So the simple fix is to use the base-iterators:

       std::string rest(it, str.end());
    

    Live On Coliru

    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iostream>
    #include <string>
    
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;
    
    //---------------------------------------------------------------------------------
    // configuration
    
    using base_iterator_type = std::string::iterator;
    using token_type = lex::lexertl::token<base_iterator_type>;
    using lexer_type = lex::lexertl::lexer<token_type>;
    
    //---------------------------------------------------------------------------------
    
    template <typename Lexer> struct cpp_tokens : lex::lexer<Lexer> {
        cpp_tokens() {
            class_ = "class";
            identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
    
            this->self += class_ | identifier;
            this->self("WS") = lex::token_def<>("[ \\t]+");
        }
    
        lex::token_def<> class_;
        lex::token_def<std::string> identifier;
    };
    
    using cpp_lex = cpp_tokens<lexer_type>;
    using cpp_iterator = cpp_lex::iterator_type;
    
    //---------------------------------------------------------------------------------
    
    template <typename Iterator, typename Lexer>
    struct cpp_grammar_impl : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > {
        template <typename TokenDef>
        cpp_grammar_impl(TokenDef const &tok) : cpp_grammar_impl::base_type(program, "program") {
            program = tok.identifier >> tok.class_;
        }
    
      private:
        using skipper_type = qi::in_state_skipper<Lexer>;
        using simple_rule = qi::rule<Iterator, skipper_type>;
    
        simple_rule program;
    };
    
    using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;
    
    //---------------------------------------------------------------------------------
    
    int main() {
        std::string str("111 class");
    
        cpp_lex cppLexer;
        cpp_grammar cppGrammar(cppLexer);
    
        auto it = str.begin();
        cpp_iterator iter = cppLexer.begin(it, str.end());
        cpp_iterator end = cppLexer.end();
    
        bool r = qi::phrase_parse(iter, end, cppGrammar, qi::in_state("WS")[cppLexer.self]);
    
        if (r && iter == end) {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "-------------------------\n";
        } else {
            std::string rest(it, str.end());
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: \"" << rest << "\"\n";
            std::cout << "-------------------------\n";
        }
    
        std::cout << "Bye... :-) \n\n";
    }
    

    Prints:

    -------------------------
    Parsing failed
    stopped at: "111 class"
    -------------------------
    Bye... :-)