Search code examples
boostc++11boost-spiritvariadic-templatesboost-fusion

Generating Spirit parser expressions from a variadic list of alternative parser expressions


I'm looking for the simplest way to implement variadic function which takes list of boost::spirit::qi rules and expands the list into expression of format: rule1 | rule2 | rule3 |.... Let's assume that the rules synthesize no attribute. Your kind help is very much appreciated.

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <string>
#include <iostream>
#include <boost/spirit/include/phoenix_operator.hpp>

namespace qi    = boost::spirit::qi;
namespace ph    = boost::phoenix;
namespace ascii = boost::spirit::ascii;
using boost::spirit::qi::phrase_parse;
using boost::spirit::qi::ascii::space;
using boost::spirit::iso8859_1::char_;

typedef qi::rule<std::string::const_iterator,ascii::space_type> mrule_t;
typedef qi::rule< std::string::const_iterator,std::string() >   wrule_t;

//How to deduce expandBitwise() return type ?

template<typename T>
T expandBitwise(T& t)
{
    return t.rule_;
}

template<typename T,typename ...Tail>
T expandBitwise(T& t,Tail& ...tail)
{
    return t.rule_ | expandBitwise(tail...);
}

struct TStruct
{
    mrule_t     rule_;
    template<typename T,typename R>
    TStruct( T& rVar,const std::string&name, R& rule ) :
        rule_( qi::lit( name ) >> rule[ ph::ref( rVar )=qi::_1 ] )
        {}
};

template<typename T,typename ...Tail>
void mparse(const std::string& line,T& t,Tail& ...tail)
{
    std::string::const_iterator f,l;

    f=line.begin();
    l=line.end();

    // I would like to expand the rules here ...
    //if(phrase_parse(f,l,expandBitwise(t,tail...),space ) && f==l)

    if( phrase_parse(f, l, t.rule_, space ) && f==l )
        std::cout<<"Parsed:"<<line<<std::endl;
    else
        std::cout<<"Syntax error:"<<line<<std::endl;
}

int main()
{
    wrule_t rword=+~space;

    std::string par1,par2,par3,par4;

    TStruct r1( par1,"-a", rword );
    TStruct r2( par2,"-b", rword );
    TStruct r3( par3,"-c", rword );
    TStruct r4( par4,"-d", rword );

    mparse("abc 8.81"   ,r1,r2,r3,r4);
    mparse("-a atoken"  ,r1,r2,r3,r4);
    mparse("-b btoken"  ,r1,r2,r3,r4);
    mparse("-c ctoken"  ,r1,r2,r3,r4);
    mparse("-d dtoken"  ,r1,r2,r3,r4);

    return 0;
}

Solution

  • Thank you for a quick hint! I've just tried your code and unless I do something wrong ... I get this output: Syntax error:abc 8.81 Parsed:-a atoken Syntax error:-b btoken Syntax error:-c ctoken Syntax error:-d dtokenG. Civardi 2 hours ago

    Okay, so, I couldn't leave it alone :/

    Turns out there was Undefined Behaviour involved, because of the way in which parser expressions were being passed to expandBitwise and being copied: Boost Proto expression templates weren't designed to be copied as they may contain references to temporaries, whose lifetime ends at the end of their containing full-expression.

    See for more background, the discussion at Zero to 60 MPH in 2 seconds!

    After a long (long) time of tweaking with rule_.alias() and boost::proto::deepcopy I have reached the following solution (which, incidentally, doesn't need a helper function at all, anymore):

    template<typename ...Tail>
    void mparse(const std::string& line,Tail& ...tail)
    {
        auto parser = boost::fusion::fold(
                    boost::tie(ph::bind(&TStruct::rule_, arg1)(tail)...),
                    qi::eps(false),
                    deepcopy_(arg2 | arg1)
                );
    
        auto f=begin(line), l=end(line);
    
        if( qi::phrase_parse(f, l, parser, ascii::space ) )
            std::cout << "Parsed:" << line << std::endl;
        else
            std::cout << "Syntax error:" << line << std::endl;
    
        if (f!=l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    }
    

    The protection against UB is the deepcopy_() invocation, which is a trivial polymorphic callable adaptor for boost::proto::deepcopy:

    struct DeepCopy
    {
        template<typename E> struct result { typedef typename boost::proto::result_of::deep_copy<E>::type type; };
    
        template<typename E>
            typename result<E>::type
            operator()(E const& expr) const {
                return boost::proto::deep_copy(expr);
            }
    };
    
    static const ph::function<DeepCopy> deepcopy_;
    

    With this code, lo and behold, the output becomes:

    Syntax error:abc 8.81
    Remaining unparsed: 'abc 8.81'
    Parsed:-a atoken
    Parsed:-b btoken
    Parsed:-c ctoken
    Parsed:-d dtoken
    Bye
    

    As a bonus, the code now allows you to use Spirit's builtin debug() capabilities (uncomment that line):

    <-d>
      <try>abc 8.81</try>
      <fail/>
    </-d>
    <-c>
      <try>abc 8.81</try>
      <fail/>
    </-c>
    <-b>
      <try>abc 8.81</try>
      <fail/>
    </-b>
    <-a>
      <try>abc 8.81</try>
      <fail/>
    </-a>
    Syntax error:abc 8.81
    Remaining unparsed: 'abc 8.81'
    

    Tested with

    • Boost 1_54_0
    • GCC 4.7.2, 4.8.x, Clang 3.2
    • Note the #defines which are significant.

    FULL CODE

    #define BOOST_RESULT_OF_USE_DECLTYPE
    #define BOOST_SPIRIT_USE_PHOENIX_V3
    #include <boost/fusion/adapted/boost_tuple.hpp>
    #include <boost/fusion/include/fold.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    
    namespace qi    = boost::spirit::qi;
    namespace ph    = boost::phoenix;
    namespace ascii = boost::spirit::ascii;
    using namespace ph::arg_names;
    
    typedef qi::rule<std::string::const_iterator,ascii::space_type> mrule_t;
    typedef qi::rule<std::string::const_iterator,std::string() >    wrule_t;
    
    struct TStruct
    {
        mrule_t     rule_;
        template<typename T,typename R>
        TStruct( T& rVar,const std::string&name, R& rule ) :
            rule_( qi::lit(name) >> rule[ ph::ref(rVar) = qi::_1 ] )
        { 
            rule_.name(name);
            // debug(rule_);
        }
    };
    
    struct DeepCopy
    {
        template<typename E> struct result { typedef typename boost::proto::result_of::deep_copy<E>::type type; };
    
        template<typename E>
            typename result<E>::type
            operator()(E const& expr) const {
                return boost::proto::deep_copy(expr);
            }
    };
    
    static const ph::function<DeepCopy> deepcopy_;
    
    template<typename ...Tail>
    void mparse(const std::string& line,Tail& ...tail)
    {
        auto parser = boost::fusion::fold(
                    boost::tie(ph::bind(&TStruct::rule_, arg1)(tail)...),
                    qi::eps(false),
                    deepcopy_(arg2 | arg1)
                );
    
        auto f=begin(line), l=end(line);
    
        if( qi::phrase_parse(f, l, parser, ascii::space ) )
            std::cout << "Parsed:" << line << std::endl;
        else
            std::cout << "Syntax error:" << line << std::endl;
    
        if (f!=l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    }
    
    int main()
    {
        wrule_t rword=+~ascii::space;
    
        std::string par1,par2,par3,par4;
    
        TStruct r1( par1, "-a", rword );
        TStruct r2( par2, "-b", rword );
        TStruct r3( par3, "-c", rword );
        TStruct r4( par4, "-d", rword );
    
        mparse("abc 8.81"   ,r1,r2,r3,r4);
        mparse("-a atoken"  ,r1,r2,r3,r4);
        mparse("-b btoken"  ,r1,r2,r3,r4);
        mparse("-c ctoken"  ,r1,r2,r3,r4);
        mparse("-d dtoken"  ,r1,r2,r3,r4);
    
        std::cout << "Bye\n";
    }