Search code examples
c++parsingboost-spiritboost-spirit-qi

How to pass the iterator to a function in spirit qi


template <typename Iterator>
struct parse_grammar
: qi::grammar<Iterator, std::string()>
{
    parse_grammar()
        : parse_grammar::base_type(start_p, "start_p"){
            a_p = ',' > qi::double_;
            b_p = *a_p;
            start_p = qi::double_ > b_p >> qi::eoi;
        }

    qi::rule<Iterator, std::string()> a_p;
    qi::rule<Iterator, std::string()> b_p;
    qi::rule<Iterator, std::string()> start_p;
};


// implementation

std::vector<double> parse(std::istream& input, const std::string& filename)

{

// iterate over stream input

    typedef std::istreambuf_iterator<char> base_iterator_type;
    base_iterator_type in_begin(input);

    // convert input iterator to forward iterator, usable by spirit parser
    typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
    forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
    forward_iterator_type fwd_end;

    // prepare output
    std::vector<double> output;
    // wrap forward iterator with position iterator, to record the position
    typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
    pos_iterator_type position_begin(fwd_begin, fwd_end, filename);
    pos_iterator_type position_end;

    parse_grammar<pos_iterator_type> gram;

    // parse
    try
    {
        qi::phrase_parse(
                position_begin, position_end,                     // iterators over input
                gram,                                         // recognize list of doubles
                ascii::space);                                         // comment skipper
    }
    catch(const qi::expectation_failure<pos_iterator_type>& e)
    {
        const classic::file_position_base<std::string>& pos = e.first.get_position();
        std::stringstream msg;
        msg <<
            "parse error at file " << pos.file <<
            " line " << pos.line << " column " << pos.column << std::endl <<
            "'" << e.first.get_currentline() << "'" << std::endl <<
            " " << "^- here";
        throw std::runtime_error(msg.str());
    }

    // return result
    return output;
}

I have this above sample code(Code used from boost-spirit website for example here).

In the grammar in the rule a_p I want to use semantic action and call a method and pass the iterator to it something as below:

a_p = ',' > qi::double_[boost::bind(&parse_grammar::doStuff(), this, 
    boost::ref(position_begin), boost::ref(position_end)];

and if the signature of the method doStuff is like this:

void doStuff(pos_iterator_type const& first, pos_iterator_type const& last);

Any ideas how to do this? I do not mind any way(if I can do it using boost::phoenix or something not sure how) as long as to the method the iterators are passed with their current state.


Solution

  • I'm not completely sure why you think you 'need' what you describe. I'm afraid the solution to your actual task might be very simple:

    start_p = qi::double_ % ',' > qi::eoi;
    

    However, since the actual question is quite interesting, and the use of position interators in combination with istream_buf (rather than just the usual (slower) boost::spirit::istream_iterator) has it's merit, I'll show you how to do it with the semantic action as well.

    For a simple (but rather complete) test main of

    int main()
    {
        std::istringstream iss(
                "1, -3.4 ,3.1415926\n"
                ",+inF,-NaN  ,\n"
                "2,-.4,4.14e7\n");
    
        data_t parsed = parse(iss, "<inline-test>");
    
        std::cout << "Done, parsed " << parsed.size() << " values ("
            << "min: " << *std::min_element(parsed.begin(), parsed.end()) << ", "
            << "max: " << *std::max_element(parsed.begin(), parsed.end()) << ")\n";
    }
    

    The output with the semantic action now becomes:

    debug ('start_p') at <inline-test>:1:[1..2] '1'  = 1
    debug ('start_p') at <inline-test>:1:[4..8] '-3.4'   = -3.4
    debug ('start_p') at <inline-test>:1:[10..19]   '3.1415926'  = 3.14159
    debug ('start_p') at <inline-test>:2:[2..6] '+inF'   = inf
    debug ('start_p') at <inline-test>:2:[7..11]    '-NaN'   = -nan
    debug ('start_p') at <inline-test>:3:[1..2] '2'  = 2
    debug ('start_p') at <inline-test>:3:[3..6] '-.4'    = -0.4
    debug ('start_p') at <inline-test>:3:[7..13]    '4.14e7'     = 4.14e+07
    Done, parsed 8 values (min: -3.4, max: inf)
    

    See it live at http://liveworkspace.org/code/8a874ef3...

    Note how it

    • demonstrates access to the name of the actual parser instance ('start_p')
    • demonstrates accces to the full source iterator range
    • shows how to do specialized processing inside the semantic action
    • I still suggest using qi::double_ to parse the raw input, because it is the only thing I know that easily handles all cases (see test data and this other question: Is it possible to read infinity or NaN values using input streams?)
    • demonstrates parsing the actual data into the vector efficiently by displaying statistics of the parsed values

    Full Code

    Here is the full code for future reference:

    #define BOOST_SPIRIT_USE_PHOENIX_V3
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/support_multi_pass.hpp>
    #include <boost/spirit/include/classic_position_iterator.hpp>
    #include <boost/phoenix/function/adapt_function.hpp>
    
    namespace qi      = boost::spirit::qi;
    namespace phx     = boost::phoenix;
    namespace classic = boost::spirit::classic;
    namespace ascii   = boost::spirit::ascii;
    
    typedef std::vector<double> data_t;
    
    ///////// USING A FREE FUNCTION
    //
    template <typename Grammar, typename Range>
        double doStuff_(Grammar &grammar, Range pos_range)
    {
        // for efficiency, cache adhoc grammar:
        static const qi::rule   <typename Range::iterator, double()> r_double = qi::double_;
        static const qi::grammar<typename Range::iterator, double()> g_double(r_double); // caching just the rule may be enough, actually
    
        double value = 0;
        qi::parse(pos_range.begin(), pos_range.end(), g_double, value);
    
        std::cout << "debug ('" << grammar.name() << "') at "
           << pos_range.begin().get_position().file   << ":"
           << pos_range.begin().get_position().line   << ":["
           << pos_range.begin().get_position().column << ".." 
           << pos_range.end  ().get_position().column << "]\t" 
           << "'" << std::string(pos_range.begin(),pos_range.end()) << "'\t = "
           << value
           << '\n';
    
        return value;
    }
    
    BOOST_PHOENIX_ADAPT_FUNCTION(double, doStuff, doStuff_, 2)
    
    template <typename Iterator, typename Skipper>
    struct parse_grammar : qi::grammar<Iterator, data_t(), Skipper>
    {
        parse_grammar()
            : parse_grammar::base_type(start_p, "start_p")
        {
            using qi::raw;
            using qi::double_;
            using qi::_1;
            using qi::_val;
            using qi::eoi;
            using phx::push_back;
    
            value_p = raw [ double_ ] [ _val = doStuff(phx::ref(*this), _1) ];
            start_p = value_p % ',' > eoi;
    
            // // To use without the semantic action (more efficient):
            // start_p = double_ % ',' >> eoi;
        }
    
        qi::rule<Iterator, data_t::value_type(), Skipper> value_p;
        qi::rule<Iterator, data_t(), Skipper> start_p;
    };
    
    // implementation
    data_t parse(std::istream& input, const std::string& filename)
    {
        // iterate over stream input
        typedef std::istreambuf_iterator<char> base_iterator_type;
        base_iterator_type in_begin(input);
    
        // convert input iterator to forward iterator, usable by spirit parser
        typedef boost::spirit::multi_pass<base_iterator_type> forward_iterator_type;
        forward_iterator_type fwd_begin = boost::spirit::make_default_multi_pass(in_begin);
        forward_iterator_type fwd_end;
    
        // wrap forward iterator with position iterator, to record the position
        typedef classic::position_iterator2<forward_iterator_type> pos_iterator_type;
        pos_iterator_type position_begin(fwd_begin, fwd_end, filename);
        pos_iterator_type position_end;
    
        parse_grammar<pos_iterator_type, ascii::space_type> gram;
    
        data_t output;
        // parse
        try
        {
            if (!qi::phrase_parse(
                    position_begin, position_end,  // iterators over input
                    gram,                          // recognize list of doubles
                    ascii::space,                  // comment skipper
                    output)                        // <-- attribute reference
               )
            {
                std::cerr << "Parse failed at " 
                   << position_begin.get_position().file   << ":"
                   << position_begin.get_position().line   << ":"
                   << position_begin.get_position().column << "\n";
            }
        }
        catch(const qi::expectation_failure<pos_iterator_type>& e)
        {
            const classic::file_position_base<std::string>& pos = e.first.get_position();
            std::stringstream msg;
            msg << "parse error at file " << pos.file
                << " line "               << pos.line
                << " column "             << pos.column
                << "\n\t'"                << e.first.get_currentline()
                << "'\n\t "               << std::string(pos.column, ' ') << "^-- here";
    
            throw std::runtime_error(msg.str());
        }
    
        return output;
    }
    
    int main()
    {
        std::istringstream iss(
                "1, -3.4 ,3.1415926\n"
                ",+inF,-NaN  ,\n"
                "2,-.4,4.14e7\n");
    
        data_t parsed = parse(iss, "<inline-test>");
    
        std::cout << "Done, parsed " << parsed.size() << " values ("
            << "min: " << *std::min_element(parsed.begin(), parsed.end()) << ", "
            << "max: " << *std::max_element(parsed.begin(), parsed.end()) << ")\n";
    }