Search code examples
c++boostboost-spiritboost-spirit-qi

How do you output the original unparsed code (as a comment) from a spirit parser


Given the input string: A = 23; B = 5, I currently get the (expected) output:

Output: 0xa0000023
Output: 0xa0010005
-------------------------

I would like to see this instead:

Output: 0xa0000023           // A = 23
Output: 0xa0010005           // B = 5
-------------------------

The core line of code is:

statement   = eps[_val = 0x50000000] >> identifier[_val += _1<<16] >>
                     "=" >> hex[_val += (_1 & 0x0000FFFF)];

Where identifier is a qi::symbols table lookup.

The rest of my code looks like this:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>

#include <iostream>
#include <iomanip>
#include <ios>
#include <string>
#include <complex>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

struct reg16_ : qi::symbols<char,unsigned> {
    reg16_() {
        add ("A", 0) ("B", 1) ("C", 2) ("D", 3) ;
    }
} reg16;

template <typename Iterator>
struct dash_script_parser : qi::grammar<Iterator, std::vector<unsigned>(), ascii::space_type> {
    dash_script_parser() : dash_script_parser::base_type(start) {
        using qi::hex;
        using qi::_val;
        using qi::_1;
        using qi::eps;

        identifier %= reg16;

        start      %= (statement % ";" );
        statement   = eps[_val = 0x50000000] >> identifier[_val += _1<<16]>> "=" >> hex[_val += (_1 & 0x0000FFFF)];
    }
    qi::rule<Iterator, std::vector<unsigned>(), ascii::space_type> start;
    qi::rule<Iterator, unsigned(), ascii::space_type> statement;
    qi::rule<Iterator, unsigned()> identifier;
};

int
main()
{
    std::cout << "\t\tA parser for Spirit...\n\n" << "Type [q or Q] to quit\n\n";

    dash_script_parser<std::string::const_iterator> g;
    std::string str;
    while (getline(std::cin, str))

    {
        if (str.empty() || str[0] == 'q' || str[0] == 'Q') break;

        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();
        std::vector<unsigned> strs;
        bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, strs);
        if (r && iter == end) {
            for(std::vector<unsigned>::const_iterator it=strs.begin(); it<strs.end(); ++it)
                std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex <<*it << "\n";
        } else
            std::cout << "Parsing failed\n";
    }
    return 0;
}

Solution

  • Update A newer answer brought iter_pos to my attention (from Boost Spirit Repository):

    This basically does the same as below, but without 'abusing' semantic actions (making it a much better fit, especially with automatic attribute propagation.


    My gut feeling says that it will probably be easier to isolate statements into raw source iterator ranges first, and then parse the statements in isolation. That way, you'll have the corresponding source text at the start.

    With that out of the way, here is an approach I tested to work without subverting your sample code too much:


    1. Make the attribute type a struct

    Replace the primitive unsigned with a struct that also contains the source snippet, verbatim, as a string:

    struct statement_t
    {
        unsigned    value;
        std::string source;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(statement_t, (unsigned, value)(std::string, source));
    

    2. Make the parser fill both fields

    The good thing is, you were already using semantic actions, so it is merely building onto that. Note that the result is not very pretty, and would benefit hugely from being converted into a (fused) functor. But it shows the technique very clearly:

    start      %= (statement % ";" );
    statement   = qi::raw [ 
            raw[eps]      [ at_c<0>(_val)  = 0x50000000 ] 
            >> identifier [ at_c<0>(_val) += _1<<16 ]
            >> "=" >> hex [ at_c<0>(_val) += (_1 & 0x0000FFFF) ]
        ] 
        [ at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)) ]
    ;
    

    3. Print

    So, at_c<0>(_val) corresponds to statement::value, and at_c<1>(_val) corresponds to statement::source. This slightly modified output loop:

    for(std::vector<statement_t>::const_iterator it=strs.begin(); it<strs.end(); ++it)
        std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex << it->value << " // " << it->source << "\n";
    

    outputs:

    Output: 0x50000023 // A = 23
    Output: 0x50010005 // B = 5
    

    Full sample

    #include <boost/config/warning_disable.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/include/phoenix_core.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    #include <boost/spirit/include/phoenix_object.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/fusion/include/io.hpp>
    
    #include <iostream>
    #include <iomanip>
    #include <ios>
    #include <string>
    #include <complex>
    
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    
    #include <boost/spirit/include/phoenix_fusion.hpp>
    #include <boost/spirit/include/phoenix_stl.hpp>
    namespace phx   = boost::phoenix;
    
    struct reg16_ : qi::symbols<char,unsigned> {
        reg16_() {
            add ("A", 0) ("B", 1) ("C", 2) ("D", 3) ;
        }
    } reg16;
    
    struct statement_t
    {
        unsigned    value;
        std::string source;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(statement_t, (unsigned, value)(std::string, source));
    
    template <typename Iterator>
    struct dash_script_parser : qi::grammar<Iterator, std::vector<statement_t>(), ascii::space_type> {
        dash_script_parser() : dash_script_parser::base_type(start) {
            using qi::hex;
            using qi::_val;
            using qi::_1;
            using qi::eps;
            using qi::raw;
    
            identifier %= reg16;
    
            using phx::begin;
            using phx::end;
            using phx::at_c;
            using phx::construct;
    
            start      %= (statement % ";" );
            statement   = raw [ 
                    raw[eps]      [ at_c<0>(_val)  = 0x50000000 ] 
                    >> identifier [ at_c<0>(_val) += _1<<16 ]
                    >> "=" >> hex [ at_c<0>(_val) += (_1 & 0x0000FFFF) ]
                ] 
                [ at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)) ]
            ;
        }
        qi::rule<Iterator, std::vector<statement_t>(), ascii::space_type> start;
        qi::rule<Iterator, statement_t(), ascii::space_type> statement;
        qi::rule<Iterator, unsigned()> identifier;
    };
    
    int
    main()
    {
        std::cout << "\t\tA parser for Spirit...\n\n" << "Type [q or Q] to quit\n\n";
    
        dash_script_parser<std::string::const_iterator> g;
        std::string str;
        while (getline(std::cin, str))
    
        {
            if (str.empty() || str[0] == 'q' || str[0] == 'Q') break;
    
            std::string::const_iterator iter = str.begin();
            std::string::const_iterator end = str.end();
            std::vector<statement_t> strs;
            bool r = phrase_parse(iter, end, g, boost::spirit::ascii::space, strs);
            if (r && iter == end) {
                for(std::vector<statement_t>::const_iterator it=strs.begin(); it<strs.end(); ++it)
                    std::cout << "Output: 0x" << std::setw(8) << std::setfill('0') << std::hex << it->value << " // " << it->source << "\n";
            } else
                std::cout << "Parsing failed\n";
        }
        return 0;
    }