Followup: Using boost::spirit::qi to parse numbers with separators

This is a followup question to Using boost::spirit::qi to parse numbers with separators.

Following sehe's very good suggestions, I managed to get number parsing to work. I then attempted to update it to have a secondary parser which handled numbers with an optional sign. This second attempt failed. I suspect I have dome something incorrect with respect to how to handle a sub-grammar. Code is as follows:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
    unsigned_parser() : unsigned_parser::base_type(start) {
        using qi::_val;
        using qi::_1;
        using qi::eps;
        using qi::debug;
        using ascii::char_;

        bin = eps[_val=0] >> *(char_("01")[_val = _val * 2 + dval(_1)] | '_');
        oct = eps[_val=0] >> *(char_("0-7")[_val = _val * 8 + dval(_1)] | '_');
        dec = eps[_val=0]
              >> *(char_("0-9")[_val = _val * 10 + dval(_1)] | '_');
        hex = eps[_val=0]
              >> *(char_("0-9a-fA-F")[_val = _val * 16 + dval(_1)] | '_');
        start = (char_('0') >>
                 ((char_("xXhH") >> hex[_val=_1])
                  | (char_("bByY") >> bin[_val=_1])
                  | (char_("oOqQ") >> oct[_val=_1])
                  | (char_("dDtT") >> dec[_val=_1])))
                | (hex[_val=_1] >> char_("xXhH"))
                | (bin[_val=_1] >> char_("bByY"))
                | (oct[_val=_1] >> char_("oOqQ"))
                | (dec[_val=_1] >> -char_("dDtT"));
        start.name("unum");
        hex.name("hex");
        oct.name("oct");
        dec.name("dec");
        bin.name("bin");

        debug(start);
        debug(hex);
        debug(oct);
        debug(dec);
        debug(bin);
    }
    qi::rule<Iterator, Num()> start;
    qi::rule<Iterator, Num()> hex;
    qi::rule<Iterator, Num()> oct;
    qi::rule<Iterator, Num()> dec;
    qi::rule<Iterator, Num()> bin;
    struct _dval {
        template <typename> struct result { typedef uint8_t type; };
        template <typename T> uint8_t operator()(T ch) const {
            if (ch >= '0' || ch <= '9') {
                return ch - '0';
            }
            ch = std::tolower(ch);
            if (ch >= 'a' || ch <= 'f') {
                return ch - 'a' + 10;
            }
            assert(false);
        }
    };
    boost::phoenix::function<_dval> dval;
};

template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
    signed_parser() : signed_parser::base_type(start) {
        using qi::eps;
        using qi::_val;
        using qi::_1;
        using ascii::char_;
        using phoenix::static_cast_;
        unum = unsigned_parser<Iterator, Num>();
        start = (char_('-') >> unum[_val=-_1])
                | (-char_('+') >> unum[_val=_1]);
        unum.name("unum");
        start.name("snum");
        debug(start);
        /* debug(unum); */
    }
    qi::rule<Iterator, Num()> start;
    qi::rule<Iterator, Num()> unum;
};

int main(int argv, const char *argc[]) {
    using phoenix::ref;
    using qi::eoi;
    using qi::_1;

    typedef std::string::const_iterator iter;
    signed_parser<iter, int64_t> sp;
    int64_t val;
    if (argv != 2) {
        std::cerr << "Usage: " << argc[0] << " <input>" << std::endl;
        return 1;
    }
    std::string test(argc[1]);
    iter i = test.begin();
    iter end = test.end();
    bool rv = phrase_parse(i, end, sp[ref(val)=_1] >> eoi, ascii::space);
    if (rv) {
        assert(i == end);
        std::cout << "Succeeded: " << val << std::endl;
        return 0;
    }
    std::cout << "Failed." << std::endl;
    return 1;
}

With the signed_parser, every parse fails. Moreover, if I uncomment the commented-out debug(), the program segfaults.

I feel as if I am close to beginning to understand how to use this, so any help would be appreciated.

Solution

Using all those separate rules kills the opportunity for the compiler to optimize the parsing.

You cannot refer to a temporary grammar/rule. You need to have the grammar instance around:

template <typename Iterator, typename Num>
struct signed_parser : qi::grammar<Iterator, Num()> {
    signed_parser() : signed_parser::base_type(snum) {
        using namespace qi;

        snum = lit('-') >> unum
            | -lit('+') >> unum
            ;

        BOOST_SPIRIT_DEBUG_NODES((snum))
    }
private:
    qi::rule<Iterator, Num()> snum;
    unsigned_parser<Iterator, Num> unum;
};

Here's some cleanup for you:

swap argc and argv will you :)

use BOOST_SPIRIT_DEBUG* macros

BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));

use bare literals instead if lit() or (worse!) char_()
prefer using automatic attribute propagation (Boost Spirit: "Semantic actions are evil"?). E.g. the rules can be much simpler:
```
    snum = lit('-') >> unum
        | -lit('+') >> unum
        ;
```

use %= to preserve automatic propagation in the presence of semantic actions:

    snum %= lit('-') >> unum [ _val = -_1 ]
         | -lit('+') >> unum
         ;

same thing goes for the phrase_parse call itself: you can pass bound references for the attributes. No need for semantic actions
doing tolower(ch) is likely slower (since you know it's ASCII), possibly incorrect (you get sign extension if your compiler has signed char)

UPDATE there was a rather gruesome bug in your dval actor. The range checks were wrong! Here's my fixed version:

struct accum_f {
    template <typename...> struct result { typedef void type; };
    void operator()(char ch, Num& accum, int base) const {
        accum *= base;

        if      (ch >= '0' && ch <= '9') accum += ch - '0';
        else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
        else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
        else assert(false);
    }
};
boost::phoenix::function<accum_f> _accum;

See below for consequential changes/simplifications to the semantic action

you can use the builting int_parser in the prefix branches; this could be (much) faster

caveat: when you write the unum semantic-action-less, it becomes essential that you don't "capture" the '0' with qi::char_ like you did. Otherwise, you'll be wondering why the result of any prefix-formatted number is always 48.

unum = ('0' >>
            ( (omit[ char_("xXhH") ] >> hex)
            | (omit[ char_("bByY") ] >> bin)
            | (omit[ char_("oOqQ") ] >> oct)
            | (omit[ char_("dDtT") ] >> dec))
        )
    | (hex >> omit[  char_("xXhH") ])
    | (bin >> omit[  char_("bByY") ])
    | (oct >> omit[  char_("oOqQ") ])
    | (dec >> omit[ -char_("dDtT") ]);

using phrase_parse and a skipper has little effect as long as you're using parser expressions that don't use a skipper (see Boost spirit skipper issues)

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace qi      = boost::spirit::qi;
namespace ascii   = boost::spirit::ascii;

template <typename Iterator, typename Num>
struct unsigned_parser : qi::grammar<Iterator, Num()> {
    unsigned_parser() : unsigned_parser::base_type(unum) {
        using namespace qi;

        bin  = eps[_val=0] >> *(char_("01")        [ _accum(_1, _val, 2 )] | '_');
        oct  = eps[_val=0] >> *(char_("0-7")       [ _accum(_1, _val, 8 )] | '_');
        dec  = eps[_val=0] >> *(char_("0-9")       [ _accum(_1, _val, 10)] | '_');
        hex  = eps[_val=0] >> *(char_("0-9a-fA-F") [ _accum(_1, _val, 16)] | '_');
        unum = ('0' >>
                    ( (omit[ char_("xXhH") ] >> hex)
                    | (omit[ char_("bByY") ] >> bin)
                    | (omit[ char_("oOqQ") ] >> oct)
                    | (omit[ char_("dDtT") ] >> dec))
                )
            | (hex >> omit[  char_("xXhH") ])
            | (bin >> omit[  char_("bByY") ])
            | (oct >> omit[  char_("oOqQ") ])
            | (dec >> omit[ -char_("dDtT") ]);

        BOOST_SPIRIT_DEBUG_NODES((unum) (hex) (oct) (dec) (bin));
    }

  private:
    qi::rule<Iterator, Num()> unum,  hex, oct, dec, bin;

    struct accum_f {
        template <typename...> struct result { typedef void type; };
        void operator()(char ch, Num& accum, int base) const {
            accum *= base;

            if      (ch >= '0' && ch <= '9') accum += ch - '0';
            else if (ch >= 'a' && ch <= 'f') accum += ch - 'a' + 10;
            else if (ch >= 'A' && ch <= 'F') accum += ch - 'A' + 10;
            else assert(false);
        }
    };
    boost::phoenix::function<accum_f> _accum;
};

    template <typename Iterator, typename Num>
    struct signed_parser : qi::grammar<Iterator, Num()> {
        signed_parser() : signed_parser::base_type(snum) {
            using namespace qi;

            snum %= lit('-') >> unum [ _val = -_1 ]
                 | -lit('+') >> unum
                 ;

            BOOST_SPIRIT_DEBUG_NODES((snum))
        }
    private:
        qi::rule<Iterator, Num()> snum;
        unsigned_parser<Iterator, Num> unum;
    };

int main(int argc, const char *argv[]) {
    typedef std::string::const_iterator iter;
    signed_parser<iter, int64_t> const sp;

    for (std::string const& s : boost::make_iterator_range(argv+1, argv+argc))
    {
        std::cout << "\n-----------------------------\nParsing '" << s << "':\n";

        int64_t val;
        iter i = s.begin(), end = s.end();
        bool rv = phrase_parse(i, end, sp >> qi::eoi, ascii::space, val);

        if (rv) {
            std::cout << "Succeeded: " << val << std::endl;
        } else {
            std::cout << "Failed." << std::endl;
        }

        if (i!=end) {
            std::cout << "Remaining unparsed: '" << std::string(i,end) << "'\n";
        }
    }
}

Output:

-----------------------------
Parsing '-124_456d':
Succeeded: -124456

-----------------------------
Parsing '123_456D':
Succeeded: 123456

-----------------------------
Parsing '-123_456T':
Succeeded: -123456

-----------------------------
Parsing '123456t':
Succeeded: 123456

-----------------------------
Parsing '+1_bh':
Succeeded: 27

-----------------------------
Parsing '0_010Q':
Succeeded: 8

-----------------------------
Parsing '+1010_1010_0111_0111_b':
Succeeded: 43639

-----------------------------
Parsing '123_456':
Succeeded: 123456

-----------------------------
Parsing '-123456':
Succeeded: -123456

-----------------------------
Parsing '1_bh':
Succeeded: 27

-----------------------------
Parsing '-0_010Q':
Succeeded: -8

-----------------------------
Parsing '1010_1010_0111_0111_b':
Succeeded: 43639

-----------------------------
Parsing '+0d124_456':
Succeeded: 124456

-----------------------------
Parsing '0D123_456':
Succeeded: 123456

-----------------------------
Parsing '+0T123_456':
Succeeded: 123456

-----------------------------
Parsing '0t123456':
Succeeded: 123456

-----------------------------
Parsing '0h1_b':
Succeeded: 27

-----------------------------
Parsing '0Q0_010':
Succeeded: 8

-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639

-----------------------------
Parsing '06123_45':
Succeeded: 612345

-----------------------------
Parsing '0612345':
Succeeded: 612345

-----------------------------
Parsing '0h1_b':
Succeeded: 27

-----------------------------
Parsing '-0Q0_010':
Succeeded: -8

-----------------------------
Parsing '0b1010_1010_0111_0111_':
Succeeded: 43639