Search code examples
c++boost-spiritboost-spirit-qi

Spirit QI parser end eom


My data is defined as:

std::string data("START34*23*43**");

My grammar:

"START" >> boost::spirit::hex % '*'

Question: How to parse end of message that has two stars?

https://wandbox.org/permlink/oDYjbBDb8fy79zQV


Solution

  • It's not immediately clear what you are asking. Assuming you just want to "ignore" (or accept) trailing asterisks, here's your culprit:

    if (first != last) // fail if we did not get a full match
        return false;
    

    Just removing those lines does the trick:

    Live On Coliru (note greatly simplified):

    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator>
    bool parse_numbers(Iterator& first, Iterator last, std::vector<unsigned>& v) {
        return qi::phrase_parse(first, last, ("START" >> qi::hex % '*'), qi::space, v);
    }
    
    int main() {
        for (std::string const data : {
                 "START34*23*43",
                 "START34 * 23 * 43",
                 "START34 * 23 * 43 *",
                 "START34 * 23 * 43**",
                 "START34 * 23 * 43* *",
             })
        {
            auto f = data.begin(), l = data.end();
            std::vector<unsigned> v;
    
            if (parse_numbers(f, l, v)) {
                std::cout << std::quoted(data) << " Parses OK: " << std::endl;
    
                for (auto i = 0u; i < v.size(); ++i)
                    std::cout << i << ": " << v[i] << std::endl;
            } else {
                std::cout << "Parsing failed\n";
            }
            if (f != l) {
                std::cout << "Remaining unparsed: "
                          << std::quoted(std::string(f, l)) << "\n";
            }
        }
    }
    

    Prints

    "START34*23*43" Parses OK: 
    0: 52
    1: 35
    2: 67
    "START34 * 23 * 43" Parses OK: 
    0: 52
    1: 35
    2: 67
    "START34 * 23 * 43 *" Parses OK: 
    0: 52
    1: 35
    2: 67
    Remaining unparsed: "*"
    "START34 * 23 * 43**" Parses OK: 
    0: 52
    1: 35
    2: 67
    Remaining unparsed: "**"
    "START34 * 23 * 43* *" Parses OK: 
    0: 52
    1: 35
    2: 67
    Remaining unparsed: "* *"
    

    Alternatively

    If you actually want to ignore adjacent ** but still continue parsing, then the useful change would be to say -qi::hex % '*' instead of qi::hex % '*' which simply makes the hex optional.

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    template <typename Iterator>
    bool parse_numbers(Iterator& first, Iterator last, std::vector<unsigned>& v) {
        return qi::phrase_parse(first, last,
                ("START" >> -qi::hex % '*'), qi::space, v);
    }
    
    int main() {
        for (std::string const data : {
                 "START34**23*43",
                 "START34 * 23 * 43**",
                 "START*******",
                 "START*******1 BOGUS",
             })
        {
            auto f = data.begin(), l = data.end();
            std::vector<unsigned> v;
    
            if (parse_numbers(f, l, v)) {
                std::cout << std::quoted(data) << " Parses OK: " << std::endl;
    
                for (auto i = 0u; i < v.size(); ++i)
                    std::cout << i << ": " << v[i] << std::endl;
            } else {
                std::cout << "Parsing failed\n";
            }
            if (f != l) {
                std::cout << "Remaining unparsed: "
                          << std::quoted(std::string(f, l)) << "\n";
            }
        }
    }
    

    Prints

    "START34**23*43" Parses OK: 
    0: 52
    1: 35
    2: 67
    "START34 * 23 * 43**" Parses OK: 
    0: 52
    1: 35
    2: 67
    "START*******" Parses OK: 
    "START*******1 BOGUS" Parses OK: 
    0: 1
    Remaining unparsed: "BOGUS"
    

    In this case you might want to re-assert that all input is parsed with >> qi::eoi (which beats checking the iterators manually), see Live On Coliru:

    "START34**23*43" OK: 
    0: 52
    1: 35
    2: 67
    "START34 * 23 * 43**" OK: 
    0: 52
    1: 35
    2: 67
    "START*******" OK: 
    "START*******1 BOGUS" Failed
    

    UPDATE

    To the updated question in your comment:

    @sehe So someting like this: coliru.stacked-crooked.com/a/5ecc5462a8dc0081 – user3314011 19 mins ago

    You need a negative look-ahead to exclude **:

    "START" >> (qi::hex % (qi::lit('*') - "**")) >> "**"
    

    Actually, let's add some expectation points (> instead of >>):

        try {
            return qi::parse(first, last, "START" > (qi::hex % (qi::lit('*') - "**")) > "**" > qi::eoi, v);
        } catch (qi::expectation_failure<Iterator> const& ef) {
            std::ostringstream msg;
            msg << "Expected " << ef.what_ << " at " << std::quoted(std::string(ef.first, ef.last), '\'');
            throw ParseError(msg.str());
        }
    

    Now you can some decent error messages too:

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    struct ParseError : std::runtime_error {
        ParseError(std::string msg) : std::runtime_error(std::move(msg)) {}
    };
    
    template <typename Iterator>
    bool parse_numbers(Iterator& first, Iterator last, std::vector<unsigned>& v) {
        try {
            return qi::parse(first, last, "START" > (qi::hex % (qi::lit('*') - "**")) > "**" > qi::eoi, v);
        } catch (qi::expectation_failure<Iterator> const& ef) {
            std::ostringstream msg;
            msg << "Expected " << ef.what_ << " at " << std::quoted(std::string(ef.first, ef.last), '\'');
            throw ParseError(msg.str());
        }
    }
    
    int main() {
        for (std::string const data : {
                 "START34*23*43",       // Fail no EOM
                 "START34 * 23 * 43",   // Fail spaces
                 "START34*23*43*",      // Fail no EOM
                 "START34*23*43**",     // OK
                 "START34*23*43**1",    // Fail extra number
             })
        {
            std::cout << std::quoted(data) << " -> ";
    
            auto f = data.begin(), l = data.end();
            std::vector<unsigned> v;
    
            try {
                if (parse_numbers(f, l, v)) {
                    std::cout << " OK:";
    
                    for (auto i : v)
                        std::cout << " " << i;
                    std::cout << "\n";
                } else {
                    std::cout << "Not matched\n";
                }
            } catch(ParseError const& pe) {
                std::cout << "Error: " << pe.what() << "\n";
            }
        }
    }
    

    Printing:

    "START34*23*43" -> Error: Expected "**" at ''
    "START34 * 23 * 43" -> Error: Expected "**" at ' * 23 * 43'
    "START34*23*43*" -> Error: Expected "**" at '*'
    "START34*23*43**" ->  OK: 52 35 67
    "START34*23*43**1" -> Error: Expected <eoi> at '1'