Search code examples
c++boost-spiritboost-spirit-qi

What's the appropriate way to indicate a Qi transform attribute fail?


What's the proper way to indicate a parse fail in a boost::spirit::traits::transform_attribute? Can I throw any old exception, or is there a specific thing it wants me to do?

namespace boost
{
    namespace spirit
    {
        namespace traits
        {
            template <>
            struct transform_attribute<TwoNums, std::vector<char>, qi::domain>
            {
                typedef std::vector<char> type;

                static type pre(TwoWords&) { return{}; }

                static void post(TwoWords& val, type const& attr) {
                    std::string stringed(attr.begin(), attr.end());
                    //https://stackoverflow.com/questions/236129/the-most-elegant-way-to-iterate-the-words-of-a-string
                    std::vector<std::string> strs;
                    boost::split(strs, stringed, ",");
                    if(strs.size()!=2) 
                    { 
                        //What do I do here?
                    }
                    val = TwoWords(strs[0],strs[1]);
                }
                static void fail(FDate&) { }
            };
        }
    }
}

Solution

    1. Yes, raising an exception seems the only out-of-band way.

    2. You could use qi::on_error to trap and respond to it.

    3. However, it's a bit unclear what you need this for. It seems a bit upside down to use split inside a parser. Splitting is basically a poor version of parsing.

      Why not have a rule for the sub-parsing?

    1. Simple Throw...

    Live On Coliru

    #include <boost/algorithm/string.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    struct Invalid {};
    
    struct TwoWords {
        std::string one, two;
    };
    
    namespace boost { namespace spirit { namespace traits {
    
        template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
            typedef std::vector<char> type;
    
            static type pre(TwoWords &) { return {}; }
    
            static void post(TwoWords &val, type const &attr) {
                std::string stringed(attr.begin(), attr.end());
    
                std::vector<std::string> strs;
                boost::split(strs, stringed, boost::is_any_of(","));
                if (strs.size() != 2) {
                    throw Invalid{};
                }
                val = TwoWords{ strs.at(0), strs.at(1) };
            }
    
            static void fail(TwoWords &) {}
        };
    
    } } }
    
    template <typename It>
    struct Demo1 : qi::grammar<It, TwoWords()> {
        Demo1() : Demo1::base_type(start) {
            start = qi::attr_cast<TwoWords>(+qi::char_);
        }
      private:
        qi::rule<It, TwoWords()> start;
    };
    
    int main() {
        Demo1<std::string::const_iterator> parser;
    
        for (std::string const input : { ",", "a,b", "a,b,c" }) {
            std::cout << "Parsing " << std::quoted(input) << " -> ";
    
            TwoWords tw;
            try {
                if (parse(input.begin(), input.end(), parser, tw)) {
                    std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
                } else {
                    std::cout << "Failed\n";
                }
            } catch(Invalid) {
                std::cout << "Input invalid\n";
            }
        }
    }
    

    Prints

    Parsing "," -> "", ""
    Parsing "a,b" -> "a", "b"
    Parsing "a,b,c" -> Input invalid
    

    2. Handling Errors Inside The Parser

    This feels a bit hacky because it will require you to throw a expectation_failure.

    This is not optimal since it assumes you know the iterator the parser is going to be instantiated with.

    on_error was designed for use with expectation points

    *Live On Coliru

    #include <boost/algorithm/string.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    struct Invalid {};
    
    struct TwoWords {
        std::string one, two;
    };
    
    namespace boost { namespace spirit { namespace traits {
    
        template <> struct transform_attribute<TwoWords, std::vector<char>, qi::domain> {
            typedef std::vector<char> type;
    
            static type pre(TwoWords &) { return {}; }
    
            static void post(TwoWords &val, type const &attr) {
                std::string stringed(attr.begin(), attr.end());
    
                std::vector<std::string> strs;
                boost::split(strs, stringed, boost::is_any_of(","));
                if (strs.size() != 2) {
                    throw qi::expectation_failure<std::string::const_iterator>({}, {}, info("test"));
                }
                val = TwoWords{ strs.at(0), strs.at(1) };
            }
    
            static void fail(TwoWords &) {}
        };
    
    } } }
    
    template <typename It>
    struct Demo2 : qi::grammar<It, TwoWords()> {
        Demo2() : Demo2::base_type(start) {
            start = qi::attr_cast<TwoWords>(+qi::char_);
    
            qi::on_error(start, [](auto&&...){});
            // more verbose spelling:
            // qi::on_error<qi::error_handler_result::fail> (start, [](auto&&...){[>no-op<]});
        }
      private:
        qi::rule<It, TwoWords()> start;
    };
    
    int main() {
        Demo2<std::string::const_iterator> parser;
    
        for (std::string const input : { ",", "a,b", "a,b,c" }) {
            std::cout << "Parsing " << std::quoted(input) << " -> ";
    
            TwoWords tw;
            try {
                if (parse(input.begin(), input.end(), parser, tw)) {
                    std::cout << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
                } else {
                    std::cout << "Failed\n";
                }
            } catch(Invalid) {
                std::cout << "Input invalid\n";
            }
        }
    }
    

    Prints

    Parsing "," -> "", ""
    Parsing "a,b" -> "a", "b"
    Parsing "a,b,c" -> Failed
    

    3. Finally: Sub-rules Rule!

    Let's assume a slightly more interesting grammar in which you have a ; separated list of TwoWords:

    "foo,bar;a,b"
    

    We parse into a vector of TwoWords:

    using Word = std::string;
    struct TwoWords { std::string one, two; };
    using TwoWordses = std::vector<TwoWords>;
    

    Instead of using traits to "coerce" attributes, we just adapt the struct and rely on automatic attribute propagation:

    BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two)
    

    The parser mimics the data-types:

    template <typename It>
    struct Demo3 : qi::grammar<It, TwoWordses()> {
        Demo3() : Demo3::base_type(start) {
            using namespace qi;
            word     = *(graph - ',' - ';');
            twowords = word >> ',' >> word;
            start    = twowords % ';';
        }
      private:
        qi::rule<It, Word()>       word;
        qi::rule<It, TwoWords()>   twowords;
        qi::rule<It, TwoWordses()> start;
    };
    

    And the full test is Live On Coliru

    #include <boost/fusion/adapted/struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <iomanip>
    
    namespace qi = boost::spirit::qi;
    
    using Word = std::string;
    struct TwoWords { std::string one, two; };
    using TwoWordses = std::vector<TwoWords>;
    
    BOOST_FUSION_ADAPT_STRUCT(TwoWords, one, two);
    
    template <typename It>
    struct Demo3 : qi::grammar<It, TwoWordses()> {
        Demo3() : Demo3::base_type(start) {
            using namespace qi;
            word     = *(graph - ',' - ';');
            twowords = word >> ',' >> word;
            start    = twowords % ';';
        }
      private:
        qi::rule<It, Word()>       word;
        qi::rule<It, TwoWords()>   twowords;
        qi::rule<It, TwoWordses()> start;
    };
    
    int main() {
        using It = std::string::const_iterator;
        Demo3<It> parser;
    
        for (std::string const input : {
                ",", 
                "foo,bar",
                "foo,bar;qux,bax",
                "foo,bar;qux,bax;err,;,ful",
    
                // failing cases or cases with trailing input:
                "",
                "foo,bar;",
                "foo,bar,qux",
                })
        {
            std::cout << "Parsing " << std::quoted(input) << " ->\n";
    
            TwoWordses tws;
            It f = input.begin(), l = input.end();
            if (parse(f, l, parser, tws)) {
                for(auto& tw : tws) {
                    std::cout << " - " << std::quoted(tw.one) << ", " << std::quoted(tw.two) << "\n";
                }
            } else {
                std::cout << "Failed\n";
            }
    
            if (f != l) {
                std::cout << "Remaining unparsed input: " << std::quoted(std::string(f,l)) << "\n";
            }
    
        }
    }
    

    Prints

    Parsing "," ->
     - "", ""
    Parsing "foo,bar" ->
     - "foo", "bar"
    Parsing "foo,bar;qux,bax" ->
     - "foo", "bar"
     - "qux", "bax"
    Parsing "foo,bar;qux,bax;err,;,ful" ->
     - "foo", "bar"
     - "qux", "bax"
     - "err", ""
     - "", "ful"
    Parsing "" ->
    Failed
    Parsing "foo,bar;" ->
     - "foo", "bar"
    Remaining unparsed input: ";"
    Parsing "foo,bar,qux" ->
     - "foo", "bar"
    Remaining unparsed input: ",qux"