Search code examples
c++error-handlingboost-spiritboost-spirit-qi

how to get boost spirit build in error message 'distinct' possible symbols?


i have a grammar with key words on entry for a rule set.

some pseudo code below .. if somebody write now "XPUBLIC" as input the parser create an 'distinct' exception in the catch handler for 'boost::spirit::qi::expectation_failureparser::Iterator::what_'. This is ok but the parser could also return the list of possible entries at this node. Also the on_error handler get equal inputs. Is there a way to get the possible entry symbols on parser fails? In this sample i like to get "PUBLIC","PRIVATE" ..

#define nocaselit(p) distinct(boost::spirit::standard_wide::alnum | L'_')[no_case[p]]

rule=  
  nocaselit(L"PUBLIC")
| nocaselit(L"PRIVATE")
| nocaselit(L"PROTECTED")
| nocaselit(L"SHARED")

Solution

  • When we simplify the situation to skip the distinct directive, then this is how you could deal with the info from the expectation failure to extract the alternatives:

    Live On Coliru

    #include <boost/spirit/include/qi.hpp>
    namespace enc = boost::spirit::standard_wide;
    namespace qi = boost::spirit::qi;
    using It = std::wstring::const_iterator;
    
    #define kw(p) qi::lit(p)
    
    int main() {
        qi::rule<It> rule;
    
        rule
            = qi::eps > 
                ( kw(L"PUBLIC")
                | kw(L"PRIVATE")
                | kw(L"PROTECTED")
                | kw(L"SHARED")
                )
            ;
    
        for (std::wstring const input : {
                L"PUBLIC",
                L"PRIVATE",
                L"PROTECTED",
                L"SHARED",
                L"XPUBLIC", // obviously no match
                L"PUBLICX", // actually test of distinct
            }) 
        try {
            It f = begin(input), l = end(input);
            auto ok = qi::parse(f, l, rule);
            std::wcout << input << " " << std::boolalpha << ok << std::endl;
        } catch(qi::expectation_failure<It> const& ef) {
    
            auto value = ef.what_.value;
            auto elements = boost::get<std::list<boost::spirit::info> >(value);
    
            std::ostringstream oss;
            oss << ef.what_.tag << "(";
            for (auto el : elements) oss << " " << el;
            oss << " )";
    
            std::wcout << input << " -> Expected " << oss.str().c_str() << std::endl;
        }
    }
    

    Prints

    PUBLIC true
    PRIVATE true
    PROTECTED true
    SHARED true
    XPUBLIC -> Expected alternative( "PUBLIC" "PRIVATE" "PROTECTED" "SHARED" )
    PUBLICX true
    

    Note that the internals of spirit::info assume std::string in UTF8 encoding. I'm not overly explicit here with codecvt facets. I'll leave a reliable conversion to wide strings as an exercise to the reader.

    With distinct()

    The naive approach will result in this output:

    PUBLIC true
    PRIVATE true
    PROTECTED true
    SHARED true
    XPUBLIC -> Expected alternative( <distinct> <distinct> <distinct> <disti
    nct> )
    PUBLICX -> Expected alternative( <distinct> <distinct> <distinct> <disti
    nct> )
    

    Sadly, that is probably not what you want. Even worse, that's also hardcoded in the repository directive:

    template <typename Context>
    info what(Context& /*ctx*/) const
    {
        return info("distinct");
    }
    

    If we could just change that to read:

    template <typename Context>
    info what(Context& ctx) const
    {
        return info("distinct", subject.what(ctx));
    }
    

    We would have solved things. In order to not impinge on the library implementation details too much, let's subclass the distinct directive into my_distinct to that:

    template <typename Subject, typename Tail, typename Modifier>
    struct my_distinct_parser
      : distinct_parser<Subject, Tail, Modifier>
    {
        using distinct_parser<Subject, Tail, Modifier>::distinct_parser;
    
        template <typename Context> info what(Context& ctx) const {
            return info("my_distinct", this->subject.what(ctx));
        }
    };
    

    Sadly, we need some more red-tape to get things registered with the parser compilation and composition machinery:

    my_distinct.hpp

    #pragma once
    #include <boost/spirit/repository/home/qi/directive/distinct.hpp>
    
    namespace boost::spirit::repository {
        BOOST_SPIRIT_DEFINE_TERMINALS_NAME_EX(( my_distinct, my_distinct_type ))
    }
    
    namespace boost::spirit {
        template <typename Tail>
        struct use_directive<qi::domain
              , terminal_ex<repository::tag::my_distinct, fusion::vector1<Tail> > >
          : mpl::true_ {};
    
        template <>
        struct use_lazy_directive<qi::domain, repository::tag::my_distinct, 1> 
          : mpl::true_ {};
    }
    
    namespace boost::spirit::repository::qi {
        using repository::my_distinct;
        using repository::my_distinct_type;
    
        template <typename Subject, typename Tail, typename Modifier>
        struct my_distinct_parser
          : distinct_parser<Subject, Tail, Modifier>
        {
            using distinct_parser<Subject, Tail, Modifier>::distinct_parser;
    
            template <typename Context> info what(Context& ctx) const {
                return info("my_distinct", this->subject.what(ctx));
            }
        };
    }
    
    namespace boost::spirit::qi {
        template <typename Tail, typename Subject, typename Modifiers>
        struct make_directive<
            terminal_ex<repository::tag::my_distinct, fusion::vector1<Tail> >
          , Subject, Modifiers>
        {
            typedef typename result_of::compile<qi::domain, Tail, Modifiers>::type
                tail_type;
    
            typedef repository::qi::my_distinct_parser<
                Subject, tail_type, Modifiers> result_type;
    
            template <typename Terminal>
            result_type operator()(Terminal const& term, Subject const& subject
              , Modifiers const& modifiers) const
            {
                return result_type(subject
                  , compile<qi::domain>(fusion::at_c<0>(term.args), modifiers));
            }
        };
    }
    
    namespace boost::spirit::traits {
        template <typename Subject, typename Tail, typename Modifier>
        struct has_semantic_action<
                repository::qi::my_distinct_parser<Subject, Tail, Modifier> >
          : unary_has_semantic_action<Subject> {};
    }
    

    LIVE DEMO

    Live On Wandbox

    #include <boost/spirit/include/qi.hpp>
    #include "my_distinct.hpp"
    namespace enc = boost::spirit::standard_wide;
    namespace qi = boost::spirit::qi;
    namespace qr = boost::spirit::repository::qi;
    using It = std::wstring::const_iterator;
    
    #define kw(p) qr::my_distinct(enc::alnum | L'_') \
            [ enc::no_case[p] ]
    
    int main() {
        qr::my_distinct(enc::alnum | L'_');
        qi::rule<It> rule;
    
        rule
            = qi::eps > 
                ( kw(L"public")
                | kw(L"private")
                | kw(L"protected")
                | kw(L"shared")
                )
            ;
    
        for (std::wstring const input : {
                L"PUBLIC",
                L"private",
                L"PROTECTED",
                L"shared",
                L"XPUBLIC", // obviously no match
                L"PUBLICX", // actually test of my_distinct
            }) 
        try {
            It f = begin(input), l = end(input);
            auto ok = qi::parse(f, l, rule);
            std::wcout << input << " " << std::boolalpha << ok << std::endl;
        } catch(qi::expectation_failure<It> const& ef) {
    
            auto value = ef.what_.value;
            auto elements = boost::get<std::list<boost::spirit::info> >(value);
    
            std::ostringstream oss;
            oss << ef.what_.tag << "(";
            for (auto el : elements) oss << " " << el;
            oss << " )";
    
            std::wcout << input << " -> Expected " << oss.str().c_str() << std::endl;
        }
    }
        
    

    Prints

    PUBLIC true
    private true
    PROTECTED true
    shared true
    XPUBLIC -> Expected alternative( "public" "private" "protected" "shared" )
    PUBLICX -> Expected alternative( "public" "private" "protected" "shared" )