Search code examples
c++boostattrboost-spirit-qi

Mandatory boost::spirit::qi::attr(0) in rule is beeing skipped


I am struggeling with the implementation of a rule where all elements are part of a list of sorts. The types of elements can not be mixed.

This is valid: 1/1/ 2/2/ 3/3/ and should be parsed with a mandatory zero like 1/1/0 2/2/0 3/3/0

The example below includes a rule where I think an attribute is created. It works for the int_/0/int_ variation but not for the int_/int_/0 version. In fact the attribute zero seems to be skipped entirely and the next int_ (the number 2) is used instead.

Here is some commented output:

enter image description here

This is a longer example that includes all tests and rules used:

#pragma once
#include <string>
#include <vector>
#include <regex>
#include <list>
#define BOOST_TEST_MAIN
#include <boost/test/unit_test.hpp>
#include <boost/spirit/include/qi.hpp>

typedef struct { int a; int b; int c; } int_triplet;
typedef struct std::vector<int_triplet> int_triplet_vector;
BOOST_FUSION_ADAPT_STRUCT(int_triplet, a, b, c)

namespace qi = boost::spirit::qi;

typedef std::string::iterator Iterator;

typedef qi::rule<Iterator> skipper_rule;
typedef qi::rule<Iterator> line_rule;
typedef qi::rule<Iterator, std::string(), skipper_rule> string_rule;
typedef qi::rule<Iterator, int_triplet(), skipper_rule> int_triplet_rule;
typedef qi::rule<Iterator, int_triplet_vector(), skipper_rule> int_triplet_vector_rule;

line_rule endofline = qi::lit("\r\n") | qi::lit("\n\r") | qi::lit('\n');
skipper_rule skip = qi::lit(' ') | qi::lit('\t') | qi::lit('\f') | qi::lit('\v') | (qi::lit('\\') >> endofline);

int_triplet_rule faceIndex_v_0_vn = qi::int_ >> qi::lit('/') >> qi::attr((int)0) >> qi::lit('/') >> qi::int_;
//int_triplet_rule faceIndex_v_vt__0 = qi::int_ >> qi::lit('/') >> qi::int_ >> qi::lit('/') >> qi::attr(0);
int_triplet_rule faceIndex_v_vt_0 = qi::lexeme[qi::int_ >> qi::lit('/') >> qi::int_ >> qi::lit('/') >> ((!qi::int_) | qi::attr((int)0))];
int_triplet_rule faceIndex_v_vt_vn = qi::int_ >> qi::lit('/') >> qi::int_ >> qi::lit('/') >> qi::int_;
int_triplet_vector_rule f = 'f' >> (+faceIndex_v_vt_vn | +faceIndex_v_0_vn | +faceIndex_v_vt_0) >> endofline;
//int_triplet_vector_rule f = 'f' >> (+faceIndex_v_vt_vn | +faceIndex_v__0_vn | +faceIndex_v_vt__0 ) >> endofline;
//int_triplet_vector_rule f = 'f' >> *( faceIndex_v__0_vn | faceIndex_v_vt__0 | faceIndex_v_vt_vn ) >> endofline;

std::string ReplaceTRNVF(std::string str)
{
    std::string strmod = str;
    std::list<std::string> rs = { "\\t", "\\r", "\\n", "\\v", "\\f" };
    for(auto r : rs)
        strmod = std::regex_replace(strmod, std::regex(r), r);
    return strmod;
}

void CHECK(std::string::iterator & it, std::string::iterator & end)
{
    BOOST_CHECK_MESSAGE(it == end, ReplaceTRNVF((std::string)"Remaining:'" + std::string(it, end) + "'"));
}

void CHECK(int_triplet &expected, int_triplet &actual)
{
    BOOST_CHECK_EQUAL(expected.a, actual.a);
    BOOST_CHECK_EQUAL(expected.b, actual.b);
    BOOST_CHECK_EQUAL(expected.c, actual.c);
}

void CHECK(int_triplet_vector &expected, int_triplet_vector &actual)
{
    BOOST_CHECK_EQUAL(expected.size(), actual.size());
    for (size_t n = 0; n < expected.size() && n < actual.size(); ++n)
        CHECK(expected[n], actual[n]);
}


BOOST_AUTO_TEST_CASE(Rules_FaceIndex_Pass)
{
    std::map<std::string, int_triplet_vector> data;
    data["f 1/1/1 2/2/2 3/3/3 4/4/4\r\n"] = int_triplet_vector{ { 1, 1, 1 },{ 2, 2, 2 },{ 3, 3, 3 },{ 4, 4, 4 } };
    data["f 1//1 2//2 3//3 4//4\r\n"] = int_triplet_vector{ { 1, 0, 1 },{ 2, 0, 2 },{ 3, 0, 3 },{ 4, 0, 4 } };
    data["f 1/1/ 2/2/ 3/3/ 4/4/\r\n"] = int_triplet_vector{ { 1, 1, 0 },{ 2, 2, 0 },{ 3, 3, 0 },{ 4, 4, 0 } };

    for (auto kvp : data)
    {
        int_triplet_vector result;
        std::string test = kvp.first;
        auto it = test.begin();
        std::cout << ReplaceTRNVF("Test: " + test) << std::endl;
        BOOST_CHECK(true == qi::phrase_parse(it, test.end(), f, skip, result));
        CHECK(it, test.end());
        CHECK(kvp.second, result);
    }
}

BOOST_AUTO_TEST_CASE(Rules_FaceIndex_Fail)
{
    std::list<std::string> data = 
    { 
        "f 1/1/1 2/2/2 3//3 4//4\r\n", 
        "f 1/1/ 2/2/ 3/3/ 4/4/4\r\n",
    };

    for (auto t : data)
    {
        int_triplet_vector result;
        std::string test = t;
        std::cout << ReplaceTRNVF("Test: " + test) << std::endl;
        auto it = test.begin();
        BOOST_CHECK(false == qi::phrase_parse(it, test.end(), f, skip, result));
    }
}

How can I make the attribute unsinkable so that it is always used to fill the struct.


Solution

  • In the end, I spent way too much time cleaning up some of the code.

    The problem likely resided in !int_ | attr(0). Because, firstly says "either you don't see another int, or you use the implied 0". That's exactly what you didn't want to say, I think.

    Note: _postive lookahead would be &int_ - but even then that would not be what you wanted, because then you would want to actually parse it, and &int_ >> int_ is very much equivalent to int_.

    Also, in general I didn't see the reason to complicate the tiplet rules with skippers (there are no spaces in any of the sample triplets; also, allowing them does make the grammar ambiguous, because there would be no way to tell whether a given integer was the trailing number from the previous triplet, or the leading one from the next.)

    Removing the skipper from the actual triplet, removes the pain:

    using namespace qi;
    
    skip_    = char_(" \t\f\v") | '\\' >> eol;
    
    t1       = int_ >> '/' >> int_    >> '/' >> int_;
    t2       = int_ >> '/' >> attr(0) >> '/' >> int_;
    t3       = int_ >> '/' >> int_    >> '/' >> attr(0);
    triplets = +t1 | +t2 | +t3;
    
    f        = 'f' >> triplets >> eol;
    start    = skip(copy(skip_)) [ f ];
    

    Here's the end result of my transfrobnications:

    Live On Coliru

    //#define BOOST_SPIRIT_DEBUG
    //#pragma once
    #include <string>
    #include <vector>
    #include <regex>
    #include <list>
    #define BOOST_TEST_MAIN
    #include <boost/test/included/unit_test.hpp>
    #include <boost/spirit/include/qi.hpp>
    
    namespace std {
        template <typename T>
        ostream& operator<<(ostream& os, vector<T> const& v) {
            copy(v.begin(), v.end(), ostream_iterator<T>(os, " "));
            return os;
        }
    }
    
    struct int_triplet {
        int a;
        int b;
        int c;
    
        bool operator==(int_triplet const& other) const {
            return (a == other.a) 
                && (b == other.b)
                && (c == other.c);
        }
    
        friend std::ostream& operator<<(std::ostream& os, int_triplet const& t) {
            return os << t.a << '/' << t.b << '/' << t.c;
        }
    };
    
    typedef std::vector<int_triplet> int_triplet_vector;
    
    BOOST_FUSION_ADAPT_STRUCT(int_triplet, a, b, c)
    
    namespace parsing {
        namespace qi = boost::spirit::qi;
    
        template <typename Iterator> 
            struct grammar : qi::grammar<Iterator, int_triplet_vector()> {
                grammar() : grammar::base_type(start) {
                    using namespace qi;
    
                    skip_    = char_(" \t\f\v") | '\\' >> eol;
    
                    t1       = int_ >> '/' >> int_    >> '/' >> int_;
                    t2       = int_ >> '/' >> attr(0) >> '/' >> int_;
                    t3       = int_ >> '/' >> int_    >> '/' >> attr(0);
                    triplets = +t1 | +t2 | +t3;
    
                    f        = 'f' >> triplets >> eol;
                    start    = skip(copy(skip_)) [ f ];
    
                    BOOST_SPIRIT_DEBUG_NODES((skip_)(t1)(t2)(t3)(triplets)(f)(start))
                }
    
              private:
                qi::rule<Iterator, int_triplet_vector()> start;
    
                typedef qi::rule<Iterator> Skipper;
    
                Skipper skip_;
                qi::rule<Iterator, int_triplet()> t1, t2, t3;
                qi::rule<Iterator, int_triplet_vector(), Skipper> f, triplets;
            };
    
        static grammar<std::string::const_iterator> const f {};
    }
    
    std::string escape(std::string str) {
        std::list<std::string> rs = { "\\t", "\\r", "\\n", "\\v", "\\f" };
        for (auto r : rs)
            str = std::regex_replace(str, std::regex(r), r);
        return str;
    }
    
    void CHECK(std::string::const_iterator it, std::string::const_iterator end) {
        BOOST_CHECK_MESSAGE(it == end, escape((std::string) "Remaining:'" + std::string(it, end) + "'"));
    }
    
    void CHECK(int_triplet const&expected, int_triplet const&actual) {
        BOOST_CHECK_EQUAL(expected, actual);
    }
    
    void CHECK(int_triplet_vector const&expected, int_triplet_vector const&actual) {
        BOOST_CHECK_EQUAL(expected.size(), actual.size());
        for (size_t n = 0; n < expected.size() && n < actual.size(); ++n)
            CHECK(expected[n], actual[n]);
    }
    
    BOOST_AUTO_TEST_CASE(Rules_FaceIndex_Pass) {
        std::map<std::string, int_triplet_vector> const data {
            {"f 1/1/1 2/2/2 3/3/3 4/4/4\r\n", int_triplet_vector{ { 1, 1, 1 }, { 2, 2, 2 }, { 3, 3, 3 }, { 4, 4, 4 } }},
            {"f 1//1 2//2 3//3 4//4\r\n"    , int_triplet_vector{ { 1, 0, 1 }, { 2, 0, 2 }, { 3, 0, 3 }, { 4, 0, 4 } }},
            {"f 1/1/ 2/2/ 3/3/ 4/4/\r\n"    , int_triplet_vector{ { 1, 1, 0 }, { 2, 2, 0 }, { 3, 3, 0 }, { 4, 4, 0 } }},
        };
    
        for (auto const& kvp : data) {
    
            auto& test = kvp.first;
            std::cout << "Test: " << escape(test) << std::endl;
    
            auto it = test.begin();
            int_triplet_vector result;
            BOOST_CHECK(true == parse(it, test.end(), parsing::f, result));
    
            CHECK(it, test.cend());
            CHECK(kvp.second, result);
        }
    }
    
    BOOST_AUTO_TEST_CASE(Rules_FaceIndex_Fail) {
        std::list<std::string> const data = {
            "f 1/1/1 2/2/2 3//3 4//4\r\n", "f 1/1/ 2/2/ 3/3/ 4/4/4\r\n",
        };
    
        for (std::string const& t : data) {
            int_triplet_vector result;
            std::cout << "Test: " << escape(t) << std::endl;
    
            auto it = t.begin();
    
            BOOST_CHECK(false == parse(it, t.end(), parsing::f, result));
        }
    }
    

    Output:

    Running 2 test cases...
    Test: f 1//1 2//2 3//3 4//4\r\n
    Test: f 1/1/ 2/2/ 3/3/ 4/4/\r\n
    Test: f 1/1/1 2/2/2 3/3/3 4/4/4\r\n
    Test: f 1/1/1 2/2/2 3//3 4//4\r\n
    Test: f 1/1/ 2/2/ 3/3/ 4/4/4\r\n
    
    *** No errors detected
    

    (That's with debug information disabled, of course)