Search code examples
c++boost-spirit

generic rule for parsing custom group of strings using boost::spirit::x3


I am trying make a generic rule for my use case with using boost::spirit::x3.

use cases are

  1. input "string,string" output [string, string]
  2. input "string.string,string" output [string.string, string]
  3. input "string.0.string.1.string,string" output [string.0.string.1.string, string]

coliru

//#define BOOST_SPIRIT_X3_DEBUG
//#define DEBUG_SYMBOLS
#include <iostream>
#include <chrono>
#include <boost/spirit/home/x3.hpp>

using namespace boost::spirit;

auto myRule = +x3::lexeme[ +x3::alnum >> -x3::char_('.')] % +x3::char_(',');

void print(std::string test, std::vector<std::string>& allHeroes)
{
    std::cout << "----" << test << std::endl;
    for (auto& hero: allHeroes)
    {
        std::cout << hero << std::endl;    
    }
    std::cout << "-----------------------------" << std::endl;
}

void test_1()
{
    std::string heroSelections = "mirana,slark";
    std::vector<std::string> allHeroes;
    boost::spirit::x3::parse(heroSelections.begin(), heroSelections.end(), myRule, allHeroes);
    print("test_1", allHeroes);
    if (allHeroes.size() == 2)
    {
        assert(allHeroes[0] == "mirana");
        assert(allHeroes[1] == "slark"); 
        std::cout << "PASS" << std::endl;
    }
    else
    {
        std::cout << "FAIL" << std::endl;
    }
}

void test_2()
{
    std::string heroSelections = "hero.0.range.1.mirana,slark";
    std::vector<std::string> allHeroes;
    boost::spirit::x3::parse(heroSelections.begin(), heroSelections.end(), myRule, allHeroes);
    print("test_2", allHeroes);
    if (allHeroes.size() == 2)
    {
        assert(allHeroes[0] == "hero.0.range.1.mirana");
        assert(allHeroes[1] == "slark");  
        std::cout << "PASS" << std::endl;
    }
    else
    {
        std::cout << "FAIL" << std::endl;
    }
}

void test_3()
{
    std::string heroSelections = "hero.0.mirana,slark";
    std::vector<std::string> allHeroes;
    boost::spirit::x3::parse(heroSelections.begin(), heroSelections.end(), myRule, allHeroes);
    print("test_3", allHeroes);
    if (allHeroes.size() == 2)
    {
        assert(allHeroes[0] == "hero.0.mirana");
        assert(allHeroes[1] == "slark");   
        std::cout << "PASS" << std::endl;
    }
    else
    {
        std::cout << "FAIL" << std::endl;
    }
}

int main()
{
    test_1();
    test_2();
    test_3();
    return 0;
}

i have tried rules

  1. auto myRule = +x3::lexeme[ +x3::alnum >> -x3::char_('.')] % +x3::char_(',');

    my understanding:

    +x3::alnum >> -x3::char_('.') allows format string and string.string and string.string.string until % +x3::char_(',') is met

  2. auto myRule = +x3::alnum >> +x3::lexeme[ +x3::alnum >> -x3::char_('.')] % +x3::char_(',');

+x3::alnum allows format string and then +x3::lexeme[ +x3::alnum >> -x3::char_('.')] allows all string.string until % +x3::char_(',').

But clearly my understanding is incorrect here.

what am i doing wrong ?


Solution

  • It's documented, just check your understanding:

    The list operator, a % b, is a binary operator that matches a list of one or more repetitions of a separated by occurrences of b. This is equivalent to a >> *(b >> a).

    Simplifying the test program making it readable (without all the repetition):

    Live On Coliru

    #include <boost/spirit/home/x3.hpp>
    #include <fmt/ranges.h>
    
    using Heroes = std::vector<std::string>;
    struct {
        std::string txt;
        Heroes      expected;
    } testcases[]{
        {
            "mirana,slark",
            {"mirana", "slark"},
        },
        {
            "hero.0.range.1.mirana,slark",
            {"hero.0.range.1.mirana", "slark"},
        },
        {
            "hero.0.mirana,slark",
            {"hero.0.mirana", "slark"},
        },
    };
    
    int main() {
        namespace x3 = boost::spirit::x3;
        auto const myRule = +x3::lexeme[+x3::alnum >> -x3::char_('.')] % +x3::lit(',');
    
        for (auto [test, expected] : testcases) {
            Heroes actual;
            parse(test.begin(), test.end(), myRule, actual);
    
            fmt::print("{}\t'{}' -> {}\n", (actual == expected ? "PASS" : "FAIL"), test, actual);
        }
    }
    

    Prints

    PASS    'mirana,slark' -> ["mirana", "slark"]
    FAIL    'hero.0.range.1.mirana,slark' -> ["hero.", "0.", "range.", "1.", "mirana", "slark"]
    FAIL    'hero.0.mirana,slark' -> ["hero.", "0.", "mirana", "slark"]
    

    I'd suggest simplification like

    auto const hero   = +x3::char_("a-zA-Z0-9.");
    auto const myRule = hero % ',';
    

    Printing:

    PASS    'mirana,slark' -> ["mirana", "slark"]
    PASS    'hero.0.range.1.mirana,slark' -> ["hero.0.range.1.mirana", "slark"]
    PASS    'hero.0.mirana,slark' -> ["hero.0.mirana", "slark"]
    

    However, if you actually want to be stricter than the tests suggest, consider:

    auto const hero   = x3::raw[ +x3::alnum % '.' ];
    auto const myRule = hero % ','; // or % +x3::lit(',');
    

    Also printing the same

    PASS    'mirana,slark' -> ["mirana", "slark"]
    PASS    'hero.0.range.1.mirana,slark' -> ["hero.0.range.1.mirana", "slark"]
    PASS    'hero.0.mirana,slark' -> ["hero.0.mirana", "slark"]