I am trying to parse some text files with boost spirit X3 parser, and I have found some differences while using a lambda function or while using "direct" parsing rules.
My sample code is next:
#include <iostream>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
namespace x3 = boost::spirit::x3;
namespace parsers
{
namespace lambda
{
using namespace boost::spirit::x3;
std::vector<std::string> files;
auto f = [&](const auto& ctx) { files.push_back(_attr(ctx)); };
auto const line = "[FILE_TO_BE_SEARCHED]" >> eol
>> eol
>> (*~char_("\r\n"))[f];
auto const ignore = *~char_("\r\n");
auto const start = skip(blank)[(line | ignore) % eol];
}
namespace direct
{
using namespace boost::spirit::x3;
auto const line = "[FILE_TO_BE_SEARCHED]" >> eol
>> eol
>> *~char_("\r\n");
auto const ignore = omit[*~char_("\r\n")];
auto const start = skip(blank)[(line | ignore) % eol];
}
}
std::string file(
" -- HEADER\n\
-- Version: 0.1.0\n\
-- Author: J. A.\n\
-- Copyright:\n\
\n\
-----------------\n\
-- Comments\n\
-----------------\n\
\n\
[FILE_TO_BE_SEARCHED]\n\
\n\
File1.txt\n\
\n\
[FILE_TO_BE_SEARCHED]\n\
\n\
File2.txt\n\
\n\
[FILE_TO_BE_SEARCHED]\n\
\n\
File3.txt\n\
\n\
-- Comments...\n\
\n\
[END]\n\
\n\
-- MD5: 0x1AF3\n"
);
const std::vector<std::string> parse_lambda()
{
x3::parse(file.begin(), file.end(), parsers::lambda::start);
return std::move(parsers::lambda::files);
}
const std::vector<std::string> parse_direct()
{
std::vector<std::string> files;
x3::parse(file.begin(), file.end(), parsers::direct::start, files);
return std::move(files);
}
void print(const std::vector<std::string> files)
{
int i = 0;
std::cout << "Files found: " << files.size() << '\n';
for (const auto& file : files)
std::cout << ++i << " - '" << file << "'\n";
}
int main()
{
std::cout << "Lambda parser:" << '\n';
const auto files_lambda = parse_lambda();
print(files_lambda);
std::cout << "\nDirect parser:" << '\n';
const auto files_direct = parse_direct();
print(files_direct);
return 0;
}
It outputs these lines:
Lambda parser:
Files found: 3
1 - 'File1.txt'
2 - 'File2.txt'
3 - 'File3.txt'
Direct parser:
Files found: 21
1 - ''
2 - ''
3 - ''
4 - ''
5 - ''
6 - ''
7 - ''
8 - ''
9 - ''
10 - 'File1.txt'
11 - ''
12 - 'File2.txt'
13 - ''
14 - 'File3.txt'
15 - ''
16 - ''
17 - ''
18 - ''
19 - ''
20 - ''
21 - ''
Here is a Coliru link to this same source code: https://coliru.stacked-crooked.com/a/bad4dd5002eb3fec or using the next Coliru command line: cat /Archive2/ba/d4dd5002eb3fec/main.cpp
I would like to ask the next questions:
1) How can I do direct parsing (non lambda function) to produce the same output that lambda parser?
2) Why is the omit directive failing and giving empty strings while accesing to the ignore rule?
3) Last, but not less important, is X3 a significative improvement over Qi (i mean further that compiling speed and c++14 likely syntax)? It is stable and code production proof?
Thank you very much, Pablo.
I'd just use seek[]
to greatly simply attribute propagation:
namespace direct {
using namespace boost::spirit::x3;
auto const line = "[FILE_TO_BE_SEARCHED]" >> eol >> eol >> +~char_("\r\n") >> eol;
auto const start = *skip(blank) [ seek[line] ];
}
That works without further ado.
Note the various modernizations/simplifications here: Live On Coliru
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
namespace parsers {
namespace lambda {
using namespace boost::spirit::x3;
std::vector<std::string> files;
auto f = [&](const auto &ctx) { files.push_back(_attr(ctx)); };
auto const line = "[FILE_TO_BE_SEARCHED]" >> eol >> eol >> (*~char_("\r\n"))[f];
auto const ignore = *~char_("\r\n");
auto const start = skip(blank)[(line | ignore) % eol];
}
namespace direct {
using namespace boost::spirit::x3;
auto const line = "[FILE_TO_BE_SEARCHED]" >> eol >> eol >> +~char_("\r\n") >> eol;
auto const start = *skip(blank) [ seek[line] ];
}
}
std::string const file = R"( -- HEADER
-- Version: 0.1.0
-- Author: J. A.
-- Copyright:
-----------------
-- Comments
-----------------
[FILE_TO_BE_SEARCHED]
File1.txt
[FILE_TO_BE_SEARCHED]
File2.txt
[FILE_TO_BE_SEARCHED]
File3.txt
-- Comments...
[END]
-- MD5: 0x1AF3
)";
std::vector<std::string> parse_lambda() { // const is a pessimization here
x3::parse(file.begin(), file.end(), parsers::lambda::start);
return std::move(parsers::lambda::files);
}
std::vector<std::string> parse_direct() { // const is a pessimization here
std::vector<std::string> files;
x3::parse(file.begin(), file.end(), parsers::direct::start, files);
return files; // std::move is a pessimization here
}
void print(std::vector<std::string> const& files) {
int i = 0;
std::cout << "Files found: " << files.size() << "\n";
for (auto&& file : files)
std::cout << ++i << " - " << std::quoted(file) << "\n";
}
int main() {
std::cout << "Lambda parser:\n";
print(parse_lambda());
std::cout << "\nDirect parser:\n";
print(parse_direct());
}
Prints
Lambda parser:
Files found: 3
1 - "File1.txt"
2 - "File2.txt"
3 - "File3.txt"
Direct parser:
Files found: 3
1 - "File1.txt"
2 - "File2.txt"
3 - "File3.txt"
To the other questions: