I have a parser in which I want to capture certain types of whitespace as enum values and preserve the spaces for the "text" values.
My whitespace parser is pretty basic (Note: I've only added the pipe character here for test/dev purposes):
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
And I want to capture everything either into my enum or into std::string
s:
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
And to parse my input I use something like this:
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
The problem though is that the parser stops at the first tab or newline it hits.
Code: http://coliru.stacked-crooked.com/a/d2cda4ce721279a4
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE)
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main()
{
const std::string text = "Hello \n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result)
{
std::cout << "failed to parse!\n";
}
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << '\n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]\n";
}
}
}
If I parse the text Hello | World
then I get the results I'm expecting. But if I instead use Hello \n World
the whitespace after the \n
is swallowed and the World
is never parsed. Ideally I'd like to see this output:
element: [Hello ]
element: [newline]
element: [ World]
How can I accomplish this? Thank you!
My goto reference on skipper issues: Boost spirit skipper issues
In this case you made it work with no_skip[]
. That's correct.
no_skip
is like lexeme
except it doesn't pre-skip, from the source (boost/spirit/home/x3/directive/no_skip.hpp):
// same as lexeme[], but does not pre-skip
In your case I would flip the logic: just adjust the skipper itself.
Also, don't supply the skipper with phrase_parse
, because your grammar is highly sensitive to the correct value of the skipper.
Your whole grammar could be:
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Here's a Live Demo On Coliru
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("\n", Whitespace::NEWLINE)
("\t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text), stop = end(text);
const auto p = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[tab]
"Hello"
[newline]
"World"
It doesn't seem like you'd need any skipper here at all. Why not:
const auto p = *(+~x3::char_("\n\t|") | whitespace);
While we're at it, there's no need for symbols to map enums:
struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;
And then
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case '\n': return os << "[newline]";
case '\t': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "\tHello \n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("\n\t|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!\n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "\n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << '\n';
}
}
Prints
[pipe]
"Hello "
[newline]
" World"