Developping with Boost Spirit 2, I am trying to follow example in order to get progression (will add semantic actions later) in my pgn parser (see also related previous question). But I can't manage to avoid compilation errors : cpp
#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <tuple>
#include <iostream>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
typedef std::tuple<std::size_t, game_move> move_t;
typedef std::tuple<std::vector<pgn_tag>, std::vector<move_t>> game_t;
typedef std::tuple<std::size_t, std::vector<game_t>> pgn_t;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::space_type> {
pgn_parser() : pgn_parser::base_type(games) {
using namespace qi;
CurrentPos<Iterator> filepos;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move = filepos.current_pos >> uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, move_t(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<move_t>, qi::space_type> game_description;
qi::rule<Iterator, game_t(), qi::space_type> single_game;
qi::rule<Iterator, pgn_t(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
loloof64::pgn_parser<It> parser;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
//////////////////////////////////
std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
and the header file : header.
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qi::omit[boost::spirit::repository::qi::iter_pos[
phx::bind(&CurrentPos::setStartPos, this, qi::_1)]];
current_pos = boost::spirit::repository::qi::iter_pos[
qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)];
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
I did not post the compilation errors as there are too many and as the files can be easily tested.
Of course, it's not gonna work well with a streaming interface. You can retain the start iterator, but
you won't know the stream length ahead of time (unless you get it out-of-band)
calculating the current position (distance from the start iterator) each time is going to be horrendously inefficient.
Since you mentioned in a comment you were parsing files, you should consider using memory mapping (boost::iostream::mapped_file_source
or mmap
e.g.). That way, the distance calculation is instantaneous, using pointer arithmetic on the random-access iterators.
Here's a working example, with the following changes/notes:
omit[]
in save_start_pos
is useless (there is no declared attribute)getCurrentPos
was horrifically inefficient (to the extent that just using omit[current_pos]
in the full_move
rule slowed the parsing down several orders of magnitude.
This is because
boost::spirit::istream_iterator
holds on to all previously read state in a deque and traversing them doesn't come for free when doingstd::distance
Your CurrentPos<Iterator> filepos;
instance goes out of scope after construction! This means that invoking save_start_pos
/current_pos
is Undefined Behaviour¹. Move it out of the constructor.
A subtler point is to use full_move %= ...
when you add the semantic action (see docs and blog)
You changed the types on some of the rules to include position information, alongside the AST types. That's both unnecessary and flawed: the AST types would not be compatible with the tuple<size_t, T>
versions of the rules.
Besides, e.g. the games
rule didn't even expose a position, because save_start_pos
synthesizes unused_type
(no attribute).
So, drop the whole tuple business, and just work with the state of the filepos
member inside your semantic action:
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
Finally, as a demonstration on how to report strictly increasing progress indications², I included a simple phoenix actor:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
Note
reportProgress
needs to be constructed with knowledge about start and end iterators, see the constructor forpgn_parser
¹ in the recorded live stream you can see I spotted the error on the first reading, then forgot about after I made it to compile. The program crashed, dutifully :) Then I remembered.
² even in the face of backtracking
3 (not strictly required, but I guess the goal wasn't to simply make it so slow you actually need the progress indicator?)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_iter_pos.hpp>
namespace loloof64 {
namespace phx = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace qr = boost::spirit::repository::qi;
/*
* This class has been taken from http://marko-editor.com/articles/position_tracking/
*/
template<typename Iterator>
struct CurrentPos {
CurrentPos() {
save_start_pos = qr::iter_pos [phx::bind(&CurrentPos::setStartPos, this, qi::_1)] >> qi::eps;
current_pos = qr::iter_pos [qi::_val = phx::bind(&CurrentPos::getCurrentPos, this, qi::_1)] >> qi::eps;
}
qi::rule<Iterator> save_start_pos;
qi::rule<Iterator, std::size_t()> current_pos;
private:
void setStartPos(const Iterator &iterator) {
start_pos_ = iterator;
}
std::size_t getCurrentPos(const Iterator &iterator) {
return std::distance(start_pos_, iterator);
}
Iterator start_pos_;
};
enum result_t { white_won, black_won, draw, undecided };
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
result_t result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string const& inputFilePath);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::string const&);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
//#include "pgn_games_extractor.h"
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>
#include <iomanip>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace qi = boost::spirit::qi;
template <typename Iterator> struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>(), qi::space_type> {
pgn_parser(Iterator start, Iterator end)
: pgn_parser::base_type(games),
reportProgress(std::distance(start, end))
{
using namespace qi;
const std::string no_move;
result.add
("1-0", result_t::white_won)
("0-1", result_t::black_won)
("1/2-1/2", result_t::draw)
("*", result_t::undecided);
quoted_string = '"' >> *~char_('"') >> '"';
tag = '[' >> +alnum >> quoted_string >> ']';
header = +tag;
regular_move = lit("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
single_move = raw [ regular_move >> -char_("+#") ];
full_move %=
omit[filepos.current_pos [ reportProgress(_1) ]] >>
uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
game_description = +full_move;
single_game = -header >> game_description;
games = filepos.save_start_pos >> *single_game;
BOOST_SPIRIT_DEBUG_NODES(
(tag)(header)(quoted_string)(regular_move)(single_move)
(full_move)(game_description)(single_game)(games)
)
}
private:
struct reportProgress_f {
size_t total_;
mutable double pct = 0.0;
reportProgress_f(size_t total) : total_(total) {}
template<typename T>
void operator()(T pos) const {
double newpct = pos * 100.0 / total_;
if ((newpct - pct) > 10) {
//sleep(1); // because it's way too fast otherwise...
pct = newpct;
std::cerr << "\rProgress " << std::fixed << std::setprecision(1) << pct << " " << std::flush;
};
}
};
phx::function<reportProgress_f> reportProgress;
CurrentPos<Iterator> filepos;
qi::rule<Iterator, pgn_tag(), qi::space_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::space_type> header;
qi::rule<Iterator, game_move(), qi::space_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::space_type> game_description;
qi::rule<Iterator, pgn_game(), qi::space_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>(), qi::space_type> games;
// lexemes
qi::symbols<char, result_t> result;
qi::rule<Iterator, std::string()> quoted_string;
qi::rule<Iterator> regular_move;
qi::rule<Iterator, std::string()> single_move;
};
}
#include <boost/iostreams/device/mapped_file.hpp>
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string const& inputFilePath) {
parseInput(inputFilePath);
}
void loloof64::PgnGamesExtractor::parseInput(std::string const& inputFilePath) {
boost::iostreams::mapped_file_source mf(inputFilePath);
//if (inputFile.fail() || inputFile.bad())
//throw new InputFileException("Could not read the input file !");
typedef char const* It;
std::vector<loloof64::pgn_game> temp_games;
/* It iter(inputFile >> std::noskipws), end; */
auto iter = mf.begin();
auto end = mf.end();
loloof64::pgn_parser<It> parser(iter, end);
//////////////////////////////////
//std::cout << "About to parse the file" << std::endl;
//////////////////////////////////
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::space, temp_games);
//////////////////////////////////
//std::cout << "Finished to parse the file" << std::endl;
//////////////////////////////////
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
With sample output
Progress 32.6
Progress 44.5
Progress 55.5
Progress 67.2
Progress 77.2
Progress 89.1
Progress 100.0Parsed 1 games
1. e4 e5
2. Nf3 Nc6
3. d4 exd4
4. Bc4 Qf6
5. O-O d6
6. Ng5 Nh6
7. f4 Be7
8. e5 Qg6
9. exd6 cxd6
10. c3 dxc3
11. Nxc3 O-O
12. Nd5 Bd7
13. Rf3 Bg4
14. Bd3 Bxf3
15. Qxf3 f5
16. Bc4 Kh8
17. Nxe7 Nxe7
18. Qxb7 Qf6
19. Be3 Rfb8
20. Qd7 Rd8
21. Qb7 d5
22. Bb3 Nc6
23. Bxd5 Nd4
24. Rd1 Ne2+
25. Kf1 Rab8
26. Qxa7 Rxb2
27. Ne6 Qxe6
28. Bxe6 Rxd1+
29. Kf2
Note that on a terminal, the progress indication will self-update using a carriage-return instead of printing separate lines