I'm using boost::spirit to parse csv input (please don't suggest an alternative, this is just testing). When I read the contents of stdin to a string and iterate over that, the parsing succeeds; however, when the contents of std::cin
are read directly (through a wrapper that I wrote myself because phrase_parse requires an iterator inheriting from std::iterator<std::forward_iterator_tag, T>
, and std::istream_iterator<T>
doesn't do that), the parsing fails, and I can't figure out why, since the debugging output seems to suggest that the same text is parsed in both situations, with different results.
I even tried iterating over std::cin
and putting that into a string, and that parsed correctly; I don't understand why the type of iterator provided is affecting the result. Here's the example I'm working off of (sorry it's so large, but you can plug it in and compile it easily). Try defining the macros SECTION_STRINGSTREAM
(succeeds) or SECTION_CIN
(fails) to observe the strange behavior (the default behavior (succeeds) is when std::cin
is read to a string).
If you compile and run this with echo "\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" | ./spirit_csv
, the debug output clearly shows the entire string is being parsed. I also added if (++start == end) std::cerr << "woah";
and that is being tripped in all situations, so it seems that it definitely is parsing to the end of the input.
// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
(unsigned long long, offset),
(unsigned long long, line), (unsigned long long, col),
(std::string, reference_type),
(std::string, specifier), (std::string, type),
(std::string, language), (std::string, name),
(std::string, scope));
// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
: public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
qi::rule<Iterator, cursor(), qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::_1;
using ascii::char_;
using boost::phoenix::push_back;
using boost::phoenix::ref;
using boost::spirit::_val;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
// file
quoted_string >> ',' >>
// offset
uint_ >> ',' >>
// line
uint_ >> ',' >>
// col
uint_ >> ',' >>
// reference_type
quoted_string >> ',' >>
// specifier
quoted_string >> ',' >>
// type
quoted_string >> ',' >>
// language
quoted_string >> ',' >>
// name
quoted_string >> ',' >>
// scope
quoted_string;
vec %= start % eol;
quoted_string.name("qs");
debug(quoted_string);
start.name("s");
debug(start);
vec.name("v");
debug(vec);
}
};
template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
std::istream_iterator<T> i;
public:
cin_forward_iterator() : i(std::istream_iterator<T>()) {}
cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
const T &operator*() const { return *i; }
cin_forward_iterator<T> operator++() {
++i;
return *this;
};
cin_forward_iterator<T> operator++(int) {
cin_forward_iterator<T> tmp = *this;
i++;
return tmp;
};
bool operator==(const cin_forward_iterator<T> &rhs) const {
return i == rhs.i;
}
bool operator!=(const cin_forward_iterator<T> &rhs) const {
return not(*this == rhs);
}
};
}
namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
typedef typename std::istream_iterator<T>::value_type value_type;
typedef typename std::istream_iterator<T>::difference_type difference_type;
typedef typename std::istream_iterator<T>::reference reference;
typedef typename std::istream_iterator<T>::pointer pointer;
typedef std::forward_iterator_tag iterator_category;
};
}
/* try:
echo \
"\"f\",111,222,333,\"ref_type\",\"spc\",\"type\",\"lan\",\"name\",\"scop\"" \
| ./spirit_csv
*/
int main() {
std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
std::stringstream ss;
ss << std::cin.rdbuf();
std::string s(ss.str());
auto start = s.cbegin();
auto end = s.cend();
// fails
#elif SECTION_CIN
noskipws(std::cin);
frontend::cin_forward_iterator<char> start(std::cin);
frontend::cin_forward_iterator<char> end;
// succeeds
#else
noskipws(std::cin);
frontend::cin_forward_iterator<char> start_in(std::cin);
frontend::cin_forward_iterator<char> end_in;
std::string s;
for (; start_in != end_in; ++start_in) {
s += *start_in;
}
auto start = s.begin();
auto end = s.end();
#endif
if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
frontend::cursor_parser<
frontend::cin_forward_iterator<char>>(),
#else
frontend::cursor_parser<std::string::iterator>(),
#endif
boost::spirit::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
return 0;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
Why do you have your own iterator?
It's hard to get right, and it certainly doesn't look like you made it multi-pass aware.
There is a reason why input iterators have a different category than forward iterators! Just papering over it doesn't help. Forward iterators MUST be copyable and have repeatable values on dereference. Input iterators don't satisfy those criteria.
In fact you should either just use
boost::spirit::istream_iterator
or you could compose an iterator using Spirit's multi_pass adaptor:
Here's a fixed and cleaned-up version:
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>
namespace frontend {
namespace qi = boost::spirit::qi;
struct cursor {
std::string file;
unsigned long long offset;
unsigned long long line;
unsigned long long col;
// verify inputs using enum
// decl/ref/defn/call
std::string reference_type;
// variable/function/scope/label/type
std::string specifier;
// if variable/function, then type
std::string type;
std::string language;
std::string name;
std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor,
(std::string, file)
(unsigned long long, offset)
(unsigned long long, line)
(unsigned long long, col)
(std::string, reference_type)
(std::string, specifier)
(std::string, type)
(std::string, language)
(std::string, name)
(std::string, scope))
namespace frontend {
// NOTE: blank_type doesn't skip newlines
template <typename Iterator>
struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
cursor_parser() : cursor_parser::base_type(vec) {
using qi::uint_;
using qi::eol;
using qi::lexeme;
using qi::char_;
quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
start %=
quoted_string >> ',' >> // file
uint_ >> ',' >> // offset
uint_ >> ',' >> // line
uint_ >> ',' >> // col
quoted_string >> ',' >> // reference_type
quoted_string >> ',' >> // specifier
quoted_string >> ',' >> // type
quoted_string >> ',' >> // language
quoted_string >> ',' >> // name
quoted_string; // scope
vec %= start % eol;
BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
}
private:
qi::rule<Iterator, std::string() , qi::blank_type> quoted_string;
qi::rule<Iterator, cursor() , qi::blank_type> start;
qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
};
}
int main() {
// '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
using It = boost::spirit::istream_iterator;
It start_in(std::cin >> std::noskipws), end_in;
std::vector<frontend::cursor> v;
if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
for (auto &c : v) {
std::cout << boost::fusion::as_vector(c) << std::endl;
}
std::cerr << "success!" << std::endl;
} else {
std::cerr << "failure!" << std::endl;
return 1;
}
}
Output
(f 111 222 333 ref_type spc type lan name scop)
success!
Debug output:
<vec>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<start>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<quoted_string>
<try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"\n</try>
<success>,111,222,333,"ref_type","spc","type","lan","name","scop"\n</success>
<attributes>[[f]]</attributes>
</quoted_string>
<quoted_string>
<try>"ref_type","spc","type","lan","name","scop"\n</try>
<success>,"spc","type","lan","name","scop"\n</success>
<attributes>[[r, e, f, _, t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"spc","type","lan","name","scop"\n</try>
<success>,"type","lan","name","scop"\n</success>
<attributes>[[s, p, c]]</attributes>
</quoted_string>
<quoted_string>
<try>"type","lan","name","scop"\n</try>
<success>,"lan","name","scop"\n</success>
<attributes>[[t, y, p, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"lan","name","scop"\n</try>
<success>,"name","scop"\n</success>
<attributes>[[l, a, n]]</attributes>
</quoted_string>
<quoted_string>
<try>"name","scop"\n</try>
<success>,"scop"\n</success>
<attributes>[[n, a, m, e]]</attributes>
</quoted_string>
<quoted_string>
<try>"scop"\n</try>
<success>\n</success>
<attributes>[[s, c, o, p]]</attributes>
</quoted_string>
<success>\n</success>
<attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
</start>
<start>
<try></try>
<quoted_string>
<try></try>
<fail/>
</quoted_string>
<fail/>
</start>
<success>\n</success>
<attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>
Notes:
BOOST_FUSION_ADAPT_STRUCT
macro invocation (too many commas)