Search code examples
c++boostboost-tokenizer

Use boost::tokenizer with boost::iterator_range


I'm using boost::tokenizer to read a CSV-like file. I'm storing the the tokens in a std::vector. It works well, but I want to store only a boost::iterator for each token.

I tried:

#include <string>
#include <boost/tokenizer.hpp>
#include <boost/range/iterator_range.hpp>

typedef std::string::const_iterator string_iter;
typedef boost::iterator_range<string_iter> string_view;

int main(){
    std::string line;

    std::vector<string_view> contents;

    boost::tokenizer<boost::escaped_list_separator<char>, string_iter, string_view> tok(line.begin(), line.end());
    contents.assing(tok.begin(), tok.end());
}

But it fails to compile:

/usr/include/boost/token_functions.hpp: In instantiation of ‘bool boost::escaped_list_separator::operator()(InputIterator&, InputIterator, Token&) [with InputIterator = __gnu_cxx::__normal_iterator >; Token = boost::iterator_range<__gnu_cxx::__normal_iterator > >; Char = char; Traits = std::char_traits]’: /usr/include/boost/token_iterator.hpp:70:11: required from ‘void boost::token_iterator::initialize() [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >]’ /usr/include/boost/token_iterator.hpp:77:63: required from ‘boost::token_iterator::token_iterator(TokenizerFunc, Iterator, Iterator) [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >]’ /usr/include/boost/tokenizer.hpp:86:53: required from ‘boost::tokenizer::iter boost::tokenizer::begin() const [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >; boost::tokenizer::iter = boost::token_iterator, __gnu_cxx::__normal_iterator >, boost::iterator_range<__gnu_cxx::__normal_iterator > > >]’ /home/wichtounet/dev/gooda-to-afdo-converter/src/gooda_reader.cpp:58:37: required from here /usr/include/boost/token_functions.hpp:187:16: error: no match for ‘operator+=’ in ‘tok += (& next)->__gnu_cxx::__normal_iterator<_Iterator, _Container>::operator* >()’ /usr/include/boost/token_functions.hpp:193:11: error: no match for ‘operator+=’ in ‘tok += (& next)->__gnu_cxx::__normal_iterator<_Iterator, _Container>::operator* >()’ /usr/include/boost/token_functions.hpp: In instantiation of ‘void boost::escaped_list_separator::do_escape(iterator&, iterator, Token&) [with iterator = __gnu_cxx::__normal_iterator >; Token = boost::iterator_range<__gnu_cxx::__normal_iterator > >; Char = char; Traits = std::char_traits]’: /usr/include/boost/token_functions.hpp:176:11: required from ‘bool boost::escaped_list_separator::operator()(InputIterator&, InputIterator, Token&) [with InputIterator = __gnu_cxx::__normal_iterator >; Token = boost::iterator_range<__gnu_cxx::__normal_iterator > >; Char = char; Traits = std::char_traits]’ /usr/include/boost/token_iterator.hpp:70:11: required from ‘void boost::token_iterator::initialize() [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >]’ /usr/include/boost/token_iterator.hpp:77:63: required from ‘boost::token_iterator::token_iterator(TokenizerFunc, Iterator, Iterator) [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >]’ /usr/include/boost/tokenizer.hpp:86:53: required from ‘boost::tokenizer::iter boost::tokenizer::begin() const [with TokenizerFunc = boost::escaped_list_separator; Iterator = __gnu_cxx::__normal_iterator >; Type = boost::iterator_range<__gnu_cxx::__normal_iterator > >; boost::tokenizer::iter = boost::token_iterator, __gnu_cxx::__normal_iterator >, boost::iterator_range<__gnu_cxx::__normal_iterator > > >]’ /home/wichtounet/dev/gooda-to-afdo-converter/src/gooda_reader.cpp:58:37: required from here /usr/include/boost/token_functions.hpp:130:9: error: no match for ‘operator+=’ in ‘tok += '\012'’ /usr/include/boost/token_functions.hpp:134:9: error: no match for ‘operator+=’ in ‘tok += (& next)->__gnu_cxx::__normal_iterator<_Iterator, _Container>::operator* >()’ /usr/include/boost/token_functions.hpp:138:9: error: no match for ‘operator+=’ in ‘tok += (& next)->__gnu_cxx::__normal_iterator<_Iterator, _Container>::operator* >()’ /usr/include/boost/token_functions.hpp:142:9: error: no match for ‘operator+=’ in ‘tok += (& next)->__gnu_cxx::__normal_iterator<_Iterator, _Container>::operator* >()’

I also simply tried to compute the two iterators by myself using the boost::token_iterator, but I haven't been successful so far.

Is there a solution to get only the iterator range of each token instead of the string in order to save some performances ?


Solution

  • This can't work. The tokenizer expects a type (the third template argument) which can be appended with the results of the tokenizer function. Specifically, it must provide the operator += ( tokenizer<...>::iterator::value_type ). The code snippet below should take you a step further, though I am not sure if it's worth the effort...

    #include <string>
    #include <boost/tokenizer.hpp>
    #include <boost/range/iterator_range.hpp>
    #include <iostream>
    #include <cstddef>
    
    typedef std::string::const_iterator string_iter;
    typedef boost::iterator_range<string_iter> string_view;
    
    // a constant size character buffer, skips anything beyond CSize...
    template< std::size_t CSize >
    class assignable_view {
       std::size_t m_size;
       char m_buffer[CSize];
    
       friend std::ostream& operator << (std::ostream& p_out, assignable_view const & p_view)
       {
          if (p_view.m_size > 0u) {
             std::copy(p_view.m_buffer, p_view.m_buffer + p_view.m_size, std::ostream_iterator<char>(p_out));
          }
          return p_out;
       }
    
    public:
       template <class TIter>
       void operator += (TIter p_input) 
       {
          if (m_size < CSize) {
             m_buffer[m_size++] = p_input;
          }   
       }   
       assignable_view() 
          : m_size(0u) {}
    };
    
    int main(){
        std::string line
            = "Field 1,\"putting quotes around fields, allows commas\",Field 3";
    
        std::vector<string_view> contents;
    
        boost::tokenizer<
           boost::escaped_list_separator<char>, 
           string_iter, 
           assignable_view<11>    
        > tok(line.begin(), line.end());
    
        for (auto const & t_s : tok) {
           std::cout << t_s << std::endl;
        }
        //contents.assing(tok.begin(), tok.end());
    }