Search code examples
c++boost

Flattening iterator for boost multi_array of linked lists


I have implemented a custom container type that stores integers within a boost::multi_array<std::list<int>, 2>. I want to be able to use a single range-based for loop to iterate through all int in this nested data structure (see code below for the desired loop construct).

To do so, I'm trying to adapt this solution. Here is the code from that solution plus a container definition and test code:

#include <iostream>
#include <vector>
#include <list>
#include <boost/multi_array.hpp>

template <typename OuterIterator>
class flattening_iterator
{
public:

    typedef OuterIterator                                outer_iterator;
    typedef typename OuterIterator::value_type::iterator inner_iterator;

    typedef std::forward_iterator_tag                iterator_category;
    typedef typename inner_iterator::value_type      value_type;
    typedef typename inner_iterator::difference_type difference_type;
    typedef typename inner_iterator::pointer         pointer;
    typedef typename inner_iterator::reference       reference;

    flattening_iterator() { }
    flattening_iterator(outer_iterator it) : outer_it_(it), outer_end_(it) { }
    flattening_iterator(outer_iterator it, outer_iterator end)
        : outer_it_(it),
          outer_end_(end)
    {
        if (outer_it_ == outer_end_) { return; }

        inner_it_ = outer_it_->begin();
        advance_past_empty_inner_containers();
    }

    reference operator*()  const { return *inner_it_;  }
    pointer   operator->() const { return &*inner_it_; }

    flattening_iterator& operator++()
    {
        ++inner_it_;
        if (inner_it_ == outer_it_->end())
            advance_past_empty_inner_containers();
        return *this;
    }

    flattening_iterator operator++(int)
    {
        flattening_iterator it(*this);
        ++*this;
        return it;
    }

    friend bool operator==(const flattening_iterator& a,
                           const flattening_iterator& b)
    {
        if (a.outer_it_ != b.outer_it_)
            return false;

        if (a.outer_it_ != a.outer_end_ &&
            b.outer_it_ != b.outer_end_ &&
            a.inner_it_ != b.inner_it_)
            return false;

        return true;
    }

    friend bool operator!=(const flattening_iterator& a,
                           const flattening_iterator& b)
    {
        return !(a == b);
    }

private:

    void advance_past_empty_inner_containers()
    {
        while (outer_it_ != outer_end_ && inner_it_ == outer_it_->end())
        {
            ++outer_it_;
            if (outer_it_ != outer_end_)
                inner_it_ = outer_it_->begin();
        }
    }

    outer_iterator outer_it_;
    outer_iterator outer_end_;
    inner_iterator inner_it_;
};

template <typename Iterator>
flattening_iterator<Iterator> flatten(Iterator it)
{
    return flattening_iterator<Iterator>(it, it);
}

template <typename Iterator>
flattening_iterator<Iterator> flatten(Iterator first, Iterator last)
{
    return flattening_iterator<Iterator>(first, last);
}

template <typename T, typename E>
class NestedContainer {
    T data;
    E extent;
    public:
        NestedContainer(E extent) : extent(extent) {
            // fill the container with sample data
            data.resize(extent);
            std::size_t i = 0;
            for ( auto &d: data ) {
                for ( std::size_t j = i; j < i+3; j++ ) {
                    d.push_back(i);
                }
                i += 3;
            }
        }
        auto begin() { return flatten(data.begin(), data.end()); }
        auto end() { return data.end(); }
};

int main()
{
    auto c1 = NestedContainer<std::vector<std::vector<int>>, std::size_t>(3); // works
    auto c2 = NestedContainer<std::vector<std::list<int>>, std::size_t>(3); // works
    // auto c3 = NestedContainer<boost::multi_array<std::list<int>, 2>, boost::multi_array_types::extent_gen::gen_type<2>::type>(boost::extents[2][2]); // does not compile

    auto c = c1;
    // loop
    for ( auto &val: c ) {
        std::cout << val << " ";
    }
    std::cout << "\n";
}

By above code, the approach works for a vector of vectors and a vector of lists, but not for a multi_array of lists:

fiter.cpp: In instantiation of ‘NestedContainer<T, E>::NestedContainer(E) [with T = boost::multi_array<std::__cxx11::list<int>, 2>; E = boost::detail::multi_array::extent_gen<2>]’:
fiter.cpp:123:147:   required from here
fiter.cpp:108:13: error: cannot bind non-const lvalue reference of type ‘boost::detail::multi_array::sub_array<std::__cxx11::list<int>, 1>&’ to an rvalue of type ‘boost::iterators::detail::iterator_facade_base<boost::detail::multi_array::array_iterator<std::__cxx11::list<int>, std::__cxx11::list<int>*, mpl_::size_t<2>, boost::detail::multi_array::sub_array<std::__cxx11::list<int>, 1>, boost::iterators::random_access_traversal_tag>, boost::multi_array<std::__cxx11::list<int>, 1, std::allocator<std::__cxx11::list<int> > >, boost::iterators::random_access_traversal_tag, boost::detail::multi_array::sub_array<std::__cxx11::list<int>, 1>, long int, false, false>::reference’ {aka ‘boost::detail::multi_array::sub_array<std::__cxx11::list<int>, 1>’}
  108 |             for ( auto &d: data ) {
      |             ^~~
fiter.cpp:110:23: error: ‘class boost::detail::multi_array::sub_array<std::__cxx11::list<int>, 1>’ has no member named ‘push_back’
  110 |                     d.push_back(i);
      |                     ~~^~~~~~~~~

It seems that the iterators returned by the begin() and end() member functions of boost::multi_array do not behave analogously as the ones from vector. Somehow they point to a sub_array instead of a std::list<int>, but I can't understand my underlying misconception from the concise reference documentation of multi_array and figuring it out from the source code of multi_array is currently beyond my proficiency.


Solution

  • It seems that the iterators returned by the begin() and end() member functions of boost::multi_array do not behave analogously as the ones from vector. Somehow they point to a sub_array instead of a std::list

    Yes. That's the reason it's called multi_array, not array. A multi-array is an abstraction that behaves like a container of sub-arrays (incidentally on top of some contiguous storage). At the lowest dimension these are themselves going to be containers of the element type.

    To behave this way with minimal overhead (unlike your vector<list> and vector<vector>) the library uses Proxy Type: https://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Temporary_Proxy

    Because the proxy is a temporary, you cannot bind a mutable reference with auto&. Instead, in generic code you often prefer to bind lvalue reference with auto&& which will extend the lifetime of the temporary: Lifetime of rvalue reference

     for (auto&& d : data)
    

    However, it still won't work with your push_back, obviously.

    Rethink #1

    Instead of assuming 80% of the data interface, and leaving a few nuts-and-bolts (like extent) fuzzy, make the entire Data type configurable:

    template <typename Data> class NestedContainer {
        Data data;
    
      public:
        template <typename E> NestedContainer(E extent) : data(extent) {}
        auto begin() { return flatten(data.begin(), data.end()); }
        auto end()   { return data.end(); }
    };
    

    Now you can define storage to cater for your vector/list based approach:

    #include <vector>
    template <typename T, typename Row = std::vector<T>> //
    struct VMatrix : std::vector<Row> {
        using Base = std::vector<Row>;
        using Base::operator=;
        VMatrix(VMatrix const&) = default;
        VMatrix(size_t extent = 0) : Base(extent, Row(extent, T{})) {
            for (int i = 0; auto&& row : *this)
                for (auto& el : row)
                    el = i++;
        }
    };
    

    Just like you can define one for the multi_array appraoch:

    #include <boost/multi_array.hpp>
    template <typename T> //
    struct MAMatrix : boost::multi_array<T, 2> {
        using Base = boost::multi_array<T, 2>;
        using Base::operator=;
    
        MAMatrix(MAMatrix const&) = default;
        MAMatrix(int extent = 0) : Base(boost::extents[extent][extent]) {
            for (int i = 0; auto&& row : *this)
                for (auto& el : row)
                    el = i++;
        }
    };
    

    Which also just works (note the auto&& row loop variable!)

    Live On Coliru

    template <typename Data> class NestedContainer {
        Data data;
    
      public:
        template <typename E> NestedContainer(E extent) : data(extent) {}
        auto begin() { return flatten(data.begin(), data.end()); }
        auto end()   { return data.end(); }
    };
    
    #include <vector>
    template <typename T, typename Row = std::vector<T>> //
    struct VMatrix : std::vector<Row> {
        using Base = std::vector<Row>;
        using Base::operator=;
        VMatrix(VMatrix const&) = default;
        VMatrix(size_t extent = 0) : Base(extent, Row(extent, T{})) {
            for (int i = 0; auto&& row : *this)
                for (auto& el : row)
                    el = i++;
        }
    };
    
    #include <boost/multi_array.hpp>
    template <typename T> //
    struct MAMatrix : boost::multi_array<T, 2> {
        using Base = boost::multi_array<T, 2>;
        using Base::operator=;
    
        MAMatrix(MAMatrix const&) = default;
        MAMatrix(int extent = 0) : Base(boost::extents[extent][extent]) {
            for (int i = 0; auto&& row : *this)
                for (auto& el : row)
                    el = i++;
        }
    };
    
    #include <iostream>
    #include <list>
    int main() {
        auto c1 = NestedContainer<VMatrix<int>>(3);                 // works
        auto c2 = NestedContainer<VMatrix<int, std::list<int>>>(3); // works
        auto c3 = NestedContainer<MAMatrix<int>>(3);                // works
    
        for (auto sep = '\n'; auto& val : c1) std::cout << sep << val, sep = ' ';
        for (auto sep = '\n'; auto& val : c2) std::cout << sep << val, sep = ' ';
        for (auto sep = '\n'; auto& val : c3) std::cout << sep << val, sep = ' ';
    }
    

    Printing

    0 1 2 3 4 5 6 7 8
    0 1 2 3 4 5 6 7 8
    0 1 2 3 4 5 6 7 8
    

    Rethink #2

    You will correctly observe that NestedContainer doesn't add much value. In fact, it mostly has cost because it abuses flattening_iterator instead of leveraging the contiguous storage that multi_array already natively affords!

    So, I'd lose the middle man and the cost overhead it introduced:

    Note it also removed the dependency on push_back which not provided by all container types

    Live On Coliru

    template <typename T, typename Row = std::vector<T>> //
    struct VMatrix : std::vector<Row> {
        using Base = std::vector<Row>;
        using Base::operator=;
        VMatrix(VMatrix const&) = default;
        VMatrix(size_t extent = 0) : Base(extent, Row(extent, T{})) {
            std::iota(begin(), end(), 0);
        }
    
        auto begin() { return flatten(Base::begin(), Base::end()); }
        auto end() { return flatten(Base::end(), Base::end()); }
    };
    
    #include <boost/multi_array.hpp>
    template <typename T> //
    struct MAMatrix : boost::multi_array<T, 2> {
        using Base = boost::multi_array<T, 2>;
        using Base::operator=;
    
        MAMatrix(MAMatrix const&) = default;
        MAMatrix(int n = 0) : Base(boost::extents[n][n]) { //
            std::iota(begin(), end(), 0);
        }
    
        auto begin() { return Base::origin(); }
        auto end() { return Base::origin() + Base::num_elements(); }
    };
    
    #include <fmt/ranges.h>
    #include <list>
    int main() {
        VMatrix<int>                 c1(3);
        VMatrix<int, std::list<int>> c2(3);
        MAMatrix<int>                c3(3);
    
        fmt::print("c1: {}\nc2: {}\nc3: {}\n", c1, c2, c3);
    }
    

    Printing

    c1: [0, 1, 2, 3, 4, 5, 6, 7, 8]
    c2: [0, 1, 2, 3, 4, 5, 6, 7, 8]
    c3: [0, 1, 2, 3, 4, 5, 6, 7, 8]
    

    UPDATE

    In response to the comments, I'd suggest not using a node-based container for efficiency. Instead, you could perhaps trade off storage by using a bitset or by optimizing for the nth percentile, so extra allocation/indirection only occurs when needed. Here's a simple example with number of particles per cell distributed uniformly on [1, 6], and extra allocation only happens when a cell's population exceeds 4:

    Live On Coliru

    #include <boost/multi_array.hpp>
    #include <boost/range/iterator_range.hpp>
    template <typename T> //
    struct MAMatrix : boost::multi_array<T, 2> {
        using Base = boost::multi_array<T, 2>;
        using Base::operator=;
    
        MAMatrix(MAMatrix const&) = default;
        MAMatrix(long n1 = 0, long n2 = 0) : Base(boost::extents[n1][n2 ? n2 : n1]) {}
    
        auto lists_size() const { return Base::num_elements(); }
        auto lists_begin() { return Base::origin(); }
        auto lists_end() { return lists_begin() + lists_size(); }
        auto lists() { return boost::make_iterator_range(lists_begin(), lists_end()); }
    
        auto begin() { return flatten(lists_begin(), lists_end()); }
        auto end() { return flatten(lists_end(), lists_end()); }
    };
    
    #include <fmt/ranges.h>
    #include <boost/container/small_vector.hpp>
    #include <random>
    using boost::container::small_vector;
    
    int main() {
        auto dist = bind(std::uniform_int_distribution(1, 6), std::mt19937{std::random_device{}()});
    
        MAMatrix<small_vector<int, 4>> cells(3, 4);
        for (int i = 0; auto& l : cells.lists())
            generate_n(std::back_inserter(l), dist(), [&] { return i++; });
    
        fmt::print("c1 lists:     {}\n", cells.lists());
        fmt::print("c1 flattened: {}\n", cells);
    }
    

    Printing outputs like

    for a in {1..4}; do (set -x; ./a.out); done
    + ./a.out
    c1 lists:     [[0, 1, 2], [3, 4, 5, 6, 7, 8], [9, 10], [11], [12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22], [23, 24, 25], [26], [27], [28, 29, 30, 31, 32, 33], [34, 35, 36, 37], [38, 39, 40, 41]]
    c1 flattened: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41]
    + ./a.out
    c1 lists:     [[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11], [12, 13, 14, 15, 16, 17], [18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28], [29, 30, 31, 32, 33, 34], [35, 36, 37, 38], [39], [40], [41], [42]]
    c1 flattened: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42]
    + ./a.out
    c1 lists:     [[0, 1, 2, 3, 4], [5, 6, 7], [8, 9, 10, 11, 12, 13], [14], [15, 16, 17, 18, 19, 20], [21, 22, 23, 24], [25], [26, 27, 28, 29, 30, 31], [32], [33, 34, 35, 36, 37, 38], [39, 40, 41], [42, 43, 44]]
    c1 flattened: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44]
    + ./a.out
    c1 lists:     [[0], [1, 2], [3, 4, 5, 6], [7, 8], [9, 10, 11], [12, 13], [14, 15, 16, 17, 18], [19, 20, 21, 22], [23, 24, 25, 26, 27], [28, 29], [30, 31, 32, 33], [34]]
    c1 flattened: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34]