Search code examples
c++boostboost-multi-array

How to store values in the boost multi_array container?


I'm struggling to access the values and store them in boost multi_array container. I've tried to access the elements using the indexing methods ([] and .at()), but throws error: no matching function for call to 'boost::multi_array<float, 2>::data(int)', however I can print the data (see the code) but do not have any idea how to store it and access it again for further computations. The data is two dimensional(11214, 3), but for the meantime I just want to flatten it and have a sequence of values. So my question is how to access the elements and how to store them in the container?

#include <boost/multi_array.hpp>
#include <boost/timer/timer.hpp>
#include <boost/range/irange.hpp>
#include <h5xx/h5xx.hpp>
#include <iostream>
#include <vector>
#include <algorithm>
#include <iterator>
#include <string>

using array_2d_t = boost::multi_array<float, 2>;
//using array_2d_t = boost::multi_array<float, 3>;
template <typename T> 
void print_array(T const& array)
{
    for (auto const& row : array) 
        { for (auto v : row)
            printf("%10f ", v);
        printf("\n"); //prints a new line similar t0 \n
    }
}

h5xx::dataset open_dataset(std::string const& filename) {
    h5xx::file xaa(filename, h5xx::file::mode::in);
    h5xx::group g(xaa, "particles/lipids/box/positions");
    return h5xx::dataset(g, "value");
}

std::vector<float> cell_from_all_frames(h5xx::dataset& ds, size_t row, size_t col) {
    // determine dataset shape: frames, particle count, space dimension
    auto ds_shape = h5xx::dataspace(ds).extents<3>();
    std::vector<float> cells(ds_shape[0]); // number of frames

    std::vector<hsize_t> offsets{0, row, col};
    std::vector<hsize_t> counts{ds_shape[0], 1, 1};
    h5xx::slice slice(offsets, counts);

    h5xx::read_dataset(ds, cells, slice);
    return cells;
}

array_2d_t read_frame(std::string const& filename, unsigned frame_no) {
    //h5xx::file xaa("../../data/xaa.h5", h5xx::file::mode::in);
    h5xx::file xaa(filename, h5xx::file::mode::in);

    h5xx::group   g(xaa, "particles/lipids/box/positions");
    h5xx::dataset ds(g, "value");

    // determine dataset shape: frames, particle count, space dimension
    auto ds_shape = h5xx::dataspace(ds).extents<3>();
    array_2d_t arr(boost::extents[ds_shape[1]][ds_shape[2]]);

    std::vector<hsize_t> offsets{frame_no, 0, 0};
    std::vector<hsize_t> counts{1, arr.shape()[0], arr.shape()[1]};
    h5xx::slice slice(offsets, counts);

    h5xx::read_dataset(ds, arr, slice);
    return arr;
}

int main(int argc, char const* argv[])
{
    if (argc < 2) {
        std::cout << "Usage: " << argv[0] << " input.h5" << std::endl;
        return -1;
    }

    auto ds = open_dataset(argv[1]);
    std::vector<float> first_cells = cell_from_all_frames(ds, 0, 0);

    // set up multi-tau correlator for the computation of time correlation functions
    size_t nsamples = 10;            // FIXME obtain these parameters from HDF5 file. These 10 elements would be first element of first row, first element of 11214 row, first element of 11214*2 row, first element of 11214*3 row ,..., first element of 11214*10 row.
  return 0;
}

In the main() I read the data from function read_frame and try to pass it to the nsamples. I've tried few things but it doesn't work!!


Solution

  • You can choose.

    Store it?

    array_2d_t  frame = read_frame(filename, 1);
    

    Access an element?

    // access individual elements:
    float ele = frame[0][3];
    
    // or with index list:
    std::array<int, 2> indices{0,3};
    ele = frame(indices);
    

    Or, as you seem to want, provide a flat view of the array:

    boost::multi_array_ref<float, 1> sequence(frame.origin(), boost::extents[frame.num_elements()]);
    fmt::print("Sum of all {} elements: {}\n",
            sequence.size(),
            std::accumulate(sequence.begin(), sequence.end(), 0.f));
    

    In fact yuou might reshape in-place, but then you cannot change dimensiaonality, so you get 1 "row" of all cells:

    frame.reshape(std::array<size_t, 2> {1, frame.num_elements()});
    // now the first "row" is the full sequence:
    auto&& sequence = frame[0];
    fmt::print("Sum of all {} elements: {}\n",
            sequence.size(),
            std::accumulate(sequence.begin(), sequence.end(), 0.f));
    

    There are legion options for slicing/reindex with or without strides, but I refer to the Boost documentation to prevent needlessly complicating things here.

    Live Demo

    #include <boost/multi_array.hpp>
    #include <fmt/ranges.h>
    #include <h5xx/h5xx.hpp>
    #include <iostream>
    #include <iterator>
    
    using array_2d_t = boost::multi_array<float, 2>;
    
    h5xx::dataset open_dataset(std::string const& filename) {
        h5xx::file xaa(filename, h5xx::file::mode::in);
        h5xx::group g(xaa, "particles/lipids/box/positions");
        return h5xx::dataset(g, "value");
    }
    
    array_2d_t read_frame(h5xx::dataset& ds, unsigned frame_no) {
        auto ds_shape = h5xx::dataspace(ds).extents<3>();
        array_2d_t arr(boost::extents[ds_shape[1]][ds_shape[2]]);
    
        std::vector<hsize_t> offsets{frame_no, 0, 0};
        std::vector<hsize_t> counts{1, arr.shape()[0], arr.shape()[1]};
        h5xx::slice slice(offsets, counts);
    
        h5xx::read_dataset(ds, arr, slice);
        return arr;
    }
    
    std::vector<float> cell_from_all_frames(h5xx::dataset& ds, size_t row, size_t col) {
        // determine dataset shape: frames, particle count, space dimension
        auto ds_shape = h5xx::dataspace(ds).extents<3>();
        std::vector<float> cells(ds_shape[0]); // number of frames
    
        std::vector<hsize_t> offsets{0, row, col};
        std::vector<hsize_t> counts{ds_shape[0], 1, 1};
        h5xx::slice slice(offsets, counts);
    
        h5xx::read_dataset(ds, cells, slice);
        return cells;
    }
    
    int main(int argc, char const* argv[])
    {
        if (argc < 2) {
            std::cout << "Usage: " << argv[0] << " input.h5" << std::endl;
            return -1;
        }
    
        auto ds = open_dataset(argv[1]);
        array_2d_t  frame = read_frame(ds, 1);
    
        // access individual elements:
        [[maybe_unused]] float ele = frame[0][2];
    
        // or with index list:
        std::array<int, 2> indices{0,2};
        ele = frame(indices);
    
        {
            boost::multi_array_ref<float, 1> sequence(frame.origin(), boost::extents[frame.num_elements()]);
            fmt::print("Sum of all {} elements: {}\n",
                    sequence.size(),
                    std::accumulate(sequence.begin(), sequence.end(), 0.f));
        }
    
        {
            // in fact yuou might reshape in-place, but then you cannot change dimensiaonality
            frame.reshape(std::array<size_t, 2> {1, frame.num_elements()});
            // now the first "row" is the full sequence:
            auto&& sequence = frame[0];
            fmt::print("Sum of all {} elements: {}\n",
                    sequence.size(),
                    std::accumulate(sequence.begin(), sequence.end(), 0.f));
        }
    
        {
            std::vector<float> first_cells = cell_from_all_frames(ds, 0, 0);
    
            fmt::print("Sum of all {} first cells: {}\n",
                    first_cells.size(),
                    std::accumulate(first_cells.begin(), first_cells.end(), 0.f));
        }
    }
    

    With your xaa.h5 from a while ago:

    Sum of all 33642 elements: 737589.1
    Sum of all 33642 elements: 737589.1
    Sum of all 75 first cells: 6053.3496