Search code examples
c++multidimensional-arraymemmove

Apply memmove function to a 3d array


I am trying to achieve the fftshift function (from MATLAB) in c++ with for loop and it's really time-consuming. here is my code:

const int a = 3;
    const int b = 4;
    const int c = 5;
    int i, j, k;
    int aa = a / 2;
    int bb = b / 2;
    int cc = c / 2;

    double ***te, ***tempa;
    te = new double **[a];
    tempa = new double **[a];
    for (i = 0; i < a; i++)
    {
        te[i] = new double *[b];
        tempa[i] = new double *[b];
        for (j = 0; j < b; j++)
        {
            te[i][j] = new double [c];
            tempa[i][j] = new double [c];
            for (k = 0; k < c; k++)
            {
                te[i][j][k] = i + j+k;
            }

        }
    }
    /*for the row*/
    if (c % 2 == 1)
    {
        for (i = 0; i < a; i++)
        {
            for (j = 0; j < b; j++)
            {
                for (k = 0; k < cc; k++)
                {
                    tempa[i][j][k] = te[i][j][k + cc + 1];
                    tempa[i][j][k + cc] = te[i][j][k];
                    tempa[i][j][c - 1] = te[i][j][cc];
                }
            }
        }
    }
    else
    {
        for (i = 0; i < a; i++)
        {
            for (j = 0; j < b; j++)
            {
                for (k = 0; k < cc; k++)
                {
                    tempa[i][j][k] = te[i][j][k + cc];
                    tempa[i][j][k + cc] = te[i][j][k];
                }
            }
        }
    }

    for (i = 0; i < a; i++)
    {
        for (j = 0; j < b; j++)
        {
            for (k = 0; k < c; k++)
            {
                te[i][j][k] = tempa[i][j][k];
            }
        }
    }

    /*for the column*/
    if (b % 2 == 1)
    {
        for (i = 0; i < a; i++)
        {
            for (j = 0; j < bb; j++)
            {
                for (k = 0; k < c; k++)
                {
                    tempa[i][j][k] = te[i][j + bb + 1][k];
                    tempa[i][j + bb][k] = te[i][j][k];
                    tempa[i][b - 1][k] = te[i][bb][k];
                }
            }
        }
    }
    else
    {
        for (i = 0; i < a; i++)
        {
            for (j = 0; j < bb; j++)
            {
                for (k = 0; k < c; k++)
                {
                    tempa[i][j][k] = te[i][j + bb][k];
                    tempa[i][j + bb][k] = te[i][j][k];
                }
            }
        }
    }

    for (i = 0; i < a; i++)
    {
        for (j = 0; j < b; j++)
        {
            for (k = 0; k < c; k++)
            {
                te[i][j][k] = tempa[i][j][k];
            }
        }
    }

    /*for the third dimension*/
    if (a % 2 == 1)
    {

        for ( i = 0; i < aa; i++)
        {
            for (j = 0; j < b; j++)
            {
                for ( k = 0; k < c; k++)
                {
                    tempa[i][j][k] = te[i + aa + 1][j][k];
                    tempa[i + aa][j][k] = te[i][j][k];
                    tempa[a - 1][j][k] = te[aa][j][k];
                }
            }
        }
    }
    else
    {
        for (i = 0; i < aa; i++)
        {
            for ( j = 0; j < b; j++)
            {
                for ( k = 0; k < c; k++)
                {
                    tempa[i][j][k] = te[i + aa][j][k];
                    tempa[i + aa][j][k] = te[i][j][k];

                }
            }
        }
    }

    for (i = 0; i < a; i++)
    {
        for (j = 0; j < b; j++)
        {
            for (k = 0; k < c; k++)
            {
                cout << te[i][j][k] << ' ';
            }
            cout << endl;
        }
        cout << "\n";
    }
    cout << "and then" << endl;
    for (i = 0; i < a; i++)
    {
        for (j = 0; j < b; j++)
        {
            for (k = 0; k < c; k++)
            {
                cout << tempa[i][j][k] << ' ';
            }
            cout << endl;
        }
        cout << "\n";
    }

now I want to rewrite it with memmove to improve the running efficiency. For the 3rd dimension, I use:

    memmove(tempa, te + aa, sizeof(double)*(a - aa));
    memmove(tempa + aa+1, te, sizeof(double)* aa);

this code can works well with 1d and 2d array, but doesn't work for the 3d array. Also, I do not know how to move the column and row elements with memmove. Anyone can help me with all of these? thanks so much!!

Now I have modified the code as below:

double ***te, ***tempa1,***tempa2, ***tempa3;

    te = new double **[a];
    tempa1 = new double **[a];
    tempa2 = new double **[a];
    tempa3 = new double **[a];
    for (i = 0; i < a; i++)
    {
        te[i] = new double *[b];
        tempa1[i] = new double *[b];
        tempa2[i] = new double *[b];
        tempa3[i] = new double *[b];
        for (j = 0; j < b; j++)
        {
            te[i][j] = new double [c];
            tempa1[i][j] = new double [c];
            tempa2[i][j] = new double [c];
            tempa3[i][j] = new double [c];
            for (k = 0; k < c; k++)
            {
                te[i][j][k] = i + j+k;
            }

        }
    }

    /*for the third dimension*/
    memmove(tempa1, te + (a-aa), sizeof(double**)*aa);
    memmove(tempa1 + aa, te, sizeof(double**)* (a-aa));
    //memmove(te, tempa, sizeof(double)*a);
    /*for the row*/
    for (i = 0; i < a; i++)
    {
        memmove(tempa2[i], tempa1[i] + (b - bb), sizeof(double*)*bb);
        memmove(tempa2[i] + bb, tempa1[i], sizeof(double*)*(b - bb));
    }

    /*for the column*/
    for (j = 0; i < a; i++)
    {
        for (k = 0; j < b; j++)
        {
            memmove(tempa3[i][j], tempa2[i][j] + (c - cc), sizeof(double)*cc);
            memmove(tempa3[i][j] + cc, tempa2[i][j], sizeof(double)*(c-cc));
        }
    }

but the problem is that I define too much new dynamic arrays and also the results for tempa3 are incorrect. could anyone give some suggestions?


Solution

  • I believe you want something like that:

    memmove(tempa, te + (a - aa), sizeof(double**) * aa);
    memmove(tempa + aa, te, sizeof(double**) * (a - aa));
    

    or

    memmove(tempa, te + aa, sizeof(double**) * (a - aa));
    memmove(tempa + (a - aa), te, sizeof(double**) * aa);
    

    depending on whether you want to swap the first half "rounded up or down" (I assume you want it rounded up, it's the first version then).


    I don't really like your code's design though:

    First and foremost, avoid dynamic allocation and use std::vector or std::array when possible. You could argue it would prevent you from safely using memmove instead of swap for the first dimensions (well, it should work, but I'm not 100% sure it isn't implementation defined) but I don't think that would improve that much the efficiency. Besides, if you want to have a N-dimensional array, I usually prefer avoiding "chaining pointers" (although with your algorithm, you can actually use this structure, so it's not that bad).
    For instance, if you're adamant about dynamically allocating your array with new, you might use something like that instead to reduce memory usage (the difference might be neglectible though; it's also probably slightly faster but again, probably neglectible):

    #include <cstddef>
    #include <iostream>
    
    typedef std::size_t index_t;
    
    constexpr index_t width = 3;
    constexpr index_t height = 4;
    constexpr index_t depth = 5;
    
    // the cells (i, j, k) and (i, j, k+1) are adjacent in memory
    // the rows (i, j, _) and (i, j+1, _) are adjacent in memory
    // the "slices" (i, _, _) and (i+1, _, _) are adjacent in memory
    constexpr index_t cell_index(index_t i, index_t j, index_t k) {
      return (i * height + j) * depth + k;
    }
    
    int main() {
      int* array = new int[width * height * depth]();
      for( index_t i = 0 ; i < width ; ++i )
        for( index_t j = 0 ; j < height ; ++j )
          for( index_t k = 0 ; k < depth ; ++k ) {
            // do something on the cell (i, j, k)
            array[cell_index(i, j, k)] = i + j + k;
            std::cout << array[cell_index(i, j, k)] << ' ';
          }
      std::cout << '\n';
      // alternatively you can do this:
      //*
      for( index_t index = 0 ; index < width * height * depth ; ++index) {
        index_t i = index / (height * depth);
        index_t j = (index / depth) % height;
        index_t k = index % depth;
        array[index] = i + j + k;
        std::cout << array[index] << ' ';
      }
      std::cout << '\n';
      //*/
      delete[] array;
    }
    

    The difference is the organization in memory. Here you have a big block of 60*sizeof(int) bytes (usually 240 or 480 bytes), whereas with your method you would have: - 1 block of 3*sizeof(int**) bytes - 3 blocks of 4*sizeof(int*) bytes - 12 blocks of 5*sizeof(int) bytes (120 more bytes on a 64 bit architecture, two additional indirections for each cell access, and more code for allocating/deallocating all that memory) Granted, you can't do array[i][j][k] anymore, but still...

    The same stands with vectors (you can either make an std::vector<std::vector<std::vector<int>>> or a std::vector<int>)

    There is also a bit too much code repetition: your algorithm basically swaps the two halves of your table three times (once for each dimension), but you rewrote 3 times the same thing with a few differences.

    There is also too much memory allocation/copy (your algorithm works and can exploit the structure of array of pointers by simply swapping pointers to swap whole rows/slices, in that specific case, you can exploit this data structure to avoid copies with your algorithm... but you don't)

    You should choose more explicit variable names, that helps. For instance use width, height, depth instead of a, b, c.

    For instance, here is an implementation with vectors (I didn't know matlab's fftshift function though, but according to your code and this page, I assume it's basically "swapping the corners"):

    (also, compile with -std=c++11)

    #include <cstddef>
    #include <iostream>
    #include <vector>
    #include <algorithm>
    
    typedef std::size_t index_t;
    typedef double element_t;
    typedef std::vector<element_t> row_t;
    typedef std::vector<row_t> slice_t;
    typedef std::vector<slice_t> array_3d_t;
    
    // for one dimension
    // you might overload this for a std::vector<double>& and use memmove
    // as you originally wanted to do here
    template<class T>
    void fftshift_dimension(std::vector<T>& row)
    {
      using std::swap;
      const index_t size = row.size();
      if(size <= 1)
        return;
      const index_t halved_size = size / 2;
      // swap the two halves
      for(index_t i = 0, j = size - halved_size ; i < halved_size ; ++i, ++j)
        swap(row[i], row[j]);
      // if the size is odd, rotate the right part
      if(size % 2)
      {
        swap(row[halved_size], row[size - 1]);
        const index_t n = size - 2;
        for(index_t i = halved_size ; i < n ; ++i)
          swap(row[i], row[i + 1]);
      }
    }
    
    // base case
    template<class T>
    void fftshift(std::vector<T>& array) {
      fftshift_dimension(array);
    }
    
    // reduce the problem for a dimension N+1 to a dimension N
    template<class T>
    void fftshift(std::vector<std::vector<T>>& array) {
      fftshift_dimension(array);
      for(auto& slice : array)
        fftshift(slice);
    }
    
    // overloads operator<< to print a 3-dimensional array
    std::ostream& operator<<(std::ostream& output, const array_3d_t& input) {
      const index_t width = input.size();
      for(index_t i = 0; i < width ; i++)
      {
        const index_t height = input[i].size();
        for(index_t j = 0; j < height ; j++)
        {
          const index_t depth = input[i][j].size();
          for(index_t k = 0; k < depth; k++)
            output << input[i][j][k] << ' ';
          output << '\n';
        }
        output << '\n';
      }
      return output;
    }
    
    int main()
    {
      constexpr index_t width = 3;
      constexpr index_t height = 4;
      constexpr index_t depth = 5;
    
      array_3d_t input(width, slice_t(height, row_t(depth)));
    
      // initialization
      for(index_t i = 0 ; i < width ; ++i)
        for(index_t j = 0 ; j < height ; ++j)
          for(index_t k = 0 ; k < depth ; ++k)
            input[i][j][k] = i + j + k;
      std::cout << input;
    
      // in place fftshift
      fftshift(input);
    
      std::cout << "and then" << '\n' << input;
    }
    

    live example

    You could probably make a slightly more efficient algorithm by avoiding to swap multiple times the same cell and/or using memmove, but I think it's already fast enough for many uses (on my machine fftshift takes roughly 130ms for a 1000x1000x100 table).