Search code examples
cudathrust

Use thrust reduce_by_key on a struct


I'm fairly new to CUDA and I'm trying to apply a reduce_by_key operation on a struct.

struct index_and_loc {
  int index;
  int3 location;
}

What I would like to do is, I have a vector of index_and_loc where elements would have the same index

thrust::host_vector<index_and_loc> my_vector;
my_vector= { {0, {0, 2, 5}},
             {0, {1, 3, 4}},
             {0, {0, 1, 3}},
             {1, {2, 1, 0}},
             {1, {2, 2, 2}}
           }

And I want to have two outputs, one would be the sum of all the elements that have the same index, and another vector that stores how many instances of each index. So my resulting outputs would be:

// Sum all elements with index 0 and 1
sum_vector = {{0, {1, 6, 12}},
              {1, {4, 3, 2}}
             }

instances_vector = {3, // Number of elements with index = 0
                    2} // Number of elements with index = 1

Reading the methods in thrust documentation, I believe I should be using reduce_by_key. I can declare my input1 be my_vector and input2 be a vector of 1`s with the same length as input1 and I can reduce using the index of my struct and use thrust::plus<int> as my BinaryFunction to the vector of 1`s. However this wouldn't be able to apply the sum of the int3 elements in my input1 vector, as the BinaryFunction gets applied on input2.

Is there any way to accomplish this? Please let me know if my question isn't very clear.

EDIT:

I reworked the problem and brought it down to an easier approach. I have instead added an instance column to my_vector and set them to 1. Now, using talonmies answer, I can get the both answers I was looking for. Here's my code

#include <thrust/extrema.h>
#include <thrust/reduce.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>

struct index_and_loc {
  int index;
  int3 location;
  int instance;

  index_and_loc() = default;

  __host__ __device__
  index_and_loc(int index_, int3 location_, int instance_) :
  index(index_), instance(instance_) {
    location.x = location_.x; 
    location.y = location_.y;
    location.z = location_.z;
  };

  __host__ __device__
  index_and_loc& operator=(const index_and_loc& y) {
    index = y.index;
    location.x = y.location.x;
    location.y = y.location.y;
    location.z = y.location.z;
    instance = y.instance;
    return *this;
  };

  __host__ __device__
  bool operator==(const index_and_loc& y) const {
    return index == y.index;
  };

  __host__ __device__
  index_and_loc operator+(const index_and_loc& y) const {
    return index_and_loc(index,
      make_int3(location.x + y.location.x, 
                location.y + y.location.y, 
                location.z + y.location.z),
      instance + y.instance);
  };

};

int main() {

  int num_points = 5;
  thrust::device_vector<index_and_loc> my_vector(num_points);
  my_vector.push_back(index_and_loc(0, {0, 2, 5}, 1));
  my_vector.push_back(index_and_loc(0, {1, 3, 4}, 1));
  my_vector.push_back(index_and_loc(0, {0, 1, 3}, 1));
  my_vector.push_back(index_and_loc(1, {2, 1, 0}, 1));
  my_vector.push_back(index_and_loc(1, {2, 2, 2}, 1));

  thrust::device_vector<index_and_loc> same_idx(num_points);
  thrust::device_vector<index_and_loc> sum_locs(num_points);

  thrust::equal_to<index_and_loc> pred;
  thrust::plus<index_and_loc> op;

  auto res = thrust::reduce_by_key(
    thrust::device,
    my_vector.begin(),
    my_vector.end(),
    my_vector.begin(),
    same_idx.begin(),
    sum_locs.begin(),
    pred,
    op);

  for(int i=0; i<2; i++) {
    index_and_loc y = same_idx[i];
    index_and_loc x = sum_locs[i];
    std::cout << y.index << " {" << x.location.x 
                         << " " << x.location.y 
                         << " " << x.location.z 
                         << "}, " << x.instance << std::endl;
  }

  return 0;
}

Running this will in fact give me

0 {1 6 12}, 3
1 {4 3 2}, 2

Solution

  • There are many ways to potentially do what you want. I have no feeling for what will and will not be efficient, that is up to you to decide.

    One approach is to define the necessary comparison and addition operators in your class and then use those wrapped in the pre-canned thrust binary functors for the required binary operator and predicate for the reduce_by_keycall. For example:

    #include <thrust/functional.h>
    #include <thrust/host_vector.h>
    #include <thrust/device_vector.h>
    #include <iostream>
    
    struct index_and_loc {
      int index;
      int3 location;
    
      index_and_loc() = default;
    
      __host__ __device__
      index_and_loc(int index_, int3 location_) {
          index = index_;
          location.x = location_.x;
          location.y = location_.y;
          location.z = location_.z;
      };
    
      __host__ __device__
      index_and_loc& operator=(const index_and_loc& y) {
          index = y.index;
          location.x = y.location.x;
          location.y = y.location.y;
          location.z = y.location.z;
          return *this;
      };
    
      __host__ __device__
      bool operator==(const index_and_loc& y) const {
          return index == y.index;
      };
    
      __host__ __device__
      index_and_loc operator+(const index_and_loc& y) const {
          return index_and_loc(index, make_int3(location.x + y.location.x, 
                                                location.y + y.location.y, 
                                                location.z + y.location.z));
      };
    };
    
    int main()
    {
        thrust::host_vector<index_and_loc> my_vector(5); 
        my_vector[0] =  {0, {0, 2, 5}};
        my_vector[1] =  {0, {1, 3, 4}};
        my_vector[2] =  {0, {0, 1, 3}};
        my_vector[3] =  {1, {2, 1, 0}};
        my_vector[4] =  {1, {2, 2, 2}};
    
        thrust::device_vector<index_and_loc> d_vector = my_vector; 
        thrust::device_vector<index_and_loc> keys_out(5);
        thrust::device_vector<index_and_loc> data_out(5);
    
        thrust::equal_to<index_and_loc> pred;
        thrust::plus<index_and_loc> op;
    
        auto res = thrust::reduce_by_key(
                d_vector.begin(),
                d_vector.end(),
                d_vector.begin(),
                keys_out.begin(),
                data_out.begin(),
                pred,
                op);
    
        for(int i=0; i<2; i++) {
            index_and_loc y = keys_out[i];
            index_and_loc x = data_out[i];
            std::cout << y.index << " {" << x.location.x 
                                 << " " << x.location.y 
                                 << " " << x.location.z 
                                 << "}" << std::endl;
        }
    
        return 0;
    }
    

    They key things is the correct definition of operator== for the predicate and operator+ for the reduction. The rest is just required for copy assignment and construction.

    This appears to do what you want:

    $ nvcc -arch=sm_52 -std=c++11 -o improbable improbable.cu
    $ ./improbable 
    0 {1 6 12}
    1 {4 3 2}
    

    Note that because thrust expects that key and data are separate iterators, you must have separate output iterators for the reduced keys and data. That means you effectively get two copies of the solution. Whether that is a big deal is for you to decide.