Search code examples
cudathrust

Retain Duplicates with Set Intersection in CUDA


I'm using CUDA and THRUST to perform paired set operations. I would like to retain duplicates, however. For example:

int keys[6] = {1, 1, 1, 3, 4, 5, 5};
int vals[6] = {1, 2, 3, 4, 5, 6, 7};
int comp[2] = {1, 5};

thrust::set_intersection_by_key(keys, keys + 6, comp, comp + 2, vals, rk, rv);

Desired result

rk[1, 1, 1, 5, 5]
rv[1, 2, 3, 6, 7]

Actual Result

rk[1, 5]
rv[5, 7]

I want all of the vals where the corresponding key is contained in comp.

Is there any way to achieve this using thrust, or do I have to write my own kernel or thrust function?

I'm using this function: set_intersection_by_key.


Solution

  • Quoting from the thrust documentation:

    The generalization is that if an element appears m times in [keys_first1, keys_last1) and n times in [keys_first2, keys_last2) (where m may be zero), then it appears min(m,n) times in the keys output range

    Since comp does only contain each key once, n=1 and therefore min(m,1) = 1.

    In order to get "all of the vals where the corresponding key is contained in comp", you can use the approach of my answer to a similar problem.

    Similarly, the example code does the following steps:

    1. Get the largest element of d_comp. This assumes that d_comp is already sorted.

    2. Create vector d_map of size largest_element+1. Copy 1 to all positions of the entries of d_comp in d_map.

    3. Copy all entries from d_vals for which there is a 1 entry in d_map into d_result.

      #include <thrust/device_vector.h>
      #include <thrust/iterator/constant_iterator.h>
      #include <thrust/iterator/permutation_iterator.h>
      #include <thrust/functional.h>
      #include <thrust/copy.h>
      #include <thrust/scatter.h>
      #include <iostream>
      
      
      #define PRINTER(name) print(#name, (name))
      void print(const char* name, const thrust::device_vector<int>& v)
      {
          std::cout << name << ":\t";
          thrust::copy(v.begin(), v.end(), std::ostream_iterator<int>(std::cout, "\t"));
          std::cout << std::endl;
      }
      
      int main()
      {
          int keys[] = {1, 1, 1, 3, 4, 5, 5};
          int vals[] = {1, 2, 3, 4, 5, 6, 7};
          int comp[] = {1, 5};
      
          const int size_data = sizeof(keys)/sizeof(keys[0]);
          const int size_comp = sizeof(comp)/sizeof(comp[0]);
      
          // copy data to GPU
          thrust::device_vector<int> d_keys (keys, keys+size_data);
          thrust::device_vector<int> d_vals (vals, vals+size_data);
          thrust::device_vector<int> d_comp (comp, comp+size_comp);
      
          PRINTER(d_keys);
          PRINTER(d_vals);
          PRINTER(d_comp);
      
          int largest_element = d_comp.back();
      
          thrust::device_vector<int> d_map(largest_element+1);
      
          thrust::constant_iterator<int> one(1);
          thrust::scatter(one, one+size_comp, d_comp.begin(), d_map.begin());
          PRINTER(d_map);
      
          thrust::device_vector<int> d_result(size_data);
          using namespace thrust::placeholders;
          int final_size = thrust::copy_if(d_vals.begin(),
                                          d_vals.end(),
                                          thrust::make_permutation_iterator(d_map.begin(), d_keys.begin()),
                                          d_result.begin(),
                                          _1
                                          ) - d_result.begin();
          d_result.resize(final_size);
      
          PRINTER(d_result);
      
          return 0;
      }
      

    output:

    d_keys:     1   1   1   3   4   5   5   
    d_vals:     1   2   3   4   5   6   7   
    d_comp:     1   5   
    d_map:      0   1   0   0   0   1   
    d_result:   1   2   3   6   7