Search code examples
c++cudathrust

How to use thrust to copy values with the same keys from one array to another?


I have a sparse histogram with keys as bins and values as count. I would like to use the keys and counts in the histogram to create another key value array. The keys in this array are bins but repeated count times, and in the end each key would have value equal to the corresponding count. Keys in both the histogram and the array are sorted in ascending order.

For example if the histogram looked like this:

    Key/Bins:       0 2 4 5 6 7
    Values/Counts:  3 2 1 4 2 3 

The key array whose values I would like to find looks like this:

 Key:     0 0 0 2 2 4 5 5 5 5 6 6 7 7 7

After populating the values using the histogram the new key-value array would look like this:

   Key:     0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
   Values:  3 3 3 2 2 1 4 4 4 4 2 2 3 3 3 

I can do this using loops and checking if two keys are the same but is there an efficient way to do this using Thrust?

Thanks!


Solution

  • Here is one possible method:

    1. use thrust::transform using two offset versions of the key array to mark the beginning of each segment

      Key:     0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
      seg:     0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
      
    2. with the beginning of each segment marked, perform a thrust::inclusive_scan to turn the segment markers into a set of lookup indices

      Key:     0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
      seg:     0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
      idx:     0 0 0 1 1 2 3 3 3 3 4 4 5 5 5
      
    3. these lookup indices can then be used with a thrust::permutation_iterator to copy the indexed values from the values/counts array into the desired output

      Key:     0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
      seg:     0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
      idx:     0 0 0 1 1 2 3 3 3 3 4 4 5 5 5
      Val:     3 3 3 2 2 1 4 4 4 4 2 2 3 3 3
      

    Here is a worked example:

    $ cat t1299.cu
    #include <thrust/device_vector.h>
    #include <thrust/iterator/zip_iterator.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/transform.h>
    #include <thrust/remove.h>
    #include <thrust/copy.h>
    #include <iostream>
    
    struct my_remove
    {
      template <typename T>
      __host__ __device__
      bool operator()(const T &t)
      {
        return (thrust::get<0>(t) == 0);
      }
    };
    
    struct my_cmp
    {
      template <typename T>
      __host__ __device__
      int operator()(const T &t)
      {
        if (thrust::get<0>(t) != thrust::get<1>(t)) return 1;
        return 0;
      }
    };
    
    using namespace thrust::placeholders;
    
    int main(){
    
      int kb[] = {0,2,4,5,6,7};
      int vc[] = {3,2,1,4,2,3};
      int key[] = {0,0,0,2,2,4,5,5,5,5,6,6,7,7,7};
      int kbsize = sizeof(kb)/sizeof(int);
      int keysize = sizeof(key)/sizeof(int);
    
      thrust::device_vector<int> dkb(kb, kb+kbsize);
      thrust::device_vector<int> dvc(vc, vc+kbsize);
      thrust::device_vector<int> dkey(key, key+keysize);
      thrust::device_vector<int> dval(keysize);
      thrust::copy(dkey.begin(), dkey.end(), std::ostream_iterator<int>(std::cout, ","));
      std::cout << std::endl;
    
      //first remove any zero count values from kb/vc
    
    //  thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dvc.begin(), dkb.begin())), thrust::make_zip_iterator(thrust::make_tuple(dvc.end(), dkb.end())), my_remove());
    
      // next, mark the segments in the key array
    
      thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(dkey.begin(), dkey.begin()+1)), thrust::make_zip_iterator(thrust::make_tuple(dkey.end()-1, dkey.end())), dval.begin()+1, my_cmp());
    
      // prefix sum to generate lookup indices
      thrust::device_vector<int> didx(keysize);
      thrust::inclusive_scan(dval.begin(), dval.end(), didx.begin());
    
      // use lookup indices to populate values vector
    
      thrust::copy(thrust::make_permutation_iterator(dvc.begin(), didx.begin()), thrust::make_permutation_iterator(dvc.begin(), didx.end()), dval.begin());
    
      // display results
    
      thrust::copy(dval.begin(), dval.end(), std::ostream_iterator<int>(std::cout, ","));
      std::cout << std::endl;
      return 0;
    }
    $ nvcc -arch=sm_35 -o t1299 t1299.cu
    $ ./t1299
    0,0,0,2,2,4,5,5,5,5,6,6,7,7,7,
    3,3,3,2,2,1,4,4,4,4,2,2,3,3,3,
    $
    

    A typical example of thrust optimization is to look for "fusion" of operations. In this case, since we have a transform operation immediately followed by an inclusive scan, a simple example of fusion would be to replace these with a thrust::transform_inclusive_scan