I have a sparse histogram with keys as bins and values as count. I would like to use the keys and counts in the histogram to create another key value array. The keys in this array are bins but repeated count times, and in the end each key would have value equal to the corresponding count. Keys in both the histogram and the array are sorted in ascending order.
For example if the histogram looked like this:
Key/Bins: 0 2 4 5 6 7
Values/Counts: 3 2 1 4 2 3
The key array whose values I would like to find looks like this:
Key: 0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
After populating the values using the histogram the new key-value array would look like this:
Key: 0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
Values: 3 3 3 2 2 1 4 4 4 4 2 2 3 3 3
I can do this using loops and checking if two keys are the same but is there an efficient way to do this using Thrust?
Thanks!
Here is one possible method:
use thrust::transform
using two offset versions of the key
array to mark the beginning of each segment
Key: 0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
seg: 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
with the beginning of each segment marked, perform a thrust::inclusive_scan
to turn the segment markers into a set of lookup indices
Key: 0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
seg: 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
idx: 0 0 0 1 1 2 3 3 3 3 4 4 5 5 5
these lookup indices can then be used with a thrust::permutation_iterator
to copy the indexed values from the values/counts array into the desired output
Key: 0 0 0 2 2 4 5 5 5 5 6 6 7 7 7
seg: 0 0 0 1 0 1 1 0 0 0 1 0 1 0 0
idx: 0 0 0 1 1 2 3 3 3 3 4 4 5 5 5
Val: 3 3 3 2 2 1 4 4 4 4 2 2 3 3 3
Here is a worked example:
$ cat t1299.cu
#include <thrust/device_vector.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <thrust/copy.h>
#include <iostream>
struct my_remove
{
template <typename T>
__host__ __device__
bool operator()(const T &t)
{
return (thrust::get<0>(t) == 0);
}
};
struct my_cmp
{
template <typename T>
__host__ __device__
int operator()(const T &t)
{
if (thrust::get<0>(t) != thrust::get<1>(t)) return 1;
return 0;
}
};
using namespace thrust::placeholders;
int main(){
int kb[] = {0,2,4,5,6,7};
int vc[] = {3,2,1,4,2,3};
int key[] = {0,0,0,2,2,4,5,5,5,5,6,6,7,7,7};
int kbsize = sizeof(kb)/sizeof(int);
int keysize = sizeof(key)/sizeof(int);
thrust::device_vector<int> dkb(kb, kb+kbsize);
thrust::device_vector<int> dvc(vc, vc+kbsize);
thrust::device_vector<int> dkey(key, key+keysize);
thrust::device_vector<int> dval(keysize);
thrust::copy(dkey.begin(), dkey.end(), std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
//first remove any zero count values from kb/vc
// thrust::remove_if(thrust::make_zip_iterator(thrust::make_tuple(dvc.begin(), dkb.begin())), thrust::make_zip_iterator(thrust::make_tuple(dvc.end(), dkb.end())), my_remove());
// next, mark the segments in the key array
thrust::transform(thrust::make_zip_iterator(thrust::make_tuple(dkey.begin(), dkey.begin()+1)), thrust::make_zip_iterator(thrust::make_tuple(dkey.end()-1, dkey.end())), dval.begin()+1, my_cmp());
// prefix sum to generate lookup indices
thrust::device_vector<int> didx(keysize);
thrust::inclusive_scan(dval.begin(), dval.end(), didx.begin());
// use lookup indices to populate values vector
thrust::copy(thrust::make_permutation_iterator(dvc.begin(), didx.begin()), thrust::make_permutation_iterator(dvc.begin(), didx.end()), dval.begin());
// display results
thrust::copy(dval.begin(), dval.end(), std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
$ nvcc -arch=sm_35 -o t1299 t1299.cu
$ ./t1299
0,0,0,2,2,4,5,5,5,5,6,6,7,7,7,
3,3,3,2,2,1,4,4,4,4,2,2,3,3,3,
$
A typical example of thrust optimization is to look for "fusion" of operations. In this case, since we have a transform operation immediately followed by an inclusive scan, a simple example of fusion would be to replace these with a thrust::transform_inclusive_scan