I am using thrust to generate some random permutations on the GPU as follows:
// Compute a random list on CPU
int iterations = 500;
int src_size = 2048;
thrust::host_vector<int> cpu_rand_list (iterations * 4);
for(size_t i = 0; i < cpu_rand_list.size(); ++i) {
cpu_rand_list[i] = src_size * (rand()/(1.0 + RAND_MAX));
}
// Copy the random list to GPU
thrust::device_vector<int> gpu_rand_list = cpu_rand_list;
Now the gpu_rand_list
contains integer of some indexes and I have another array like:
thrust:;device_vector<float> values(2048);
// These are now filled with some values
...
What i would like to do is create another list which would only contain entries from gpu_rand_list
which correspond to entries in values
not equal to -1. So in CPU code something like:
std::vector<int> refined;
for (int i = 0; i < gpu_rand_list.size(); ++i) {
if (values[gpu_rand_list[i]] != -1)
refined.push_back(gpu_rand_list[i]);
}
Is there a way to achieve this in thrust? I tried to use the copy_if construct but could not make it work with these multiple arrays.
thrust::copy_if (specifically the stencil version, I would think) is a reasonable starting point. The only other complexity I see seems to be the indexing "through" gpu_rand_list
. This can be accomplished with a permutation iterator.
(aside: using a float
array for the stencil when you want to do an exact comparison to -1 seems a little odd to me, but perhaps it makes sense.)
Something like this may work for you:
$ cat t881.cu
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include <thrust/copy.h>
#include <thrust/iterator/permutation_iterator.h>
#include <stdlib.h>
#include <vector>
#include <iostream>
using namespace thrust::placeholders;
int main(){
// Compute a random list on CPU
int iterations = 500;
int src_size = 2048;
thrust::host_vector<int> cpu_rand_list (iterations * 4);
for(size_t i = 0; i < cpu_rand_list.size(); ++i) {
cpu_rand_list[i] = src_size * (rand()/(1.0 + RAND_MAX));
}
// Copy the random list to GPU
thrust::device_vector<int> gpu_rand_list = cpu_rand_list;
thrust::device_vector<float> values(src_size, -1.0f);
// pick some values to copy
values[2] = 0; values[3] = 0; values[5] = 0;
thrust::device_vector<int> result(iterations * 4);
thrust::copy_if(gpu_rand_list.begin(), gpu_rand_list.end(),thrust::make_permutation_iterator(values.begin(), gpu_rand_list.begin()), result.begin(), _1 != -1.0f);
std::vector<float> h_values(src_size);
thrust::copy(values.begin(), values.end(), h_values.begin());
thrust::host_vector<int> h_result = result;
std::vector<int> refined;
for (int i = 0; i < cpu_rand_list.size(); ++i) {
if (h_values[cpu_rand_list[i]] != -1)
refined.push_back(gpu_rand_list[i]);
}
for (int i = 0; i < refined.size(); i++)
if (refined[i] != h_result[i]) { std::cout << "mismatch at: " << i << "was: " << h_result[i] << "should be: " << refined[i] << std::endl; return 1;}
else std::cout << refined[i] << std::endl;
return 0;
}
$ nvcc -o t881 t881.cu
$ ./t881
2
5
5
$
(I'm using thrust placeholders so I don't have to create an explicit functor for the copy_if operation.)