I would like to conditional copy data from vector, basing on stencil vector, which is N times shorter. Every element in stencil would be responsible for N elements in data vector. Suppose that the vectors look as follows (N=3)
data = {1,2,3,4,5,6,7,8,9}
stencil = {1,0,1}
What I would like to get in result:
result = {1,2,3,7,8,9}
Is there a way to achieve this using functions from Thrust library?
I know, that there is:
thrust::copy_if (InputIterator1 first, InputIterator1 last, InputIterator2 stencil, OutputIterator result, Predicate pred)
but this doesn't allow me to copy N values from data vector basing on one element from stencil.
As is often the case, I imagine there are many possible ways to do this.
The approach which occurs to me (using copy_if
) is to use the stencil
vector as part of a thrust::permutation_iterator
, that takes the stencil
vector and generates the index into it using a thrust::transform_iterator
. If we imagine a copying index that goes from 0..8 for this example, then we can index into the "source" (i.e. stencil) vector using a "map" index calculated using a thrust::counting_iterator
with integer division by N
(using thrust placeholders). The copying predicate just tests if the stencil value == 1.
The thrust quick start guide gives a concise description of how to use these fancy iterators.
Here is a worked example:
$ cat t471.cu
#include <thrust/copy.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <iostream>
using namespace thrust::placeholders;
int main(){
int data[] = {1,2,3,4,5,6,7,8,9};
int stencil[] = {1,0,1};
int ds = sizeof(data)/sizeof(data[0]);
int ss = sizeof(stencil)/sizeof(stencil[0]);
int N = ds/ss; // assume this whole number divisible
thrust::device_vector<int> d_data(data, data+ds);
thrust::device_vector<int> d_stencil(stencil, stencil+ss);
thrust::device_vector<int> d_result(ds);
int rs = thrust::copy_if(d_data.begin(), d_data.end(), thrust::make_permutation_iterator(d_stencil.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), _1 / N)), d_result.begin(), _1 == 1) - d_result.begin();
thrust::copy_n(d_result.begin(), rs, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
$ nvcc -o t471 t471.cu
$ ./t471
1,2,3,7,8,9,
$
With the assumptions about stencil
organization made here, we could also pre-compute the result size rs
with thrust::reduce
, and use that to allocate the result vector size:
$ cat t471.cu
#include <thrust/copy.h>
#include <thrust/reduce.h>
#include <thrust/device_vector.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/counting_iterator.h>
#include <iostream>
using namespace thrust::placeholders;
int main(){
int data[] = {1,2,3,4,5,6,7,8,9};
int stencil[] = {1,0,1};
int ds = sizeof(data)/sizeof(data[0]);
int ss = sizeof(stencil)/sizeof(stencil[0]);
int N = ds/ss; // assume this whole number divisible
thrust::device_vector<int> d_data(data, data+ds);
thrust::device_vector<int> d_stencil(stencil, stencil+ss);
int rs = thrust::reduce(d_stencil.begin(), d_stencil.end())*N;
thrust::device_vector<int> d_result(rs);
thrust::copy_if(d_data.begin(), d_data.end(), thrust::make_permutation_iterator(d_stencil.begin(), thrust::make_transform_iterator(thrust::counting_iterator<int>(0), _1 / N)), d_result.begin(), _1 == 1) - d_result.begin();
thrust::copy_n(d_result.begin(), rs, std::ostream_iterator<int>(std::cout, ","));
std::cout << std::endl;
return 0;
}
$ nvcc -o t471 t471.cu
$ ./t471
1,2,3,7,8,9,
$