I am trying to solve a problem using CUDA Thrust.
I have a host array with 3
elements. Is it possible, using Thrust, to create a device array of 384
elements in which the 3
elements in my host array is repeated 128
times (128 x 3 = 384
)?
Generally speaking, starting from an array of 3
elements, how can I use Thrust to generate a device array of size X
, where X = Y x 3
, i.e. Y
is the number of repetitions?
One possible approach:
This code is a trivial modification of the strided range example to demonstrate. You can change the REPS
define to 128 to see the full expansion to 384 output elements:
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/permutation_iterator.h>
#include <thrust/functional.h>
#include <thrust/fill.h>
#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
// for printing
#include <thrust/copy.h>
#include <ostream>
#define STRIDE 3
#define REPS 15 // change to 128 if you like
#define DSIZE (STRIDE*REPS)
// this example illustrates how to make strided access to a range of values
// examples:
// strided_range([0, 1, 2, 3, 4, 5, 6], 1) -> [0, 1, 2, 3, 4, 5, 6]
// strided_range([0, 1, 2, 3, 4, 5, 6], 2) -> [0, 2, 4, 6]
// strided_range([0, 1, 2, 3, 4, 5, 6], 3) -> [0, 3, 6]
// ...
template <typename Iterator>
class strided_range
{
public:
typedef typename thrust::iterator_difference<Iterator>::type difference_type;
struct stride_functor : public thrust::unary_function<difference_type,difference_type>
{
difference_type stride;
stride_functor(difference_type stride)
: stride(stride) {}
__host__ __device__
difference_type operator()(const difference_type& i) const
{
return stride * i;
}
};
typedef typename thrust::counting_iterator<difference_type> CountingIterator;
typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
typedef typename thrust::permutation_iterator<Iterator,TransformIterator> PermutationIterator;
// type of the strided_range iterator
typedef PermutationIterator iterator;
// construct strided_range for the range [first,last)
strided_range(Iterator first, Iterator last, difference_type stride)
: first(first), last(last), stride(stride) {}
iterator begin(void) const
{
return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
}
iterator end(void) const
{
return begin() + ((last - first) + (stride - 1)) / stride;
}
protected:
Iterator first;
Iterator last;
difference_type stride;
};
int main(void)
{
thrust::host_vector<int> h_data(STRIDE);
h_data[0] = 1;
h_data[1] = 2;
h_data[2] = 3;
thrust::device_vector<int> data(DSIZE);
typedef thrust::device_vector<int>::iterator Iterator;
strided_range<Iterator> pos1(data.begin(), data.end(), STRIDE);
strided_range<Iterator> pos2(data.begin()+1, data.end(), STRIDE);
strided_range<Iterator> pos3(data.begin()+2, data.end(), STRIDE);
thrust::fill(pos1.begin(), pos1.end(), h_data[0]);
thrust::fill(pos2.begin(), pos2.end(), h_data[1]);
thrust::fill(pos3.begin(), pos3.end(), h_data[2]);
// print the generated data
std::cout << "data: ";
thrust::copy(data.begin(), data.end(), std::ostream_iterator<int>(std::cout, " ")); std::cout << std::endl;
return 0;
}