Search code examples
c++cudathrust

Copy specific elements of an array with CUDA Thrust permutation iterator


I have an array of glm::vec3 with count * 3 elements. I have another array which contains int indices of the elements to copy. An example:

thrust::device_vector<glm::vec3> vals(9);
// vals contains 9 vec3, which represent 3 "items"
// vals[0], vals[1], vals[2] are the first "item", 
// vals[3], vals[4], vals[5] are the second "item"...

int idcs[] = {0, 2};
// index 0 and 2 should be copied, i.e. 
// vals[0..2] and vals[6..8]

I tried to use permutation iterators, but I cannot get it to work. My approach is:

thrust::copy(
    thrust::make_permutation_iterator(vals, idcs),
    thrust::make_permutation_iterator(vals, idcs + 2),
    target.begin()
);

But of course this will only copy vals[0] and vals[2] instead of vals[0] vals[1] vals[2] and vals[6] vals[7] vals[8].

Is it possible to copy the desired values from one buffer to another with Thrust?


Solution

  • We can combine the idea of strided ranges with your permutation iterator approach to achieve what you want, I think.

    The basic idea is to use your permutation iterator method to select the "groups" of items to copy, and we will select the 3 items in each group using a set of 3 strided range iterators combined into a zip iterator. We need a zip iterator for the input, and a zip iterator for the output. Here is a fully worked example, using uint3 as a proxy for glm::vec3:

    $ cat t484.cu
    #include <vector_types.h>
    #include <thrust/device_vector.h>
    #include <thrust/host_vector.h>
    #include <iostream>
    #include <thrust/copy.h>
    #include <thrust/iterator/permutation_iterator.h>
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/functional.h>
    
    
    #define DSIZE 18
    
    
    template <typename Iterator>
    class strided_range
    {
        public:
    
        typedef typename thrust::iterator_difference<Iterator>::type difference_type;
    
        struct stride_functor : public thrust::unary_function<difference_type,difference_type>
        {
            difference_type stride;
    
            stride_functor(difference_type stride)
                : stride(stride) {}
    
            __host__ __device__
            difference_type operator()(const difference_type& i) const
            {
                return stride * i;
            }
        };
    
        typedef typename thrust::counting_iterator<difference_type>                   CountingIterator;
        typedef typename thrust::transform_iterator<stride_functor, CountingIterator> TransformIterator;
        typedef typename thrust::permutation_iterator<Iterator,TransformIterator>     PermutationIterator;
    
        // type of the strided_range iterator
        typedef PermutationIterator iterator;
    
        // construct strided_range for the range [first,last)
        strided_range(Iterator first, Iterator last, difference_type stride)
            : first(first), last(last), stride(stride) {}
    
        iterator begin(void) const
        {
            return PermutationIterator(first, TransformIterator(CountingIterator(0), stride_functor(stride)));
        }
    
        iterator end(void) const
        {
            return begin() + ((last - first) + (stride - 1)) / stride;
        }
    
        protected:
        Iterator first;
        Iterator last;
        difference_type stride;
    };
    
    typedef thrust::device_vector<uint3>::iterator Iter;
    
    int main(){
    // set up test data
      int idcs[] = {0, 2, 5};
      unsigned num_idcs = sizeof(idcs)/sizeof(int);
      thrust::host_vector<uint3> h_vals(DSIZE);
      for (int i = 0; i < DSIZE; i ++) {
        h_vals[i].x = i;
        h_vals[i].y = 100+i;
        h_vals[i].z = 1000+i;}
      thrust::device_vector<uint3> d_target(num_idcs*3);
      thrust::host_vector<int> h_idcs(idcs, idcs + num_idcs);
      thrust::device_vector<int> d_idcs = h_idcs;
      thrust::device_vector<uint3> d_vals = h_vals;
    // set up strided ranges for input, output
      strided_range<Iter> item_1(d_vals.begin()  , d_vals.end(), 3);
      strided_range<Iter> item_2(d_vals.begin()+1, d_vals.end(), 3);
      strided_range<Iter> item_3(d_vals.begin()+2, d_vals.end(), 3);
    // set up strided ranges for output
      strided_range<Iter> out_1(d_target.begin()  , d_target.end(), 3);
      strided_range<Iter> out_2(d_target.begin()+1, d_target.end(), 3);
      strided_range<Iter> out_3(d_target.begin()+2, d_target.end(), 3);
    // copy from input to output
      thrust::copy(thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.begin()), thrust::make_permutation_iterator(thrust::make_zip_iterator(thrust::make_tuple(item_1.begin(), item_2.begin(), item_3.begin())), d_idcs.end()), thrust::make_zip_iterator(thrust::make_tuple(out_1.begin(), out_2.begin(), out_3.begin())));
    // print out results
      thrust::host_vector<uint3> h_target = d_target;
      for (int i = 0; i < h_target.size(); i++)
        std::cout << "index: " << i << " x: " << h_target[i].x << " y: " << h_target[i].y << " z: " << h_target[i].z << std::endl;
      return 0;
    }
    $ nvcc -arch=sm_20 -o t484 t484.cu
    $ ./t484
    index: 0 x: 0 y: 100 z: 1000
    index: 1 x: 1 y: 101 z: 1001
    index: 2 x: 2 y: 102 z: 1002
    index: 3 x: 6 y: 106 z: 1006
    index: 4 x: 7 y: 107 z: 1007
    index: 5 x: 8 y: 108 z: 1008
    index: 6 x: 15 y: 115 z: 1015
    index: 7 x: 16 y: 116 z: 1016
    index: 8 x: 17 y: 117 z: 1017
    $