Search code examples
cudathrust

CUDA Thrust copy transformed result only if it satisfies a predicate


I want to perform a transformation on a input thrust::device_vector and only copy the result to the output vector if the result satisfies a predicate. So the number of results could be less than the size of the input device_vector (similar to the output vector of thrust::copy_if). I have not found a way to do this with thrust::transform_if. Currently I can do this with thrust::transform and thrust::remove_if as shown in the example below:

#include <thrust/random.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/remove.h>
#include <iostream>

__host__ __device__ unsigned int hash(unsigned int a) {
  a = (a+0x7ed55d16) + (a<<12);
  a = (a^0xc761c23c) ^ (a>>19);
  a = (a+0x165667b1) + (a<<5);
  a = (a+0xd3a2646c) ^ (a<<9);
  a = (a+0xfd7046c5) + (a<<3);
  a = (a^0xb55a4f09) ^ (a>>16);
  return a;
};

struct add_random {
  __host__ __device__ add_random() {}
  __device__ int operator()(const int n, const int x) const {
    thrust::default_random_engine rng(hash(n));
    thrust::uniform_int_distribution<int> uniform(0, 11);
    return uniform(rng)+x;
  } 
};

struct is_greater {
  __host__ __device__ bool operator()(const int x) {
    return x > 6 ;
  }
};

int main(void) {
  int x[5] = {10, 2, 5, 3, 0};
  thrust::device_vector<int> d_x(x, x+5);

  thrust::transform(
      thrust::counting_iterator<int>(0),
      thrust::counting_iterator<int>(5),
      d_x.begin(),
      d_x.begin(),
      add_random());

  std::cout << "after adding random number:" << std::endl;
  std::ostream_iterator<int> o(std::cout, " ");
  thrust::copy(d_x.begin(), d_x.end(), o);
  std::cout << std::endl;

  thrust::device_vector<int>::iterator new_end(thrust::remove_if(d_x.begin(), d_x.end(), is_greater()));

  std::cout << "after removing values greater than 6:" << std::endl;
  thrust::copy(d_x.begin(), new_end, o);
  std::cout << std::endl;

  return 0;
}

Which gives the output:

after adding random number:
18 4 8 7 11 
after removing values greater than 6:
4 

I would like to avoid copying the results to memory twice, first by thrust::transform and then by thrust::remove_if in the above example. Is it possible to get the above output with a single transformation function? How can I do this? My biggest concern is the computational cost, so any optimized solution, even if it doesn't use the Thrust library would be great.


Solution

  • Welcome to the world of thrust fancy iterators. You can get a quick overview of some fancy iterator types by looking at the thrust quick start guide. In particular, a thrust transform iterator can frequently be used to replace a thrust transform operation that is applied to the input of another thrust algorithm, "fusing" the two algorithms into a single operation.

    Here's a worked example applied to your case:

    $ cat t1254.cu
    #include <thrust/random.h>
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/iterator/transform_iterator.h>
    #include <thrust/device_vector.h>
    #include <thrust/transform.h>
    #include <thrust/remove.h>
    #include <iostream>
    
    __host__ __device__ unsigned int hash(unsigned int a) {
      a = (a+0x7ed55d16) + (a<<12);
      a = (a^0xc761c23c) ^ (a>>19);
      a = (a+0x165667b1) + (a<<5);
      a = (a+0xd3a2646c) ^ (a<<9);
      a = (a+0xfd7046c5) + (a<<3);
      a = (a^0xb55a4f09) ^ (a>>16);
      return a;
    };
    
    struct add_random : public thrust::unary_function<thrust::tuple<int, int>, int> {
      __host__ __device__ int operator()(thrust::tuple<int, int> t) const {
        int n = thrust::get<0>(t);
        int x = thrust::get<1>(t);
        thrust::default_random_engine rng(hash(n));
        thrust::uniform_int_distribution<int> uniform(0, 11);
        return uniform(rng)+x;
      }
    };
    
    struct is_greater {
      __host__ __device__ bool operator()(const int x) {
        return x < 6 ;
      }
    };
    
    int main(void) {
      int x[5] = {10, 2, 5, 3, 0};
      thrust::device_vector<int> d_x(x, x+5);
      thrust::device_vector<int> d_r(5);
      int rsize = thrust::copy_if(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(0), d_x.begin())), add_random()), thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(5), d_x.end())), add_random()), d_r.begin(), is_greater())- d_r.begin();
      std::cout << "after removing values greater than 6:" << std::endl;
      thrust::copy_n(d_r.begin(), rsize, std::ostream_iterator<int>(std::cout, " "));
      std::cout << std::endl;
    
      return 0;
    }
    $ nvcc -o t1254 t1254.cu
    $ ./t1254
    after removing values greater than 6:
    4
    $
    
    1. We've replaced your transform operation with a transform iterator applied to the same two inputs. Since you have two inputs to your transform operation, we're using a zip iterator to combine these, and the transform functor has also been reworked slightly to accept that tuple as its input.

    2. Converted your remove_if to a copy_if, to work with the transform iterator as input. This requires a slight change in the logic of the copy predicate.