Search code examples
c++cudathrust

Using CURAND inside a Thrust functor


Is it possible to use CURAND together with Thrust inside a device functor? A minimum code example can be:

#include <thrust/device_vector.h>

struct Move
{
    Move() {}

    using Position = thrust::tuple<double, double>;

    __host__ __device__
    Position operator()(Position p)
    {
        thrust::get<0>(p) += 1.0; // use CURAND to add a random N(0,1)
        thrust::get<1>(p) += 1.0; // use CURAND to add a random N(0,1)
        return p;
    }
};

int main()
{
    // Create vectors on device
    thrust::device_vector<double> p1(10, 0.0);
    thrust::device_vector<double> p2(10, 0.0);

    // Create zip iterators
    auto pBeg = thrust::make_zip_iterator(thrust::make_tuple(p1.begin(), p2.begin()));
    auto pEnd = thrust::make_zip_iterator(thrust::make_tuple(p1.end(),   p2.end()  ));

    // Move points in the vectors
    thrust::transform(pBeg, pEnd, pBeg, Move());

    // Print result (just for debug)
    thrust::copy(p1.begin(), p1.end(), std::ostream_iterator<double>(std::cout, "\n"));
    thrust::copy(p2.begin(), p2.end(), std::ostream_iterator<double>(std::cout, "\n"));

    return 0;
}

What is the right way to create random numbers inside the operator function?


Solution

  • Is it possible to use CURAND together with Thrust inside a device functor?

    Yes, it's possible. As indicated by @m.s. most of what you need from curand can be gotten from the curand device api example in the curand documentation. (In fact, there is even a full thrust/curand sample code in the documentation here)

    We can mimic the behavior of the setup kernel indicated there with a thrust algorithm call, eg. thrust::for_each_n to setup the initial curand state variables for each device vector element.

    After that, it is only necessary to pass the initialized curand state to your Move functor via an additional iterator in your zip iterators, and then call curand_uniform (for example) within the functor.

    Here is a fully worked example, based on your code:

    $ cat t20.cu
    #include <thrust/device_vector.h>
    #include <curand_kernel.h>
    #include <iostream>
    #include <thrust/iterator/counting_iterator.h>
    #include <thrust/transform.h>
    #include <thrust/for_each.h>
    
    const int seed = 1234;
    const int ds = 10;
    const int offset = 0;
    
    struct Move
    {
        Move() {}
    
        using Position = thrust::tuple<double, double, curandState>;
    
        __device__
        Position operator()(Position p)
        {
            curandState s = thrust::get<2>(p);
            thrust::get<0>(p) += curand_uniform(&s); // use CURAND to add a random N(0,1)
            thrust::get<1>(p) += curand_uniform(&s); // use CURAND to add a random N(0,1)
            thrust::get<2>(p) = s;
            return p;
        }
    };
    
    struct curand_setup
    {
        using init_tuple = thrust::tuple<int, curandState &>;
        __device__
        void operator()(init_tuple t){
          curandState s;
          int id = thrust::get<0>(t);
          curand_init(seed, id, offset, &s);
          thrust::get<1>(t) = s;
          }
    };
    
    int main()
    {
        // Create vectors on device
        thrust::device_vector<double> p1(ds, 0.0);
        thrust::device_vector<double> p2(ds, 0.0);
        thrust::device_vector<curandState> s1(ds);
    
        // Create zip iterators
        auto pBeg = thrust::make_zip_iterator(thrust::make_tuple(p1.begin(), p2.begin(), s1.begin()));
        auto pEnd = thrust::make_zip_iterator(thrust::make_tuple(p1.end(),   p2.end(), s1.end()  ));
        auto pInit = thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator<int>(0), s1.begin()));
        // initialize random generator
        thrust::for_each_n(pInit, ds, curand_setup());
        // Move points in the vectors
        thrust::transform(pBeg, pEnd, pBeg, Move());
    
        // Print result (just for debug)
        thrust::copy(p1.begin(), p1.end(), std::ostream_iterator<double>(std::cout, "\n"));
        thrust::copy(p2.begin(), p2.end(), std::ostream_iterator<double>(std::cout, "\n"));
    
        return 0;
    }
    $ nvcc -arch=sm_61 -std=c++11 t20.cu -o t20 -lcurand
    $ ./t20
    0.145468
    0.820181
    0.550399
    0.29483
    0.914733
    0.868979
    0.321921
    0.782857
    0.0113023
    0.28545
    0.434899
    0.926417
    0.811845
    0.308556
    0.557235
    0.501246
    0.206681
    0.123377
    0.539587
    0.198575
    $
    

    Regarding this question:

    What is the right way to create random numbers inside the operator function?

    There is no problem with using curand in thrust, but you might also want to be aware that thrust has a built in RNG facility and there is a fully worked usage example here.