Search code examples
c++cudathrust

Cuda lambda vs functor usage


I've got a simple function in CUDA using a functor

struct MT {
    const float _beta1;
    const float _mb1;

    MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }
    
    __device__
    float operator()(const float& op, const float& gradient) {
        return _beta1 * op + _mb1 * gradient;
    }
};


void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
}

However this equivalent example crashes (complies fine with --extended-lambda flat). Is there another flag or different way of expressing this to make it run. Functors are fine, but lambda's look neater.

void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [this](const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
}

Error is

Exception thrown at 0x00007FFA833D4FD9 in Optioniser.exe: Microsoft C++ exception: thrust::system::system_error at memory location 0x00000031ED7FCDD0.
Exception thrown: 'System.Runtime.InteropServices.SEHException' in AARC.Optimisation.dll
An exception of type 'System.Runtime.InteropServices.SEHException' occurred in AARC.Optimisation.dll but was not handled in user code
External component has thrown an exception.

Solution

  • Your example and example_crash functions don't make sense to me because I don't know what _mt is and you don't seem to be using d_weights.

    If we fix that, then there are at least a couple issues with your lambda, one of them being there is no __device__ decoration (which is necessary, here).

    Making various changes, and fixing things you haven't shown, this works for me:

    $ cat t2093.cu
    #include <thrust/device_vector.h>
    #include <thrust/transform.h>
    #include <thrust/host_vector.h>
    #include <thrust/copy.h>
    #include <iostream>
    
    struct MT {
        const float _beta1;
        const float _mb1;
    
        MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }
    
        __device__
        float operator()(const float& op, const float& gradient) {
            return _beta1 * op + _mb1 * gradient;
        }
    };
    
    const float _beta1 = 1.0f;
    const float _mb1 = 1.0f;
    void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
    {
        thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
    };
    
    void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
    {
        thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [=] __device__ (const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
    };
    
    const int len = 1000;
    int main(){
    
      thrust::device_vector<float> g1(len, 1.0f);
      thrust::device_vector<float> mt1(len, 2.0f);
      example(g1, mt1);
      thrust::host_vector<float> h_mt1 = mt1;
      thrust::copy_n(h_mt1.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
      std::cout << std::endl;
      thrust::device_vector<float> g2(len, 1.0f);
      thrust::device_vector<float> mt2(len, 2.0f);
      example_crash(g2, mt2);
      thrust::host_vector<float> h_mt2 = mt2;
      thrust::copy_n(h_mt2.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
      std::cout << std::endl;
    }
    $ nvcc -o t2093 t2093.cu --extended-lambda
    $ compute-sanitizer ./t2093
    ========= COMPUTE-SANITIZER
    3,3,
    3,3,
    ========= ERROR SUMMARY: 0 errors
    $