Cuda lambda vs functor usage

I've got a simple function in CUDA using a functor

struct MT {
    const float _beta1;
    const float _mb1;

    MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }
    
    __device__
    float operator()(const float& op, const float& gradient) {
        return _beta1 * op + _mb1 * gradient;
    }
};


void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
}

However this equivalent example crashes (complies fine with --extended-lambda flat). Is there another flag or different way of expressing this to make it run. Functors are fine, but lambda's look neater.

void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [this](const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
}

Error is

Exception thrown at 0x00007FFA833D4FD9 in Optioniser.exe: Microsoft C++ exception: thrust::system::system_error at memory location 0x00000031ED7FCDD0.
Exception thrown: 'System.Runtime.InteropServices.SEHException' in AARC.Optimisation.dll
An exception of type 'System.Runtime.InteropServices.SEHException' occurred in AARC.Optimisation.dll but was not handled in user code
External component has thrown an exception.

Solution

Your example and example_crash functions don't make sense to me because I don't know what _mt is and you don't seem to be using d_weights.

If we fix that, then there are at least a couple issues with your lambda, one of them being there is no __device__ decoration (which is necessary, here).

Making various changes, and fixing things you haven't shown, this works for me:

$ cat t2093.cu
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <iostream>

struct MT {
    const float _beta1;
    const float _mb1;

    MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }

    __device__
    float operator()(const float& op, const float& gradient) {
        return _beta1 * op + _mb1 * gradient;
    }
};

const float _beta1 = 1.0f;
const float _mb1 = 1.0f;
void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
};

void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
{
    thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [=] __device__ (const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
};

const int len = 1000;
int main(){

  thrust::device_vector<float> g1(len, 1.0f);
  thrust::device_vector<float> mt1(len, 2.0f);
  example(g1, mt1);
  thrust::host_vector<float> h_mt1 = mt1;
  thrust::copy_n(h_mt1.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
  std::cout << std::endl;
  thrust::device_vector<float> g2(len, 1.0f);
  thrust::device_vector<float> mt2(len, 2.0f);
  example_crash(g2, mt2);
  thrust::host_vector<float> h_mt2 = mt2;
  thrust::copy_n(h_mt2.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
  std::cout << std::endl;
}
$ nvcc -o t2093 t2093.cu --extended-lambda
$ compute-sanitizer ./t2093
========= COMPUTE-SANITIZER
3,3,
3,3,
========= ERROR SUMMARY: 0 errors
$