I've got a simple function in CUDA using a functor
struct MT {
const float _beta1;
const float _mb1;
MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }
__device__
float operator()(const float& op, const float& gradient) {
return _beta1 * op + _mb1 * gradient;
}
};
void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
}
However this equivalent example crashes (complies fine with --extended-lambda flat). Is there another flag or different way of expressing this to make it run. Functors are fine, but lambda's look neater.
void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& d_weights)
{
thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [this](const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
}
Error is
Exception thrown at 0x00007FFA833D4FD9 in Optioniser.exe: Microsoft C++ exception: thrust::system::system_error at memory location 0x00000031ED7FCDD0.
Exception thrown: 'System.Runtime.InteropServices.SEHException' in AARC.Optimisation.dll
An exception of type 'System.Runtime.InteropServices.SEHException' occurred in AARC.Optimisation.dll but was not handled in user code
External component has thrown an exception.
Your example
and example_crash
functions don't make sense to me because I don't know what _mt
is and you don't seem to be using d_weights
.
If we fix that, then there are at least a couple issues with your lambda, one of them being there is no __device__
decoration (which is necessary, here).
Making various changes, and fixing things you haven't shown, this works for me:
$ cat t2093.cu
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/host_vector.h>
#include <thrust/copy.h>
#include <iostream>
struct MT {
const float _beta1;
const float _mb1;
MT(const float beta1, const float mb1) : _beta1(beta1), _mb1(mb1) { }
__device__
float operator()(const float& op, const float& gradient) {
return _beta1 * op + _mb1 * gradient;
}
};
const float _beta1 = 1.0f;
const float _mb1 = 1.0f;
void example(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
{
thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), MT(_beta1, _mb1));
};
void example_crash(const thrust::device_vector<float>& gradients, thrust::device_vector<float>& _mt)
{
thrust::transform(_mt.begin(), _mt.end(), gradients.begin(), _mt.begin(), [=] __device__ (const float& op,const float& gradient) { return _beta1 * op + _mb1 * gradient; });
};
const int len = 1000;
int main(){
thrust::device_vector<float> g1(len, 1.0f);
thrust::device_vector<float> mt1(len, 2.0f);
example(g1, mt1);
thrust::host_vector<float> h_mt1 = mt1;
thrust::copy_n(h_mt1.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
std::cout << std::endl;
thrust::device_vector<float> g2(len, 1.0f);
thrust::device_vector<float> mt2(len, 2.0f);
example_crash(g2, mt2);
thrust::host_vector<float> h_mt2 = mt2;
thrust::copy_n(h_mt2.begin(), 2, std::ostream_iterator<float>(std::cout, ","));
std::cout << std::endl;
}
$ nvcc -o t2093 t2093.cu --extended-lambda
$ compute-sanitizer ./t2093
========= COMPUTE-SANITIZER
3,3,
3,3,
========= ERROR SUMMARY: 0 errors
$