An error "operation not permitted" is generated when running the following code. Is there anything I am missing?
I'm running it with compute capabilities 7.5 and the command nvcc test.cu -rdc=true
. It works without RDC.
#include <cooperative_groups.h>
#include <iostream>
__global__ void kernel() {
void* x;
cudaMalloc(&x, sizeof(int));
}
int main() {
int dev = 0;
int supportsCoopLaunch = 0;
cudaDeviceGetAttribute(&supportsCoopLaunch, cudaDevAttrCooperativeLaunch, dev);
if(supportsCoopLaunch == 0) {
std::cout << "Device does not support cooperative launch, required to synchronize globally on the grid." << std::endl;
return 0;
}
void* args[] = {};
dim3 dimBlock(1, 1, 1);
dim3 dimGrid(2, 1, 1);
cudaError_t e = cudaLaunchCooperativeKernel((void*)kernel, dimGrid, dimBlock, args);
if (e != cudaSuccess) {
printf("CUDA runtime error %s\n", cudaGetErrorString(e));
}
cudaDeviceSynchronize();
return 0;
}
``
Use malloc
instead of cudaMalloc
. This is probably due to a bug (see comments by Robert Crovella above).