In CUDA, we can't throw exceptions; but - we can and do occasionally reach exceptional situations in which we can't proceed, and on the host we would have thrown an exception.
So, as a second best, we can at least trigger a runtime error to stop doing unreasonable work and indicate that something went wrong.
What's a good way to do so in a CUDA kernel, which:
?
All presently supported GPUs include an in kernel assertion mechanism, described here.
Directly from the documentation:
#include <assert.h>
__global__ void testAssert(void)
{
int is_one = 1;
int should_be_one = 0;
// This will have no effect
assert(is_one);
// This will halt kernel execution
assert(should_be_one);
}
int main(int argc, char* argv[])
{
testAssert<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
There is a dedicated CUDA runtime error cudaErrorAssert
which will be reported by any kernel which fires an assertion call during execution. As per all other device side runtime errors, the context will be destroyed on the error and a new context will need to be created (by calling cudaDeviceReset()
).
Note that is (unfortunately) not supported on MacOS because of driver limitations.
You can use inline ptx and asm("trap;") to trigger an illegal instruction.
Here is some code demonstrating that:
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cstdio>
#include <cstdlib>
__global__ void kernel(int i) {
if(i > 0) {
asm("trap;");
}
::printf("%d\n", i);
}
inline void error_check(cudaError_t err, const char* file, int line) {
if(err != cudaSuccess) {
::fprintf(stderr, "CUDA ERROR at %s[%d] : %s\n", file, line, cudaGetErrorString(err));
abort();
}
}
#define CUDA_CHECK(err) do { error_check(err, __FILE__, __LINE__); } while(0)
int main() {
kernel<<<1, 1>>>(0);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
kernel<<<1, 1>>>(1);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
which outputs:
0
CUDA ERROR at ...kernel.cu[31] : an illegal instruction was encountered