Suppose I have a system with a single GPU installed, and suppose I've also installed a recent version of CUDA.
I want to determine what's the compute capability of my GPU. If I could compile code, that would be easy:
#include <stdio.h>
int main() {
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, 0);
printf("%d", prop.major * 10 + prop.minor);
}
but - suppose I want to do that without compiling. Can I? I thought nvidia-smi
might help me, since its lets you query all sorts of information about devices, but it seems it doesn't let you obtain the compute capability. Maybe there's something else I can do? Maybe something visible via /proc
or system logs?
Edit: This is intended to run before a build, on a system which I don't control. So it must have minimal dependencies, run on a command-line and not require root privileges.
Edit: This answer is useful for CUDA versions 11.5 and earlier; for 11.6 and later see @idy002's answer.
Unfortunately, it looks like the answer at the moment is "No", and that one needs to either compile a program or use a binary compiled elsewhere.
I have adapted a workaround for this issue - a self-contained bash script which compiles a small built-in C program to determine the compute capability. (It is particualrly useful to call from with CMake, but can just run independently.)
Also, I've filed a feature-requesting bug report at nVIDIA about this.
Here's the script, in a version assuming that nvcc
is on your path:
/usr/bin/env nvcc --run "$0" ${1:+--run-args "${@:1}"} ; exit $?
#include <cstdio>
#include <cstdlib>
#include <cuda_runtime_api.h>
int main(int argc, char *argv[])
{
cudaDeviceProp prop;
cudaError_t status;
int device_count;
int device_index = 0;
if (argc > 1) {
device_index = atoi(argv[1]);
}
status = cudaGetDeviceCount(&device_count);
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceCount() failed: %s\n", cudaGetErrorString(status));
return -1;
}
if (device_index >= device_count) {
fprintf(stderr, "Specified device index %d exceeds the maximum (the device count on this system is %d)\n", device_index, device_count);
return -1;
}
status = cudaGetDeviceProperties(&prop, device_index);
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceProperties() for device device_index failed: %s\n", cudaGetErrorString(status));
return -1;
}
int v = prop.major * 10 + prop.minor;
printf("%d\n", v);
}