Search code examples
c++cuda

Do I need to redo cudaHostRegister after switching GPU devices with C++?


For example, it looks like I don't need to do cudaHostUnregister and cudaHostRegister if I switch the device.

The performance and GPU shared memory does not change much.

// Set Device to 0 
cudaError_t cudaStatus = cudaSetDevice(0);

// Allocate buffers 
int size = 8192;
float* cpuBuffer = new float[size * size];
auto totalBytes = sizeof(float) * size * size;
cudaHostRegister((void*)cpuBuffer, totalBytes, cudaHostRegisterPortable);

float* gpuBuffer = nullptr;
cudaMalloc((void**)&gpuBuffer, totalBytes);


// Transfer 
cudaMemcpy((void*)gpuBuffer, (const void*)cpuBuffer, totalBytes, cudaMemcpyHostToDevice);

// Free gpu
cudaFree(gpuBuffer);
//cudaHostUnregister(cpuBuffer);

// Switch to Device 1
cudaStatus = cudaSetDevice(1);

cudaMalloc((void**)&gpuBuffer, totalBytes);
//cudaHostRegister((void*)cpuBuffer, totalBytes, cudaHostRegisterPortable);

// Transfer 
cudaMemcpy((void*)gpuBuffer, (const void*)cpuBuffer, totalBytes, cudaMemcpyHostToDevice);


cudaFree(gpuBuffer);
cudaHostUnregister(cpuBuffer);
delete[] cpuBuffer;

Solution

  • No. cudaHostRegister just pins the CPU memory of an already allocated CPU buffer. It is not dependent on the current GPU. Host memory is always accessible by all GPUs in cudaMemcpy calls.