Following sample problem:
#include <iostream>
using namespace std;
__device__ __constant__ float* data;
template<class T> void allocOnly(T* deviceDest, size_t numElem)
{
cudaError_t errCode = cudaMalloc((void**)&deviceDest, numElem*sizeof(T));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
}
int main()
{
float* test(0);
allocOnly<float>(test,10);
cout << "test = " << test << endl;
float* test2(0);
cudaError_t errCode = cudaMalloc((void**)&test2, 10*sizeof(float));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
cout << "test2 = " << test2 << endl;
return 0;
}
compiled with nvcc test.cu -o testBin
returns
test = 0
test2 = 0x310100
Why is test not modified when called through template function, cudaMalloc is supposed to modify it to be a pointer to the newly allocated device memory!
The pointer is not being modified because cudaMalloc
in the function allocOnly
is allocating memory to the argument deviceTest
which is local to the function allocOnly
. You can modify the function allocOnly
to allocate memory as follows:
template<class T> void allocOnly(T** deviceDest, size_t numElem)
{
cudaError_t errCode = cudaMalloc((void**)deviceDest, numElem*sizeof(T));
if(errCode != cudaSuccess)
cout << "Got error with code " << errCode << endl;
}
Inside the main function:
int main()
{
float* test(0);
allocOnly<float>(&test,10);
cout << "test = " << test << endl;
.
.
.
}