I want to send a vector to my kernel. I have vector of floats h_vec
.
thrust::device_vector<float> d_vec = h_vec;
float* pd_vec = thrust::raw_pointer_cast(d_vec.data());
...
kernel<<<grid, block>>>(pd_vec)
However in my kernel the vector seems to be filled with zeros and I have no idea why.
__global__ void kernel (float* pd_vec)
Is it required that I specifically allocate memory for this vector in host with cudaMalloc
and cudaMemcpy
?
Is it required that I specifically allocate memory for this vector in host with cudaMalloc and cudaMemcpy?
No. Copy assignment works just fine between thrust containers (host or device) and std::vector
.
For example:
$ module load cuda/10.1
$ cat notreallyno.cu
#include <thrust/device_vector.h>
#include <vector>
#include <cstdio>
__global__ void kernel (float* pd_vec, int n)
{
if (threadIdx.x < n)
printf("%d %f \n", threadIdx.x, pd_vec[threadIdx.x]);
}
int main()
{
{
std::vector<float> h_vec = { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.01f };
thrust::device_vector<float> d_vec = h_vec;
float* pd_vec = thrust::raw_pointer_cast(d_vec.data());
int n = h_vec.size();
kernel<<<1, 32>>>(pd_vec, n);
cudaDeviceSynchronize();
}
cudaDeviceReset();
return 0;
}
$ nvcc -std=c++11 -arch=sm_52 -o notreallyno notreallyno.cu
$ ./notreallyno
0 1.100000
1 2.200000
2 3.300000
3 4.400000
4 5.500000
5 6.600000
6 7.700000
7 8.800000
8 9.900000
9 10.010000