CUDA Device Vector

I want to send a vector to my kernel. I have vector of floats h_vec.

thrust::device_vector<float> d_vec = h_vec;
float* pd_vec = thrust::raw_pointer_cast(d_vec.data());
...
kernel<<<grid, block>>>(pd_vec)

However in my kernel the vector seems to be filled with zeros and I have no idea why.

__global__ void kernel (float* pd_vec)

Is it required that I specifically allocate memory for this vector in host with cudaMalloc and cudaMemcpy?

Solution

Is it required that I specifically allocate memory for this vector in host with cudaMalloc and cudaMemcpy?

No. Copy assignment works just fine between thrust containers (host or device) and std::vector.

For example:

$ module load cuda/10.1

$ cat notreallyno.cu 

#include <thrust/device_vector.h>
#include <vector>
#include <cstdio>

__global__ void kernel (float* pd_vec, int n)
{
    if (threadIdx.x < n)
        printf("%d %f \n", threadIdx.x, pd_vec[threadIdx.x]);
}

int main()
{
    {
    std::vector<float> h_vec = { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.01f };
    thrust::device_vector<float> d_vec = h_vec;
    float* pd_vec = thrust::raw_pointer_cast(d_vec.data());

    int n = h_vec.size();
    kernel<<<1, 32>>>(pd_vec, n);
    cudaDeviceSynchronize();
    }
    cudaDeviceReset();

    return 0;
}

$ nvcc -std=c++11 -arch=sm_52 -o notreallyno notreallyno.cu 

$ ./notreallyno 
0 1.100000 
1 2.200000 
2 3.300000 
3 4.400000 
4 5.500000 
5 6.600000 
6 7.700000 
7 8.800000 
8 9.900000 
9 10.010000