Search code examples
c++cudathrust

CUDA Device Vector


I want to send a vector to my kernel. I have vector of floats h_vec.

thrust::device_vector<float> d_vec = h_vec;
float* pd_vec = thrust::raw_pointer_cast(d_vec.data());
...
kernel<<<grid, block>>>(pd_vec)

However in my kernel the vector seems to be filled with zeros and I have no idea why.

__global__ void kernel (float* pd_vec)

Is it required that I specifically allocate memory for this vector in host with cudaMalloc and cudaMemcpy?


Solution

  • Is it required that I specifically allocate memory for this vector in host with cudaMalloc and cudaMemcpy?

    No. Copy assignment works just fine between thrust containers (host or device) and std::vector.

    For example:

    $ module load cuda/10.1
    
    $ cat notreallyno.cu 
    
    #include <thrust/device_vector.h>
    #include <vector>
    #include <cstdio>
    
    __global__ void kernel (float* pd_vec, int n)
    {
        if (threadIdx.x < n)
            printf("%d %f \n", threadIdx.x, pd_vec[threadIdx.x]);
    }
    
    int main()
    {
        {
        std::vector<float> h_vec = { 1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f, 9.9f, 10.01f };
        thrust::device_vector<float> d_vec = h_vec;
        float* pd_vec = thrust::raw_pointer_cast(d_vec.data());
    
        int n = h_vec.size();
        kernel<<<1, 32>>>(pd_vec, n);
        cudaDeviceSynchronize();
        }
        cudaDeviceReset();
    
        return 0;
    }
    
    $ nvcc -std=c++11 -arch=sm_52 -o notreallyno notreallyno.cu 
    
    $ ./notreallyno 
    0 1.100000 
    1 2.200000 
    2 3.300000 
    3 4.400000 
    4 5.500000 
    5 6.600000 
    6 7.700000 
    7 8.800000 
    8 9.900000 
    9 10.010000