How do I convert a std::vector<thrust::device_vector<int>> to int**?

I am working on an application in which previous processing has produced a (shortish but variable-length) std::vector of (big) thrust::device_vectors, each with the same length (but that length is also variable). I need to convert it to a raw pointer on the device to pass it to a cuda kernel.

I did the process below, which as far as I can see should leave rawNumberSquare as a pointer on the device, with rawNumberSquare[0] and rawNumberSquare[1] each containing a pointer to numberSquareOnDevice[0][0] and numberSquareOnDevice[1][0] respectively. So, it seems to me that rawNumberSquare[i][j] (i,j = 0,1) are all locations assigned by this program and legal to access.

However when a kernel tries to access these location the values are wrong and the program crashes with an illegal memory access.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<vector>
#include<thrust/device_vector.h>

__global__ void talkKernel(  int ** in,  int dimension)
{
    int index = threadIdx.x;
    for (int coord = 0; coord < dimension; ++coord)
        printf("in[%d][%d] = %d\n", coord, index, in[coord][index]);       
}

int main()
{
    //print out name of GPU in case it is helpful
    int deviceNumber;
    cudaGetDevice(&deviceNumber);
    cudaDeviceProp prop;
    cudaGetDeviceProperties(&prop, deviceNumber);
    std::cout << prop.name << "\n";
    //make a std::vector of std::vectors of ints
    std::vector<std::vector<int>> numberSquareOnHost{ {1,2}, {3,4} };
    //copy the values of each vector to the device
    std::vector<thrust::device_vector<int>> numberSquareDevice;
    for (auto& vector : numberSquareOnHost)
        numberSquareDevice.push_back(thrust::device_vector<int>(vector));
    //copy the raw pointers to start of the device vectors to a std::vector
    std::vector<int*> halfRawNumberSquareOnHost(2);
    for ( int i = 0; i < 2 ; ++i)
        halfRawNumberSquareOnHost[i] = (thrust::raw_pointer_cast(numberSquareOnHost[i].data()));
    //copy the raw pointers ot the device
    thrust::device_vector<int*> halfRawNumberSquareOnDevice(halfRawNumberSquareOnHost);
    //get raw pointer (on the device) to the raw pointers (on the device)
    int** rawNumberSquare = thrust::raw_pointer_cast(halfRawNumberSquareOnDevice.data());
    //call the kernel
    talkKernel <<<1,2 >>> ( rawNumberSquare, 2);
    cudaDeviceSynchronize();
    //ask what's up'
    std::cout << cudaGetErrorString(cudaGetLastError()) << "\n";
    return 0;

   /*output:
   * Quadro M2200
    in[0][0] = 0
    in[0][1] = 0
    in[1][0] = 0
    in[1][1] = 0
    an illegal memory access was encountered

    ...\vectorOfVectors.exe (process 6428) exited with code -1073740791.
        */
}

I also tried all such as allocating a host pointer to (raw device) int* with new rather than using the std::vector<int*> halfRawNumberSquareOnHost and allocating the device int** rawSquareOnDevice with cudaMalloc (and filling it with cudaMemcpy). This didn't make a difference.

Solution

Your error is here:

halfRawNumberSquareOnHost[i] = (thrust::raw_pointer_cast(numberSquareOnHost[i].data()));

it should be:

halfRawNumberSquareOnHost[i] = (thrust::raw_pointer_cast(numberSquareDevice[i].data()));

The first is grabbing a host pointer (not what you want at that point.) The second is grabbing a device pointer. Stated another way, you built numberSquareDevice for a reason, but your code as posted doesn't actually use it.