Search code examples
ccudatexturesextern

CUDA extern texture declaration


I want to declare my texture once and use it in all my kernels and files. Therefore, I declare it as extern in a header and include the header on all other files (following the SO How do I use extern to share variables between source files?)

I have a header cudaHeader.cuh file containing my texture:

extern texture<uchar4, 2, cudaReadModeElementType> texImage;

In my file1.cu, I allocate my CUDA array and bind it to the texture:

cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc< uchar4 >( );
cudaStatus=cudaMallocArray( &cu_array_image, &channelDesc, width, height ); 
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMallocArray failed! cu_array_image couldn't be created.\n");
    return cudaStatus;
}

cudaStatus=cudaMemcpyToArray( cu_array_image, 0, 0, image, size_image, cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpyToArray failed! Copy from the host memory to the device texture memory failed.\n");
    return cudaStatus;
}


// set texture parameters
texImage.addressMode[0] = cudaAddressModeWrap;
texImage.addressMode[1] = cudaAddressModeWrap;
texImage.filterMode = cudaFilterModePoint;
texImage.normalized = false;    // access with normalized texture coordinates

// Bind the array to the texture
cudaStatus=cudaBindTextureToArray( texImage, cu_array_image, channelDesc);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaBindTextureToArray failed! cu_array couldn't be bind to texImage.\n");
    return cudaStatus;
}

In file2.cu, I use the texture in the kernel function as follows:

__global__ void kernel(int width, int height, unsigned char *dev_image) {
    int x = blockIdx.x*blockDim.x + threadIdx.x;
    int y = blockIdx.y*blockDim.y + threadIdx.y;
    if(y< height) {
        uchar4 tempcolor=tex2D(texImage, x, y);

        //if(tempcolor.x==0)
        //  printf("tempcolor.x %d \n", tempcolor.x);

        dev_image[y*width*3+x*3]= tempcolor.x;
        dev_image[y*width*3+x*3+1]= tempcolor.y;
        dev_image[y*width*3+x*3+2]= tempcolor.z;
    }
}

The problem is that my texture contains nothing or corrupt values when I use it in my file2.cu. Even if I use the function kernel directly in file1.cu, the data are not correct.

If I add: texture<uchar4, 2, cudaReadModeElementType> texImage; in file1.cu and file2.cu, the compiler says that there is a redefinition.

EDIT:

I tried the same thing with CUDA version 5.0 but the same problem appears. If I print the address of texImage in file1.cu and file2.cu, I don't have the same address. There must have a problem with the declaration of the variable texImage.


Solution

  • This is a very old question and answers were provided in the comments by talonmies and Tom. In the pre-CUDA 5.0 scenario, extern textures were not feasible due to the lack of a true linker leading to extern linkage possibilities. As a consequence, and as mentioned by Tom,

    you can have different compilation units, but they cannot reference each other

    In the post-CUDA 5.0 scenario, extern textures are possible and I want to provide a simple example below, showing this in the hope that it could be useful to other users.

    kernel.cu compilation unit

    #include <stdio.h>
    
    texture<int, 1, cudaReadModeElementType> texture_test;
    
    /********************/
    /* CUDA ERROR CHECK */
    /********************/
    #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
    inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
    {
       if (code != cudaSuccess) 
       {
          fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
          if (abort) exit(code);
       }
    }
    
    /*************************/
    /* LOCAL KERNEL FUNCTION */
    /*************************/
    __global__ void kernel1() {
    
        printf("ThreadID = %i; Texture value = %i\n", threadIdx.x, tex1Dfetch(texture_test, threadIdx.x));
    
    }
    
    __global__ void kernel2();
    
    /********/
    /* MAIN */
    /********/
    int main() {
    
        const int N = 16;
    
        // --- Host data allocation and initialization
        int *h_data = (int*)malloc(N * sizeof(int));
        for (int i=0; i<N; i++) h_data[i] = i;
    
        // --- Device data allocation and host->device memory transfer
        int *d_data; gpuErrchk(cudaMalloc((void**)&d_data, N * sizeof(int)));
        gpuErrchk(cudaMemcpy(d_data, h_data, N * sizeof(int), cudaMemcpyHostToDevice));
    
        gpuErrchk(cudaBindTexture(NULL, texture_test, d_data, N * sizeof(int)));
    
        kernel1<<<1, 16>>>();
        gpuErrchk(cudaPeekAtLastError());
        gpuErrchk(cudaDeviceSynchronize());
    
        kernel2<<<1, 16>>>();
        gpuErrchk(cudaPeekAtLastError());
        gpuErrchk(cudaDeviceSynchronize());
    
        gpuErrchk(cudaUnbindTexture(texture_test));
    
    }
    

    kernel2.cu compilation unit

    #include <stdio.h>
    
    extern texture<int, 1, cudaReadModeElementType> texture_test;
    
    /**********************************************/
    /* DIFFERENT COMPILATION UNIT KERNEL FUNCTION */
    /**********************************************/
    __global__ void kernel2() {
    
        printf("Texture value = %i\n", tex1Dfetch(texture_test, threadIdx.x));
    
    }
    

    Remember to compile generating relocatable device code, namely, -rdc = true, to enable external linkage