Search code examples
ccudagpu-constant-memory

Error in cudaMemcpyToSymbol using CUDA 5


The Problem

I have prepared one sample CUDA code using the constant memory. I can run this in cuda 4.2 successfully but I get "invalid device symbol" when I compile using the CUDA 5. I have attached the sample code here.

The Code

#include <iostream>
#include <stdio.h>
#include <cuda_runtime.h>
#include <cuda.h>

struct CParameter
{
    int A;  
    float B;
    float C;
    float D;
};

__constant__ CParameter * CONSTANT_PARAMETER;   
#define PARAMETER "CONSTANT_PARAMETER"

bool ERROR_CHECK(cudaError_t Status)
{
    if(Status != cudaSuccess)
    {
        printf(cudaGetErrorString(Status));
        return false;
    }   
    return true;
}

// Kernel that executes on the CUDA device
__global__ void square_array(float *a, int N)
{
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx<N)
  {
      a[idx] = CONSTANT_PARAMETER->A * a[idx];
  }
}
////Main Function/////
int main(void)
{
    /////Variable Definition
    const int N = 10;
    size_t size = N * sizeof(float);
    cudaError_t Status = cudaSuccess;

    CParameter * m_dParameter;
    CParameter * m_hParameter;
    float * m_D;
    float * m_H;

    //Memory Allocation Host
    m_hParameter = new CParameter;
    m_H = new float[N];

    //Memory Allocation Device
    cudaMalloc((void **) &m_D, size);
    cudaMalloc((void**)&m_dParameter,sizeof(CParameter));

    ////Data Initialization
    for (int i=0; i<N; i++) 
        m_H[i] = (float)i;

    m_hParameter->A = 5;
    m_hParameter->B = 3;
    m_hParameter->C = 98;
    m_hParameter->D = 100;

    //Memory Copy from Host To Device
    Status = cudaMemcpy(m_D, m_H, size, cudaMemcpyHostToDevice);
    ERROR_CHECK(Status);

    Status = cudaMemcpy(m_dParameter,m_hParameter,sizeof(CParameter),cudaMemcpyHostToDevice);
    ERROR_CHECK(Status);        

    Status = cudaMemcpyToSymbol(PARAMETER, &m_dParameter, sizeof(m_dParameter));
    ERROR_CHECK(Status);

    // Do calculation on device:
    int block_size = 4;

    int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);

    square_array <<<n_blocks, block_size>>>(m_D,N);

    // Retrieve result from device and store it in host array
    cudaMemcpy(m_H, m_D, sizeof(float)*N, cudaMemcpyDeviceToHost);

    // Print results
    for (int i=0; i<N; i++)
        printf("%d %f\n", i, m_H[i]);

    // Cleanup
    free(m_H);
    free(m_hParameter);
    cudaFree(m_dParameter);
    cudaFree(m_D);
    return 0;   
}

I have tried WINDOWS: CUDA 5.0 Production Release and the Graphics card is GTX 590.
Any help will be appreciated.


Solution

  • In an effort to avoid being "Stringly Typed", the use of character strings to refer to device symbols was deprecated in CUDA runtime API functions in CUDA 4.1, and removed in CUDA 5.0.

    The CUDA 5 release notes read:

    ** The use of a character string to indicate a device symbol, which was possible with certain API functions, is no longer supported. Instead, the symbol should be used directly.

    If you change your code to the following, it should work.

    Status = cudaMemcpyToSymbol(CONSTANT_PARAMETER, &m_dParameter, sizeof(m_dParameter));
    ERROR_CHECK(Status);
    

    enter image description here