Search code examples
cudamemcpy

CUDA device to host data transfer error


I have a problem retrieving device data to host.. In the sample program below.. I made a hostData array in host and provided values to it. I then trasfer the hostData to deviceData variable and again transfered the same data to new host data array ie. hostDataFinal. and when I printed the values stored in hostDataFinal upto few values the output is ok rest is shown as garbage value. All the value should be same as in hostData array. Please Help me with it.

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>

int main()
{
int totalData = 20;
int *hostData;
hostData = new int[totalData];
std::cout<<std::endl<<std::endl<<"HostData Values : "<<std::endl;
for (int i=0; i<totalData ;i++)
{
    hostData[i]= i+1;
    std::cout<<hostData[i]<<" "; 
}

int *deviceData;
int *hostDataFinal;
cudaMalloc((void**)&deviceData,totalData*sizeof(int));
hostDataFinal = new int[totalData];

cudaMemcpy(deviceData,hostData,totalData,cudaMemcpyHostToDevice);
cudaMemcpy(hostDataFinal,deviceData,totalData,cudaMemcpyDeviceToHost);

std::cout<<std::endl<<std::endl<<"HostDataFinal After Device Transfer Values : "<<std::endl;
for (int i=0; i<totalData ;i++)
{
    std::cout<<hostDataFinal[i]<<" "; 
}

free(hostData);
free(hostDataFinal);
cudaFree(deviceData);
return 0;

}

Output of the Program :

HostData Values : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

HostDataFinal After Device Transfer Values :

1 2 3 4 5 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451 -842150451

Actual Output should be :

HostData Values : 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

HostDataFinal After Device Transfer Values :

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

Thank you for your help.


Solution

  • Your problem is in these lines. The third parameter in the cudaMemcpy is the number of bytes to be copied.

    cudaMemcpy(deviceData,hostData,totalData,cudaMemcpyHostToDevice);
    cudaMemcpy(hostDataFinal,deviceData,totalData,cudaMemcpyDeviceToHost);
    

    should be

    cudaMemcpy(deviceData,hostData,totalData *sizeof(int),cudaMemcpyHostToDevice);
    cudaMemcpy(hostDataFinal,deviceData,totalData*sizeof(int),cudaMemcpyDeviceToHost);
    

    Also make sure you handle return error codes as shown here