Search code examples
cudansight

why am I seeing a black screen when I try this code in cuda?


I'm using Win8 and Nsight in "visual studio 2010" and I installed "310.90-notebook-win8-win7-winvista-32bit-international-whql" for my Graphic card(9300m Gs).but when I try the code below,I see a black screen!and an error :"Display driver stoped responding and has recoverd"! I know that the problem is with "cudaMemcpy",but I don't why!?

#include "cuda_runtime.h"
#include "device_launch_parameters.h"

#include <stdio.h>

#define N 8
__global__ void kernel(int *a)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int step = x;
while(step<N){
    a[step] =  threadIdx.x;
    step += x;
}
}

int main()
{
int a[N],i=N,j=0;
for(;j<N;j++)
    a[j]=i--;

int *dev_a;
cudaMalloc( (void**)&dev_a, N * sizeof(int) );
cudaMemcpy( dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);

    kernel<<<2,2>>>(dev_a);

cudaError_t cudaStatus = cudaMemcpy(a, dev_a,N-1 * sizeof(int), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
    fprintf(stderr, "cudaMemcpy failed!");
    //goto Error;
}

for(j=0;j<N;j++)printf("\n%d",a[j]);

int t;
scanf("%d",&t);
}

Solution

  • In the kernel, the thread with threadIdx.x = 0 and blockIdx.x = 0 i.e. the first thread of the first block will run indefinitely, causing the kernel to crash.

    When threadIdx.x = 0 and blockIdx.x = 0 the kernel code will become:

    int x = 0;
    int step = 0;
    while(step<N)
    {
        a[step] =  0;
        step += 0; //This will create infinite loop
    }
    

    Also (May be its a typo), there is a logical error in the following line of your code:

    cudaError_t cudaStatus = cudaMemcpy(a, dev_a,N-1 * sizeof(int), cudaMemcpyDeviceToHost);
    

    Considering the operator precedence in C, the expression N-1 * sizeof(int) will evaluate to N-4 (if sizeof(int) is 4).