hi i have a cuda program which run successfully here is code for cuda program
#include <stdio.h>
#include <cuda.h>
__global__ void square_array(float *a, int N)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx<N)
a[idx] = a[idx] * a[idx];
}
int main(void)
{
float *a_h, *a_d;
const int N = 10;
size_t size = N * sizeof(float);
a_h = (float *)malloc(size);
cudaMalloc((void **) &a_d, size);
for (int i=0; i<N; i++) a_h[i] = (float)i;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
int block_size = 4;
int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);
square_array <<< n_blocks, block_size >>> (a_d, N);
cudaMemcpy(a_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// Print results
for (int i=0; i<N; i++) printf("%d %f\n", i, a_h[i]);
free(a_h);
cudaFree(a_d);
}
now i want to split this code into two files means there should be two file one for c++ code or c code and other one .cu file for kernel. i just wanat to do it for learning and i don't want to write same kernel code again and again. can any one tell me how to do this ? how to split this code into two different file? than how to compile it? how to write makefile for it ? how to
Code which has CUDA C extensions has to be in *.cu file, rest can be in c++ file.
So here your kernel code can be moved to separate *.cu file.
To have main function implementation in c++ file you need to wrap invocation of kernel (code with square_array<<<...>>>(...);
) with c++ function which implementation needs to be in *cu file as well.
Functions cudaMalloc etc. can be left in c++ file as long as you include proper cuda headers.