I have stored the text in char array a[textLength], and pattern in an array b[patternLength]
cl_char *a = (cl_char *) malloc(textLength*sizeof(cl_char));
for(int i =0; i<textLength;i++)
{
a[i]=text[i];
}
// A buffer object is a handle to a region of memory
cl_mem a_buffer = clCreateBuffer(context,
CL_MEM_READ_ONLY | // buffer object read only for kernel
CL_MEM_COPY_HOST_PTR, // copy data from memory referenced
// by host pointer
textLength*sizeof(cl_char), // size in bytes of buffer object
a, // host pointer
NULL); // no error code returned
// for text and pattern kernal arguments
cl_char *b = (cl_char *) malloc(patternLength*sizeof(cl_char));
for(int i =0; i<patternLength;i++)
{
b[i]=pattern[i];
}
// A buffer object is a handle to a region of memory
/*cl_mem b_buffer = clCreateBuffer(context,
CL_MEM_READ_ONLY | // buffer object read only for kernel
CL_MEM_COPY_HOST_PTR, // copy data from memory referenced
// by host pointer
patternLength*sizeof(cl_char), // size in bytes of buffer object
b, // host pointer
NULL); // no error code returned */
cl_mem b_buffer = NULL;
clSetKernelArg(kernel, 0, sizeof(a_buffer), (void*) &a_buffer);
clSetKernelArg(kernel, 1, sizeof(cl_mem), NULL);
clSetKernelArg(kernel, n, sizeof(cl_mem), &b_buffer);
size_t global_work_size = numberofWorkItem;
cl_int error= clEnqueueNDRangeKernel(queue, kernel,
1, NULL, // global work items dimensions and offset
&global_work_size, // number of global work items
&patternLength, // number of local work items
0, NULL, // don't wait on any events to complete
&timeEvent); // no event object returned
I have read that in clSetKernelArg, for __local indentifiers, the arg_value should be NULL. I have done that by doing b_buffer=NULL;
But doing that would prevent the b_buffer from storing the value of b[] (pattern) How do I do it??
Also, If I am not wrong, the local_work_size can not be greater than the value given by CL_DEVICE_MAX_WORK_ITEM_SIZES. Because the local_work_size is constrained by the underlying device/hardware. The global_work_size, on the other hand, can be as big as one wants. Does it have to be a multiple of local_work_size ??? If yes, why??
Your error is in the clSetKernelArg line:
//incorrect
clSetKernelArg(kernel, n, sizeof(cl_mem), &b_buffer);
//correct
clSetKernelArg(kernel, n, sizeof(cl_char)*patternLength, NULL);
Local memory is cleared after kernel execution, so you won't be able to get a copy of b_buffer using your method. Also, local memory is not assigned by the host. You need to copy from a global parameter to get it into the LDS.
To get the local data copied, you need to pass in a global cl_mem as well as the local parameter. The copy can be done at the end of the kernel and red back to the host with clEnqueueReadBuffer.
Update
Here's a specific example of how to use a dynamic local buffer, and assign it the contents of a global buffer.
__kernel void copyBufferExample(__global int* srcBuff, __local int* localBuff, const int copyCount)
{
int lid = get_local_id(0);
int ls = get_local_size(0);
int i;
for(i=lid; i<copyCount; i+=ls){
localBuff[i] = srcBuff[i];
}
//use localBuff here
//copy result back to global memory if needed
}