Search code examples
printfopenclosx-mountain-liondouble-precision

Double Precision variations in OpenCL printf


I have a daft problem that I'd like someone to explain to me please. I have a simple OpenCL kernel that just takes in a double, prints it out inside the kernel, and then copies it back to the host. I've noticed that when I printf on the device (obviously using the CPU as the device rather than GPU) then the value printed is that of float precision rather than double precision. Is there something about printf in OpenCL ? (im wondering if it does an implicit cast to float?) Here is some test code. (This is running on a Macbook Pro (retina), OSX 10.8.4).

#include <stdio.h>
#include <OpenCL/opencl.h>
#include <math.h>
#define CL_CHECK(_expr)                                                      \
do {                                                                         \
    cl_int _err = _expr;                                                     \
    if (_err == CL_SUCCESS)                                                  \
        break;                                                               \
    fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
    abort();                                                                 \
 } while (0)

 #define CL_CHECK_ERR(_expr)                                                      \
 ({                                                                               \
     cl_int _err = CL_INVALID_VALUE;                                              \
     typeof(_expr) _ret = _expr;                                                  \
     if (_err != CL_SUCCESS) {                                                    \
         fprintf(stderr, "OpenCL Error: '%s' returned %d!\n", #_expr, (int)_err); \
         abort();                                                                 \
     }                                                                            \
     _ret;                                                                        \
 })

void pfn_notify(const char *errinfo, const void *private_info, size_t cb, void *user_data{
    fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}

int main(int argc, const char * argv[]){

    cl_platform_id platforms[100];
    cl_uint platforms_n = 0;
    CL_CHECK(clGetPlatformIDs(100, platforms, &platforms_n));
    if (platforms_n == 0)return 1;

    cl_device_id devices[100];
    cl_uint devices_n = 0;
    CL_CHECK(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_CPU, 100, devices, &devices_n));
    if (devices_n == 0)return 1;

    cl_context context;
    context = CL_CHECK_ERR(clCreateContext(NULL, 1, devices, &pfn_notify, NULL, &_err));

    double num = M_PI ;
    printf("number before is : %1.17e\n",num);

    const char *program_source[] = {
        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
        "__kernel void simple_demo(__global double *src, __global double *dst)\n",
        "{\n",
        "   int i = get_global_id(0);\n",
        "   printf(\"src on device is : %1.17e\\n\",src[i]);\n",
        "   dst[i] = src[i];\n",
        "}\n"
     };

     cl_program program;
     program = CL_CHECK_ERR(clCreateProgramWithSource(context, 
         sizeof(program_source)/sizeof(*program_source), program_source, NULL, &_err));
     if (clBuildProgram(program, 1, devices, "", NULL, NULL) != CL_SUCCESS) {
            char buffer[10240];
            clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG,
                sizeof(buffer), buffer, NULL);
            fprintf(stderr, "CL Compilation failed:\n%s", buffer);
            abort();
     }

    cl_mem input_buffer;
    input_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(double), NULL, &_err));

    cl_mem output_buffer;
    output_buffer = CL_CHECK_ERR(clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(double), NULL, &_err));

    cl_kernel kernel;
    kernel = CL_CHECK_ERR(clCreateKernel(program, "simple_demo", &_err));
    CL_CHECK(clSetKernelArg(kernel, 0, sizeof(input_buffer), &input_buffer));
    CL_CHECK(clSetKernelArg(kernel, 1, sizeof(output_buffer), &output_buffer));

    cl_command_queue queue;
    queue = CL_CHECK_ERR(clCreateCommandQueue(context, devices[0], 0, &_err));

    CL_CHECK(clEnqueueWriteBuffer(queue, input_buffer, CL_TRUE, 0, sizeof(double), &num, 0, NULL, NULL));

    cl_event kernel_completion;
    size_t global_work_size[1] = { 1 };
    CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, &kernel_completion));
    CL_CHECK(clWaitForEvents(1, &kernel_completion));
    CL_CHECK(clReleaseEvent(kernel_completion));

    printf("number after is  :");
    double data;
    CL_CHECK(clEnqueueReadBuffer(queue, output_buffer, CL_TRUE, 0, sizeof(double), &data, 0, NULL, NULL));
    printf(" %1.17e", data);
    printf("\n");

    CL_CHECK(clReleaseMemObject(input_buffer));
    CL_CHECK(clReleaseMemObject(output_buffer));

    CL_CHECK(clReleaseKernel(kernel));
    CL_CHECK(clReleaseProgram(program));
    CL_CHECK(clReleaseContext(context));

    return 0;
}

If you copy this and just compile it and run it you should get:

number before is : 3.14159265358979312e+00
src is           : 3.14159274101257324e+00
number after is  : 3.14159265358979312e+00

Any ideas?


Solution

  • It looks like a bug in the OpenCL library or runtime environment you are using. It (essentially) works properly on Windows with Intel Core-i7 and AMD A6-3650:

    OpenCL 1.2 AMD-APP (1124.2) + AMD A8-3650:
    number before is : 3.14159265358979312e+000
    src on device is : 3.14159265358979310e+000
    number after is  : 3.14159265358979312e+000
    
    Intel OpenCL 1.2 + Intel Core-i7:
    number before is : 3.14159265358979312e+000
    src on device is : 3.14159265358979310e+00
    number after is  : 3.14159265358979312e+000
    

    Not sure why some libraries use 3 digits for the 'e' format. The C99 spec states "The exponent always contains at least two digits, and only as many more digits as necessary to represent the exponent."

    Also not sure why the printed values do not match exactly, but at least they are close.