I have a OpenCL 1.2 program that converts an equirectangular hdr image to a cubemap.
On my i7-9750H CPU everything works correctly, but when I run the program on my GTX 1650 GPU the result is consistently incorrect.
In case it is relevant, my GPU driver version is 536.99 and 7.6.0.0228 for my CPU.
Here is the code:
constant sampler_t srcSampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
// These transforms specify the directions based on the cube face
// They are based on
// https://www.khronos.org/opengl/wiki_opengl/images/CubeMapAxes.png The order
// is +X, -X, +Y, -Y, +Z, -Z Using the reference image the transforms are
// (horizontal face axis, vertical face axis, face direction)
__constant float3 xTransforms[6] = {
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f),
(float3)(1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
(float3)(1.0f, 0.0f, 0.0f), (float3)(-1.0f, 0.0f, 0.0f)};
__constant float3 yTransforms[6] = {
(float3)(0.0, -1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f),
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f),
(float3)(0.0f, -1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f)};
__constant float3 zTransforms[6] = {
(float3)(-1.0f, 0.0f, 0.0f), (float3)(1.0f, 0.0f, 0.0f),
(float3)(0.0f, 1.0f, 0.0f), (float3)(0.0f, -1.0f, 0.0f),
(float3)(0.0f, 0.0f, 1.0f), (float3)(0.0f, 0.0f, -1.0f)};
float2 projectSphericalMap(float3 dir) {
float2 uv = (float2)(atan2pi(dir.z, dir.x) * 0.5, asinpi(dir.y));
uv += (float2)(0.5f, 0.5f);
return uv;
}
// The kernel is invoked for every pixel on every face of the cubemap
// 'size' is the size of a cube map face
// 'sizefac' is 2/(size-1) precomputed
__kernel void reproject_environment(__read_only image2d_t srcImage,
__write_only image2d_t dstImage, int size,
float sizefac) {
int outu = get_global_id(0);
int outv = get_global_id(1);
int face = get_global_id(2);
// This check is probably unnecessary
if (outu >= size || outv >= size || face >= 6) {
return;
}
// The value range is [-1, 1]
float horizontal = (float)(outu)*sizefac - 1.0;
float vertical = (float)(outv)*sizefac - 1.0;
float3 vec = (float3)(horizontal, vertical, 1.0f);
float x = dot(vec, xTransforms[face]);
float y = dot(vec, yTransforms[face]);
float z = dot(vec, zTransforms[face]);
float3 dir = (float3)(x, y, z);
float2 uv = projectSphericalMap(normalize(dir));
float4 color = read_imagef(srcImage, srcSampler, uv);
// the cube map faces are stacked vertically
write_imagef(dstImage, (int2)(outu, outv + size * face), color);
}
This is my first time using OpenCL so the mistake might be very obvious.
After adding some printf
s for debugging I noticed that on my GPU the float3
s read from the Transform
arrays were wrong.
The issue is the compiler was aligning the array elements to 4 floats which resulted in a 1 float shift when indexing the elements. Changing float3
to float4
fixed the problem.