Search code examples
c++openglfloating-pointglslprecision

float is overflowing in openGL despite being under 32 bits


float test_1 = 1.79450992e+38;
float test_2 = 127;

float temp = abs(test_1) / exp2(test_2); 
float temp_2 = abs(1.79450992e+38) / exp2(127.0); 

temp output is 0 and temp_2 output is 1.05472

temp output should be 1.05472. Whats wrong?

Im doing the above calculations in GLSL in the vertex shader in OpenGL. AI thinks that my hardware is taking some nasty shortcuts and truncating my float.

I need absolutely perfect precision. How can I force the compiler/hardware to use full 32 bit precision?

The reason I need precision is Im packing information into a vertex and need to unpack it. I cant use bitwise operations to unpack because Im stuck in GLSL 130. I can post the unpacking code if someone thinks that would be helpful.

EDIT Code that I use:

#include <GL/glew.h>
#include <GLFW/glfw3.h>
#include <iostream>
#include <string>

// ============================
// Shader source strings
// ============================

// Vertex Shader Source (computes all debug values)
const char* vertexShaderSource = R"(
#version 430 core
precision highp float;

layout(location = 0) in vec3 aPos;
layout(std430, binding = 0) buffer DebugBuffer {
    float debugValues[25];  // 25 floats to store all debug values
};

uniform ivec2 targetPixel; // Not used here but preserved for consistency

const int emax = 127;

int floorLog2(float x) {
    if (x == 0.0) return -emax;
    for (int e = emax; e >= 1 - emax; --e)
        if (x >= exp2(float(e))) return e;
    return emax + 1;
}

int biasedExp(float x) {
    return emax + floorLog2(abs(x));
}

float safe_exp2(int exponent) {
    float result = 1.0;
    if (exponent >= 64)   { result *= exp2(64.0); exponent -= 64; }
    if (exponent >= 32)   { result *= exp2(32.0); exponent -= 32; }
    if (exponent >= 16)   { result *= exp2(16.0); exponent -= 16; }
    if (exponent >= 8)    { result *= exp2(8.0);  exponent -= 8;  }
    if (exponent >= 4)    { result *= exp2(4.0);  exponent -= 4;  }
    if (exponent >= 2)    { result *= exp2(2.0);  exponent -= 2;  }
    if (exponent >= 1)    { result *= exp2(1.0);  exponent -= 1;  }
    return result;
}

float significand(float x) {
    float expo = float(floorLog2(abs(x)));
    debugValues[5] = expo;
    float x_in_signifigand = x;
    
    // Use safe_exp2 with decomposed exponents
    float test_1 = 1.79450992e+38;
    int test_2 = 127; // Pass as integer
    float temp = abs(test_1) / exp2(test_2); // Uses custom safe_exp2
    
    float temp_2 = abs(1.79450992e+38) / exp2(127.0); // Original literal-based
    
    debugValues[22] = temp;            // Now 1.05472
    debugValues[23] = x_in_signifigand; 
    debugValues[24] = temp_2;          // Still 1.05472
    return temp;
}

int part(float x, int N) {
    const float byteShift = 256.0;
    debugValues[1] = byteShift;
    for (int n = 0; n < N; ++n)
        x *= byteShift;
    debugValues[2] = x;
    float q = fract(x);
    debugValues[3] = q;
    float c = floor(byteShift * q);
    debugValues[4] = c;
    return int(c);
}

ivec3 significandAsIVec3(float x) {
    ivec3 result;
    float sig = significand(x) / 2.0;
    debugValues[0] = sig;
    
    result.x = part(sig, 0);
    result.y = part(sig, 1);
    result.z = part(sig, 2);
    return result;
}

ivec4 unpackIEEE754binary32(float x) {
    debugValues[19] = x;
    int e = biasedExp(x);
    debugValues[7] = float(e);
    int s = x < 0.0 ? 128 : 0;
    debugValues[6] = float(s);
    ivec4 binary32;
    binary32.yzw = significandAsIVec3(x);
    if (binary32.y >= 128) {
        binary32.y -= 128;
    }
    binary32.y += 128 * int(mod(float(e), 2.0));
    debugValues[8] = float(binary32.y);
    e /= 2;
    debugValues[9] = float(e);
    binary32.x = e + s;
    debugValues[10] = float(binary32.x);
    
    debugValues[11] = float(binary32.x);
    debugValues[12] = float(binary32.y);
    debugValues[13] = float(binary32.z);
    debugValues[14] = float(binary32.w);
    
    binary32.x -= 1;
    binary32.y -= 1;
    binary32.z -= 1;
    binary32.w -= 1;
    
    debugValues[15] = float(binary32.x);
    debugValues[16] = float(binary32.y);
    debugValues[17] = float(binary32.z);
    debugValues[18] = float(binary32.w);

    float this_test = abs(1.79450992e+38) / exp2(127);
    debugValues[20] = this_test;  // Store this_test in the debug buffer

    float sig_test = 1.05472/2;
    debugValues[21] = sig_test;   // Store sig_test in the debug buffer

    return binary32;
}

void main() {
    // Compute the debug values only once
    if (gl_VertexID == 0) {
        float testValue = 1.79450992e+38;
        ivec4 result = unpackIEEE754binary32(testValue);
    }
    // Standard transformation: pass through the vertex position.
    gl_Position = vec4(aPos, 1.0);
}
)";

// Fragment Shader Source (simply outputs a constant color)
const char* fragmentShaderSource = R"(
#version 430 core
precision highp float;
out vec4 FragColor;
void main() {
    // Simply output black (or any color you like)
    FragColor = vec4(0.0, 0.0, 0.0, 1.0);
}
)";

// ============================
// Host (C++) code below
// ============================

GLFWwindow* createWindow() {
    if (!glfwInit()) {
        std::cerr << "Failed to initialize GLFW" << std::endl;
        return nullptr;
    }
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
    glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
    glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GL_TRUE);
    
    GLFWwindow* window = glfwCreateWindow(800, 600, "Shader Debug", nullptr, nullptr);
    if (!window) {
        std::cerr << "Failed to create GLFW window" << std::endl;
        glfwTerminate();
        return nullptr;
    }
    glfwMakeContextCurrent(window);
    return window;
}

void APIENTRY openglDebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity,
                                  GLsizei length, const GLchar* message, const void* userParam) {
    std::cerr << "OpenGL Debug Message: " << message << std::endl;
}

GLuint compileShader(const char* shaderSource, GLenum shaderType) {
    GLuint shader = glCreateShader(shaderType);
    glShaderSource(shader, 1, &shaderSource, nullptr);
    glCompileShader(shader);
    
    GLint success;
    glGetShaderiv(shader, GL_COMPILE_STATUS, &success);
    if (!success) {
        GLchar infoLog[1024];
        glGetShaderInfoLog(shader, 1024, nullptr, infoLog);
        const char* shaderTypeStr = (shaderType == GL_VERTEX_SHADER) ? "VERTEX" : "FRAGMENT";
        std::cerr << "ERROR::SHADER_COMPILATION_ERROR of type: " << shaderTypeStr << "\n"
                  << infoLog << "\n -- --------------------------------------------------- -- " 
                  << std::endl;
    }
    return shader;
}

GLuint createShaderProgram() {
    GLuint vertexShader = compileShader(vertexShaderSource, GL_VERTEX_SHADER);
    GLuint fragmentShader = compileShader(fragmentShaderSource, GL_FRAGMENT_SHADER);
    
    GLuint shaderProgram = glCreateProgram();
    glAttachShader(shaderProgram, vertexShader);
    glAttachShader(shaderProgram, fragmentShader);
    glLinkProgram(shaderProgram);
    
    GLint success;
    glGetProgramiv(shaderProgram, GL_LINK_STATUS, &success);
    if (!success) {
        GLchar infoLog[1024];
        glGetProgramInfoLog(shaderProgram, 1024, nullptr, infoLog);
        std::cerr << "ERROR::PROGRAM_LINKING_ERROR\n"
                  << infoLog << "\n -- --------------------------------------------------- -- " 
                  << std::endl;
    }
    
    glDeleteShader(vertexShader);
    glDeleteShader(fragmentShader);
    return shaderProgram;
}

int main() {
    GLFWwindow* window = createWindow();
    if (!window)
        return -1;
    
    glewExperimental = GL_TRUE; 
    if (glewInit() != GLEW_OK) {
        std::cerr << "Failed to initialize GLEW" << std::endl;
        return -1;
    }
    
    GLint flags;
    glGetIntegerv(GL_CONTEXT_FLAGS, &flags);
    if (flags & GL_CONTEXT_FLAG_DEBUG_BIT) {
        std::cout << "Debug context active" << std::endl;
        glEnable(GL_DEBUG_OUTPUT);
        glDebugMessageCallback(openglDebugCallback, nullptr);
    }
    
    float vertices[] = {
        -1.0f,  1.0f, 0.0f,
        -1.0f, -1.0f, 0.0f,
         1.0f,  1.0f, 0.0f,
         1.0f, -1.0f, 0.0f
    };
    
    GLuint VBO, VAO;
    glGenVertexArrays(1, &VAO);
    glGenBuffers(1, &VBO);
    
    glBindVertexArray(VAO);
    glBindBuffer(GL_ARRAY_BUFFER, VBO);
    glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
    
    glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)0);
    glEnableVertexAttribArray(0);
    
    GLuint shaderProgram = createShaderProgram();
    
    // Set up shader storage buffer for debug output
    GLuint debugBuffer;
    glGenBuffers(1, &debugBuffer);
    glBindBuffer(GL_SHADER_STORAGE_BUFFER, debugBuffer);
    glBufferData(GL_SHADER_STORAGE_BUFFER, 25 * sizeof(float), nullptr, GL_DYNAMIC_COPY);
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, debugBuffer);
    
    glUseProgram(shaderProgram);
    GLint targetPixelLocation = glGetUniformLocation(shaderProgram, "targetPixel");
    glUniform2i(targetPixelLocation, 123, 456);
    
    bool printedDebug = false;
    
    // Main render loop. We use glfwPollEvents so that button/key input is still processed.
    while (!glfwWindowShouldClose(window)) {
        glClear(GL_COLOR_BUFFER_BIT);
        
        glUseProgram(shaderProgram);
        glBindVertexArray(VAO);
        glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
        
        // Print debug values only once
        if (!printedDebug) {
            glFinish();
            glBindBuffer(GL_SHADER_STORAGE_BUFFER, debugBuffer);
            float* debugData = (float*)glMapBuffer(GL_SHADER_STORAGE_BUFFER, GL_READ_ONLY);
            if (debugData) {
                std::cout << "=== Debug values (computed in vertex shader) ===\n";
                std::cout << "1) sig (result.x from significandAsIVec3)       : " << debugData[0] << "\n";
                std::cout << "2) byteShift (from part)                        : " << debugData[1] << "\n";
                std::cout << "3) x after loop in part                         : " << debugData[2] << "\n";
                std::cout << "4) q (from part)                                : " << debugData[3] << "\n";
                std::cout << "5) c (from part)                                : " << debugData[4] << "\n";
                std::cout << "6) expo (from significand)                      : " << debugData[5] << "\n";
                std::cout << "7) s (sign, 128 if negative, else 0)            : " << debugData[6] << "\n";
                std::cout << "8) initial e (biased exponent)                  : " << debugData[7] << "\n";
                std::cout << "9) binary32.y after mod-addition                : " << debugData[8] << "\n";
                std::cout << "10) e after division by 2                       : " << debugData[9] << "\n";
                std::cout << "11) binary32.x = e + s (before subtracting 1)   : " << debugData[10] << "\n";
                std::cout << "---- Pre subtraction values ----\n";
                std::cout << "12) binary32.x (pre-subtraction)               : " << debugData[11] << "\n";
                std::cout << "13) binary32.y (pre-subtraction)               : " << debugData[12] << "\n";
                std::cout << "14) binary32.z (pre-subtraction)               : " << debugData[13] << "\n";
                std::cout << "15) binary32.w (pre-subtraction)               : " << debugData[14] << "\n";
                std::cout << "---- Post subtraction values ----\n";
                std::cout << "16) binary32.x (final)                         : " << debugData[15] << "\n";
                std::cout << "17) binary32.y (final)                         : " << debugData[16] << "\n";
                std::cout << "18) binary32.z (final)                         : " << debugData[17] << "\n";
                std::cout << "19) binary32.w (final)                         : " << debugData[18] << "\n";
                std::cout << "20) x (as soon as entering unpackIEEE754binary32): " << debugData[19] << "\n";
                std::cout << "21) this_test value                             : " << debugData[20] << "\n";
                std::cout << "22) sig_test value                              : " << debugData[21] << "\n";
                std::cout << "23) temp value (from significand)               : " << debugData[22] << "\n";
                std::cout << "24) x_in_signifigand (from significand)         : " << debugData[23] << "\n";
                std::cout << "25) temp_2 value (from significand)             : " << debugData[24] << "\n";
                std::cout << "==========================================\n";
                glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
            }
            printedDebug = true;
        }
        
        glfwSwapBuffers(window);
        glfwPollEvents();
    }
    
    glDeleteVertexArrays(1, &VAO);
    glDeleteBuffers(1, &VBO);
    glDeleteBuffers(1, &debugBuffer);
    glDeleteProgram(shaderProgram);
    
    glfwTerminate();
    return 0;
}

Solution

  • According to section 2.1.1 of the OpenGL 1.4 spec (which is what the GLSL 130 spec refers to):

    We do not specify how floating-point numbers are to be represented or how operations on them are to be performed. We require simply that numbers’ floating-point parts contain enough bits and that their exponent fields are large enough so that individual results of floating-point operations are accurate to about 1 part in 105 . The maximum representable magnitude of a floating-point number used to represent positional or normal coordinates must be at least 232

    The values you are using here (1.79450992e+38 and exp2(127)) exceed that 232 limit, so are liable to run into trouble (overflowing and giving ∞ or some kind of error). That may manifest as temp being 0 as you apparently see.