c algorithm formula ieee-754 fixed-point

Round to IEEE 754 precision but keep binary format

If I convert the decimal number 3120.0005 to float (32-bit) representation, the number gets rounded down to 3120.00048828125.

Assuming we're using a fixed point number with a scale of 10^12 then 1000000000000 = 1.0 and 3120000500000000 = 3120.0005.

What would the formula/algorithm be to round down to the nearest IEEE 754 precision to get 3120000488281250? I would also need a way to get the result of rounding up (3120000732421875).

Solution

If you divide by the decimal scaling factor, you'll find your nearest representable float. For rounding the other direction, std::nextafter can be used:

#include <float.h>
#include <math.h>
#include <stdio.h>

long long scale_to_fixed(float f)
{
    float intf = truncf(f);
    long long result = 1000000000000LL;
    result *= (long long)intf;
    result += round((f - intf) * 1.0e12);
    return result;
}

/* not needed, always good enough to use (float)(n / 1.0e12) */
float scale_from_fixed(long long n)
{
    float result = (n % 1000000000000LL) / 1.0e12;
    result += n / 1000000000000LL;
    return result;
}

int main()
{
    long long x = 3120000500000000;
    float x_reduced = scale_from_fixed(x);
    long long y1 = scale_to_fixed(x_reduced);
    long long yfloor = y1, yceil = y1;
    if (y1 < x) {
        yceil = scale_to_fixed(nextafterf(x_reduced, FLT_MAX));
    }
    else if (y1 > x) {
        yfloor = scale_to_fixed(nextafterf(x_reduced, -FLT_MAX));
    }

    printf("%lld\n%lld\n%lld\n", yfloor, x, yceil);
}

Results:

3120000488281250

3120000500000000

3120000732421875