Search code examples
c++floating-pointcudaconstexprhalf-precision-float

How can I convert an integer to CUDA's __half FP16 type, in a constexpr fashion?


I'm the developer of aerobus and I'm facing difficulties with half precision arithmetic.

At some point in the library, I need to convert a IntType to related FloatType (same bit count) in a constexpr function (to evaluate polynomials).

I have this function (in a larger type aerobus::i16::val<x>) :

template<typename valueType>
static constexpr valueType get() { return static_cast<valueType>(x); }

in an enclosing type.

It works well when valueType is float or double and x is int32_t or int64_t but not when valueType is __half and x is int16_t.

nvcc 12.6.r12.6 generates the following error :

../src/aerobus.h:576:55: note: ‘static constexpr valueType aerobus::i16::val<x>::get() [with valueType = __half; short int x = 64]’ is not usable as a ‘constexpr’ function because:
  576 |             static constexpr INLINED DEVICE valueType get() { return (valueType)x; }
      |                                                       ^~~
../src/aerobus.h:576:55: error: call to non-‘constexpr’ function ‘__half::__half(short int)’
In file included from cuda_fp16.cpp:1:
/usr/local/cuda-12.6/bin/../targets/x86_64-linux/include/cuda_fp16.h:4652:25: note: ‘__half::__half(short int)’ declared here
 4652 |     __CUDA_HOSTDEVICE__ __half(const short val) { __x = __short2half_rn(val).__x; }

How can I work around that? Am I forced to implement a constexpr conversion function from int16_t to __half or not?


Solution

  • Nothing in the cuda_fp16.hpp header has constexpr in it, so you'll have to do the hard work yourself. The following compiles for me (with cuda 12.5.1 at least and options --expt-relaxed-constexpr and -std=c++20):

    static constexpr unsigned short my_internal_float2half(const float f, unsigned int& sign, unsigned int& remainder) {
        unsigned int x;
        unsigned int u;
        unsigned int result;
        x = std::bit_cast<int>(f); //c++20 
        ////(void)std::memcpy(&x, &f, sizeof(f)); //not constexpr
        u = (x & 0x7fffffffU);
        sign = ((x >> 16U) & 0x8000U);
        // NaN/+Inf/-Inf
        if (u >= 0x7f800000U) {
            remainder = 0U;
            result = ((u == 0x7f800000U) ? (sign | 0x7c00U) : 0x7fffU);
        } else if (u > 0x477fefffU) { // Overflows
            remainder = 0x80000000U;
            result = (sign | 0x7bffU);
        } else if (u >= 0x38800000U) { // Normal numbers
            remainder = u << 19U;
            u -= 0x38000000U;
            result = (sign | (u >> 13U));
        } else if (u < 0x33000001U) { // +0/-0
            remainder = u;
            result = sign;
        } else { // Denormal numbers
            const unsigned int exponent = u >> 23U;
            const unsigned int shift = 0x7eU - exponent;
            unsigned int mantissa = (u & 0x7fffffU);
            mantissa |= 0x800000U;
            remainder = mantissa << (32U - shift);
            result = (sign | (mantissa >> shift));
            result &= 0x0000FFFFU;
        }
        return static_cast<unsigned short>(result);
    }
    
    static constexpr __half my_float2half_rn(const float a) {
        __half val;
        __half_raw r;
        unsigned int sign = 0U;
        unsigned int remainder = 0U;
        r.x = my_internal_float2half(a, sign, remainder);
        if ((remainder > 0x80000000U) || ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U))) {
            r.x++;
        }
        val = std::bit_cast<__half>(r); //allowed, see operator= for __raw_half -> __half
        return val;
    }
    
    static constexpr __half my_int2half_rn(const int i) {
        __half h;
            // double-rounding is not a problem here: if integer
            // has more than 24 bits, it is already too large to
            // be represented in half precision, and result will
            // be infinity.
            const float  f = static_cast<float>(i);
        h = my_float2half_rn(f);
        return h;
    }
    
    __device__ consteval __half convert_int16_to_half(int16_t i) {
        return my_float2half_rn(static_cast<float>(i));
    }
    
    #ifdef INSIDE_OPS_CLASS_DEF
    //in your class header
    template<typename VT>
    static constexpr VT get() { 
        static_assert(std::is_same<int16_t, decltype(this->x)>::value);
        if constexpr (std::is_same<VT, __half>::value) {
            return convert_int16_to_half(this->x);
        } else {
            return static_cast<valueType>(x); 
        }
    }
    #endif
     
    

    Note that outside of consteval, you'll want to use the built-in version, as they use a single assembly statement, letting the GPU do the conversion.

    Do note that if you are worried about efficiency you really should not be mucking about with a single __half, but process them in pairs instead.