Unpack/Pack SNorm16 in HLSL compute shader

I'm trying to unpack SNorm16 values in HLSL compute shader. Since SNorm16x4 = 8 bytes total, and Load/Store functions can only read/write 4 bytes I'm trying to get two 4 bytes values as packed to 1 value of 8 bytes, unpack it to 4 values, work with result and then pack it pack and store as 1 packed value of 8 bytes.

The code is:

float2 UnpackFromSNorm16x2(uint v)
{
    uint2 tempU = asuint(uint2(v, v >> 16) & 0xFFFF);
    int2 tempI = int2(tempU.x - 32767, tempU.y - 32767);
    return float2( tempI * float(1.0 / 32767.0));
}

int FloatToSNorm16(float v)
{
    //According to D3D10 rules, the value "-1.0f" has two representations:
    //  0x1000 and 0x10001
    //This allows everyone to convert by just multiplying by 32767 instead
    //of multiplying the negative values by 32768 and 32767 for positive.
    return int(clamp(v >= 0.0f ? (v * 32767.0f + 0.5f) : (v * 32767.0f - 0.5f), -32768.0f, 32767.0f));
}

uint PackToSNorm16x2(float2 v)
{
    int intX = int(FloatToSNorm16(v.x));
    int intY = int(FloatToSNorm16(v.y));
    uint2 uintXY = uint2(clamp(intX + 32767, 0, 65535), clamp(intY + 32767, 0, 65535));
    uint x = (uintXY.x << 0) & 0x0000FFFF;
    uint y = (uintXY.y << 16) & 0xFFFF0000;
    return x | y;
}

uint2 inputTangentUInt = asuint(vertices.Load2(baseOffset + tangentOffset));
float4 qTangentUnpacked = float4(UnpackFromSNorm16x2(inputTangentUInt.x), UnpackFromSNorm16x2(inputTangentUInt.y));

//Do some work with qTangentUnpacked

uint2 qTangentPacked = uint2(PackTwoSNORM16(qTangentUnpacked.xy), PackTwoSNORM16(qTangentUnpacked.zw));
vertices.Store2(baseOffset + tangentOffset, asuint(qTangentPacked));

But final result is wrong, looks like some data lost. What am I doing wrong?

Solution

There is a collection of HLSL routines for doing these kinds of conversions called D3DX_DXGIFormatConvert.inl. It is documented on Microsoft Learn, and used to ship in the legacy DirectX SDK.

typedef int INT;
typedef int2 XMINT2;
typedef float2 XMFLOAT2;

#define D3DX11INLINE
#define D3DX_Truncate_FLOAT(_V) trunc(_V)
#define hlsl_precise precise

D3DX11INLINE FLOAT D3DX_INT_to_FLOAT(INT _V,
                                     FLOAT _Scale)
{
    FLOAT Scaled = (FLOAT)_V / _Scale;
    // The integer is a two's-complement signed
    // number so the negative range is slightly
    // larger than the positive range, meaning
    // the scaled value can be slight less than -1.
    // Clamp to keep the float range [-1, 1].
    return max(Scaled, -1.0f);
}

D3DX11INLINE INT D3DX_FLOAT_to_INT(FLOAT _V,
                                   FLOAT _Scale)
{
    return (INT)D3DX_Truncate_FLOAT(_V * _Scale + (_V >= 0 ? 0.5f : -0.5f));
}

D3DX11INLINE XMFLOAT2 D3DX_R16G16_SNORM_to_FLOAT2(UINT packedInput)
{
    hlsl_precise XMFLOAT2 unpackedOutput;
    XMINT2 signExtendedBits;
    signExtendedBits.x =  (INT)(packedInput << 16) >> 16;
    signExtendedBits.y =  (INT)(packedInput & 0xffff0000) >> 16;
    unpackedOutput.x = D3DX_INT_to_FLOAT(signExtendedBits.x, 32767);
    unpackedOutput.y = D3DX_INT_to_FLOAT(signExtendedBits.y, 32767);
    return unpackedOutput;
}

D3DX11INLINE UINT D3DX_FLOAT2_to_R16G16_SNORM(hlsl_precise XMFLOAT2 unpackedInput)
{
    UINT packedOutput;
    packedOutput = ( (D3DX_FLOAT_to_INT(D3DX_SaturateSigned_FLOAT(unpackedInput.x), 32767) & 0x0000ffff)      |
                     (D3DX_FLOAT_to_INT(D3DX_SaturateSigned_FLOAT(unpackedInput.y), 32767)              <<16) );
    return packedOutput;
}

The latest version is on GitHub

See this blog post for more information.