Search code examples
c++armsignalssetjmp

Runtime feature testing, setjmp, longjmp, and signal mask


According to The Open Group Base Specifications and longjump docs:

It is unspecified whether longjmp() restores the signal mask, leaves the signal mask unchanged, or restores it to its value at the time setjmp() was called.

I think I am encountering that caveat on an ARMv8 Mustang board. We perform runtime feature detection by trapping a SIGILL. The first SIGILL trap executes as expected when testing for CRC32 extensions. The second SIGILL trap does not execute as expected when testing for AES extensions. Below is what it looks like under the debugger.

I don't believe the code falls into the undefined behavior called out by the docs. For example, nested signal handlers are not used, the same thread performs the setjmp and longjmp magic, etc.

My question is, how can I safely perform the runtime feature test multiple times?


gdb ./test.exe
...

(gdb) b TryCRC32() 
Breakpoint 1 at 0x401034: file test.cc, line 92.
(gdb) b TryAES() 
Breakpoint 2 at 0x401120: file test.cc, line 120.
...

(gdb) r
Starting program: /home/cryptopp/test.exe v

Breakpoint 1, TryCRC32 () at test.cc:92
92      volatile bool result = true;
(gdb) n
94      SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
(gdb) 
95      if (oldHandler == SIG_ERR)
(gdb) 
98      if (setjmp(s_jmpNoCRC32))
(gdb) 
102         word32 w=0, x=0; word16 y=0; byte z=0;
(gdb) 
103         w = __crc32cw(w,x);
(gdb) 

Program received signal SIGILL, Illegal instruction.
0x00000000004010b4 in __crc32cw (__b=0, __a=0)
    at /usr/lib/gcc/aarch64-linux-gnu/4.9/include/arm_acle.h:57
57    return __builtin_aarch64_crc32cw (__a, __b);
(gdb) c
Continuing.

Breakpoint 2, TryAES () at test.cc:120
120     volatile bool result = true;
(gdb) n
122     SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
(gdb) 
123     if (oldHandler == SIG_ERR)
(gdb) 
126     if (setjmp(s_jmpNoAES))
(gdb) 
130         uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
(gdb) 
131         uint8x16_t r1 = vaeseq_u8(data, key);
(gdb) 

Program received signal SIGILL, Illegal instruction.
0x0000000000400a64 in vaeseq_u8 (data=..., key=...)
    at /usr/lib/gcc/aarch64-linux-gnu/4.9/include/arm_neon.h:13731
13731     return __builtin_aarch64_crypto_aesev16qi_uuu (data, key);
(gdb) c
Continuing.

Program terminated with signal SIGILL, Illegal instruction.
The program no longer exists.

Here's the test program. It is compiled with:

$ export CXXFLAGS="-g3 -O0 -march=armv8-a+crc+crypto"
$ g++ $CXXFLAGS test.cc -o test.exe

The -march=armv8-a+crc+crypto means preprocessor symbols like __ARM_NEON and __ARM_FEATURE_CRYPTO are defined.

Declarations like static volatile bool TryNEON() is another problem the Mustang revealed (namely, GCC was optimizing away the checks). It caused the program to die with a SIGILL. Don't get distracted with it because its just a stop gap for now.

#include <signal.h>
#include <setjmp.h>

#include <stdint.h>
#include <arm_neon.h>
#include <arm_acle.h>

#include <iostream>

#define UNUSED(x) ((void)(x))

typedef uint8_t byte;
typedef uint16_t word16;
typedef uint32_t word32;
typedef uint64_t word64;

typedef void (*SigHandler)(int);

extern "C" {

    static jmp_buf s_jmpNoNEON;
    static void SigIllHandlerNEON(int)
    {
        longjmp(s_jmpNoNEON, 1);
    }

    static jmp_buf s_jmpNoCRC32;
    static void SigIllHandlerCRC32(int)
    {
        longjmp(s_jmpNoCRC32, 1);
    }

    static jmp_buf s_jmpNoAES;
    static void SigIllHandlerAES(int)
    {
        longjmp(s_jmpNoAES, 1);
    }

    static jmp_buf s_jmpNoSHA1;
    static void SigIllHandlerSHA1(int)
    {
        longjmp(s_jmpNoSHA1, 1);
    }

    static jmp_buf s_jmpNoSHA2;
    static void SigIllHandlerSHA2(int)
    {
        longjmp(s_jmpNoSHA2, 1);
    }
};

static volatile bool TryNEON()
{
#if defined(__ARM_NEON)
    volatile bool result = true;

    SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
    if (oldHandler == SIG_ERR)
        result = false;

    if (setjmp(s_jmpNoNEON))
        result = false;
    else
    {
        uint32_t v1[4] = {1,1,1,1};
        uint32x4_t x1 = vld1q_u32(v1);
        uint64_t v2[2] = {1,1};
        uint64x2_t x2 = vld1q_u64(v2);

        uint32x4_t x3 = vdupq_n_u32(0);
        x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
        x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
        uint64x2_t x4 = vdupq_n_u64(0);
        x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
        x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
    }

    signal(SIGILL, oldHandler);
    return result;
#else
    return false;
#endif  // __ARM_NEON
}

static volatile bool TryCRC32()
{
#if defined(__ARM_FEATURE_CRC32)
    volatile bool result = true;

    SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32);
    if (oldHandler == SIG_ERR)
        result = false;

    if (setjmp(s_jmpNoCRC32))
        result = false;
    else
    {
        word32 w=0, x=0; word16 y=0; byte z=0;
        w = __crc32cw(w,x);
        w = __crc32ch(w,y);
        w = __crc32cb(w,z);
    }

    signal(SIGILL, oldHandler);
    return result;
#else
    return false;
#endif  // __ARM_FEATURE_CRC32
}

static volatile bool TryAES()
{
#if defined(__ARM_FEATURE_CRYPTO)
    volatile bool result = true;

    SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES);
    if (oldHandler == SIG_ERR)
        result = false;

    if (setjmp(s_jmpNoAES))
        result = false;
    else
    {
        uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0);
        uint8x16_t r1 = vaeseq_u8(data, key);
        uint8x16_t r2 = vaesdq_u8(data, key);
        UNUSED(r1), UNUSED(r2);
    }

    signal(SIGILL, oldHandler);
    return result;
#else
    return false;
#endif  // __ARM_FEATURE_CRYPTO
}

static volatile bool TrySHA1()
{
#if defined(__ARM_FEATURE_CRYPTO)
    volatile bool result = true;

    SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1);
    if (oldHandler == SIG_ERR)
        result = false;

    if (setjmp(s_jmpNoSHA1))
        result = false;
    else
    {
        uint32x4_t data = vdupq_n_u32(0);
        uint32_t hash = 0x0;

        uint32x4_t r1 = vsha1cq_u32 (data, hash, data);
        uint32x4_t r2 = vsha1mq_u32 (data, hash, data);
        uint32x4_t r3 = vsha1pq_u32 (data, hash, data);
        UNUSED(r1), UNUSED(r2), UNUSED(r3);
    }

    signal(SIGILL, oldHandler);
    return result;
#else
    return false;
#endif  // __ARM_FEATURE_CRYPTO
}

static volatile bool TrySHA2()
{
#if defined(__ARM_FEATURE_CRYPTO)
    volatile bool result = true;

    SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2);
    if (oldHandler == SIG_ERR)
        result = false;

    if (setjmp(s_jmpNoSHA2))
        result = false;
    else
    {
        uint32x4_t data = vdupq_n_u32(0);
        uint32x4_t hash = vdupq_n_u32(0);

        uint32x4_t r1 = vsha256hq_u32 (hash, hash, data);
        uint32x4_t r2 = vsha256h2q_u32 (hash, hash, data);
        uint32x4_t r3 = vsha256su0q_u32 (data, data);
        uint32x4_t r4 = vsha256su1q_u32 (data, data, data);
        UNUSED(r1), UNUSED(r2), UNUSED(r3), UNUSED(r4);
    }

    signal(SIGILL, oldHandler);
    return result;
#else
    return false;
#endif  // __ARM_FEATURE_CRYPTO
}

bool hasNEON = TryNEON();
bool hasCRC32 = TryCRC32();
bool hasAES = TryAES();
bool hasSHA1 = TrySHA1();
bool hasSHA2 = TrySHA2();

int main(int argc, char* argv[])
{
    std::cout << "Has NEON: " << hasNEON << std::endl;
    std::cout << "Has CRC32: " << hasCRC32 << std::endl;
    std::cout << "Has AES: " << hasAES << std::endl;
    std::cout << "Has SHA1: " << hasSHA1 << std::endl;
    std::cout << "Has SHA2: " << hasSHA2 << std::endl;

    return 0;
}

Solution

  • There were two issues with the code. First, all variables were made volatile. Second, the process mask needed to be saved and restored. The second issue only surfaced when a feature was not present, and on the second (or subsequent) failed feature test. The problem did not surface if the feature was available.

    Here's an example:

    static bool TryNEON()
    {
    #if defined(__ARM_NEON)
        volatile bool result = true;
        volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON);
        if (oldHandler == SIG_ERR)
            return false;
    
        volatile sigset_t oldMask;
        if (sigprocmask(0, NULL, (sigset_t*)&oldMask))
            return false;
    
        if (setjmp(s_jmpNoNEON))
            result = false;
        else
        {
            uint32_t v1[4] = {1,1,1,1};
            uint32x4_t x1 = vld1q_u32(v1);
            uint64_t v2[2] = {1,1};
            uint64x2_t x2 = vld1q_u64(v2);
    
            uint32x4_t x3 = {0,0,0,0};
            x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0);
            x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3);
            uint64x2_t x4 = {0,0};
            x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0);
            x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1);
    
            // Hack... GCC optimizes away the code and returns true
            result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1));
        }
    
        sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL);
        signal(SIGILL, oldHandler);
        return result;
    #else
        return false;
    #endif  // __ARM_NEON
    }