Search code examples
cmacosarmarm64stack-unwinding

On ARM macOS when explicitly raise()-ing a signal, some return addresses are garbled on the stack


Here's a simple program for ARM macOS that installs a signal handler for SIGSEGV, then generates one. In the signal handler function, the stack is walked with the usual frame pointer chasing algorithm, then the symbolized version is printed out:

#include <stdio.h>
#include <signal.h>
#include <unistd.h>
#include <execinfo.h>
#include <stdlib.h>

void handler(int signum, siginfo_t* siginfo, void* context)
{
    __darwin_ucontext* ucontext = (__darwin_ucontext*) context;
    __darwin_mcontext64* machineContext = ucontext->uc_mcontext;
    
    uint64_t programCounter = machineContext->__ss.__pc;
    uint64_t framePointer = machineContext->__ss.__fp;
    
    void* bt[100];
    int n = 0;
    while (framePointer != 0) {
        bt[n] = (void*)programCounter;
        
        programCounter = *(uint64_t*)(framePointer + 8);
        framePointer = *(uint64_t*)(framePointer);
        
        ++n;
    }

    char** symbols = backtrace_symbols(bt, n);
    printf ("Call stack:\n");
    for (int i = 0; i < n; ++i) {
        printf ("\t %s\n", symbols[i]);
    }

    free (symbols);
    
    abort ();
}

void Crash ()
{
    raise (SIGSEGV);
    //*(volatile int*)0 = 0;
}

int main()
{
    struct sigaction sigAction;
    sigAction.sa_sigaction = handler;
    sigAction.sa_flags = SA_SIGINFO;
    sigaction (SIGSEGV, &sigAction, nullptr);
    
    Crash ();
}

This works fine when a "regular" SIGSEGV happens, but when it's raised explicitly, return values on the stack seem garbled, specifically, the upper part seems to contain garbage:

Call stack:
     0   libsystem_kernel.dylib              0x0000000185510e68 __pthread_kill + 8
     1   libsystem_c.dylib                   0x116a000185422e14 raise + [...] // Should be 0x0000000185422e14
     2   SignalHandlerTest                   0x8f6a000104bc3eb8 _Z5Crashv + [...] // Should be 0x0000000104bc3eb8
     3   SignalHandlerTest                   0x0000000104bc3ef8 main + 56
     4   libdyld.dylib                       0x0000000185561450 start + 4

The behavior is the same regardless of which signal is raised. What am I missing?


Solution

  • As @Codo has correctly identified, this is PAC.
    The upper bits of the address are not garbled, but rather contain a salted hash of the register's lower bits.

    And contrary to your claims, this happens with regular segfaults too. For example, calling fprintf(NULL, "a"); results in:

    Call stack:
         0   libsystem_c.dylib                   0x000000019139d8a0 flockfile + 28
         1   libsystem_c.dylib                   0x1d550001913a5870 vfprintf_l + 2113595600120315944
         2   libsystem_c.dylib                   0x341c80019139efd0 fprintf + 3755016926808506440
         3   t                                   0x5f29000100483e9c Crash + 6857011907648290844
         4   t                                   0x0000000100483edc main + 56
         5   libdyld.dylib                       0x00000001914b1430 start + 4
    

    This is because all system binaries, including libraries, are compiled for the arm64e ABI and will make use of PAC. Now, your binary is running as a regular old arm64 binary and would crash if it passed an unsigned function pointer to a library function, or got a signed one returned. So the kernel actually disables 3 of the 4 keys that your process can use (IA, IB, DA and DB). But one of those, IB, is used solely for stack frames and so that one is left enabled even in arm64 binaries.

    The reason why some return addresses are still not signed though is:

    • The main + 56 and start + 4 were pushed by your code, which is arm64 and hence doesn't sign them.
    • The flockfile + 28 is the instruction that crashed, whose address was never pushed to the stack, but extracted from the thread state.

    So everything's working exactly as it's supposed to.


    Edit:

    After attempting to use this to aid me in debugging myself, I find the PAC'ed addresses to be annoying after all. You commented about ptrauth_strip in ptrauth.h, but that will actually not work inside an arm64 process (it's aliased to a macro that does nothing), nor will __builtin_ptrauth_strip (the compiler will error out).
    The compiler won't even let you use a raw xpaci instruction when targeting arm64, but nothing on the hardware level prevents the instruction from working, so you can still manually inject the opcode.

    Based on this, I wrote a signal handler that properly strips PAC signatures from an arm64 process:

    #include <errno.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <execinfo.h>
    
    #ifdef __arm64__
    
    extern void* xpaci(uint64_t pc);
    __asm__
    (
        "_xpaci:\n"
        "    mov x1, x30\n"
        "    mov x30, x0\n"
        "    .4byte 0xd50320ff\n" // xpaclri
        "    mov x0, x30\n"
        "    ret x1\n"
    );
    
    #else
    
    static inline void* xpaci(uint64_t pc)
    {
        return (void*)pc;
    }
    
    #endif
    
    static void handler(int signum, siginfo_t *siginfo, void *ctx)
    {
        _STRUCT_MCONTEXT64 *mctx = ((_STRUCT_UCONTEXT*)ctx)->uc_mcontext;
    #ifdef __arm64__
        uint64_t orig_pc = mctx->__ss.__pc;
        uint64_t orig_fp = mctx->__ss.__fp;
    #elif defined(__x86_64__)
        uint64_t orig_pc = mctx->__ss.__rip;
        uint64_t orig_fp = mctx->__ss.__rbp;
    #else
    #   error "Unknown arch"
    #endif
    
        uint64_t pc = orig_pc;
        uint64_t fp = orig_fp;
        size_t n = 0;
        while(1)
        {
            if(!xpaci(pc))
            {
                break;
            }
            ++n;
            if(!fp)
            {
                break;
            }
            pc = ((uint64_t*)fp)[1];
            fp = ((uint64_t*)fp)[0];
        }
        void **bt = malloc(n * sizeof(void*));
        if(!bt)
        {
            fprintf(stderr, "malloc: %s\n", strerror(errno));
            exit(-1);
        }
        pc = orig_pc;
        fp = orig_fp;
        for(size_t i = 0; i < n; ++i)
        {
            bt[i] = xpaci(pc);
            if(!fp)
            {
                break;
            }
            pc = ((uint64_t*)fp)[1];
            fp = ((uint64_t*)fp)[0];
        }
        char **sym = backtrace_symbols(bt, n);
        fprintf(stderr, "Caught signal with call stack:\n");
        for(size_t i = 0; i < n; ++i)
        {
            fprintf(stderr, "%s\n", sym[i]);
        }
        free(sym);
        free(bt);
        exit(-1);
    }
    

    It uses xpaclri rather than xpaci, since the former is a NOP on arm64 (non-arm64e) hardware while the latter would be undefined.