Search code examples
clinux-kernelsystem-callsptrace

Linux syscalls: PTRACE_O_TRACECLONE causes indefinite hanging


I have a binary from which I need to intercept a certain syscall--in this case unlinkat--and make it do nothing. I have the following code which works fine for a single process; however, with PTRACE_O_TRACECLONE added to the ptrace opts, after the tracee makes a call to clone, the waitpid call hangs forever. I've been pulling my hair out for days on different parts of the internet, to the point where I was going through the source of strace, and had in fact straced strace to see what the strace I had straced was ptracing.

Here's the source--I removed some stuff to make it as minimal as possible for readability.

#define _POSIX_C_SOURCE 200112L

// std (i think)
#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// linux
#include <sys/ptrace.h>
#include <sys/reg.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/wait.h>
#include <unistd.h>

#define OPTS PTRACE_O_TRACESYSGOOD // | PTRACE_O_TRACECLONE | PTRACE_O_TRACEVFORK | PTRACE_O_TRACEFORK
#define WOPTS 0

/* The TRACEE. Executes the process we want to target with PTRACE_TRACEME */
int do_child(int argc, char **argv) {
    char *args[argc + 1];
    memcpy(args, argv, argc * sizeof(char *));
    args[argc] = NULL;

    ptrace(PTRACE_TRACEME);
    kill(getpid(), SIGSTOP);
    return execvp(args[0], args);
}

/* Waits for the next syscall and checks to see if the process has been exited */
int wait_for_syscall(pid_t child) {
    int status;
    while (1) {
        ptrace(PTRACE_SYSCALL, child, 0, 0);
        waitpid(child, &status, WOPTS); // <--- THIS CALL HANGS FOREVER AFTER CLONE
        if (WIFSTOPPED(status) && WSTOPSIG(status) & 0x80)
            return 0;
        if (WIFEXITED(status))
            return 1;
    }
    return -1; // unreachable
}

/* The TRACER. Takes the pid of the child process that we just started and actually does the 
    PTRACE stuff by passing signals back and forth to that process. */
int do_trace(pid_t child) {
    int status, syscall;
    waitpid(child, &status, WOPTS);
    ptrace(PTRACE_SETOPTIONS, child, 0, (unsigned long)OPTS);
    while (1) {
        // ptrace(PTRACE_SYSCALL) really needs to be called twice, first is before entry second is after exit, but idgaf
        if (wait_for_syscall(child) != 0) {
            break;
        }
        syscall = ptrace(PTRACE_PEEKUSER, child, sizeof(long) * ORIG_RAX);
        switch (syscall) {
            case SYS_clone:
                fprintf(stderr, "DEBUG: clone detected\n");
                break;
            case SYS_unlinkat:
                fprintf(stderr, "DEBUG: unlinkat detected\n");
                ptrace(PTRACE_POKEUSER, child, sizeof(long) * RAX, 0); 
                break;
        }
    }

    return 0;
}

int main(int argc, char **argv) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s prog args\n", argv[0]);
        exit(1);
    }

    pid_t child = fork();
    if (child == 0) {
        return do_child(argc - 1, argv + 1);
    } else {
        return do_trace(child);
    }
    return 0;
}

Just as a disclaimer, I am NOT a C developer, these days I mainly write Python, so a lot of this was just copied and pasted from different tutorials I found and I basically added/removed random shit until gcc didn't give me that many warnings.

Based on what I've read, I suspect the issue is something about raising signals to the processes involved and waiting for a SIGTRAP, I just have no real intuition on what to do at that level.


Solution

  • The solution was using libseccomp instead.

    #include <stdio.h>
    #include <stdlib.h>
    #include <stddef.h>
    #include <unistd.h>
    #include <string.h>
    #include <fcntl.h>
    #include <seccomp.h>
    #include <sys/stat.h>
    #include <sys/types.h>
    #include <errno.h>
    
    int do_child(int argc, char **argv)
    {
        char *args[argc + 1];
        memcpy(args, argv, argc * sizeof(char *));
        args[argc] = NULL;
    
        return execvp(args[0], args);
    }
    
    int main(int argc, char **argv)
    {
        if (argc < 2)
        {
            fprintf(stderr, "Usage: %s prog args\n", argv[0]);
            exit(1);
        }
    
        // Init the filter
        scmp_filter_ctx ctx;
        ctx = seccomp_init(SCMP_ACT_ALLOW); // default allow
    
        // setup basic whitelist
        seccomp_rule_add(ctx, SCMP_ACT_ERRNO(0), SCMP_SYS(unlinkat), 0);
    
        // build and load the filter
        seccomp_load(ctx);
    
        pid_t child = fork();
        if (child == 0)
        {
            return do_child(argc - 1, argv + 1);
        }
    
        return 0;
    }