Search code examples
clinuxpathptracereadlink

Problems with finding out file path via descriptor


The challenge is to fish out the "write" and find out where and how many bytes were written. I use "ptrace" to catch it, but I have problems with further path detection. I use "readlink" to get the path descriptor. But it returns strings like "pipe:[766279]". What could be the problem? How to find out the full path to the file in this case?

    #define _GNU_SOURCE
    #include <fcntl.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <sys/ptrace.h>
    #include <sys/wait.h>
    #include <unistd.h>
    #include <linux/limits.h>
    #include <sys/user.h>
    
    typedef struct Counter {
        char filename[PATH_MAX];
        int counter;
        struct Counter* next;
    } Counter;
    
    typedef struct Counters {
        struct Counter* head;
    } Counters;
    
    void increment(Counters* counters, const char* filename, int value) {
        Counter* current = counters->head;
        while (current != NULL) {
            if (strcmp(current->filename, filename) == 0) {
                current->counter += value;
                return;
            }
            current = current->next;
        }
    
        Counter* new_entry = malloc(sizeof(Counter));
        new_entry->next = counters->head;
        new_entry->counter = value;
        strncpy(new_entry->filename, filename, PATH_MAX - 1);
        new_entry->filename[PATH_MAX - 1] = '\0';
        counters->head = new_entry;
    }
    
    void print(Counters* counters) {
        Counter* current = counters->head;
        while (current != NULL) {
            printf("%s:%d\n", current->filename, current->counter);
            current = current->next;
        }
    }
    
    void get_file_path(int fd, char* path, size_t path_size) {
        char fd_path[PATH_MAX];
        snprintf(fd_path, sizeof(fd_path), "/proc/self/fd/%d", fd);
        int size = readlink(fd_path, path, path_size - 1);
        if (size < 0) {
            perror("readlink");
            exit(EXIT_FAILURE);
        }
        path[size] = '\0';
    }
    
    int main(int argc, char *argv[]) {
        Counters counters;
        counters.head = NULL;
    
        pid_t child_pid = fork();
    
        if (child_pid == -1) {
            perror("fork");
            exit(EXIT_FAILURE);
        }
        if (child_pid == 0) {
            ptrace(PTRACE_TRACEME, 0, NULL, NULL);
            execvp(argv[1], &argv[1]);
            perror("execvp");
            exit(EXIT_FAILURE);
        } else {
            int status;
            waitpid(child_pid, &status, 0);
    
            while (WIFSTOPPED(status)) {
                ptrace(PTRACE_SYSCALL, child_pid, NULL, NULL);
                waitpid(child_pid, &status, 0);
                struct user_regs_struct regs;
                ptrace(PTRACE_GETREGS, child_pid, NULL, &regs);
                if (regs.orig_rax == 1) {
                    fprintf(stderr, "we search %lld\n", regs.rdi);
                    char filename[PATH_MAX];
                    long int addr = regs.rdi;
                    char data[PATH_MAX];
                    get_file_path(regs.rdi, data, PATH_MAX);
                    data[PATH_MAX - 1] = '\0';
                    increment(&counters, filename, regs.rdx);
                }
            }
            print(&counters);
            Counter* current = counters.head;
            while (current != NULL) {
                Counter* next = current->next;
                free(current);
                current = next;
            }
        }
        return 0;
    }

I read that this can happen if it is a pipe-channel or an irregular file. But in the testing system they use a Python script, where writing to files of extension ".txt" goes through "echo" and everything should be OK. I'm sure I'm catching the right "write" system call, because the number of bytes written is the same. In the tests, a temporary directory is created in a loop and the command that writes looks like this "echo -n aaaaaaaaa 1>/tmp/tmp2baliyc8/uhjpiewboo". Realised that I get a file descriptor on "FIFO/pipe". And in response I want a string like '/tmp/tmp2baliyc8/uhjpiewboo'. But I still haven't figured out how to get it


Solution

  • By reading the links in /proc/self/fd, you will read what the parent process is opening. Since you want to trace the child, you should replace self with the process id of the child.

    Example:

    void get_file_path(int fd, pid_t pid, char* path, size_t path_size) {
        char fd_path[PATH_MAX];
        snprintf(fd_path, sizeof(fd_path), "/proc/%d/fd/%d", (int)pid, fd);
        ssize_t size = readlink(fd_path, path, path_size - 1);
        if(size < 0) {
            perror("readlink");
            path[0] = '\0';
            exit(EXIT_FAILURE);
        }
        path[size] = '\0';
        fprintf(stderr, "WE GOT [%s]\n", path); // debug print to see that it works
    }
    

    The call to the function in the parent process would then need to be:

    get_file_path(regs.rdi, child_pid, data, PATH_MAX);
    //                      ^^^^^^^^^
    

    You can now see the program correctly printing out the name of the file the child process opened in the WE GOT [...] debug printout.

    This does not fix everything else in the program, like char filename[PATH_MAX]; being uninitialized and never changed, but used in comparisons in increment - or the fact that you populate data in the get_file_path call, but never use it.