Search code examples
clinuxlinux-kernel

Hooking syscall by modifying sys_call_table does not work


I'm trying to do basic hooking by locating sys_call_table and modify an entry for sys_read syscall to a function in my own kernel module. I have tried kprobes I'm just interested to do it with sys_call_table.

Below is my code:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/syscalls.h>
#include <linux/version.h>



typedef asmlinkage long (*t_syscall)(const struct pt_regs *);
unsigned long cr0;
unsigned long **__sys_call_table;
typedef unsigned long (*kallsyms_lookup_name_t)(const char *name);
typedef asmlinkage int (*orig_getdents64_t)(unsigned int,
        struct linux_dirent64 *, unsigned int);   
asmlinkage long (*original_syscall)(const struct pt_regs *);
static struct kprobe kp = {
    .symbol_name = "kallsyms_lookup_name"
};
static kallsyms_lookup_name_t kallsyms_lookup_name_ptr;

static struct kprobe kp2 = {
    .symbol_name = "__x64_sys_read"
};

unsigned long *get_syscall_address(unsigned long *sys_call_table, int syscall_number);
asmlinkage long hooked_syscall(const struct pt_regs *regs);


#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
static inline void
write_cr0_forced(unsigned long val)
{
    unsigned long __force_order;

    asm volatile(
        "mov %0, %%cr0"
        : "+r"(val), "+m"(__force_order));
}
#endif

static inline void
unprotect_memory(void)
{
#if IS_ENABLED(CONFIG_X86) || IS_ENABLED(CONFIG_X86_64)
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
    write_cr0_forced(cr0 & ~0x00010000);
#else
    write_cr0(cr0 & ~0x00010000);
#endif
#elif IS_ENABLED(CONFIG_ARM64)
    update_mapping_prot(__pa_symbol(start_rodata), (unsigned long)start_rodata,
            section_size, PAGE_KERNEL);
#endif
}

static inline void
protect_memory(void)
{
#if IS_ENABLED(CONFIG_X86) || IS_ENABLED(CONFIG_X86_64)
#if LINUX_VERSION_CODE > KERNEL_VERSION(4, 16, 0)
    write_cr0_forced(cr0);
#else
    write_cr0(cr0);
#endif
#elif IS_ENABLED(CONFIG_ARM64)
    update_mapping_prot(__pa_symbol(start_rodata), (unsigned long)start_rodata,
            section_size, PAGE_KERNEL_RO);

#endif
}

asmlinkage long hooked_syscall(const struct pt_regs *regs) {
    printk(KERN_INFO "Syscall hooked!\n");
    return original_syscall(regs);
}

static unsigned long **find_sys_call_table(void) {
    unsigned long **sct;
    sct = (unsigned long **)kallsyms_lookup_name_ptr("sys_call_table");
    return sct;
}


static int __init kprobe_init(void)
{
    int ret;
    cr0 = read_cr0();
    ret = register_kprobe(&kp);
    if (ret < 0)
        return ret;

    kallsyms_lookup_name_ptr = (kallsyms_lookup_name_t)kp.addr;

    __sys_call_table = find_sys_call_table();

    if (!__sys_call_table) {
        printk(KERN_ERR "Couldn't find sys_call_table.\n");
        return -1;
    }

    printk("__sys_call_table address : %px\n", __sys_call_table);

    unprotect_memory();
    original_syscall = (void *)__sys_call_table[__NR_read];
    printk("__NR_READ : %px\n", original_syscall);
    printk("HOOKED FUNCTION : %px\n", (unsigned long *)hooked_syscall);
    __sys_call_table[__NR_read] = (unsigned long *)hooked_syscall;
    
    /// Double check
    original_syscall = (void *)__sys_call_table[__NR_read];
    printk("__NR_READ : %px\n", original_syscall);

    protect_memory();

    // Extra check
    int ret2 = register_kprobe(&kp2);
    if (ret2 < 0)
        return ret2;

    printk("%px\n", kp2.addr);

    unregister_kprobe(&kp);
    unregister_kprobe(&kp2);

    return 0;
}

static void __exit kprobe_exit(void)
{
}

module_init(kprobe_init)
module_exit(kprobe_exit)
MODULE_LICENSE("GPL");

and the Makefile,

# Name of the kernel module
obj-m += sct.o

# List of source files for the module
hello_world-objs := sct.c

# Path to the kernel source tree
KDIR := /lib/modules/$(shell uname -r)/build

all:
    make -C $(KDIR) M=$(PWD) modules

clean:
    make -C $(KDIR) M=$(PWD) clean

I get the address to kallsyms_lookup_name() by installing a kprobe and after registering it, get the .addr field. Once I got the address to sys_call_table I can read the the address of sys_read syscall. I checked the read address by grepping /proc/kallsyms and it seems I got the right address. Then I change the __NR_read entry to a function in my lkm. I have some debug prints afterward and I can confirm that the sys_call_table entry has changed.

    printk("__sys_call_table address : %px\n", __sys_call_table);

    unprotect_memory();
    original_syscall = (void *)__sys_call_table[__NR_read];
    printk("__NR_READ : %px\n", original_syscall);
    printk("HOOKED FUNCTION : %px\n", (unsigned long *)hooked_syscall);
    __sys_call_table[__NR_read] = (unsigned long *)hooked_syscall;
    
    /// Double check
    original_syscall = (void *)__sys_call_table[__NR_read];
    printk("__NR_READ : %px\n", original_syscall);

Unfortunately, after modifying sys_call_table entry I don't get any printk showing in dmesg, or any crash or anything!

To do extra checking, I installed a kprobe on sys_read and got the addr but even after modifying sys_call_table the kprobe still shows the original address of sys_read.

I'm on Ubuntu 24.04, 6.8.0-35-generic. I also tried Ubuntu 22.04 but I got the same result! Both with stock kernel with default configuration. Tried in VMware VM as well as physical hardware.

I searched a bit to see if any security mechanism might cause issues with this, but couldn't find anything :(

It's pretty confusing for me why my modification to sys_call_table doesn't seem to take effect.

Can you please tell me what do I miss here? Is hooking sys_call_table a thing yet? I'm new and learning different Linux kernel features, I need to know if sys_call_table modification for hooking a syscall is still a thing or not?

I tried to include enough information to help reproducing the same result.


Solution

  • Surprise, surprise! You cannot do this anymore since Linux v6.9. Commit 1e3ad78334a69b36e107232e337f9d693dcc9df2 introduced a security mitigation against speculative execution on x86 that completely removed the use of syscall tables, which has been backported to v6.8.5+, v6.6.26+, v6.1.85+, v5.15.154+. Ubuntu 24.04 uses the v6.8 stable branch, and Ubuntu 22.04 uses the v6.1 stable branch, so the patch is present there too.

    The sys_call_table symbol still exists and still contains valid function pointers, but it is only used for tracing purposes (CONFIG_FTRACE_SYSCALLS=y). The actual syscall dispatch code is now implemented as a huge inlined switch case (source):

    #define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
    
    long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
    {
        switch (nr) {
        #include <asm/syscalls_64.h>
        default: return __x64_sys_ni_syscall(regs);
        }
    };
    

    I see you already mention you tried kprobes (the real solution) so I assume you know how to use those. I'm just going to leave this here for whoever comes across this post and might find it useful. Using kprobes is significantly easier than doing things manually and in a "dirty" way by editing sys_call_table.

    In order to find the appropriate symbol to hook you can take a look at the kernel symbols directly with readelf -s and grep for the syscall name you are interested in. Usually, they are prefixed with an arch-specific prefix. In case of x86 it's __x64_sys_ for 64-bit syscalls.

    I also maintain syscalls.mebeim.net where you can find a list of syscall symbol names for various architectures and kernel versions, which you may find useful.

    Here's an example of how this could be done:

    #include <linux/kprobes.h>
    #include <linux/ptrace.h>
    // ...
    
    static int sys_read_kprobe_pre_handler(struct kprobe *p, struct pt_regs *regs)
    {
        // Do something here...
        return 0;
    }
    
    struct kprobe syscall_kprobe = {
        .symbol_name = "__x64_sys_read",
        .pre_handler = sys_read_kprobe_pre_handler,
    };
    
    static int __init my_module_init(void)
    {
        int err;
    
        err = register_kprobe(&syscall_kprobe);
        if (err) {
            pr_err("register_kprobe() failed: %d\n", err);
            return err;
        }
    
        return 0;
    }
    
    static void __exit my_module_exit(void)
    {
        unregister_kprobe(&syscall_kprobe);
    }
    

    Note: I did not test the above code so don't expect it to run perfectly as is, but you can use it as a starting point.