Search code examples
cassemblyrustx86osdev

What causes x86-64 Page Fault with only the Write bit set when a hardware interrupt happens while CPL=3


I am writing my own kernel using Rust (loosely based on phil-opp's blog, https://os.phil-opp.com/), and so far I have succeeded in duplicating the level 4 page table, creating new tables for the user mode application code and data, mapping a naked function to virtual address 0x40000000000, setting up the stack and jumping to the code. I have also got a system call handler working using syscall/sysret, that simply prints a message when it encounters a system call. I have noticed that whenever a PIC timer interrupt happens, it always occurs when CPL=0, because the code for the system call handler is many times longer than the user mode app, which simply does a system call in an endless loop. If I disable the printing (which takes most instructions), after a couple hundred iterations of the loop, a timer interrupt happens while CPL=3. However, instead of invoking the interrupt handler, the CPU throws a page fault with error code 2 (which corresponds to the Write bit set only). In my opinion, it does not make any sense, what could the problem be?

GDT:

static ref GDT: (gdt::GlobalDescriptorTable, Selectors) = {
    let mut gdt = gdt::GlobalDescriptorTable::new();
    let kernel_code_selector = gdt.add_entry(gdt::Descriptor::kernel_code_segment());
    let kernel_data_selector = gdt.add_entry(gdt::Descriptor::kernel_data_segment());
    let tss_selector = gdt.add_entry(gdt::Descriptor::tss_segment(&TSS));
    let user_data_selector = gdt.add_entry(gdt::Descriptor::user_data_segment());
    let user_code_selector = gdt.add_entry(gdt::Descriptor::user_code_segment());
    (gdt, Selectors { kernel_code_selector, kernel_data_selector, tss_selector, user_code_selector, user_data_selector })
    };

IDT:

const DOUBLE_FAULT_IST_INDEX: u16 = 0;
let mut IDT: idt::InterruptDescriptorTable = idt::InterruptDescriptorTable::new();
IDT.breakpoint.set_handler_fn(interrupts::breakpoint::breakpoint_handler);
IDT.double_fault.set_handler_fn(interrupts::double_fault::double_fault_handler).set_stack_index(DOUBLE_FAULT_IST_INDEX);
IDT.page_fault.set_handler_fn(interrupts::page_fault::page_fault_handler);
IDT.general_protection_fault.set_handler_fn(interrupts::general_protection_fault::general_protection_fault_handler);
IDT.stack_segment_fault.set_handler_fn(interrupts::stack_segment_fault::stack_segment_fault_handler);
IDT.segment_not_present.set_handler_fn(interrupts::segment_not_present::segment_not_present_handler);
IDT.invalid_tss.set_handler_fn(interrupts::invalid_tss::invalid_tss_handler);
IDT.debug.set_handler_fn(interrupts::debug::debug_handler);
IDT[interrupts::HardwareInterrupt::Timer.as_usize()].set_handler_fn(interrupts::timer::timer_handler);
IDT.load();

TSS:

let mut tss = tss::TaskStateSegment::new();
    tss.interrupt_stack_table[DOUBLE_FAULT_IST_INDEX as usize] = {
        const STACK_SIZE: usize = 4096 * 5;
        static mut STACK: [u8; STACK_SIZE] = [0; STACK_SIZE];
        let stack_start = x86_64::VirtAddr::from_ptr(unsafe { &STACK });
        let stack_end = stack_start + STACK_SIZE;
        stack_end
    };

Timer interrupt handler:

pub extern "x86-interrupt" fn timer_handler(_stack_frame: idt::InterruptStackFrame) {
    print!(".");
    cpu::pic_end_of_interrupt(0x20);
}

Userspace app:

#[naked]
#[no_mangle]
#[allow(named_asm_labels)]
pub unsafe fn userspace_app_1() {
    asm!("\
        push 0
        prog1start:
        mov rax, 1234h
        pop rdi
        inc rdi
        push rdi
        mov rsi, 3
        mov rdx, 4
        mov r8, 5
        syscall
        jmp prog1start
    ", options(noreturn));
}

QEMU interrupt log:

     7: v=20 e=0000 i=0 cpl=3 IP=0033:0000040000000066 pc=0000040000000066 SP=002b:0000060000000ff8 env->regs[R_EAX]=00000000515ca11a
RAX=0000000000001234 RBX=0000000000006062 RCX=0000040000000066 RDX=0000000000000004
RSI=0000000000000003 RDI=00000000001e91c5 RBP=0000008040201000 RSP=0000060000000ff8
R8 =0000000000000005 R9 =0000060000000f78 R10=0000000000203080 R11=0000000000000206
R12=0000000100000000 R13=0000000000005fea R14=0000018000000000 R15=0000000000006692
RIP=0000040000000066 RFL=00000206 [-----P-] CPL=3 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00cf1300
CS =0033 0000000000000000 ffffffff 00a0fb00 DPL=3 CS64 [-RA]
SS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
DS =002b 0000000000000000 ffffffff 00cff300 DPL=3 DS   [-WA]
FS =0000 0000000000000000 0000ffff 00009300 DPL=0 DS   [-WA]
GS =0000 0000000000000000 0000ffff 00009300 DPL=0 DS   [-WA]
LDT=0000 0000000000000000 0000ffff 00008200 DPL=0 LDT
TR =0018 0000000000276014 00000067 00008900 DPL=0 TSS64-avl
GDT=     0000000000276090 00000037
IDT=     000000000026dd80 00000fff
CR0=80010011 CR2=0000000000000000 CR3=00000000002b4018 CR4=00000020
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 
DR6=00000000ffff0ff0 DR7=0000000000000400
CCS=0000000000000004 CCD=0000060000000fa8 CCO=EFLAGS
EFER=0000000000000d01
check_exception old: 0xffffffff new 0xe
     8: v=0e e=0002 i=0 cpl=3 IP=0033:0000040000000066 pc=0000040000000066 SP=002b:0000060000000ff8 CR2=fffffffffffffff8
RAX=0000000000001234 RBX=0000000000006062 RCX=0000040000000066 RDX=0000000000000004
RSI=0000000000000003 RDI=00000000001e91c5 RBP=0000008040201000 RSP=0000060000000ff8
R8 =0000000000000005 R9 =0000060000000f78 R10=0000000000203080 R11=0000000000000206
R12=0000000100000000 R13=0000000000005fea R14=0000018000000000 R15=0000000000006692
RIP=0000040000000066 RFL=00000206 [-----P-] CPL=3 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00cf1300
CS =0033 0000000000000000 ffffffff 00a0fb00 DPL=3 CS64 [-RA]
SS =002b 0000000000000000 ffffffff 00c0f300 DPL=3 DS   [-WA]
DS =002b 0000000000000000 ffffffff 00cff300 DPL=3 DS   [-WA]
FS =0000 0000000000000000 0000ffff 00009300 DPL=0 DS   [-WA]
GS =0000 0000000000000000 0000ffff 00009300 DPL=0 DS   [-WA]
LDT=0000 0000000000000000 0000ffff 00008200 DPL=0 LDT
TR =0018 0000000000276014 00000067 00008900 DPL=0 TSS64-avl
GDT=     0000000000276090 00000037
IDT=     000000000026dd80 00000fff
CR0=80010011 CR2=fffffffffffffff8 CR3=00000000002b4018 CR4=00000020
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 
DR6=00000000ffff0ff0 DR7=0000000000000400
CCS=0000000000000004 CCD=0000060000000fa8 CCO=EFLAGS
EFER=0000000000000d01

Solution

  • I have fixed the problem! As @sj95126 suggested, the problem was with my TSS, which had the IST entry set up only for the double fault handler (which makes sense looking back, because previously any interrupt or exception that happened in user mode caused a double fault, because as far as I can tell the CPU could not know which stack to activate). I have fixed it by setting the IST bits to that TSS offset for every interrupt handler, and now my code works flawlessly.

    IDT:

    IDT.breakpoint.set_handler_fn(interrupts::breakpoint::breakpoint_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.double_fault.set_handler_fn(interrupts::double_fault::double_fault_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.page_fault.set_handler_fn(interrupts::page_fault::page_fault_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.general_protection_fault.set_handler_fn(interrupts::general_protection_fault::general_protection_fault_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.stack_segment_fault.set_handler_fn(interrupts::stack_segment_fault::stack_segment_fault_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.segment_not_present.set_handler_fn(interrupts::segment_not_present::segment_not_present_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.debug.set_handler_fn(interrupts::debug::debug_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT[interrupts::HardwareInterrupt::Timer.as_usize()].set_handler_fn(interrupts::timer::timer_handler).set_stack_index(INTERRUPT_IST_INDEX);
    IDT.load();