Search code examples
assemblysegmentation-faultx86-64osdevgdt

Triple fault when jumping to long mode x86_64


I have hobby OS and I want it to jump to 64 bit long mode, everything work fine before far jump to 64 long mode entry, paging works correctly, but QEMU log file shows that EFER values is LMA

Triple fault
CPU Reset (CPU 0)
RAX=0000000000000100 RBX=0000000080000011 RCX=00000000c0000080 RDX=0000000000000000
RSI=0000000000000015 RDI=000000000020102d RBP=0000000000000000 RSP=000000000020b000
R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 R11=0000000000000000
R12=0000000000000000 R13=0000000000000000 R14=0000000000000000 R15=0000000000000000
RIP=000000008020015b RFL=00000086 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
CS =0008 0000000000000000 00000000 00209a00 DPL=0 CS64 [-R-]
SS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
DS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
FS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
GS =0018 0000000000000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
LDT=0000 0000000000000000 0000ffff 00008200 DPL=0 LDT
TR =0000 0000000000000000 0000ffff 00008b00 DPL=0 TSS64-busy
GDT=     000000000020103b 00000017
IDT=     0000000000000000 00000000
CR0=80000011 CR2=000000008020015b CR3=0000000000202000 CR4=00000020
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 
DR6=00000000ffff0ff0 DR7=0000000000000400
CCS=0000000000000400 CCD=ffffffff80000011 CCO=LOGICL
EFER=0000000000000500
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=0000000000000000 0000000000000000 XMM01=0000000000000000 0000000000000000
XMM02=0000000000000000 0000000000000000 XMM03=0000000000000000 0000000000000000
XMM04=0000000000000000 0000000000000000 XMM05=0000000000000000 0000000000000000
XMM06=0000000000000000 0000000000000000 XMM07=0000000000000000 0000000000000000
XMM08=0000000000000000 0000000000000000 XMM09=0000000000000000 0000000000000000
XMM10=0000000000000000 0000000000000000 XMM11=0000000000000000 0000000000000000
XMM12=0000000000000000 0000000000000000 XMM13=0000000000000000 0000000000000000
XMM14=0000000000000000 0000000000000000 XMM15=0000000000000000 0000000000000000

CR2 register shows that Page fault happened in 0x20015b virtual address this is my code:

%define KERNEL_VIRTUAL_ADDR 0xFFFFFFFF80000000
section .multiboot_header
header_start:
    align 8
    dd 0xE85250D6 
    dd 0           
    dd header_end - header_start   
    dd 0x100000000 - (0xE85250D6 + 0 + (header_end - header_start))
    dw 0
    dw 0
    dd 8
header_end:
section .multiboot.text
global start
bits 32
;functions
check_cpuid:
        pushfd
        pop eax
        mov ecx, eax
        xor eax, 1 << 21
        push eax
        popfd
        pushfd
        pop eax
        push ecx
        popfd
        xor eax, ecx
        jz .no_cpuid
        mov edi, cpuid_av - KERNEL_VIRTUAL_ADDR
        call print
        .cont:
        ret
.no_cpuid:
        mov edi, cpuid_err - KERNEL_VIRTUAL_ADDR
        call print
        jmp check_cpuid.cont

check_long_mode:
        mov eax, 0x80000000
        cpuid
        cmp eax, 0x80000001
        jb .no_long

        mov eax, 0x80000001
        cpuid
        test edx, 1 << 29
        jz .no_long
        
        mov edi, lm - KERNEL_VIRTUAL_ADDR
        call print
        .cont:
        ret
.no_long:
        mov edi, no_lm - KERNEL_VIRTUAL_ADDR
        call print
        jmp check_long_mode.cont

print:
    mov dh, 0x0f
    xor ecx, ecx
    mov dl, [edi + ecx]
    mov word [0xb8000 + esi*2], dx
    .loopx:
        inc ecx
        inc esi
        mov dl, [edi + ecx]
        mov word [0xb8000 + esi*2], dx
        cmp byte [edi + ecx], 0
        jnz .loopx
    ret
start:
    cmp eax, 0x36d76289
    je loader
loader:
    mov esp, stack.top - KERNEL_VIRTUAL_ADDR
    ;disable paging
    cli                                  
    mov eax, cr0
    or eax, 0 << 31
    mov cr0, eax

    mov eax, cr4
    or eax, 1 << 5
    mov cr4, eax
    
    xor esi, esi
    call check_cpuid
    call check_long_mode
    mov eax, p3_table - KERNEL_VIRTUAL_ADDR
    or eax, 0b11
    mov dword [p4_table - KERNEL_VIRTUAL_ADDR], eax
    mov eax, p2_table - KERNEL_VIRTUAL_ADDR
    or eax, 0b11
    mov dword [p3_table - KERNEL_VIRTUAL_ADDR], eax
    mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR
    or eax, 0b11
    mov dword [p2_table - KERNEL_VIRTUAL_ADDR], eax

    mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR + 4096
    or eax, 0b11
    mov dword [p2_table - KERNEL_VIRTUAL_ADDR + 8], eax
    
    mov ecx, 0
    .map_p1_table:
        mov eax, 4096
        mul ecx
        or eax, 0b11
        mov [p1_table_1 - KERNEL_VIRTUAL_ADDR + ecx*8], eax
        inc ecx
        cmp ecx, 1024
        jne .map_p1_table

    mov eax, p4_table - KERNEL_VIRTUAL_ADDR
    mov cr3, eax

    mov ecx, 0xC0000080
    rdmsr
    or eax, (1 << 8)
    wrmsr

    mov ebx, cr0 
    or ebx, 1 << 31
    mov cr0, ebx 
    lgdt [gdt64.pointer_low - KERNEL_VIRTUAL_ADDR] 
    jmp (0x8):(kernel_jumper - KERNEL_VIRTUAL_ADDR) 
[bits 64]
kernel_jumper:
    .h:
        jmp .h ;this is 0x20015b that made page fault
section .data
cpuid_err: db "CPUID:0 ", 0
cpuid_av: db "CPUID:1 ", 0
os_err: db " multiboot: ", 0
no_lm: db "long-mode: 0 ", 0
lm: db "long-mode: 1 ", 0
gdt64:
    dq  0   ;first entry = 0
    .code equ $ - gdt64
        ; equ tells the compiler to set the address of the variable at given address ($ - gdt64). $ is the current position.
        ; set the following values:
        ; descriptor type: bit 44 has to be 1 for code and data segments
        ; present: bit 47 has to be  1 if the entry is valid
        ; read/write: bit 41 1 means that is readable
        ; executable: bit 43 it has to be 1 for code segments
        ; 64bit: bit 53 1 if this is a 64bit gdt
        dq (1 <<44) | (1 << 47) | (1 << 41) | (1 << 43) | (1 << 53)  ;second entry=code=0x8
    .data equ $ - gdt64
        dq (1 << 44) | (1 << 47) | (1 << 41)    ;third entry = data = 0x10
.pointer:
    dw .pointer - gdt64 - 1
    dq gdt64
.pointer_low:
    dw .pointer - gdt64 - 1
    dq gdt64 - KERNEL_VIRTUAL_ADDR
section .bss
align 4096
p4_table:
        resb 4096
p3_table:
        resb 4096
p2_table:
        resb 4096
p1_table_1:
        resb 8192
align 16
stack:
    resb 16384
    .top:

this is my linker.ld file:

OUTPUT(X86-64)
ENTRY(start)

SECTIONS {
    . = 2M;

    _kernel_start = .;
    _kern_virtual_offset = 0xffffffff80000000;
    .multiboot_header :
    {
        /* Be sure that multiboot header is at the beginning */
        *(.multiboot_header)
    }

    .multiboot.text :
    {
        *(.multiboot.text)
    }

    . += _kern_virtual_offset;
    /* Add a symbol that indicates the start address of the kernel. */
    .text ALIGN (4K) : AT (ADDR (.text) - _kern_virtual_offset)
    {
        *(.text)
        *(.text.*)
    }
    .rodata ALIGN (4K) : AT (ADDR (.rodata) - _kern_virtual_offset)
    {
        *(.rodata)
        *(.rodata.*)
    }
    .data ALIGN (4K) : AT (ADDR (.data) - _kern_virtual_offset)
    {
        *(.data)
        *(.data.*)
    }
    .bss ALIGN (4K) : AT (ADDR (.bss) - _kern_virtual_offset)
    {
        *(.bss)
    }

    _kernel_end = .;
    _kernel_physical_end = . - _kern_virtual_offset;
}

From the given info it seems that page fault happened but if the OS does not map the 2th megabyte the code will not be fetched before that, my OS entry at 2 MB.


Solution

  • Seeing your linker script I can confirm that the issue is this FAR JMP to the kernel_jumper label:

        jmp (0x8):(kernel_jumper - KERNEL_VIRTUAL_ADDR)
    
    [bits 64]
    kernel_jumper:
        .h:
            jmp .h ;this is 0x20015b that made page fault
    

    The problem is that kernel_jumper is still a lower half address and requires no adjustment. It should have been:

    jmp (0x8):(kernel_jumper)
    

    There is a hint in the debug info where it shows RIP=000000008020015b. This is neither in the first 4MiB nor is it in the higher half KERNEL_VIRTUAL_ADDR address space. It appears this address was computed as 0x20015b - 0xFFFFFFFF80000000 = 0x8020015B.


    I believe this is beyond the scope of what is being asked, but the code is incomplete as it doesn't have the higher half addresses mapped and it doesn't transition from the lower half to the higher half. The following code adds additional support to handle mapping the higher half addresses; transitioning from the lower half to higher half; removing the lower half mapping; reloading the GDT from higher half; and setting the segment registers:

    %define KERNEL_VIRTUAL_ADDR 0xFFFFFFFF80000000
    section .multiboot_header
    header_start:
        align 8
        dd 0xE85250D6
        dd 0
        dd header_end - header_start
        dd 0x100000000 - (0xE85250D6 + 0 + (header_end - header_start))
        dw 0
        dw 0
        dd 8
    header_end:
    section .multiboot.text
    global start
    bits 32
    ;functions
    check_cpuid:
            pushfd
            pop eax
            mov ecx, eax
            xor eax, 1 << 21
            push eax
            popfd
            pushfd
            pop eax
            push ecx
            popfd
            xor eax, ecx
            jz .no_cpuid
            mov edi, cpuid_av - KERNEL_VIRTUAL_ADDR
            call print
            .cont:
            ret
    .no_cpuid:
            mov edi, cpuid_err - KERNEL_VIRTUAL_ADDR
            call print
            jmp check_cpuid.cont
    
    check_long_mode:
            mov eax, 0x80000000
            cpuid
            cmp eax, 0x80000001
            jb .no_long
    
            mov eax, 0x80000001
            cpuid
            test edx, 1 << 29
            jz .no_long
    
            mov edi, lm - KERNEL_VIRTUAL_ADDR
            call print
            .cont:
            ret
    .no_long:
            mov edi, no_lm - KERNEL_VIRTUAL_ADDR
            call print
            jmp check_long_mode.cont
    
    print:
        mov dh, 0x0f
        xor ecx, ecx
        mov dl, [edi + ecx]
        mov word [0xb8000 + esi*2], dx
        .loopx:
            inc ecx
            inc esi
            mov dl, [edi + ecx]
            mov word [0xb8000 + esi*2], dx
            cmp byte [edi + ecx], 0
            jnz .loopx
        ret
    start:
        cmp eax, 0x36d76289
        je loader
    loader:
        mov esp, stack.top - KERNEL_VIRTUAL_ADDR
        ;disable paging
        cli
        mov eax, cr0
        or eax, 0 << 31
        mov cr0, eax
    
        mov eax, cr4
        or eax, 1 << 5
        mov cr4, eax
    
        xor esi, esi
        call check_cpuid
        call check_long_mode
        mov eax, p3_table - KERNEL_VIRTUAL_ADDR
        or eax, 0b11
        ; Map the lower half addresses
        mov dword [p4_table - KERNEL_VIRTUAL_ADDR], eax
        ; Map the higher half addresses
        mov dword [p4_table+511*8 - KERNEL_VIRTUAL_ADDR], eax
    
        mov eax, p2_table - KERNEL_VIRTUAL_ADDR
        or eax, 0b11
        ; Map the lower half addresses
        mov dword [p3_table - KERNEL_VIRTUAL_ADDR], eax
        ; Map the higher half addresses
        mov dword [p3_table+510*8 - KERNEL_VIRTUAL_ADDR], eax
        mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR
        or eax, 0b11
        mov dword [p2_table - KERNEL_VIRTUAL_ADDR], eax
    
        mov eax, p1_table_1 - KERNEL_VIRTUAL_ADDR + 4096
        or eax, 0b11
        mov dword [p2_table - KERNEL_VIRTUAL_ADDR + 8], eax
    
        mov ecx, 0
        .map_p1_table:
            mov eax, 4096
            mul ecx
            or eax, 0b11
            mov [p1_table_1 - KERNEL_VIRTUAL_ADDR + ecx*8], eax
            inc ecx
            cmp ecx, 1024
            jne .map_p1_table
    
        mov eax, p4_table - KERNEL_VIRTUAL_ADDR
        mov cr3, eax
    
        mov ecx, 0xC0000080
        rdmsr
        or eax, (1 << 8)
        wrmsr
    
        mov ebx, cr0
        or ebx, 1 << 31
        mov cr0, ebx
        lgdt [gdt64.pointer_low - KERNEL_VIRTUAL_ADDR]
    
        ; We need to reload CS by a FAR JMP to the lower half label kernel_jumper
        jmp (0x8):(kernel_jumper)
    
    [bits 64]
    kernel_jumper:
        ; Jump to the higher half entry point kernel_jumper_high
        mov rax, kernel_jumper_high
        jmp rax
    
    ; Section .text has higher half addresses
    section .text
    kernel_jumper_high:
        ; Load the GDT from the higher half
        lgdt [gdt64.pointer]
    
        ; Set a higher half stack
        lea rsp, [stack.top]
    
        ; Initialize the segment registers to NULL segment
        xor eax, eax
        mov ds, eax
        mov es, eax
        mov ss, eax
        mov fs, eax
        mov gs, eax
    
        ; Remove the lower half page mappings
        mov rax, p4_table
        mov dword [rax], 0
        mov rax, p3_table
        mov dword [rax], 0
    
        ; Flush the TLB by reloading CR3
        mov rax, cr3
        mov cr3, rax
    
        ; Add higher half long mode code here
    
        ; Print HHLM to upper right of screen (white on magenta)
        lea rax, [0xb8000 + KERNEL_VIRTUAL_ADDR]
        mov word [rax+76*2], 0x57 << 8 | 'H'
        mov word [rax+77*2], 0x57 << 8 | 'H'
        mov word [rax+78*2], 0x57 << 8 | 'L'
        mov word [rax+79*2], 0x57 << 8 | 'M'
    
        ;
        ; Infinite loop
        .h:
            jmp .h
    
    
    section .data
    cpuid_err: db "CPUID:0 ", 0
    cpuid_av: db "CPUID:1 ", 0
    os_err: db " multiboot: ", 0
    no_lm: db "long-mode: 0 ", 0
    lm: db "long-mode: 1 ", 0
    gdt64:
        dq  0   ;first entry = 0
        .code equ $ - gdt64
            ; equ tells the compiler to set the address of the variable at given address ($ - gdt64). 
            ; $ is the current position.
            ; set the following values:
            ; descriptor type: bit 44 has to be 1 for code and data segments
            ; present: bit 47 has to be  1 if the entry is valid
            ; read/write: bit 41 1 means that is readable
            ; executable: bit 43 it has to be 1 for code segments
            ; 64bit: bit 53 1 if this is a 64bit gdt
            dq (1 <<44) | (1 << 47) | (1 << 41) | (1 << 43) | (1 << 53)  ;second entry=code=0x8
        .data equ $ - gdt64
            dq (1 << 44) | (1 << 47) | (1 << 41)    ;third entry = data = 0x10
    .pointer:
        dw .pointer - gdt64 - 1
        dq gdt64
    .pointer_low:
        dw .pointer - gdt64 - 1
        dq gdt64 - KERNEL_VIRTUAL_ADDR
    section .bss
    align 4096
    p4_table:
            resb 4096
    p3_table:
            resb 4096
    p2_table:
            resb 4096
    p1_table_1:
            resb 8192
    align 16
    stack:
        resb 16384
        .top:
    

    I have added appropriate comments in the code where I made the changes.

    The astute observer may have noticed that the page mappings in the code above identity maps the lower half; maps the higher half; and happens to map 2 additional regions. This is okay as the extra mappings will disappear when the lower half is unmapped. The initial mappings actually look like this:

    0x0000000000000000-0x00000000003fffff -> 0x000000000000-0x0000003fffff
    0x0000007f80000000-0x0000007f803fffff -> 0x000000000000-0x0000003fffff
    0xffffff8000000000-0xffffff80003fffff -> 0x000000000000-0x0000003fffff
    0xffffffff80000000-0xffffffff803fffff -> 0x000000000000-0x0000003fffff
    

    After the lower half is unmapped it should look like this:

    0xffffffff80000000-0xffffffff803fffff -> 0x000000000000-0x0000003fffff
    

    This is okay, as the extra mappings simplified the code a bit and ultimately do no harm.