Search code examples
assemblyx86nasmbootloaderosdev

Cannot set GS register in Protected Mode


I am writing an operating system based on the information from the OSDev Wiki and some related books. I found that after jumping into Protected Mode, the segment initialization routine was not properly executed. Here is my second stage loader and kernel code:

; stage2
; still -f bin
[org 0xd200] ; calculated by first stage loader

; GDTs are defined here
jmp entry

GDT_DESC_START:

GDT0:
dw 0 ; Limit 0~15
dw 0 ; Base 0~15
db 0 ; Base 16~23
db 0 ; Access Byte
db 0 ; Limit 16~19 + Flags
db 0 ; Base 24~31

DESCRIPTOR_CODE:
dw 0xffff ; Limit 0~15
CODE_BASE_AX dw 0 ; Base 0~15
CODE_BASE_EAHL db 0 ; Base 16~23
db 0x9a ; 1001 1011 - Vaild, ring 0, executable, strict, readable,
; accessed
db 0x9f; 1100 1111 - paging, 32-bit
CODE_BASE_EAHH db 0 ; Base 24~31

DESCRIPTOR_DATA: 
dw 0xffff ; Limit 0~15
dw 0x0000 ; Base 0~15
db 0x00 ; Base 16~23
db 0x92 ; 1001 0011 - vaild, ring 0, data, forward, writable, accessed
db 0x9F ; 1100 1111 - 4KB, 32-bits
db 0x0 ; Base 24~31

; Note that in further development, VRAM descriptor should be
; created dynamically
; Base = 0x000b8000
DESCRIPTOR_VIDEO:
dw 0xffff
dw 0x8000
db 0x0b
db 0x92
db 0x4F
db 0x00

GDT_DESC_END:

GDTR0:
        dw GDT_DESC_END - GDT_DESC_START - 1
        dd GDT_DESC_START

SELECTOR_CODE equ DESCRIPTOR_CODE - GDT0
SELECTOR_DATA equ DESCRIPTOR_DATA - GDT0
SELECTOR_VIDEO equ DESCRIPTOR_VIDEO - GDT0

; --- code ------------

[bits 16]

entry:

cli
; open a20 gate
in al, 0x92
or al, 0x02
and al, ~1
out 0x92, al
; goto protected mode
; rewrite code descriptor
xor eax, eax
mov eax, entry_32
mov [CODE_BASE_AX], ax
shr eax, 16
mov [CODE_BASE_EAHH], ah
mov [CODE_BASE_EAHL], al
; load the gdt
lgdt [GDTR0]
mov eax, cr0
or eax, 1
mov cr0, eax
jmp dword SELECTOR_CODE:0 ; -> protected mode!!!

[bits 32]
entry_32:
mov ax, SELECTOR_DATA
mov ds, ax
mov ss, ax
mov ax, SELECTOR_VIDEO
mov gs, ax

; print a 'P'
xor ax, ax
mov edi, 0
mov ah, 0x07
mov al, 'P'
mov [gs:edi], ax

.sleep:
        hlt
        jmp .sleep

The P was not printed on the screen and I found out that GS=0 rather than GS=0x18. The P was written to the linear address 0x0. Strangely, the HLT loop works normally. I am new to x86 assembly so I couldn't figure out where the problem was. What is wrong with the code? How can it be fixed?


Solution

  • Potential Bugs

    I believe most of your problems stem from issues with the flags in some of your GDT descriptors. In the CODE descriptor table you have:

    db 0x9a ; 1001 1011 - Vaild, ring 0, executable, strict, readable,
    db 0x9f ; 1100 1111 - paging, 32-bit
    

    It should have been:

    db 0x9a ; 1001 1010 - Valid, ring 0, executable, strict, readable,
    db 0xcf ; 1100 1111 - paging, 32-bit
    

    In particular 1100 1111 is 0xcf, not 0x9f. You could have also defined the DBs with binary values like:

    db 10011010b  ; Valid, ring 0, executable, strict, readable,
    db 11001111b  ; paging, 32-bit
    

    By doing it this way with binary values makes it less error prone and a bit more readable.

    In the DATA descriptor you had:

    db 0x92 ; 1001 0011 - vaild, ring 0, data, forward, writable, accessed
    db 0x9F ; 1100 1111 - 4KB, 32-bits
    

    I believe it should have been:

    db 10010010b  ; valid, ring 0, data, forward, writable, accessed
    db 11001111b  ; 4KB, 32-bits
    

    Your 0x9F should have been 0xCF (11001111)


    Other Considerations

    It is a good idea to set up SS:ESP after entering protected mode. Code like this would work:

    mov es, ax                       ; Set ES as well
    mov ss, ax
    mov esp,0x90000                  ; We should specify a stack pointer
    cld                              ; Set direction flag forward
    

    0x90000 can be any valid linear address. Since some instructions like MOVSB need ES being set, it is a good idea to have ES=DS. If you use any of the instructions that require a direction flag it is a good idea to explicitly set it. Most code assumes forward movement that can be set using the CLD instruction.

    As an experiment you can add this code to display a string from memory. This will test that the offsets generated for DS work as expected; and tests if the GS segment points to the base of the color text video memory at 0xb8000.

    ; print "Hello, World!"
    xor ax, ax
    mov edi, 0
    mov ecx, 0
    mov ah, 0x07
    
    ; Display a nul terminated string from a variable to video memory
    ; using a character attribute in AH
    strloop:
    mov al, [mystr+ecx]
    test al,al
    je .endstrloop
    mov [gs:edi+ecx*2], ax
    inc ecx
    jmp strloop
    .endstrloop:
    

    Revised Code

    The revised stage2 code could look like this:

    ; stage2
    ; still -f bin
    [org 0xd200] ; calculated by first stage loader
    
    ; GDTs are defined here
    jmp entry
    
    GDT_DESC_START:
    
    GDT0:
    dw 0 ; Limit 0~15
    dw 0 ; Base 0~15
    db 0 ; Base 16~23
    db 0 ; Access Byte
    db 0 ; Limit 16~19 + Flags
    db 0 ; Base 24~31
    
    DESCRIPTOR_CODE:
    dw 0xffff ; Limit 0~15
    CODE_BASE_AX dw 0 ; Base 0~15
    CODE_BASE_EAHL db 0 ; Base 16~23
    db 10011010b ; Valid, ring 0, executable, strict, readable,
    ; accessed
    db 11001111b ; paging, 32-bit
    CODE_BASE_EAHH db 0 ; Base 24~31
    
    DESCRIPTOR_DATA:
    dw 0xffff ; Limit 0~15
    dw 0x0000 ; Base 0~15
    db 0x00 ; Base 16~23
    db 10010010b ; valid, ring 0, data, forward, writable, accessed
    db 11001111b ; 4KB, 32-bits
    db 0x0 ; Base 24~31
    
    ; Note that in further development, VRAM descriptor should be
    ; created dynamically
    ; Base = 0x000b8000
    DESCRIPTOR_VIDEO:
    dw 0xffff
    dw 0x8000
    db 0x0b
    db 0x92
    db 0x4F
    db 0x00
    
    GDT_DESC_END:
    
    GDTR0:
            dw GDT_DESC_END - GDT_DESC_START - 1
            dd GDT_DESC_START
    
    SELECTOR_CODE equ DESCRIPTOR_CODE - GDT0
    SELECTOR_DATA equ DESCRIPTOR_DATA - GDT0
    SELECTOR_VIDEO equ DESCRIPTOR_VIDEO - GDT0
    
    ; --- code ------------
    
    [bits 16]
    
    entry:
    
    cli
    ; open a20 gate
    in al, 0x92
    or al, 0x02
    and al, ~1
    out 0x92, al
    
    ; goto protected mode
    ; rewrite code descriptor
    xor eax, eax
    mov eax, entry_32
    mov [CODE_BASE_AX], ax
    shr eax, 16
    mov [CODE_BASE_EAHH], ah
    mov [CODE_BASE_EAHL], al
    
    ; load the gdt
    lgdt [GDTR0]
    mov eax, cr0
    or eax, 1
    mov cr0, eax
    jmp dword SELECTOR_CODE:0x0      ; -> protected mode!!!
    
    [bits 32]
    entry_32:
    
    mov ax, SELECTOR_DATA
    mov ds, ax
    mov es, ax                       ; Set ES as well
    mov ss, ax
    mov esp,0x90000                  ; We should specify a stack pointer
    mov ax, SELECTOR_VIDEO
    mov gs, ax
    cld                              ; Set direction flag forward
    
    ; print "Hello, World!"
    xor ax, ax
    mov edi, 0
    mov ecx, 0
    mov ah, 0x07
    
    ; Display a nul terminated string from a variable to video memory
    ; using a character attribute in AH
    strloop:
    mov al, [mystr+ecx]
    test al,al
    je .endstrloop
    mov [gs:edi+ecx*2], ax
    inc ecx
    jmp strloop
    .endstrloop:
    
    .sleep:
            hlt
            jmp .sleep
    
    mystr: db "Hello, World!",0
    

    Although your question doesn't have a first stage, I provide a simplistic one for readers who may wish to test:

    [BITS 16]
    org 0x7C00
    
            STAGE2OFFSET equ 0xd200
    
    start:
            ; This section of code is added based on Michael Petch's bootloader tips
            ; See http://stackoverflow.com/a/32705076/3857942
            xor ax,ax          ; We want a segment of 0 for DS for this question
            mov ds,ax          ;     Set AX to appropriate segment value for your situation
            mov es,ax          ; In this case we'll default to ES=DS
            mov bx,0x7c00      ; Stack segment can be any usable memory
    
            cli                ; Disable interrupts to circumvent bug on early 8088 CPUs
            mov ss,ax      
            mov sp,bx          ; This places top of the stack @ 0x0000:0x7c00
                               ;    just below the bootsector.
            sti                ; Re-enable interrupts
            cld                ; Set the direction flag to be positive direction
    
            mov ah, 0x02       ; Read sectors from drive
            mov al, 4          ; Read 4 sectors
            mov ch, 0          ; Cylinder 0
            mov cl, 2          ; Sector 2
            mov dh, 0          ; Head 0
            xor bx, bx
            mov es, bx
            mov bx, STAGE2OFFSET    ; ES:BX = 0x0000:0xd200 (start of memory to read into)
            int 0x13
    
            jmp 0x0000:STAGE2OFFSET ; Far Jump to second stage. Sets CS=0
    
    times   510-($-$$) db 0         ; Create padding to fill out to 510 bytes
    dw      0xaa55                  ; Magic number in the trailer of a boot sector
    

    These files could be assembled and built into a disk image with these commands:

    dd if=/dev/zero of=floppy.img bs=1024 count=1440
    nasm -f bin bootload.asm -o bootload.bin
    nasm -f bin stage2.asm -o stage2.bin
    
    dd if=bootload.bin of=floppy.img conv=notrunc
    dd if=stage2.bin of=floppy.img bs=512 seek=1 conv=notrunc
    

    Code Segment Recommendation

    Although the way you set up your CODE descriptor will work, another alternative would have been to use a base of zero. This can be accomplished by:

    • remove these lines:
    ; rewrite code descriptor
    xor eax, eax
    mov eax, entry_32
    mov [CODE_BASE_AX], ax
    shr eax, 16
    mov [CODE_BASE_EAHH], ah
    mov [CODE_BASE_EAHL], al
    
    • Modify the ptr16:32 FAR JMP to use the offset of entry_32:
    jmp dword SELECTOR_CODE:entry_32 ; -> protected mode!!!
    

    Rather than a 0 offset and a non zero descriptor base, we simply use a descriptor base of 0. We can then use the offset of the label entry_32 for the ptr16:32 FAR JMP. By keeping CS=DS=ES=SS you simplify the code by using a 4gb linear address space starting at 0. Keeping it consistent may avoid potential coding bugs in the future.