Search code examples
assemblyx86kernelbootloaderosdev

How to make the kernel for my bootloader?


I'm trying to make my own custom OS and I need some help with my code. This is my bootloader.asm:

[ORG 0x7c00]

start:
    cli
    xor ax, ax
    mov ds, ax
    mov ss, ax
    mov es, ax
    mov [BOOT_DRIVE], dl
    mov bp, 0x8000
    mov sp, bp
    mov bx, 0x9000
    mov dh, 5
    mov dl, [BOOT_DRIVE]
    call load_kernel
    call enable_A20
    call graphics_mode
    lgdt [gdtr]
    mov eax, cr0
    or al, 1
    mov cr0, eax
    jmp CODE_SEG:init_pm

[bits 32]
init_pm:
    mov ax, DATA_SEG
    mov ds, ax
    mov ss, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

    mov ebp, 0x90000
    mov esp, ebp

    jmp 0x9000

[BITS 16]
graphics_mode:
    mov ax, 0013h
    int 10h
    ret

load_kernel:
                        ; load DH sectors to ES:BX from drive DL
    push dx             ; Store DX on stack so later we can recall
                        ; how many sectors were request to be read ,
                        ; even if it is altered in the meantime
    mov ah , 0x02       ; BIOS read sector function
    mov al , dh         ; Read DH sectors
    mov ch , 0x00       ; Select cylinder 0
    mov dh , 0x00       ; Select head 0
    mov cl , 0x02       ; Start reading from second sector ( i.e.
                        ; after the boot sector )
    int 0x13            ; BIOS interrupt
    jc disk_error       ; Jump if error ( i.e. carry flag set )
    pop dx              ; Restore DX from the stack
    cmp dh , al         ; if AL ( sectors read ) != DH ( sectors expected )
    jne disk_error      ; display error message
    ret
disk_error :
    mov bx , ERROR_MSG
    call print_string
    hlt

[bits 32]
    ; prints a null - terminated string pointed to by EDX
print_string :
    pusha
    mov edx , VIDEO_MEMORY ; Set edx to the start of vid mem.
print_string_loop :
    mov al , [ ebx ] ; Store the char at EBX in AL
    mov ah , WHITE_ON_BLACK ; Store the attributes in AH
    cmp al , 0 ; if (al == 0) , at end of string , so
    je print_string_done ; jump to done
    mov [edx] , ax ; Store char and attributes at current
        ; character cell.
    add ebx , 1 ; Increment EBX to the next char in string.
    add edx , 2 ; Move to next character cell in vid mem.
    jmp print_string_loop ; loop around to print the next char.
print_string_done :
    popa
    ret ; Return from the function

[bits 16]
; Variables 
ERROR_MSG db "Error!" , 0
BOOT_DRIVE: db 0
VIDEO_MEMORY equ 0xb8000
WHITE_ON_BLACK equ 0x0f

%include "a20.inc"
%include "gdt.inc"

times 510-($-$$) db 0
db 0x55
db 0xAA

I compile it with this:

nasm -f bin -o boot.bin bootloader.asm

This is kernel.c:

call_main(){main();}
void main(){}

I compile it with this:

gcc -ffreestanding -o kernel.bin kernel.c

and then:

cat boot.bin kernel.bin > os.bin

I want to know what I am doing wrong because when I test with QEMU it doesn't work. Can someone give some tips to improve kernel.c so I don't have to use the call_main() function?

When testing I use:

qemu-system-i386 -kernel os.bin

My Other Files

a20.inc:

   enable_A20:
call check_a20
cmp ax, 1
je enabled
call a20_bios
call check_a20
cmp ax, 1
je enabled
call a20_keyboard
call check_a20
cmp ax, 1
je enabled
call a20_fast
call check_a20
cmp ax, 1
je enabled
mov bx, [ERROR]
call print_string
   enabled:
ret


  check_a20:
pushf
push ds
push es
push di
push si

cli

xor ax, ax ; ax = 0
mov es, ax

not ax ; ax = 0xFFFF
mov ds, ax

mov di, 0x0500
mov si, 0x0510

mov al, byte [es:di]
push ax

mov al, byte [ds:si]
push ax

mov byte [es:di], 0x00
mov byte [ds:si], 0xFF

cmp byte [es:di], 0xFF

pop ax
mov byte [ds:si], al

pop ax
mov byte [es:di], al

mov ax, 0
je check_a20__exit

mov ax, 1

 check_a20__exit:
pop si
pop di
pop es
pop ds
popf

ret

    a20_bios:
mov ax, 0x2401
int 0x15
ret

    a20_fast:
in al, 0x92
or al, 2
out 0x92, al
ret

    [bits 32]
    [section .text]

    a20_keyboard:
    cli

    call    a20wait
    mov     al,0xAD
    out     0x64,al

    call    a20wait
    mov     al,0xD0
    out     0x64,al

    call    a20wait2
    in      al,0x60
    push    eax

    call    a20wait
    mov     al,0xD1
    out     0x64,al

    call    a20wait
    pop     eax
    or      al,2
    out     0x60,al

    call    a20wait
    mov     al,0xAE
    out     0x64,al

    call    a20wait
    sti
    ret

    a20wait:
    in      al,0x64
    test    al,2
    jnz     a20wait
    ret


    a20wait2:
    in      al,0x64
    test    al,1
    jz      a20wait2
    ret

gdt.inc:

 gdt_start:
dd 0                ; null descriptor--just fill 8 bytes    dd 0 

 gdt_code:
dw 0FFFFh           ; limit low
dw 0                ; base low
db 0                ; base middle
db 10011010b            ; access
db 11001111b            ; granularity
db 0                ; base high

 gdt_data:
dw 0FFFFh           ; limit low (Same as code)
dw 0                ; base low
db 0                ; base middle
db 10010010b            ; access
db 11001111b            ; granularity
db 0                ; base high
  end_of_gdt:

  gdtr: 
dw end_of_gdt - gdt_start - 1   ; limit (Size of GDT)
dd gdt_start            ; base of GDT

   CODE_SEG equ gdt_code - gdt_start
   DATA_SEG equ gdt_data - gdt_start

Solution

  • There are a number of issues, but in general your assembly code does work. I have written a StackOverflow answer that has tips for general bootloader development.

    Don't Assume the Segment Registers are Set Properly

    The original code in your question didn't set the SS stack segment register. Tip #1 I give is:

    When the BIOS jumps to your code you can't rely on CS,DS,ES,SS,SP registers having valid or expected values. They should be set up appropriately when your bootloader starts.

    If you need ES it should be set as well. Although in your code it doesn't appear to be the case (except in the print_string function which I'll discuss later).

    Properly Define the GDT

    The single largest bug that would have prevented you from getting far into protected mode was that you set up the global descriptor table (GDT) in gdt.inc starting with:

    gdt_start:
        dd 0                ; null descriptor--just fill 8 bytes    dd 0
    

    Each global descriptor needs to be 8 bytes but dd 0 defines just 4 bytes (double word). It should be:

    gdt_start:
        dd 0                ; null descriptor--just fill 8 bytes    
        dd 0
    

    It actually appears that the second dd 0 was accidentally added to the end of the comment on the previous line.

    When in 16-bit Real Mode Don't Use 32-bit Code

    You have written some print_string code but it is 32-bit code:

    [bits 32]
        ; prints a null - terminated string pointed to by EBX
    print_string :
        pusha
        mov edx , VIDEO_MEMORY ; Set edx to the start of vid mem.
    print_string_loop :
        mov al , [ ebx ] ; Store the char at EBX in AL
        mov ah , WHITE_ON_BLACK ; Store the attributes in AH
        cmp al , 0 ; if (al == 0) , at end of string , so
        je print_string_done ; jump to done
        mov [edx] , ax ; Store char and attributes at current
            ; character cell.
        add ebx , 1 ; Increment EBX to the next char in string.
        add edx , 2 ; Move to next character cell in vid mem.
        jmp print_string_loop ; loop around to print the next char.
    print_string_done :
        popa
        ret ; Return from the function
    

    You call print_string as an error handler in 16-bit code so what you are doing here will likely force a reboot of the computer. You can't use the 32-bit registers and addressing. The code can be made 16-bit with some adjustments:

        ; prints a null - terminated string pointed to by EBX
    print_string :
        pusha
        push es                   ;Save ES on stack and restore when we finish
    
        push VIDEO_MEMORY_SEG     ;Video mem segment 0xb800
        pop es
        xor di, di                ;Video mem offset (start at 0)
    print_string_loop :
        mov al , [ bx ] ; Store the char at BX in AL
        mov ah , WHITE_ON_BLACK ; Store the attributes in AH
        cmp al , 0 ; if (al == 0) , at end of string , so
        je print_string_done ; jump to done
        mov word [es:di], ax ; Store char and attributes at current
            ; character cell.
        add bx , 1 ; Increment BX to the next char in string.
        add di , 2 ; Move to next character cell in vid mem.
        jmp print_string_loop ; loop around to print the next char.
    
    print_string_done :
        pop es                    ;Restore ES that was saved on entry
        popa
        ret ; Return from the function
    

    The primary difference (in 16-bit code) is that we no longer use EAX and EDX 32-bit registers. In order to access the video ram @ 0xb8000 we need to use a segment:offset pair that represents the same thing. 0xb8000 can be represented as segment:offset 0xb800:0x0 (Computed as (0xb800<<4)+0x0) = 0xb8000 physical address. We can use this knowledge to store b800 in the ES register and use DI register as the offset to update video memory. We now use:

    mov word [es:di], ax
    

    To move a word into video ram.

    Assembling and Linking the Kernel and Bootloader

    One of the issues you have in building your Kernel is that you don't properly generate a flat binary image that can be loaded into memory directly. Rather than using gcc -ffreestanding -o kernel.bin kernel.c I recommend doing it this way:

    gcc -g -m32 -c -ffreestanding -o kernel.o kernel.c -lgcc
    ld -melf_i386 -Tlinker.ld -nostdlib --nmagic -o kernel.elf kernel.o
    objcopy -O binary kernel.elf kernel.bin
    

    This assembles kernel.c to kernel.o with debugging info (-g). The linker then takes kernel.o (32-bit ELF binary) and produces an ELF executable called kernel.elf (this file will be handy if you want to debug your kernel). We then use objcopy to take the ELF32 executable file kernel.elf and convert it into a flat binary image kernel.bin that can be loaded by the BIOS. A key thing to note is that with -Tlinker.ld option we are asking the LD(linker) to read options from the file linker.ld . This is a simple linker.ld you can use to get started:

    OUTPUT_FORMAT(elf32-i386)
    ENTRY(main)
    
    SECTIONS
    {
        . = 0x9000;
        .text : { *(.text) }
        .data : { *(.data) }
        .bss  : { *(.bss) *(COMMON) }
    }
    

    The thing to note here is that . = 0x9000 is telling the linker that it should produce an executable that will be loaded at memory address 0x9000 . 0x9000 is where you seem to have placed your kernel in your question. The rest of the lines make available the C sections that will need to be included into your kernel to work properly.

    I recommend doing something similar when using NASM so rather than doing nasm -f bin -o boot.bin bootloader.asm do it this way:

    nasm -g -f elf32 -F dwarf -o boot.o bootloader.asm
    ld -melf_i386 -Ttext=0x7c00 -nostdlib --nmagic -o boot.elf boot.o
    objcopy -O binary boot.elf boot.bin
    

    This is similar to compiling the C kernel. We don't use a linker script here, but we do tell the linker to produce our code assuming the code (bootloader) will be loaded at 0x7c00 .

    For this to work you will need to remove this line from bootloader.asm :

    [ORG 0x7c00]
    

    Cleanup The Kernel (kernel.c)

    Modify your kernel.c file to be:

    /* This code will be placed at the beginning of the object by the linker script */    
    __asm__ (".pushsection .text.start\r\n" \
             "jmp main\r\n" \
             ".popsection\r\n"
             );
    
    /* Place main as the first function defined in kernel.c so
     * that it will be at the entry point where our bootloader
     * will call. In our case it will be at 0x9000 */
    
    int main(){
        /* Do Stuff Here*/
    
        return 0; /* return back to bootloader */
    }
    

    In bootloader.asm we should be calling the main function (that will be placed at 0x9000) rather than jumping to it. Instead of:

    jmp 0x9000
    

    Change it to:

        call 0x9000
        cli
    loopend:                ;Infinite loop when finished
        hlt
        jmp loopend
    

    The code after the call will be executed when C function main returns. It is a simple loop that will effectively halt the processor and remain that way indefinitely since we have no where to go back to.

    Code After Making All Recommended Changes

    bootloader.asm:

    [bits 16]
    
    global _start
    _start:
        cli
        xor ax, ax
        mov ds, ax
        mov es, ax
        mov ss, ax
        mov sp, 0x8000      ; Stack pointer at SS:SP = 0x0000:0x8000
        mov [BOOT_DRIVE], dl; Boot drive passed to us by the BIOS
        mov dh, 17          ; Number of sectors (kernel.bin) to read from disk
                            ; 17*512 allows for a kernel.bin up to 8704 bytes
        mov bx, 0x9000      ; Load Kernel to ES:BX = 0x0000:0x9000
    
        call load_kernel
        call enable_A20
    
    ;   call graphics_mode  ; Uncomment if you want to switch to graphics mode 0x13
        lgdt [gdtr]
        mov eax, cr0
        or al, 1
        mov cr0, eax
        jmp CODE_SEG:init_pm
    
    graphics_mode:
        mov ax, 0013h
        int 10h
        ret
    
    load_kernel:
                            ; load DH sectors to ES:BX from drive DL
        push dx             ; Store DX on stack so later we can recall
                            ; how many sectors were request to be read ,
                            ; even if it is altered in the meantime
        mov ah , 0x02       ; BIOS read sector function
        mov al , dh         ; Read DH sectors
        mov ch , 0x00       ; Select cylinder 0
        mov dh , 0x00       ; Select head 0
        mov cl , 0x02       ; Start reading from second sector ( i.e.
                            ; after the boot sector )
        int 0x13            ; BIOS interrupt
        jc disk_error       ; Jump if error ( i.e. carry flag set )
        pop dx              ; Restore DX from the stack
        cmp dh , al         ; if AL ( sectors read ) != DH ( sectors expected )
        jne disk_error      ; display error message
        ret
    disk_error :
        mov bx , ERROR_MSG
        call print_string
        hlt
    
    ; prints a null - terminated string pointed to by EDX
    print_string :
        pusha
        push es                   ;Save ES on stack and restore when we finish
    
        push VIDEO_MEMORY_SEG     ;Video mem segment 0xb800
        pop es
        xor di, di                ;Video mem offset (start at 0)
    print_string_loop :
        mov al , [ bx ] ; Store the char at BX in AL
        mov ah , WHITE_ON_BLACK ; Store the attributes in AH
        cmp al , 0 ; if (al == 0) , at end of string , so
        je print_string_done ; jump to done
        mov word [es:di], ax ; Store char and attributes at current
            ; character cell.
        add bx , 1 ; Increment BX to the next char in string.
        add di , 2 ; Move to next character cell in vid mem.
        jmp print_string_loop ; loop around to print the next char.
    
    print_string_done :
        pop es                    ;Restore ES that was saved on entry
        popa
        ret ; Return from the function
    
    %include "a20.inc"
    %include "gdt.inc"
    
    [bits 32]
    init_pm:
        mov ax, DATA_SEG
        mov ds, ax
        mov ss, ax
        mov es, ax
        mov fs, ax
        mov gs, ax
    
        mov ebp, 0x90000
        mov esp, ebp
    
        call 0x9000
        cli
    loopend:                                ;Infinite loop when finished
        hlt
        jmp loopend
    
    [bits 16]
    ; Variables
    ERROR            db "A20 Error!" , 0
    ERROR_MSG        db "Error!" , 0
    BOOT_DRIVE:      db 0
    
    VIDEO_MEMORY_SEG equ 0xb800
    WHITE_ON_BLACK   equ 0x0f
    
    times 510-($-$$) db 0
    db 0x55
    db 0xAA
    

    gdt.inc:

    gdt_start:
        dd 0                ; null descriptor--just fill 8 bytes
        dd 0
    
    gdt_code:
        dw 0FFFFh           ; limit low
        dw 0                ; base low
        db 0                ; base middle
        db 10011010b        ; access
        db 11001111b        ; granularity
        db 0                ; base high
    
    gdt_data:
        dw 0FFFFh           ; limit low (Same as code)
        dw 0                ; base low
        db 0                ; base middle
        db 10010010b        ; access
        db 11001111b        ; granularity
        db 0                ; base high
    end_of_gdt:
    
    gdtr:
        dw end_of_gdt - gdt_start - 1   ; limit (Size of GDT)
        dd gdt_start        ; base of GDT
    
        CODE_SEG equ gdt_code - gdt_start
        DATA_SEG equ gdt_data - gdt_start
    

    a20.inc:

    enable_A20:
        call check_a20
        cmp ax, 1
        je enabled
        call a20_bios
        call check_a20
        cmp ax, 1
        je enabled
        call a20_keyboard
        call check_a20
        cmp ax, 1
        je enabled
        call a20_fast
        call check_a20
        cmp ax, 1
        je enabled
        mov bx, [ERROR]
        call print_string
    enabled:
        ret
    
    check_a20:
        pushf
        push ds
        push es
        push di
        push si
    
        cli
        xor ax, ax ; ax = 0
        mov es, ax
        not ax ; ax = 0xFFFF
        mov ds, ax
        mov di, 0x0500
        mov si, 0x0510
        mov al, byte [es:di]
        push ax
        mov al, byte [ds:si]
        push ax
        mov byte [es:di], 0x00
        mov byte [ds:si], 0xFF
        cmp byte [es:di], 0xFF
        pop ax
        mov byte [ds:si], al
        pop ax
        mov byte [es:di], al
        mov ax, 0
        je check_a20__exit
        mov ax, 1
    
    check_a20__exit:
        pop si
        pop di
        pop es
        pop ds
        popf
        ret
    
    a20_bios:
        mov ax, 0x2401
        int 0x15
        ret
    
    a20_fast:
        in al, 0x92
        or al, 2
        out 0x92, al
        ret
    
        [bits 32]
        [section .text]
    
    a20_keyboard:
        cli
    
        call    a20wait
        mov     al,0xAD
        out     0x64,al
        call    a20wait
        mov     al,0xD0
        out     0x64,al
        call    a20wait2
        in      al,0x60
        push    eax
        call    a20wait
        mov     al,0xD1
        out     0x64,al
        call    a20wait
        pop     eax
        or      al,2
        out     0x60,al
        call    a20wait
        mov     al,0xAE
        out     0x64,al
        call    a20wait
        sti
        ret
    
    a20wait:
        in      al,0x64
        test    al,2
        jnz     a20wait
        ret
    
    a20wait2:
        in      al,0x64
        test    al,1
        jz      a20wait2
        ret
    

    kernel.c:

    /* This code will be placed at the beginning of the object by the linker script */    
    __asm__ (".pushsection .text.start\r\n" \
             "jmp main\r\n" \
             ".popsection\r\n"
             );
    
    /* Place main as the first function defined in kernel.c so
     * that it will be at the entry point where our bootloader
     * will call. In our case it will be at 0x9000 */
    
    int main(){
        /* Do Stuff Here*/
    
        return 0; /* return back to bootloader */
    }
    

    linker.ld

    OUTPUT_FORMAT(elf32-i386)
    ENTRY(main)
    
    SECTIONS
    {
        . = 0x9000;
        .text : { *(.text.start) *(.text) }
        .data : { *(.data) }
        .bss  : { *(.bss) *(COMMON) }
    }
    

    Create Disk Image Using DD / Debugging with QEMU

    If you use the files above, and produce the required bootloader and kernel files using these commands (as mentioned previously)

    nasm -g -f elf32 -F dwarf -o boot.o bootloader.asm
    ld -melf_i386 -Ttext=0x7c00 -nostdlib --nmagic -o boot.elf boot.o
    objcopy -O binary boot.elf boot.bin
    
    gcc -g -m32 -c -ffreestanding -o kernel.o kernel.c -lgcc
    ld -melf_i386 -Tlinker.ld -nostdlib --nmagic -o kernel.elf kernel.o
    objcopy -O binary kernel.elf kernel.bin
    

    You can produce a disk image (in this case we'll make it the size of a floppy) with these commands:

    dd if=/dev/zero of=disk.img bs=512 count=2880
    dd if=boot.bin of=disk.img bs=512 conv=notrunc
    dd if=kernel.bin of=disk.img bs=512 seek=1 conv=notrunc
    

    This creates a zero filled disk image of size 512*2880 bytes (The size of a 1.44 megabyte floppy). dd if=boot.bin of=disk.img bs=512 conv=notrunc writes boot.bin to the first sector of the file without truncating the disk image. dd if=kernel.bin of=disk.img bs=512 seek=1 conv=notrunc places kernel.bin into the disk image starting at the second sector. The seek=1 skips over the first block (bs=512) before writing.

    If you wish to run your kernel you can launch it as floppy drive A: (-fda) in QEMU like this:

    qemu-system-i386 -fda disk.img
    

    You can also debug your 32-bit kernel using QEMU and the GNU Debugger (GDB) with the debug information we generated when compiling/assembling the code with the instructions above.

    qemu-system-i386 -fda disk.img -S -s &
    gdb kernel.elf  \
            -ex 'target remote localhost:1234' \
            -ex 'layout src' \
            -ex 'layout reg' \
            -ex 'break main' \
            -ex 'continue'
    

    This example launches QEMU with the remote debugger and emulating a floppy disk using the file disk.img(that we created with DD). GDB launches using kernel.elf (a file we generated with debug info), then connects to QEMU, and sets a breakpoint at function main() in the C code. When the debugger finally is ready you'll be prompted to press <return> to continue. With any luck you should be viewing function main in the debugger.