Search code examples
assemblyx86qemubootloaderosdev

How to fix "qemu: fatal: Trying to execute code outside RAM or ROM at 0x000a0000"


I'm developing my own bootloader + kernel. I create a project and placed it on github: https://github.com/rprata/ubootlua (branch tmp-libc-implemenation)

I tried to run my boot.bin using QEMU:

qemu-system-i386 -fda boot.bin -nographic -serial stdio -monitor none

However a crash happens:

> qemu-system-i386 -fda ./deploy/boot.bin -nographic -serial stdio -monitor none
> WARNING: Image format was not specified for './deploy/boot.bin' and probing guessed raw.
>         Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted.
>         Specify the 'raw' format explicitly to remove the restrictions.
> qemu: fatal: Trying to execute code outside RAM or ROM at 0x000a0000
> 
> EAX=00000055 EBX=00018eb4 ECX=00018eb3 EDX=00000000
ESI=00000001 EDI=00000000 EBP=00016058 ESP=00015f94
EIP=0009ffae EFL=00000896 [-OS-AP-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
CS =0008 00000000 ffffffff 00cf9a00 DPL=0 CS32 [-R-]
SS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
DS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
FS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
GS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT
TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy
GDT=     00007c36 00000018
IDT=     00000000 000003ff
CR0=00000011 CR2=00000000 CR3=00000000 CR4=00000000
DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 
DR6=ffff0ff0 DR7=00000400
CCS=00000055 CCD=000000d1 CCO=ADDB    
EFER=0000000000000000
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000
XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000
XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000
XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000
> makefile:26: recipe for target 'run' failed
> make: *** [run] Aborted (core dumped)

My boot.asm and linker.ld:

section .boot
bits 16                     ; We're working at 16-bit mode here
global boot

boot:
    mov ax, 0x2401          
    int 0x15                ; Enable A20 bit 

    mov ax, 0x3             ; Set VGA text mode 3
    int 0x10                ; Otherwise, call interrupt for printing the char   

    mov [disk],dl

    mov ah, 0x2             ;read sectors
    mov al, 60              ;sectors to read
    mov ch, 0               ;cylinder idx
    mov dh, 0               ;head idx
    mov cl, 2               ;sector idx
    mov dl, [disk]          ;disk idx
    mov bx, copy_target     ;target pointer
    int 0x13

    cli                     ; Disable the interrupts
    lgdt [gdt_pointer]      ; Load the gdt table
    mov eax, cr0            ; Init swap cr0...
    or eax,0x1              ; Set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:boot32     ; Long jump to the code segment


; base a 32 bit value describing where the segment begins
; limit a 20 bit value describing where the segment ends, can be multiplied by 4096 if granularity = 1
; present must be 1 for the entry to be valid
; ring level an int between 0-3 indicating the kernel Ring Level
; direction:
;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
; read/write if you can read/write to this segment
; accessed if the CPU has accessed this segment
; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
; size 0 = 16 bit mode, 1 = 32 bit protected mode
gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:
gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
disk:
    db 0x0

CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

;; Magic numbers
times 510 - ($ - $$) db 0

dw 0xaa55
copy_target:
bits 32
    msg:    db "Hello, World more than 512 bytes!", 0

boot32:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax  
    ;mov esi, msg            ; SI now points to our message
    ;mov ebx, 0xb8000       ; vga memory position (0) 

.loop   lodsb               ; Loads SI into AL and increments SI [next char]
    or al, al               ; Checks if the end of the string
    jz halt                 ; Jump to halt if the end
    or eax,0x0200           ; The top byte defines the character colour in the buffer as an int value from 0-15 with 0 = black, 1 = blue and 15 = white. 
                            ; The bottom byte defines an ASCII code point
    mov word [ebx], ax      
    add ebx, 2              
    jmp .loop               ; Next iteration of the loop

halt:   
    mov esp, kernel_stack_top
    extern __start
    call __start
    cli
    hlt                     ; CPU command to halt the execution

section .bss
align 4
kernel_stack_bottom: equ $
    resb 16384 ; 16 KB
kernel_stack_top:

    ENTRY(boot)
    OUTPUT_FORMAT("binary")
    SECTIONS {
        . = 0x7c00;
        .text :
        {
            *(.boot)
            *(.text)
        }

        .rodata :
        {
            *(.rodata)
        }

        .data :
        {
            *(.data)
        }

        .bss :
        {
            *(.bss)
        }
    }

The relevant part of my makefile is:

NASM:=nasm
CC:=gcc
SRC_NASM:=./src/init/boot.asm
SRC_C:=./src/init/boot.c ./src/init/init.c ./src/init/version.c
LINKER:=./src/init/linker.ld
DEPLOY=./deploy
BUILD:=./build
BIN:=$(DEPLOY)/boot.bin
OBJ_NASM:=$(BUILD)/boot.o
CFLAGS:=-Wall -Werror -m32 -fno-pie -ffreestanding -mno-red-zone -fno-exceptions -nostdlib -I./src/include
LDFLAGS:=

export ARCH:=i386
export ZLIB_SUPPORT:=false

DEPENDENCIES:=libc
ifeq ($(ZLIB_SUPPORT),true)
DEPENDENCIES:=$(DEPENDENCIES) zlib
endif

all: $(DEPENDENCIES)
    mkdir -p $(DEPLOY)
    mkdir -p $(BUILD)
    $(NASM) $(SRC_NASM) -f elf32 -o $(OBJ_NASM)
    $(CC) $(SRC_C) $(OBJ_NASM) -o $(BIN) $(CFLAGS) -T $(LINKER) $(LDFLAGS)

run:
    qemu-system-i386 -fda $(BIN) -nographic -serial stdio -monitor none

Why is it failing this way and how can I fix it?


Solution

  • The primary issue is that you aren't reading your entire kernel into memory. Your code eventually ends up executing uninitialised memory (most likely filled with zeroes), reaches the Extended BIOS Data Area (just below video memory at 0xa0000), and then eventually starts executing video memory at 0xa0000. QEMU doesn't permit executing video memory, thus the source of the error you get.

    Fixing this isn't as easy as it may first seem. Your code on my system was about 47300 bytes. 1 sector for the MBR and 92 for the kernel. The first problem is that not all hardware (and emulators) can read 92 sectors at once. QEMU and BOCHs max out at 72 for floppy drives and 128 for hard drives. This number can be smaller for some hardware (as low as the number of sectors per track).

    Some hardware will not read sectors:

    • That extend beyond a 64KiB segment limit.
    • That span more than one track. Not all BIOSes support multi-track reads and writes. QEMU and BOCHS do support them.
    • If the BIOS uses Direct Memory Access (DMA) transfers for disk access you may not be able to write a number of sectors that traverses a 64KiB boundary (in physical memory). This means you can't guarantee a write is successful if it starts before physical address 0x10000 and ends after. Same for 0x20000, 0x30000, 0x40000 ... 0x90000. QEMU and BOCHS do not allow disk transfers across such boundaries.

    A simple hack to load a kernel up to 64KiB with BOCHS and QEMU is to read 64 sectors (32KiB) to physical address 0x0000:0x8000 and then do a second copy of 64 sectors to 0x1000:0x0000. You could read a bigger kernel by reading additional 32KiB chunks. the 512 bytes between 0x0000:0x7e00 and 0x0000:0x8000 would be unused. The only real catch is determining the Cylinder Head Sector (CHS) values1 to use for the Int 21h/AH=02 disk reads.

    Other issues:

    • When reading disk sectors into memory you should set the stack (SS:SP) to a location that you won't inadvertently overwrite. If you load the kernel after the bootloader a good location is SS:SP 0x0000:0x7c000 below the bootloader. To avoid interrupts occurring while setting SS:SP, set SP in the instruction immediately following the instruction that loads SS.
    • Never rely on the value of any general purpose register or segment register containing the value you expect. DL is an exception since in almost all cases on modern hardware it will contain the boot drive number. See my bootloader tips for more information.
    • QEMU and other emulators may not read sectors that don't exist in the file. If you read more sectors than what is in the disk image the read sector may fail. To get around this create a disk image (a 1.44MiB floppy image is convenient) and copy the contents of the kernel and bootloader to the beginning of the file without truncating the disk image. DD can be used for this purpose.
    • To aid debugging rather than have your linker script output as binary, have it default to outputting in ELF. Use OBJCOPY to copy the ELF file to a binary file. The ELF file can be used to store debug information. This is useful if using QEMU and GDB as a remote debugger.
    • You can't rely on memory containing zeroes. GCC requires the .bss section be zeroed filled. Use a linker script to determine the extents of the .bss section and zero out the memory prior to calling your C entry point.
    • Before calling the C entry point, GCC requires the Direction Flag (DF) be cleared so that string instructions default to forward movement.
    • In your makefile you use GCC to do linking. If not using a cross compiler GCC may generate a special section called .note.gnu.build-id that can interfere with your linker script. To fix this you can tell GCC to suppress this special section with LDFLAGS:=-Wl,--build-id=none. If you linked with LD directly this section wouldn't be created.

    Taking all these changes into account:

    linker.ld:

    ENTRY(boot)
    SECTIONS {
        . = 0x7c00;
        .boot :
        {
            *(.boot)
        }
        /* Place kernel right after boot sector on disk but set the
         * VMA (ORiGin point) to 0x8000 */
        . = 0x8000;
        __kernel_start = .;
        __kernel_start_seg = __kernel_start >> 4;
        .text : AT(0x7e00)
        {
            *(.text.start)
            *(.text*)
        }
        .rodata :
        {
            *(.rodata*)
        }
        .data :
        {
            *(.data)
        }
        /* Compute number of sectors that the kernel uses */
        __kernel_end = .;
        __kernel_size_sectors = (__kernel_end - __kernel_start + 511) / 512;
    
        .bss :
        {
            __bss_start = .;
            *(COMMON)
            *(.bss)
            . = ALIGN(4);
            __bss_end = .;
            /* Compute number of DWORDS that BSS section uses */
            __bss_sizel = (__bss_end - __bss_start) / 4;
        }
    }
    

    boot.asm:

    section .boot
    bits 16                     ; We're working at 16-bit mode here
    global boot
    
    boot:
        xor ax, ax
        mov ds, ax
        mov ss, ax
        mov sp, 0x7c00          ; Set SS:SP just below bootloader
    
        cld                     ; DF=0 : string instruction forward movement
        mov ax, 0x2401
        int 0x15                ; Enable A20 bit
    
        mov ax, 0x3             ; Set VGA text mode 3
        int 0x10                ; Otherwise, call interrupt for printing the char
    
        mov [disk],dl
    
        ; Read 64 sectors from LBA 1, CHS=0,0,2 to address 0x0800:0
        mov ax, 0x0800
        mov es, ax              ;ES = 0x800
    
        mov ah, 0x2             ;read sectors
        mov al, 64              ;sectors to read
        mov ch, 0               ;cylinder idx
        mov dh, 0               ;head idx
        mov cl, 2               ;sector idx
        mov dl, [disk]          ;disk idx
        mov bx, 0               ;target pointer, ES:BX=0x0800:0x0000
        int 0x13
    
        ; Read 64 sectors from LBA 65, CHS=1,1,12 to address 0x1000:0
        mov ax, 0x1000
        mov es, ax              ;ES=0x1000
    
        mov ah, 0x2             ;read sectors
        mov al, 64              ;sectors to read
        mov ch, 1               ;cylinder idx
        mov dh, 1               ;head idx
        mov cl, 12              ;sector idx
        mov dl, [disk]          ;disk idx
        mov bx, 0x0000          ;target pointer, ES:BX=0x1000:0x0000
        int 0x13
    
        cli                     ; Disable the interrupts
        lgdt [gdt_pointer]      ; Load the gdt table
        mov eax, cr0            ; Init swap cr0...
        or eax,0x1              ; Set the protected mode bit on special CPU reg cr0
        mov cr0, eax
        jmp CODE_SEG:boot32     ; Long jump to the code segment
    
    
    ; base a 32 bit value describing where the segment begins
    ; limit a 20 bit value describing where the segment ends, can be multiplied by 4096
    ; if granularity = 1
    ; present must be 1 for the entry to be valid
    ; ring level an int between 0-3 indicating the kernel Ring Level
    ; direction:
    ;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
    ;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
    ; read/write if you can read/write to this segment
    ; accessed if the CPU has accessed this segment
    ; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
    ; size 0 = 16 bit mode, 1 = 32 bit protected mode
    gdt_start:
        dq 0x0
    gdt_code:
        dw 0xFFFF
        dw 0x0
        db 0x0
        db 10011010b
        db 11001111b
        db 0x0
    gdt_data:
        dw 0xFFFF
        dw 0x0
        db 0x0
        db 10010010b
        db 11001111b
        db 0x0
    gdt_end:
    gdt_pointer:
        dw gdt_end - gdt_start
        dd gdt_start
    disk:
        db 0x0
    
    CODE_SEG equ gdt_code - gdt_start
    DATA_SEG equ gdt_data - gdt_start
    
    ;; Magic numbers
    times 510 - ($ - $$) db 0
    dw 0xaa55
    
    section .data
    msg: db "Hello, World more than 512 bytes!", 0
    
    bits 32
    section .text.start
    boot32:
        mov ax, DATA_SEG
        mov ds, ax
        mov es, ax
        mov fs, ax
        mov gs, ax
        mov ss, ax
        mov esi, msg        ; SI now points to our message
        mov ebx, 0xb8000    ; vga memory position (0)
    
    .loop:
        lodsb               ; Loads SI into AL and increments SI [next char]
        or al, al           ; Checks if the end of the string
        jz halt             ; Jump to halt if the end
        or eax,0x0200       ; The top byte defines the character colour in the buffer as
                            ; an int value from 0-15 with 0 = black, 1 = blue and 15 = white.
                            ; The bottom byte defines an ASCII code point
        mov word [ebx], ax
        add ebx, 2
        jmp .loop           ; Next iteration of the loop
    
    halt:
        mov esp, kernel_stack_top
        extern __start
        extern __bss_start
        extern __bss_sizel
    
        ; Zero the BSS section
        mov ecx, __bss_sizel
        mov edi, __bss_start
        xor eax, eax
        rep stosd
    
        ; Call C entry point
        call __start
        cli
        hlt                 ; CPU command to halt the execution
    
    section .bss
    align 4
    kernel_stack_bottom:
        resb 16384          ; 16 KB stack
    kernel_stack_top:
    

    Modify makefile by adding these make variables:

    OC:=objcopy
    DD:=dd
    ELF:=$(DEPLOY)/boot.elf
    

    Modify makefile by changing LDFLAGS to:

    LDFLAGS:=-Wl,--build-id=none
    

    Modify makefile by changing the all rule to:

    all: $(DEPENDENCIES)
            mkdir -p $(DEPLOY)
            mkdir -p $(BUILD)
            $(NASM) $(SRC_NASM) -f elf32 -o $(OBJ_NASM)
            $(CC) $(SRC_C) $(OBJ_NASM) -o $(ELF) $(CFLAGS) -T $(LINKER) $(LDFLAGS)
            $(OC) -O binary $(ELF) $(BIN)
            $(DD) if=/dev/zero of=$(BIN).tmp count=1440 bs=1024
            $(DD) if=$(BIN) of=$(BIN).tmp conv=notrunc
            mv $(BIN).tmp $(BIN)
    

    Alternative Solution

    Given there are many ways that reading with Int 13/AH=2 can fail, one can avoid most of the issues by reading one sector at a time and always reading to a memory location evenly divisible by 512.

    When using a linker script to build a bootloader along side the kernel you can use the linker to determine the size of the kernel and compute the number of sectors needed to be read.

    A revision of the previous code above that could do the needed job could be as follows.

    linker.ld

    ENTRY(boot)
    SECTIONS {
        . = 0x7c00;
        .boot :
        {
            *(.boot)
        }
        __kernel_start = .;
        __kernel_start_seg = __kernel_start >> 4;
        .text :
        {
            *(.text.start)
            *(.text*)
        }
        .rodata :
        {
            *(.rodata*)
        }
        .data :
        {
            *(.data)
        }
        /* Compute number of sectors that the kernel uses */
        __kernel_end = .;
        __kernel_size_sectors = (__kernel_end - __kernel_start + 511) / 512;
    
        .bss :
        {
            __bss_start = .;
            *(COMMON)
            *(.bss)
            . = ALIGN(4);
            __bss_end = .;
            /* Compute number of DWORDS that BSS section uses */
            __bss_sizel = (__bss_end - __bss_start) / 4;
        }
    }
    

    The main difference is that this linker script starts loading the kernel into physical memory at 0x07e00 instead of 0x08000. A more refined boot.asm can use the values generated by the linker to loop through the needed sectors reading them one at a time until complete:

    extern __kernel_size_sectors    ; Size of kernel in 512 byte sectors
    extern __kernel_start_seg       ; Segment start of kernel will be laoded at
    
    global boot
    
    STAGE2_LBA_START equ 1          ; Logical Block Address(LBA) Stage2 starts on
                                    ;     LBA 1 = sector after boot sector
                                    ; Logical Block Address(LBA) Stage2 ends at
    STAGE2_LBA_END   equ STAGE2_LBA_START + __kernel_size_sectors
    DISK_RETRIES     equ 3          ; Number of times to retry on disk error
    
    bits 16
    section .boot
    
    boot:
    ; Include a BPB (1.44MB floppy with FAT12) to be more compatible with USB floppy media
    ;%include "src/init/bpb.inc"
    
    boot_start:
        xor ax, ax                  ; DS=SS=ES=0 for stage2 loading
        mov ds, ax
        mov ss, ax                  ; Stack at 0x0000:0x7c00
        mov sp, 0x7c00
        cld                         ; Set string instructions to use forward movement
    
        ; Read Stage2 1 sector at a time until stage2 is completely loaded
    load_stage2:
        mov [bootDevice], dl        ; Save boot drive
        mov di, __kernel_start_seg  ; DI = Current segment to read into
        mov si, STAGE2_LBA_START    ; SI = LBA that stage2 starts at
        jmp .chk_for_last_lba       ; Check to see if we are last sector in stage2
    
    .read_sector_loop:
        mov bp, DISK_RETRIES        ; Set disk retry count
    
        call lba_to_chs             ; Convert current LBA to CHS
        mov es, di                  ; Set ES to current segment number to read into
        xor bx, bx                  ; Offset zero in segment
    
    .retry:
        mov ax, 0x0201              ; Call function 0x02 of int 13h (read sectors)
                                    ;     AL = 1 = Sectors to read
        int 0x13                    ; BIOS Disk interrupt call
        jc .disk_error              ; If CF set then disk error
    
    .success:
        add di, 512>>4              ; Advance to next 512 byte segment (0x20*16=512)
        inc si                      ; Next LBA
    
    .chk_for_last_lba:
        cmp si, STAGE2_LBA_END      ; Have we reached the last stage2 sector?
        jl .read_sector_loop        ;     If we haven't then read next sector
    
    .stage2_loaded:
        jmp stage2                  ; Jump to second stage
    
    .disk_error:
        xor ah, ah                  ; Int13h/AH=0 is drive reset
        int 0x13
        dec bp                      ; Decrease retry count
        jge .retry                  ; If retry count not exceeded then try again
    
    error_end:
        ; Unrecoverable error; print drive error; enter infinite loop
        mov si, diskErrorMsg        ; Display disk error message
        call print_string
        cli
    .error_loop:
        hlt
        jmp .error_loop
    
    ; Function: print_string
    ;           Display a string to the console on display page 0
    ;
    ; Inputs:   SI = Offset of address to print
    ; Clobbers: AX, BX, SI
    
    print_string:
        mov ah, 0x0e                ; BIOS tty Print
        xor bx, bx                  ; Set display page to 0 (BL)
        jmp .getch
    .repeat:
        int 0x10                    ; print character
    .getch:
        lodsb                       ; Get character from string
        test al,al                  ; Have we reached end of string?
        jnz .repeat                 ;     if not process next character
    .end:
        ret
    
    ;    Function: lba_to_chs
    ; Description: Translate Logical block address to CHS (Cylinder, Head, Sector).
    ;              Works for all valid FAT12 compatible disk geometries.
    ;
    ;   Resources: http://www.ctyme.com/intr/rb-0607.htm
    ;              https://en.wikipedia.org/wiki/Logical_block_addressing#CHS_conversion
    ;              https://stackoverflow.com/q/45434899/3857942
    ;              Sector    = (LBA mod SPT) + 1
    ;              Head      = (LBA / SPT) mod HEADS
    ;              Cylinder  = (LBA / SPT) / HEADS
    ;
    ;      Inputs: SI = LBA
    ;     Outputs: DL = Boot Drive Number
    ;              DH = Head
    ;              CH = Cylinder (lower 8 bits of 10-bit cylinder)
    ;              CL = Sector/Cylinder
    ;                   Upper 2 bits of 10-bit Cylinders in upper 2 bits of CL
    ;                   Sector in lower 6 bits of CL
    ;
    ;       Notes: Output registers match expectation of Int 13h/AH=2 inputs
    ;
    lba_to_chs:
        push ax                     ; Preserve AX
        mov ax, si                  ; Copy LBA to AX
        xor dx, dx                  ; Upper 16-bit of 32-bit value set to 0 for DIV
        div word [sectorsPerTrack]  ; 32-bit by 16-bit DIV : LBA / SPT
        mov cl, dl                  ; CL = S = LBA mod SPT
        inc cl                      ; CL = S = (LBA mod SPT) + 1
        xor dx, dx                  ; Upper 16-bit of 32-bit value set to 0 for DIV
        div word [numHeads]         ; 32-bit by 16-bit DIV : (LBA / SPT) / HEADS
        mov dh, dl                  ; DH = H = (LBA / SPT) mod HEADS
        mov dl, [bootDevice]        ; boot device, not necessary to set but convenient
        mov ch, al                  ; CH = C(lower 8 bits) = (LBA / SPT) / HEADS
        shl ah, 6                   ; Store upper 2 bits of 10-bit Cylinder into
        or  cl, ah                  ;     upper 2 bits of Sector (CL)
        pop ax                      ; Restore scratch registers
        ret
    
    ; Uncomment these lines if not using a BPB (via bpb.inc)
    %ifndef WITH_BPB
    numHeads:        dw 2           ; 1.44MB Floppy has 2 heads & 18 sector per track
    sectorsPerTrack: dw 18
    %endif
    
    bootDevice:      db 0x00
    diskErrorMsg:    db "Unrecoverable disk error!", 0
    
    ; Pad boot sector to 510 bytes and add 2 byte boot signature for 512 total bytes
    TIMES 510-($-$$) db  0
    dw 0xaa55
    
    section .data
    msg: db "Hello, World more than 512 bytes!", 0
    
    ; base a 32 bit value describing where the segment begins
    ; limit a 20 bit value describing where the segment ends, can be multiplied by 4096
    ; if granularity = 1
    ; present must be 1 for the entry to be valid
    ; ring level an int between 0-3 indicating the kernel Ring Level
    ; direction:
    ;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
    ;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
    ; read/write if you can read/write to this segment
    ; accessed if the CPU has accessed this segment
    ; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
    ; size 0 = 16 bit mode, 1 = 32 bit protected mode
    gdt_start:
        dq 0x0
    gdt_code:
        dw 0xFFFF
        dw 0x0
        db 0x0
        db 10011010b
        db 11001111b
        db 0x0
    gdt_data:
        dw 0xFFFF
        dw 0x0
        db 0x0
        db 10010010b
        db 11001111b
        db 0x0
    gdt_end:
    gdt_pointer:
        dw gdt_end - gdt_start
        dd gdt_start
    disk:
        db 0x0
    
    CODE_SEG equ gdt_code - gdt_start
    DATA_SEG equ gdt_data - gdt_start
    
    bits 16
    section .text.start
    stage2:
        cli                         ; Disable the interrupts
        mov ax, 0x2401
        int 0x15                    ; Enable A20 bit
    
        lgdt [gdt_pointer]          ; Load the gdt table
        mov eax, cr0                ; Init swap cr0...
        or eax,0x1                  ; Set the protected mode bit on special CPU reg cr0
        mov cr0, eax
        jmp CODE_SEG:startpm        ; FAR JMP to the code segment
    
    bits  32
    startpm:
        mov ax, DATA_SEG
        mov ds, ax
        mov es, ax
        mov fs, ax
        mov gs, ax
        mov ss, ax
        mov esi, msg                ; SI now points to our message
        mov ebx, 0xb8000            ; vga memory position (0)
    
    .loop:
        lodsb                       ; Loads SI into AL and increments SI [next char]
        or al, al                   ; Checks if the end of the string
        jz halt                     ; Jump to halt if the end
        or eax,0x0200               ; The top byte defines the character colour in the
                                    ; buffer as an int value from 0-15 with 0 = black,
                                    ; 1 = blue and 15 = white.
                                    ; The bottom byte defines an ASCII code point
        mov word [ebx], ax
        add ebx, 2
        jmp .loop                   ; Next iteration of the loop
    
    halt:
        mov esp, kernel_stack_top
        extern __start
        extern __bss_start
        extern __bss_sizel
    
        ; Zero the BSS section
        mov ecx, __bss_sizel
        mov edi, __bss_start
        xor eax, eax
        rep stosd
    
        ; Call C entry point
        call __start
        cli
        hlt                         ; CPU command to halt the execution
    
    section .bss
    align 4
    kernel_stack_bottom:
        resb 16384                  ; 16 KB stack
    kernel_stack_top:
    

    This boot.asm is loosely based on the bootloader I proposed in another Stackoverflow question and answer. The main difference is that the linker computes much of the needed information through a linker script rather than being coded/included directly in the assembly file. This code also moves the enabling of the A20 line and entering protected mode to the second stage. This frees up space if you need to expand on the capabilities in the bootloader in the future.

    If you are building your bootloader to be used on real hardware as unpartitioned media - a copy of a 1.44MiB BIOS Parameter Block (BPB) can be found in the file bpb.inc. This can be useful for booting on USB media using Floppy Disk Emulation (FDD). To enable it just remove the ; from this line:

    ; %include "src/init/bpb.inc"
    

    Footnotes

    • 1There is a formula to convert a zero based Logical Bock Address to a set of CHS values:

      C = LBA ÷ (HPC × SPT)
      H = (LBA ÷ SPT) mod HPC
      S = (LBA mod SPT) + 1
      

      LBA 0 is the bootsector. If the kernel is in the contiguous sectors after the bootloader then the start of the kernel is at LBA 1. The second 32KiB chunk of the kernel would be at LBA 65(64+1). For a 1.44MiB floppy HPC=2 and SPT=18. From the calculation LBA 0=CHS(0,0,2) and LBA 65= CHS (1,1,12). Those are the values used by the 64 sector disk reads in the first version of boot.asm.