So, I've been working on a hobby project. Creating my own Operating System. I started a while back but dropped it until maybe a couple nights ago. I just fixed an oversight that caused nothing to be read from the sectors I want to read from. With that error out of the way, a new one has came about and I honestly don't even know where to begin debugging this one.
I am coding a Master Boot Record and debugging it with GDB and QEMU, here is the code to my master boot record (It was assembled using YASM)
Sorry if my code is not very good. I am not an expert at assembly language...
; yasm boot.asm -fbin
bits 16
%define part(n,l) section n vstart=l align=1
%define rpart(n,l) section n start=l align=1
; ----------------------- ;
part(entry, 0x7c00) ;
; --ENTRY---------------- ;
_start:
mov [boot_drive+0x7c00], dl
xor ax, ax
mov ss, ax
mov ds, ax
mov es, ax
mov sp, _start
mov bp, _start
mov cx, 512
mov si, _start
mov di, _strap
rep movsb
jmp 0:_strap+(b_boot_strapper-$$)
b_boot_strapper:
; ----------------------- ;
part(strap, 0x0600) ;
; --BOOT STRAPPER-------- ;
_strap:
xor cx, cx
.find_active_part:
cmp cl, 4
jge .no_active_part
xor ax, ax
mov ah, cl
mov bl, 16
mul bl
mov bx, ax
inc cl
mov al, (1 << 7)
mov ah, [partition_1+0x600+bx]
and ah, al
jnz .load_active_part
jmp .find_active_part
.load_active_part:
xor ax, ax
mov ds, ax
mov ah, 42h
mov dl, [boot_drive+0x600]
mov si, dap+0x600
push bx
mov bx, dap+0x600
mov es, bx
pop bx
mov cx, [partition_1+0x600+bx+8]
mov [dap_startlba+0x600], cx
mov cx, [partition_1+0x600+bx+12]
mov [dap_sectors+0x600], cx
int 13h
jc .disk_error
xor ax, ax
mov ds, ax
mov es, ax
mov ss, ax
mov sp, _start
mov bp, _start
mov dl, [boot_drive+0x600]
jmp 0:0x7c00
.no_active_part:
mov si, msg_no_part
call print
jmp halt
.disk_error:
mov si, msg_er_read
call print
jmp halt
print:
mov dx, ax
mov ah, 0Eh
xor bh, bh
mov bl, 0Fh
.rep:
lodsb
or al, al
jz .done
int 10h
jmp .rep
.done:
ret
halt:
cli
hlt
jmp halt
msg_er_read db 'Disk Read Error....', 0
msg_no_part db 'No Active Partition....', 0
; ----------------------- ;
rpart(variables, 300) ;
; --VARIABLES------------ ;
boot_drive db 0
dap: ; Disk Address Packet
db 16, 0
dap_sectors dw 0
dap_offset dw 0x7c00
dap_segment dw 0
dap_startlba dq 0
dap_end:
; ----------------------- ;
rpart(partitions, 446) ;
; --VARIABLES------------ ;
partition_1: ; This file has the following 16 bytes:
; 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00
%include "part_n1.asm"
partition_2: ; The rest of these files are just 16 null bytes.
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"
; ------------------------------- ;
rpart(signature, 510) ;
db 0x55, 0xAA ;
; ------------------------------- ;
This code works! However, I don't know if this is an issue with QEMU or not but when it reads from the sectors it has a bit of corruption or dataloss...
These are the bytes that were expected to be at 0x7c00
EB 1B B4 0E 30 FF B3 0F
AC 74 04 CD 10 EB F9 C3
48 65 6C 6C 6F 20 57 6F
72 6C 64 21 00 BE 10 7C
E8 DF FF F4
(It's a basic function that prints "Hello World!")
This is what ended up actually being in memory at that location:
EB 1B B4 0E 30 FF B3 0F
AC 74 04 CD 10 EB F9 C3
48 65 6C 6C 6F 20 57 6F
72 6C 64 21 00 BE 10 7C
F0 DF FF F4
If you look closely the 4th byte from the last was changed from E8 to F0, I have no idea why this happened. And in the last run the "E" in "Hello World" was also changed but it wasn't in this debug run.
I need help with even where to begin debugging this...
I realized that my function to print hello world had a few issues, weather or not they were related to this odd thing, I don't really know. In the repeating part of the print function (The one in the code I was loading, not in the mbr code above) I forgot to add or al, al
after I did lodsb
and before I did jz .done
which might have been interfering with things, I am not completely sure, but after I updated that code and run a few more debug sessions it seems this issue doesn't occur anymore...
There are a number of problems with your code, but it may well be that the issue is in the volume boot record you didn't show. Some of the problems in the MBR that should be resolved:
There are some things in your code that are nice to have:
Some of these tips can be found in my Stackoverflow General Bootloader Tips.
A modified version of your relocatable bootloader that chain loads a Volume Boot Record (VBR) could coded as:
boot.asm:
DISK_RETRY EQU 3
BOOT_ORG_RELOC EQU 0x0600
BOOT_ORG EQU 0x7c00
MBR_SIZE EQU 512
%define SECTION(n,l) section n start=l+BOOT_ORG_RELOC align=1
ORG BOOT_ORG_RELOC
_start:
; This code occurs before relocation so can't rely on any labels relative to
; BOOT_ORG_RELOC
xor ax, ax
mov es, ax
mov ds, ax
mov ss, ax
mov sp, BOOT_ORG ; Place stack at 0x0000:0x7c00 below bootloader
cld ; DF=0 for forward direction of string instructions
mov cx, MBR_SIZE/2 ; MBR Size to copy in bytes
mov si, BOOT_ORG ; Source address = DS:SI (0x0000:0x7c00)
mov di, BOOT_ORG_RELOC ; Destination address = ES:DI (0x0000:0x0600)
rep movsw
jmp 0x0000:.reloc_start ; Set CS:IP to continue at the next instruction but in
; the relocated boot sector
.reloc_start:
; Start at end of partition table and search to beginning looking for active
; boot partition.
mov si, partition_start ; SI = base of partition table
mov bx, PARTITION_SIZE ; Set the offset to search at to end of partition table
.active_search_loop:
sub bx, 16 ; Go to previous partition entry
jl .no_active ; If BX is neg we have passed beginning of partition table
cmp byte [si + bx], 0x80 ; Is partition bootable?
jnz .active_search_loop ; If not bootable go back and search again
.fnd_active:
lea di, [si + bx] ; Save offset of active partition to DI
mov ax, [si + bx + 8] ; Copy partition start LBA to DAP structure (lower 16-bits)
mov [dap + 8], ax
mov ax, [si + bx + 10] ; Copy partition start LBA to DAP structure (upper 16-bits)
mov [dap + 10], ax
mov cx, DISK_RETRY
; DL contains boot drive passed by BIOS
; ES was previously set to 0
mov bx, BOOT_ORG ; ES:BX needs to be same values as the DAP for some BIOSes
mov si, dap ; DS:SI = beginning of DAP structure
.disk_retry:
mov ah, 0x42 ; BIOS call for extended disk read
int 0x13 ; Read boot sector to 0x0000:0x7c00
jnc .vbr_loaded ; If int 0x13 succeeded (CF=0), run the loaded VBR
dec cx ; Lower retry count by 1
jge .disk_retry ; If retry count >= 0 go back and try again
.disk_error:
mov si, msg_er_read ; Print disk error and halt
call print
jmp halt
.no_active:
mov si, msg_no_part ; Print no active partition error and halt
call print
jmp halt
.vbr_loaded:
; DL is still same value oeiginally passed by BIOS
mov si, di ; DS:SI=address of active partition for some old OSes
jmp 0x0000:BOOT_ORG ; Execute the chain loaded VBR
halt: ; Infinite HLT loop with interrupts off to end bootloader
cli
.halt_loop:
hlt
jmp .halt_loop
; Print function
print:
mov ah, 0x0e
xor bh, bh
.rep:
lodsb
or al, al
jz .done
int 0x10
jmp .rep
.done:
ret
dap: ; Disk Address Packet
db 16, 0 ; DAP size, second byte always 0
dap_sectors dw 1 ; Read VBR (1 sector)
dap_offset dw BOOT_ORG ; Read to 0x0000:0x7c00
dap_segment dw 0
dap_startlba dq 0 ; To be filled in at runtime
dap_end:
msg_er_read: db 'Disk Read Error....', 0
msg_no_part: db 'No Active Partition....', 0
SECTION(parttbl, 446)
partition_start:
partition_1:
%include "part_n1.asm"
partition_2:
%include "part_n2.asm"
partition_3:
%include "part_n3.asm"
partition_4:
%include "part_n4.asm"
partition_end:
PARTITION_SIZE EQU partition_end - partition_start
SECTION(bootsig, 510)
dw 0xaa55
part_n1.asm:
db 0x80, 0x01, 0x00, 0x05, 0x17, 0x01, 0x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00
part_n2.asm:
dq 0, 0
part_n3.asm:
dq 0, 0
part_n4.asm:
dq 0, 0
A simple Volume Boot Record (VBR) to test could be:
BOOT_ORG EQU 0x7c00
%define SECTION(n,l) section n start=l+BOOT_ORG align=1
ORG BOOT_ORG
vbr_start:
xor ax, ax ; ES=DS=SS=0
mov es, ax
mov ds, ax
mov ss, ax
mov sp, BOOT_ORG ; Place stack at 0x0000:0x7c00 below bootloader
cld ; DF=0 is forward direction for string instructions
mov si, vbr_run_msg ; Print a message that the VBR is running
call print
halt:
cli
.halt_loop:
hlt
jmp .halt_loop
; print function
print:
mov ah, 0x0e
xor bh, bh
.rep:
lodsb
or al, al
jz .done
int 0x10
jmp .rep
.done:
ret
vbr_run_msg: db "VBR running", 0x0d, 0x0a, 0
SECTION(bootsig, 510)
dw 0xaa55
You can build and run this code as a 10 megabyte disk image with these commands:
nasm -f bin boot.asm -o boot.bin
nasm -f bin vbr.asm -o vbr.bin
# create 10MiB disk image
dd if=/dev/zero of=disk.img bs=10M count=1
# place boot sector at LBA=0 without truncating the disk image
dd if=boot.bin of=disk.img conv=notrunc seek=0
# place vbr at LBA=4 without truncating the disk image
dd if=vbr.bin of=disk.img conv=notrunc seek=4
In QEMU you can run it with the command:
qemu-system-i386 -hda disk.img
If it works, the output should look similar to: