Search code examples
assemblynasmx86-16bootloader

x86-16: INT 13h does not read file correctly from the disk


I tried to load kernel stored on a floppy disk formatted to FAT12. When I try to read sectors where the kernel is located, I don't have any errors (carry flag is not set), but the memory where I try to load the kernel has zeroes.

org 0x7C00
bits 16

%define endl            0x0D, 0x0A
%define dirEntrySize        32
%define fatLocation     0x7E00
%define rootdirLocation     0x9000
%define kernelLocation      0x9E00
%define filenameLength      11

jmp short main
nop

bpb_oemId:                  db "mkfs.fat"
bpb_bytesPerSector:         dw 512
bpb_sectorsPerCluster:      db 1
bpb_reservedSectors:        dw 1
bpb_fatCount:               db 2
bpb_rootdirEntries:         dw 224
bpb_sectorCount:            dw 2880
bpb_mediaDescriptor:        db 0xF0
bpb_sectorsPerFat:          dw 9
bpb_sectorsPerTrack:        dw 18
bpb_headCount:              dw 2
bpb_hiddenSectors:          dd 0
bpb_largeSectorCount:       dd 0
ebr_driveNumber:            db 0x00
ebr_ntFlags:                db 0
ebr_driveSignature:         db 0x29
ebr_volumeId:               dd 0
ebr_volumeLabel:            db "NO NAME    "
ebr_systemId:               db "FAT12   "

rootdirStart:           dw 0
dataStart:          dw 0

main:
    xor ax, ax
    mov ds, ax
    mov es, ax
    mov ss, ax
    mov sp, 0x7C00

    mov [ebr_driveNumber], dl

    mov si, msg_loading
    call puts

    ; Read FAT
    mov ax, [bpb_reservedSectors]
    mov bx, fatLocation
    mov cx, [bpb_sectorsPerFat]
    call readSectors
    
    ; Read Root Directory
    mov ax, [bpb_rootdirEntries]
    mov bx, dirEntrySize
    mul bx
    div word [bpb_bytesPerSector]
    mov [dataStart], ax
    xchg ax, cx
    mul byte [bpb_fatCount]
    add ax, [bpb_reservedSectors]
    mov bx, rootdirLocation
    call readSectors
    mov [rootdirStart], ax
    add [dataStart], ax
    
    ; Find and read the kernel
    mov si, kernelFilename
    call findFile
    mov bx, kernelLocation
    call readFile

    ; Load kernel
    mov dl, [ebr_driveNumber]
    jmp kernelLocation

    cli
    hlt

putc:
    push ax
    push bx
    mov ah, 0x0E
    xor bx, bx
    int 0x10
    pop bx
    pop ax
    ret
    
puts:
    push ax
    push si
.loop:
    lodsb
    test al, al
    jz .done
    call putc
    jmp .loop
.done:
    pop si
    pop ax
    ret

lbaToChs:
    push ax
    push dx
    xor dx, dx
    div word [bpb_sectorsPerTrack]
    inc dx
    mov cx, dx
    xor dx, dx
    div word [bpb_headCount]
    mov dh, dl
    mov ch, al
    shl ah, 6
    or cl, ah
    pop ax
    mov dl, al
    pop ax
    ret

readSectors:
    push ax
    push cx
    push dx
    push si
    push di
    push cx
    call lbaToChs
    pop si
    mov di, 3
    mov dl, [ebr_driveNumber]
.loop:
    mov ah, 0x02
    int 0x13
    mov ah, 0x01
    int 0x13
    jnc .done
    mov ah, 0x00
    int 0x13
    dec di
    jz .fail
    dec si
    jnz .loop
.done:
    pop di
    pop si
    pop dx
    pop cx
    pop ax
    ret
.fail:
    mov si, msg_diskError
    call puts
    cli
    hlt

findFile:
    push bx
    push si
    push di
    mov ax, 0
    mov di, rootdirLocation
.loop:
    mov bx, filenameLength
.compare:
    cmpsb
    jnz .skip
    dec bx
    jz .done
    jmp .compare
.skip:
    sub si, filenameLength
    add di, dirEntrySize - filenameLength
    inc ax
    cmp ax, word [bpb_rootdirEntries]
    jnz .loop
.fail:
    mov si, msg_findFailed
    call puts
    cli
    hlt
.done:
    mov si, rootdirLocation
    mov bx, dirEntrySize
    mul dx
    add si, ax
    add si, 0x1A
    lodsw
    pop di
    pop si
    pop bx
    ret

readFile:
    push ax
    push bx
    push cx
    push dx
.loop:
    push ax
    sub ax, 2
    mul byte [bpb_sectorsPerCluster]
    add ax, word [dataStart]
    mov cx, 1
    call readSectors
    mov ax, [bpb_bytesPerSector]
    mul byte [bpb_sectorsPerCluster]
    add bx, ax
    pop ax
    mov cl, 3
    mul cx
    dec cl
    div cx
    mov si, fatLocation
    add si, ax
    mov ax, [si]
    or dx, dx
    jnz .odd
.even:
    and ax, 0x0FFF
    jmp .update
.odd:
    shr ax, 4
.update:
    cmp ax, 0xFF0
    jnl .loop
.done:
    pop dx
    pop cx
    pop bx
    pop ax
    ret
.fail:
    mov si, msg_loadFailed
    call puts
    cli
    hlt

kernelFilename: db "KERNEL  BIN"
msg_loading: db "Loading...", endl, 0
msg_findFailed: db "Cannot find kernel", 0
msg_loadFailed: db "Cannot load kernel", 0
msg_diskError: db "Disk error", 0

times 510+$$-$ db 0
dw 0xAA55

I tried debugging with GDB and checked the registers, but their value was correct. I also checked the memory where I stored the FAT and the Root Directory, but this data also was correct. Only the memory where I wanted to load kernel was filled with zeroes.

Register values after lba-to-chs conversion:

eax            0x21                33
ecx            0x10                16
edx            0x100               256
ebx            0x9e00              40448
esp            0x7be6              0x7be6
ebp            0x0                 0x0
esi            0x7d98              32152
edi            0x0                 0
eip            0x7ce6              0x7ce6
eflags         0x202               [ IOPL=0 IF ]
cs             0x0                 0
ss             0x0                 0
ds             0x0                 0
es             0x0                 0
fs             0x0                 0
gs             0x0                 0

AX value is correct, should be lba of start of data: reserved_sectors + sectors_per_fat * fat_count + root_dir_entries * dir_entry_size / bytes_per_sectors = 1 + 9 * 2 + 224 * 32 / 512 = 1 + 18 + 14 = 33. In floppy image opened with hex editor, that points to start of the kernel.

Loaded file allocation table:

0x7e00: 0xf0    0xff    0xff    0xff    0x0f    0x00    0x00    0x00
0x7e08: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
...

Loaded root directory:

0x9000: 0x4b    0x45    0x52    0x4e    0x45    0x4c    0x20    0x20
0x9008: 0x42    0x49    0x4e    0x20    0x18    0x00    0x28    0x61
0x9010: 0xaa    0x58    0xaa    0x58    0x00    0x00    0x28    0x61
0x9018: 0xaa    0x58    0x02    0x00    0x25    0x01    0x00    0x00
0x9020: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9028: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9030: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9038: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
...

*Loaded* kernel:

0x9e00: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9e08: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9e10: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
0x9e18: 0x00    0x00    0x00    0x00    0x00    0x00    0x00    0x00
...

Solution

  • org 0x7C00
    bits 16
    

    You could add cpu 8086 here to insure that conditional jumps use the short jump encodings only.

    
    %define endl            0x0D, 0x0A
    %define dirEntrySize        32
    %define fatLocation     0x7E00
    %define rootdirLocation     0x9000
    %define kernelLocation      0x9E00
    %define filenameLength      11
    

    The FAT on a 1440 KiB diskette image has 9 sectors, 1200h bytes. So it just so fits in the memory you allocated. This would blow up if you wanted to boot off a larger file system.

    A root directory with 224 entries needs 1C00h bytes. However, you don't actually need the root directory any longer after you found the file to search, so the overlap between directory location and kernel location is not a problem.

    jmp short main
    nop
    
    bpb_oemId:                  db "mkfs.fat"
    bpb_bytesPerSector:         dw 512
    bpb_sectorsPerCluster:      db 1
    bpb_reservedSectors:        dw 1
    bpb_fatCount:               db 2
    bpb_rootdirEntries:         dw 224
    bpb_sectorCount:            dw 2880
    bpb_mediaDescriptor:        db 0xF0
    bpb_sectorsPerFat:          dw 9
    bpb_sectorsPerTrack:        dw 18
    bpb_headCount:              dw 2
    bpb_hiddenSectors:          dd 0
    bpb_largeSectorCount:       dd 0
    ebr_driveNumber:            db 0x00
    ebr_ntFlags:                db 0
    ebr_driveSignature:         db 0x29
    ebr_volumeId:               dd 0
    ebr_volumeLabel:            db "NO NAME    "
    ebr_systemId:               db "FAT12   "
    
    rootdirStart:           dw 0
    dataStart:          dw 0
    

    For size optimisation you don't want to put these as zero-initialised bytes into the loader because you never need them to be zero-initialised. However, while it is not optimal, this is not incorrect.

    main:
        xor ax, ax
        mov ds, ax
        mov es, ax
        mov ss, ax
        mov sp, 0x7C00
    
        mov [ebr_driveNumber], dl
    
        mov si, msg_loading
        call puts
    
        ; Read FAT
        mov ax, [bpb_reservedSectors]
        mov bx, fatLocation
        mov cx, [bpb_sectorsPerFat]
        call readSectors
        
        ; Read Root Directory
        mov ax, [bpb_rootdirEntries]
        mov bx, dirEntrySize
        mul bx
        div word [bpb_bytesPerSector]
    

    This should round up the sectors per root directory. However, a lot of loaders and drivers make this error.

        mov [dataStart], ax
    

    This is what I listed for the variables, you unconditionally overwrite it so it needn't be zero-initialised.

        xchg ax, cx
        mul byte [bpb_fatCount]
    

    Assumes that sectors per FAT is < 256.

        add ax, [bpb_reservedSectors]
    

    Assumes that the root directory start is < 65_536.

        mov bx, rootdirLocation
        call readSectors
        mov [rootdirStart], ax
    

    You don't seem to read this variable anywhere so why set it?

        add [dataStart], ax
        
        ; Find and read the kernel
        mov si, kernelFilename
        call findFile
    

    As find file never is called with another name, you can hardcode the kernel filename address within the find file function.

        mov bx, kernelLocation
        call readFile
    
        ; Load kernel
        mov dl, [ebr_driveNumber]
        jmp kernelLocation
    

    The comment is incorrect, at this point the kernel is already loaded (if the code works correctly). What you're trying to do here is transfer control to the kernel.

    You're assuming cs:ip equal to 0:7C00h at the beginning of your loader. It would be better to make a far jump like jmp 0:kernelLocation instead to insure cs is reset to zero, because the prior loader may enter your code at 7C0h:0 instead. (Immediate short and near calls and jumps are always done using rel8 or rel16 so they're not affected by this difference.)

        cli
        hlt
    

    This is dead code, never reached. Remove it to save some space.

    putc:
        push ax
        push bx
        mov ah, 0x0E
        xor bx, bx
        int 0x10
        pop bx
        pop ax
        ret
    

    If you used bp you should also push and pop it here because this function may destroy bp.

    puts:
        push ax
        push si
    .loop:
        lodsb
        test al, al
        jz .done
        call putc
        jmp .loop
    .done:
        pop si
        pop ax
        ret
    

    The put character function doesn't need to be a function if you only ever call it from one spot. This is not a correctness problem however.

    lbaToChs:
        push ax
        push dx
        xor dx, dx
        div word [bpb_sectorsPerTrack]
        inc dx
        mov cx, dx
        xor dx, dx
        div word [bpb_headCount]
        mov dh, dl
        mov ch, al
        shl ah, 6
        or cl, ah
        pop ax
        mov dl, al
        pop ax
        ret
    

    push dx and the companion pop and mov are not needed as is because you always overwrite dl after calling this function.

    readSectors:
        push ax
        push cx
        push dx
        push si
        push di
        push cx
        call lbaToChs
        pop si
        mov di, 3
        mov dl, [ebr_driveNumber]
    .loop:
        mov ah, 0x02
        int 0x13
    

    You didn't initialise al to the number of sectors to read! The naive solution would be to init it to the total number of sectors you want to read per readSectors function call. However, if you cross a 64 KiB boundary or a "track" boundary then reading more than 1 sector may fail. Therefore in my boot sector loaders the read_sector function reads a single sector unconditionally. It does the LBA to CHS calculation for every sector that it wants to read, rather than once per continuous read. The callers have to take care of looping if they want to read multiple sectors.

    So if you wanted to do this you should do the LBA to CHS calculation in a single-sector read function, and call this in a loop which increments the LBA and adds to bx after reading every single sector. (This would also mean you needn't calculate the bytes per cluster and add it to your pointer later in readFile.)

        mov ah, 0x01
        int 0x13
        jnc .done
    

    This call is useless at best. The jnc will jump if the read succeeded. But if that happens you will skip your dec si loop so you never continue to read subsequent sectors.

        mov ah, 0x00
        int 0x13
        dec di
        jz .fail
        dec si
        jnz .loop
    

    The dec si loop is very confused. The dec di loop appears to be intended as an error retry. But why loop for the next sector only if an error retry is happening? Also, if you do loop for si > 1 then you do not calculate the next CHS tuple nor transfer address, so you cannot possibly read multiple sectors correctly.

    .done:
        pop di
        pop si
        pop dx
        pop cx
        pop ax
        ret
    .fail:
        mov si, msg_diskError
        call puts
        cli
        hlt
    

    It is nicer to run int 16h function 00h then int 19h to return control to the prior loader. Or use sti \ halt: \ hlt \ jmp halt which allows to reboot eg using Ctrl-Alt-Del.

    findFile:
        push bx
        push si
        push di
        mov ax, 0
        mov di, rootdirLocation
    .loop:
        mov bx, filenameLength
    .compare:
        cmpsb
        jnz .skip
        dec bx
        jz .done
        jmp .compare
    

    This loop works if the (SFN) KERNEL.BIN is in the very first directory entry.

    .skip:
        sub si, filenameLength
        add di, dirEntrySize - filenameLength
    

    This is very confused. You don't know at which point di and si are pointing (like if KERNEL.SYS is found as a mismatch then di will point at the Y in "SYS" and si at the I in "BIN") so these hardcoded adjustments cannot possibly be correct. If I insert a deleted directory entry (first byte is a mismatch) your instructions here also do not work.

        inc ax
        cmp ax, word [bpb_rootdirEntries]
        jnz .loop
    

    This is odd, but not wrong. I prefer to initialise to root directory entries and count down in a register.

    .fail:
        mov si, msg_findFailed
        call puts
        cli
        hlt
    

    You can share most of the code for this with another error display function.

    
    .done:
        mov si, rootdirLocation
        mov bx, dirEntrySize
        mul dx
    

    This seems wrong, perhaps you meant mul bx ?

        add si, ax
        add si, 0x1A
        lodsw
        pop di
        pop si
        pop bx
        ret
    
    readFile:
        push ax
        push bx
        push cx
        push dx
    .loop:
        push ax
        sub ax, 2
        mul byte [bpb_sectorsPerCluster]
    

    This assumes your current cluster is < 256. This is not necessarily true on a 1440 KiB diskette image.

        add ax, word [dataStart]
        mov cx, 1
        call readSectors
    

    If readSectors worked this would hardcode the cluster size to 1 sector.

        mov ax, [bpb_bytesPerSector]
        mul byte [bpb_sectorsPerCluster]
        add bx, ax
    

    This assumes Bytes per Sector < 256, which is not true on the typical image. So you will add zero to bx, which is why you overwrite the first sector of the kernel later.

        pop ax
        mov cl, 3
        mul cx
        dec cl
        div cx
        mov si, fatLocation
        add si, ax
        mov ax, [si]
        or dx, dx
        jnz .odd
    .even:
        and ax, 0x0FFF
        jmp .update
    .odd:
        shr ax, 4
    

    Oh, this is a 186 instruction. Not a problem for qemu of course.

    .update:
        cmp ax, 0xFF0
        jnl .loop
    

    This should be jb .loop (or jl .loop). If the next cluster is below 0FF0h then loop.

    .done:
        pop dx
        pop cx
        pop bx
        pop ax
        ret
    .fail:
        mov si, msg_loadFailed
        call puts
        cli
        hlt
    

    This is dead code, never reached.

    kernelFilename: db "KERNEL  BIN"
    msg_loading: db "Loading...", endl, 0
    msg_findFailed: db "Cannot find kernel", 0
    msg_loadFailed: db "Cannot load kernel", 0
    msg_diskError: db "Disk error", 0
    
    times 510+$$-$ db 0
    dw 0xAA55
    

    Finally, I debugged this using boot loaded lDebug running in qemu. I created a 1440 KiB diskette image using my bootimg script. And here's the command scriptlet I used to debug your loader:

    test$ nasm kernel.asm -o kernel.bin && nasm test.asm -o test.bin -l test.lst && nasm -I ~/proj/lmacros/ -I ~/proj/bootimg/ ~/proj/bootimg/bootimg.asm -D_PAYLOADFILE="kernel.bin" -D_BOOTPATCHFILE="test.bin" -o diskette.img && nasm -I ~/proj/lmacros/ -I ~/proj/ldosboot/ ~/proj/ldosboot/boot.asm -o boot12.bin -D_LOAD_NAME="'LDEBUG'" && nasm -I ~/proj/lmacros/ -I ~/proj/ldosmbr/ ~/proj/ldosmbr/oldmbr.asm -o oldmbr.bin && nasm -I ~/proj/ldebug/bin/ -I ~/proj/lmacros/ -I ~/proj/bootimg/ ~/proj/bootimg/bootimg.asm -D_PAYLOADFILE="ldebug.com,extlib.eld" -D_BOOTPATCHFILE="boot12.bin" -D_MBR -D_MBRPATCHFILE="oldmbr.bin" -o hdimage.img && qemu-system-i386 -hda hdimage.img -fda diskette.img -boot order=c -display curses -chardev serial,id=serial2,path=/tmp/vptty-dos -serial null -serial chardev:serial2; stty sane

    I changed a switch to -D_PAYLOADFILE="::directorypad,1,kernel.bin" to insert a deleted directory entry, which makes your code fail even after correcting the jnl error.