I know this question gets asked a lot but every answer I find doesn't work for me. I'm trying to load stage 2
of my OS, located at the second sector of my image file(0x200
)
This is the code I tried to use:
bits 16 ; Starting at 16 bits
org 0x0 ; And starting at 0
jmp main ; Hop to main!
; TODO: copy comment from prev. loader
; args: SI
print:
lodsb ; Load the next/first character to AL
or al, al ; Is it 0?
jz donePrint ; Yes - Done.
mov ah, 0eh ; No - keep going.
int 10h ; Print character.
jmp print ; Repeat
donePrint:
ret ; Return
; todo: args
readSector:
mov ah, 02h
mov al, 1
mov dl, 0x80
mov ch, 0
mov dh, 0
mov cl, 2
mov bx, 0x500
int 13h
jnc good
jmp fail
main:
; First, setup some registers.
cli ; Clear interrupts
mov ax, 0x07C0 ; Point all registers to segment
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
; Create the stack(0x0000-0xFFFF).
mov ax, 0x0000
mov ss, ax ; Point SS to 0x0000
mov sp, 0xFFFF ; Stack pointer at 0xFFFF
sti ; Restore interrupts
mov si, LOADING
call print
call readSector
fail:
mov si, FAILURE_MSG
call print
good:
mov si, LOADOK
call print
jmp 0x500
LOADING db 0x0D, 0x0A, "Booting loader...", 0x0D, 0x0A, 0x00
FAILURE_MSG db 0x0D, 0x0A, "ERROR: Press any key to reboot.", 0x0A, 0x00
LOADOK db 0x0D, 0x0A, "load ok", 0x0A, 0x00
TIMES 510 - ($-$$) DB 0
DW 0xAA55
But it just bootloops. I tried other solutions to no avail. What am I doing wrong? If I need to update the question please tell me.
Thank you!
EDIT #1: According to Sep Roland's answer, I updated my code, but it is still not working. I'm putting the updated code here if it's any help. Also, if asked for it, I can post my 2nd stage code. It should be using 0x500 as org. NEW CODE:
bits 16 ; Starting at 16 bits
org 0x0 ; And starting at 0
jmp main ; Hop to main!
; TODO: copy comment from prev. loader
; args: SI
print:
lodsb ; Load the next/first character to AL
or al, al ; Is it 0?
jz donePrint ; Yes - Done.
mov ah, 0eh ; No - keep going.
int 10h ; Print character.
jmp print ; Repeat
donePrint:
ret ; Return
; todo: args
readSector:
mov ah, 02h
mov al, 1
mov ch, 0
mov dh, 0
mov cl, 2
mov bx, 0x500
int 13h
jnc good
jmp fail
main:
; First, setup some registers.
cli ; Clear interrupts
mov ax, 0x07C0 ; Point all registers to segment
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
; Create the stack(0x0000-0xFFFF).
mov ax, 0x0000
mov ss, ax ; Point SS to 0x0000
mov sp, 0xFFFE ; Stack pointer at 0xFFFE
sti ; Restore interrupts
mov si, LOADING
call print
call readSector
fail:
mov si, FAILURE_MSG
call print
end:
cli
hlt
jmp end
good:
mov si, LOADOK
call print
jmp 0x07C0:0x0500
LOADING db 0x0D, 0x0A, "Booting loader...", 0x0D, 0x0A, 0x00
FAILURE_MSG db 0x0D, 0x0A, "ERROR: Press any key to reboot.", 0x0A, 0x00
LOADOK db 0x0D, 0x0A, "load ok", 0x0A, 0x00
TIMES 510 - ($-$$) DB 0
DW 0xAA55
EDIT #2: Posting second stage code including gdt.inc
because someone mentioned LGDT
may have been causing a problem:
MAIN CODE(SOME PARTS HAVE BEEN CUT OUT BUT THEY ARE NOT REQUIRED, like strings)
bits 16 ; We start at 16 bits
org 0x500 ; We are loaded in at 0x500
jmp main ; Jump to main code.
; ----------------------------------------
; Includes
; ----------------------------------------
%include "include/stdio.inc"
%include "include/gdt.inc"
%include "include/a20.inc"
; ---------------------------------------
; Data and strings
; ---------------------------------------
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
; ---------------------------------------------------------------------
; main - 16-bit entry point
; Installing GDT, storing BIOS info, and enabling protected mode
; ---------------------------------------------------------------------
main:
; Our goal is jump to main32 to become 32-bit
; Setup segments and stack
cli ; Clear interrupts
xor ax, ax ; Null segments AX, DS, and ES
mov ds, ax
mov es, ax
mov ax, 0x9000 ; Stack begins at 0x9000-0xFFFF
mov ss, ax
mov sp, 0xFFFF ; Stack pointer is 0xFFFF
sti ; Enable interrupts
; Install the GDT
call installGDT ; Install the GDT
; Enable A20
call enableA20_KKbrd_Out ; Enable A20 through output port
; Print loading messages
mov si, msg1
call print16 ; Print the message
mov si, msg2 ; A message
call print16 ; Print the message
; Enter protected mode
cli ; Clear interrupts
mov eax, cr0 ; Set bit 0 in CR0--ENTER protected mode
or eax, 1
mov cr0, eax
jmp CODE_DESC:main32 ; Far jump to fix CS
; We can't re-enable interrupts because that would triple-fault. This will be fixed in main32.
bits 32 ; We are now 32 bit!
%include "include/stdio32.inc"
main32:
; Set registers up
mov ax, 0x10 ; Setup data segments to 0x10(data selector)
mov ds, ax
mov ss, ax
mov es, ax
mov esp, 90000h ; Stack begins from 90000h
call clear32 ; Clear screen
mov ebx, MSGHIDDEN ; Setup params for our message
call puts32 ; Call puts32 to print
cli ; Clear interrupts
hlt ; Halt the processor
LGDT CODE:
%ifndef __GDT_INC_67343546FDCC56AAB872_INCLUDED__
%define __GDT_INC_67343546FDCC56AAB872_INCLUDED__
bits 16 ; We are in 16-bit mode
; -----------------------------------------
; installGDT - install the GDT
; -----------------------------------------
installGDT:
cli ; Clear interrupts
pusha ; Save the registers
lgdt [toc] ; Load GDT into GDTR
sti ; Re-enable interrupts
popa ; Restore registers
ret ; Return!
; ----------------------------------------
; Global Descriptor Table data
; ----------------------------------------
gdt_data:
dd 0 ; Null descriptor
dd 0
; GDT code starts here
dw 0FFFFh ; Limit low
dw 0 ; Base low
db 0 ; Base middle
db 10011010b ; Access
db 11001111b ; Granularity
db 0 ; Base high
; GDT data starts here(mostly same as code, only difference is access)
dw 0FFFFh ; Limit low, again.
dw 0 ; Base low
db 0 ; Base middle
db 10010010b ; Access - different
db 11001111b ; Granularity
db 0
gdt_end:
toc:
dw gdt_end - gdt_data - 1
dd gdt_data ; Base of GDT
; Descriptor offsets names
%define NULL_DESC 0
%define CODE_DESC 0x8
%define DATA_DESC 0x10
; End of GDT code.
%endif ;__GDT_INC_67343546FDCC56AAB872_INCLUDED__
EDIT #3: Possible problems with stdio and stdio32 so putting those here
stdio.inc:
; ==============================================
; stdio.inc - IO routines
; Thanks to BrokenThorn Entertainment
; ==============================================
; First, show that we are defining stdio.inc
%ifndef __STDIO_INC_67343546FDCC56AAB872_INCLUDED__
%define __STDIO_INC_67343546FDCC56AAB872_INCLUDED__
; ------------------------------------------------
; Print16 - printing a null terminated string
; SI - 0 terminated string
; ------------------------------------------------
print16:
pusha ; Save registers for later
.loop1:
lodsb ; Load the next byte from the string into AL
or al, al ; Is AL 0?
jz print16done ; Yes - we are done.
mov ah, 0eh ; No - print next character
int 10h ; Call BIOS
jmp .loop1 ; Repeat!
print16done:
popa ; Restore registers
ret ; Return
%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__
stdio32.inc:
; ==================================================
; stdio32.inc - Handles 32-bit graphics
; ==================================================
%ifndef __GFX_INC_67343546FDCC56AAB872_INCLUDED__
%define __GFX_INC_67343546FDCC56AAB872_INCLUDED__
bits 32 ; 32-bits
%define VIDEO_MEMORY 0xB8000 ; Video memory address
%define COLS 80 ; Width of the screen
%define LINES 25 ; Height of the string
%define CHARACTER_ATTRIBURE 63 ; White text on Cyan background
_CurrentXPos db 0
_CurrentYPos db 0
; ---------------------------------------------------------
; char32 - Print a character to the screen(32-bit)
; BL - Character to print
; ---------------------------------------------------------
char32:
pusha ; Save registers
mov edi, VIDEO_MEMORY ; Get the pointer to the video memory
; Get current position
xor eax, eax ; Zero-out EAX
mov ecx, COLS*2 ; Mode 7 has 2 bytes per character - and so COLS*2 bytes per line.
mov al, byte [_CurrentYPos] ; Get Y position
mul ecx ; Multiply COLS * Y
push eax ; Save EAX--the multiplication
mov al, byte [_CurrentXPos] ; Multiply _CurrentXPos by 2 because 2 bytes per char(Mode 7)
mov cl, 2
mul cl
pop ecx ; Pop Y*COLS result
add eax, ecx
xor ecx, ecx
add edi, eax ; Add to base address
; Watch for a new line!
cmp bl, 0x0A ; 0x0A - newline character.
je .row ; Jump to .row if newline char
; Print the character
mov dl, bl ; Get character
mov dh, CHARACTER_ATTRIBURE ; Change DH to Character Attribute
mov word [edi], dx ; Write to video memory
; Update next pos
inc byte [_CurrentXPos] ; Go to next character
;cmp byte [_CurrentXPos], COLS ; EOL?
;je .row ; Yep - move to next row
jmp .done ; Nope - BAIL!
.row:
; Goto next row.
mov byte [_CurrentXPos], 0 ; Return to col 0
inc byte [_CurrentYPos] ; Go to next row.
.done:
; Return
popa
ret
; ---------------------------------------------------------
; puts32 - print a null terminated string
; EBX - String to print
; ---------------------------------------------------------
puts32:
; Store registers(EBX and EDI)
pusha ; Save registers
push ebx ; Copy string
pop edi
.loop:
mov bl, byte [edi] ; Get next character
cmp bl, 0 ; Check if it's null
je .done ; It is - done printing.
call char32 ; It isn't - print the character
inc edi ; Increment EDI for next character
jmp .loop ; Restart loop
.done:
; Update the hardware cursor
mov bh, byte [_CurrentXPos] ; BH and BL are the params for movecursor
mov bl, byte [_CurrentYPos]
call movecursor ; Update cursor position
popa ; Restore registers
ret ; Return!
bits 32
; ---------------------------------------------------------
; movecursor - Move the cursor to an X and Y position
; BH - X position
; BL - Y position
; ---------------------------------------------------------
movecursor:
pusha ; Save registers
; Get current position(BH and BL are relative to the current position on screen, not memory)
xor eax, eax ; Clear EAX
mov ecx, COLS ; Store COLS in ECX for multiplication
mov al, bh ; Get Y position
mul ecx ; Multiply Y by cols
add al, bl ; Add X
mov ebx, eax
; Set low byte index to VGA register
mov al, 0x0f
mov dx, 0x03D4
out dx, al
mov al, bl
mov dx, 0x03D5
out dx, al
; Do the same but for high byte
xor eax, eax
mov al, 0x0e
mov dx, 0x03D4
out dx, al
mov al, bl
mov dx, 0x03D5
out dx, al
popa ; Restore registers
ret ; Return
; ---------------------------------------------------------
; clear32 - clearing the screen
; ---------------------------------------------------------
clear32:
pusha ; Save registers
cld
mov edi, VIDEO_MEMORY ; Set EDI to video memory
mov cx, 2000
mov ah, CHARACTER_ATTRIBURE ; Clear screen with character attribute
mov al, ' ' ; Replace all chars with space
rep stosw
mov byte [_CurrentXPos], 0 ; Reset X and Y position
mov byte [_CurrentYPos], 0
popa ; Restore registers
ret
; ---------------------------------------------------------
; gotoxy - Set X and Y position
; AL - X position
; AH - Y position
; ---------------------------------------------------------
gotoxy:
pusha
mov [_CurrentXPos], al ; Set X and Y position
mov [_CurrentYPos], ah
popa
ret
%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__
EDIT0 reviews the 1st stage
EDIT1 reviews the 2nd stage
EDIT2 reviews the included stdio32.inc
[EDIT0]
The sector that you load yourself, was loaded at offset 0x500 in the extra segment at 0x7C0.
The jmp 0x500
instruction jumps to offset 0x500 in the code segment.
There's no guarantee that CS==0x7C0. Use a far jump instead:
jmp 0x07C0:0x0500
mov dl, 0x80
Are you sure about this drive number? It's always better to use the value that BIOS provided in the DL register when the bootloader is given control.
mov sp, 0xFFFF
A word-aligned stack pointer will be so much nicer!
Use mov sp, 0xFFFE
or even xor sp, sp
(trusting wraparound).
fail: mov si, FAILURE_MSG call print good: mov si, LOADOK call print jmp 0x500
In case loading the sector failed, you jump to fail, but after displaying the message you happily continue (fall through) with the code at good. You need to halt:
fail:
mov si, FAILURE_MSG
call print
theEnd:
cli
hlt
jmp theEnd
good:
mov si, LOADOK
call print
jmp 0x07C0:0x0500
[EDIT1]
xor ax, ax ; Null segments AX, DS, and ES mov ds, ax mov es, ax
You have applied corrections to the first stage bootloader. Control has successfully passed to 0x07C0:0x0500. Because the second stage uses an org 0x500
it is vital that the segment registers (at least DS) remain at 0x07C0. But the first thing I see is that you reload DS and ES with 0. This is not going to work since it creates a mismatch between the offsets (in accordance with the org
) generated by the assembler and the offsets (relative DS) where the actual data resides.
The first manifestation of this mismatch is with the lgdt [toc]
instruction.
All the data from the second stage resides in memory above the 0x8100 mark (0x7C00 + 0x0500).
The toc label on the other hand will have been translated by the assembler to an offset address of little over 0x0500. With DS=0, this addresses memory much lower than that 0x8100 mark. There's simply no valid data to act upon, hence the crash (or similar)!
The problem is not only with the lgdt [toc]
instruction. mov si, msg1
and mov si, msg2
will fail in the same way, and mov ebx, MSGHIDDEN
and dd gdt_data
will fail on not being linear addresses at all. dw gdt_end - gdt_data - 1
is not impaired since the difference does not change when both magnitudes are wrong (in the same manner).
What you should do is either keep the segment registers at 0x07C0 in accordance with the org 0x0500
, or much better zero the segment registers already in the first stage bootloader and use an org 0x7C00
for the first stage and an org 0x8100
for the second stage. These settings will leave everything at the same place in memory, with a 768-bytes gap between the first and second stages which is something we don't see everyday.
The much preferred way however is to zero the segment registers already in the first stage bootloader and use an org 0x7C00
for the first stage and an org 0x0600
for the second stage. This puts the second stage right behind the low memory BIOS variables. MS-DOS being our great example.
installGDT: cli ; Clear interrupts pusha ; Save the registers lgdt [toc] ; Load GDT into GDTR sti ; Re-enable interrupts popa ; Restore registers ret ; Return!
It is redundant to preserve the general purpose registers in this code. The lgdt
instruction doesn't change any of them.
; Create the stack(0x0000-0xFFFF). mov ax, 0x0000 mov ss, ax ; Point SS to 0x0000 mov sp, 0xFFFE ; Stack pointer at 0xFFFE
mov ax, 0x9000 ; Stack begins at 0x9000-0xFFFF mov ss, ax mov sp, 0xFFFF ; Stack pointer is 0xFFFF
mov ax, 0x10 ; Setup data segments to 0x10(data selector) mov ds, ax mov ss, ax mov es, ax mov esp, 90000h ; Stack begins from 90000h
(1) Don't use an odd value for SP.
(2) Don't put anything between loading SS and ESP.
(3) Don't use the word "begin" both for the low end and the high end of the stack.
You are setting up the stack 3 times and each time in a different place and size! The first time it runs from 0x00000000 to 0x0000FFFD, the second time it runs from 0x00090000 to 0x0009FFFE, and the third time it runs from 0x00000000 to 0x0008FFFF.
I would advice to setup the stack such that the real mode addresses correspond to the protected mode addresses. At least for the top 64KB.
In the first stage use:
mov ax, 0x8000
mov ss, ax
xor sp, sp ; 0x00080000 - 0x0008FFFF (64KB)
In the second stage use:
mov ax, DATA_DESC
mov ss, ax
mov esp, 0x00090000 ; 0x00000000 - 0x0008FFFF (576KB)
bits 16
org 0x7C00
jmp main
; args: SI
print:
lodsb
or al, al
jz donePrint
mov bx, 0007h
mov ah, 0Eh
int 10h
jmp print
donePrint:
ret
main:
cli
xor ax, ax
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ax, 0x8000 ; Stack between 0x8000:0x0000
mov ss, ax ; and 0x8000:0xFFFF (64KB)
xor sp, sp
sti
mov si, LOADING
call print
readSector:
mov dh, 0
mov cx, 0002h
mov bx, 0x0600 ; Sector buffer at 0x0000:0x0600
mov ax, 0201h
int 13h
jnc good
fail:
mov si, FAILURE_MSG
call print
end:
cli
hlt
jmp end
good:
mov si, LOADOK
call print
jmp 0x0000:0x0600 ; Start second stage
LOADING db 13, 10, "Booting loader...", 13, 10, 0
FAILURE_MSG db 13, 10, "ERROR: Press any key to reboot.", 10, 0
LOADOK db 13, 10, "load ok", 10, 0
TIMES 510 - ($-$$) DB 0
DW 0xAA55
bits 16
org 0x0600
jmp main
; ---------------------------------------
; Includes
; ---------------------------------------
%include "include/stdio.inc"
%include "include/gdt.inc"
%include "include/a20.inc"
; ---------------------------------------
; Data and strings
; ---------------------------------------
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
; ---------------------------------------
; main - 16-bit entry point
; Installing GDT, storing BIOS info, and enabling protected mode
; ---------------------------------------
main:
call installGDT
call enableA20_KKbrd_Out
mov si, msg1
call print16
mov si, msg2
call print16
; Enter protected mode
cli
mov eax, cr0
or eax, 1
mov cr0, eax
jmp CODE_DESC:main32 ; Far jump to fix CS
; We can't re-enable interrupts because that would triple-fault. This will be fixed in main32.
bits 32 ; We are now 32 bit!
%include "include/stdio32.inc"
main32:
; Set registers up
mov ax, DATA_DESC
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax ; Stack between 0x00000000
mov esp, 0x00090000 ; and 0x0008FFFF (576KB)
call clear32 ; Clear screen
mov ebx, MSGHIDDEN
call puts32 ; Call puts32 to print
cli
hlt
...
[EDIT2]
@Sep Roland Your changes mostly work, but unfortunately there might be something wrong with my video code as well.. The system appears to stop typing after a little bit(24 bytes) and just gives up. I posted video code, but if this seems a like too much to ask I can stop here. Thank you!
I reviewed your stdio32.inc and have found a number of errors in it!
rep stosw
will be using ECX. The garbage in the high word of ECX can do a lot of harm. Use mov ecx, 2000
.out
twice the low byte instead of doing low byte then high byte.The improved stdio32.inc
; ==================================================
; stdio32.inc - Handles 32-bit graphics
; ==================================================
%ifndef __GFX_INC_67343546FDCC56AAB872_INCLUDED__
%define __GFX_INC_67343546FDCC56AAB872_INCLUDED__
bits 32
%define VIDEO_MEMORY 0xB8000 ; Video memory address
%define COLS 80 ; Width of the screen
%define LINES 25 ; Height of the screen
%define ATTRIB 0x3F ; WhiteOnCyan
_CurrentXPos db 0
_CurrentYPos db 0
; ---------------------------------------------------------
; char32 - Print a character to the screen(32-bit)
; BL - Character to print
; ---------------------------------------------------------
char32:
cmp bl, 10
je .row
push eax
push edi
movzx edi, byte [_CurrentYPos]
imul edi, COLS*2
movzx eax, byte [_CurrentXPos]
lea edi, [VIDEO_MEMORY + edi + eax * 2]
mov al, bl
mov ah, ATTRIB
mov [edi], ax
pop edi
pop eax
inc byte [_CurrentXPos]
cmp byte [_CurrentXPos], COLS ; EOL?
je .row
ret
.row:
mov byte [_CurrentXPos], 0
inc byte [_CurrentYPos]
ret
; ---------------------------------------------------------
; puts32 - print a null terminated string
; EBX - String to print
; ---------------------------------------------------------
puts32:
push ebx
push edi
mov edi, ebx
jmp .start
.loop:
call char32
inc edi
.start:
mov bl, [edi]
test bl, bl
jnz .loop
movzx ebx, word [_CurrentXPos] ; Load XPos and YPos together!
call movecursor ; Update hardware cursor
pop edi
pop ebx
ret
; ---------------------------------------------------------
; movecursor - Move the cursor to an X and Y position
; BL - X position
; BH - Y position
; BH and BL are relative to the current position on screen
; ---------------------------------------------------------
movecursor:
pushad
movzx eax, bh ; BH * COLS + BL
imul eax, COLS
movzx ebx, bl
add ebx, eax
; Set low byte index to VGA register
mov al, 0x0F
mov dx, 0x03D4
out dx, al
mov al, bl
inc dx
out dx, al
; Do the same but for high byte
mov al, 0x0E
dec dx
out dx, al
mov al, bh
inc dx
out dx, al
popad
ret
; ---------------------------------------------------------
; clear32 - clearing the screen
; ---------------------------------------------------------
clear32:
pushad
mov edi, VIDEO_MEMORY
mov ecx, 1000 ; 2000 words
mov eax, ((ATTRIB * 256 + 32) * 256 + ATTRIB) * 256 + 32
rep stosd
mov [_CurrentXPos], cx ; Reset XPos and YPos together!
popad
ret
; ---------------------------------------------------------
; gotoxy - Set X and Y position
; AL - X position
; AH - Y position
; ---------------------------------------------------------
gotoxy:
mov [_CurrentXPos], ax ; Set XPos and YPos together!
ret
%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__
While optimizing the code, it has become much shorter.
What I am thinking now is that your code had gotten longer than 512 bytes and that the single-sector load from the first stage did not bring all of it into memory. That could certainly explain some partial string output that you are experiencing.
I never remember (and my manual doesn't mention it), but for NASM is pusha
the same as pushad
when bits 32
is active?
I deal with it this way:
pushad
in 32-bit code.pushad
and preferring to push
individual registers as this is faster.