Search code examples
assemblyoperating-systemmemory-segmentation

Problems in understanding segmentation (Trying to write a simple OS)


Im trying to learn low level stuff so I thought that maybe learning how an OS works is going to be the best choice and I started learning from: https://github.com/cfenollosa/os-tutorial

This are the codes:

main.asm

[org 0x7c00]
mov bp, 0x9000
mov sp, bp
mov bx,0x1000
mov dh,1
call disk_load
call switch_to_pm

%include "print.asm"
%include "gdt.asm"
%include "32print.asm"
%include "32switch.asm"
%include "disk_func.asm"

[bits 32]
BEGIN_PM:
mov ebx, MSG_PROT_MODE
mov eax,0
mov ch,1
mov cl,0
call print_string
mov ch,0
call print_string
mov ebx,MSG_LOADED_Kernel
mov eax,30
mov cl,1
mov ch,0
call print_string
jmp 0x1000

MSG_PROT_MODE db "Test White ... ",0
MSG_LOADED_Kernel db " Test Red ... ",0
; bootsector
times 510-($-$$) db 0
dw 0xaa55

test_load:
mov ebx,MSG_PROMPT
sub ebx,test_load
add ebx,0x1000
mov cl,2
mov ch,0
mov eax,58
call CODE_SEG:print_string
jmp $

MSG_PROMPT db "Test Green ... ",0
times 512-($-test_load) db 0

print.asm

print:
pusha
mov ah,0x0e
start:
mov al,[bx]
cmp al,0
je done
int 0x10
add bx,1
jmp start
done:
popa
ret

disk_func.asm

disk_load:
pusha
mov ah,0x02 ;<= Read
mov al,dh ;<= Sectors to read
mov cl,0x02 ;<= Sector
mov ch,0x00 ;<= Cylinder
push edx
mov dh,0x00 ;<= Head number
int 0x13
pop edx
jc disk_error
cmp al, dh
jne sectors_error
popa
ret
disk_error:
mov ecx, DISK_ERROR
call print
mov dh, ah
jmp disk_loop

sectors_error:
mov ecx, SECTORS_ERROR
call print

disk_loop:
jmp $

DISK_ERROR:
db "Disk read error", 0

SECTORS_ERROR:
db "Incorrect number of sectors read", 0

gdt.asm

gdt_start:
dd 0x0
dd 0x0
;8 null bytes

; GDT for code segment. base = 0x00000000, length = 0xfffff
gdt_code:
dw 0xffff    ; segment length, bits 0-15
dw 0x0       ; segment base, bits 0-15
db 0x0       ; segment base, bits 16-23
db 10011010b ; flags (8 bits)
db 11001111b ; flags (4 bits) + segment length, bits 16-19
db 0x0       ; segment base, bits 24-31

; GDT for data segment. base and length identical to code segment
gdt_data:
dw 0xffff
dw 0x0
db 0x0
db 10010010b
db 11001111b
db 0x0

gdt_end:
gdt_desc:
dw gdt_end - gdt_start - 1 ; size (16 bit)
dd gdt_start ; address (32 bit)


CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

32switch.asm

[bits 16]
switch_to_pm:
cli
lgdt [gdt_desc]
mov eax, cr0
or eax, 0x1
mov cr0, eax
jmp CODE_SEG:init_pm

[bits 32]
init_pm:
mov ax, DATA_SEG
mov ds, ax
mov ss, ax
mov es, ax
mov fs, ax
mov gs, ax

mov ebp, 0x90000
mov esp, ebp

call BEGIN_PM

32print.asm

[bits 32]

VIDEO_MEMORY equ 0xb8000
WHITE_ON_BLACK equ 0x0f
RED_ON_BLACK equ 0x0c
GREEN_ON_BLACK equ 0x0a

print_string:
pusha
cmp cl,1
je skip_print32_red
cmp cl,2
je skip_print32_red
mov cl,0
skip_print32_red:
mov edx, VIDEO_MEMORY
add edx,eax
cmp ch,1
je clear_scrn

print_string_loop:
mov al, [ebx] ; [ebx] is the address of our character
print32_color:
cmp cl,1
je print_with_red
cmp cl,2
je print_with_green
mov ah, WHITE_ON_BLACK
jmp print32_skip_red
print_with_red:
mov ah, RED_ON_BLACK
jmp print32_skip_red
print_with_green:
mov ah, GREEN_ON_BLACK
print32_skip_red:
cmp al, 0 ; check if end of string
je print_string_done
mov [edx], ax ; store character + attribute in video memory
add ebx, 1 ; next char
add edx, 2 ; next video memory position
jmp print_string_loop
print_string_done:
popa
ret

clear_scrn:
mov al, 0x20
mov ah, WHITE_ON_BLACK
mov [edx], ax ; store character + attribute in video memory
add edx, 2 ; next video memory position
cmp edx, 0xb8492
je print_string_done
jmp clear_scrn

make_32print_red:
mov cl, 1
jmp print32_color

In the 32switch.asm file I understand that because init_pm starts with [bits 32] and uses 32bit registers the jump to it needs to have CODE_SEG because CODE_SEG enables 32 bit but then I dont understand why it doesnt need to call BEGIN_PM with CODE_SEG because BEGIN_PM also uses 32bit registers. also calls to print_string in BEGIN_PM and jumping to test_load dont need CODE_SEG too but in test_load it is necessary to use CODE_SEG for calling print_string otherwise it will not do anything

"test_load is going to be read from hdd to memory and jump to 0x1000 in BEGIN_PM is the jump to test_load"

It really confuses me can someone explain to me why is it needed in those places and not needed in others? and also why calling print string from test_load requires CODE_SEG but calling it from BEGIN_PM doesnt?

Im using nasm for assembling and qemu for running the binary

Im new to assembly and low level stuff so I may have understood everything wrong

EDIT: I was trying to solve it somehow and understand it and I found that in test_load if I do

mov edx,print_string
call edx

it works but

call print_string

doesnt


Solution

  • I figured it out because both test_load and print are assembled together NASM compiles them as if they are going to remain like that while executed but test_load is actually going to be read from hdd and put inside memory at 0x1000 so their distance will differ from what NASM thought when I do

    call print_string
    

    and NASM assembles it , it uses this

    e8 21 fe ff ff          call   0xfffffe26
    

    fffffe26 stands for -474 the distance from call to print_string which would work completely normal if they were put inside the memory exactly in the way they where written but since test_load is loaded at 0x1000 it should be (-1000+512)+(-474) so

    call print_string
    

    will not work

    The reason why

    mov edx
    call edx
    

    works is because its calling the address itself and it doesnt care about the distance anymore its the same with the one with CODE_SEG, because it considers that as a far jump it calls the address without using the distance