Search code examples
stringassemblyx86-16subroutine

Why are we switching segments while calculating string length?


I have provided 2 different implementations of switching segments when calculating the string length. I don't understand why we are switching the extra segment and data segment or more precisely copying data segment (register DS) contents into extra segment (register ES). If I remove the lines push ds and pop es the result does not change, but when I replace les di, [bp+4] by mov di, [bp+4] the message is displayed on the screen but with a bunch of other different characters.

Why is it happening? I am new in assembly.

org 0x0100
jmp start

message: db 'Hello world!', 0      ; First string to be concatenated


; Subroutine to calculate the length of a string
; Takes the segment and offset of a string as parameters
strlen:
    push bp
    mov bp, sp
    push es
    push cx
    push di
    les di, [bp+4]        ; Point es:di to string
    mov cx, 0xffff        ; Load maximum number in cx
    xor al, al            ; Load a zero in al
    repne scasb           ; Find zero in the string
    mov ax, 0xffff        ; Load maximum number in ax
    sub ax, cx            ; Find change in cx
    dec ax                ; Exclude null from length
    pop di
    pop cx
    pop es
    pop bp
    ret 4

; Subroutine to print a string
; Takes the x position, y position, attribute, and address of a null-terminated string as parameters
printstr:
    push bp
    mov bp, sp
    pusha
    push di
    push ds               ; Push segment of string
    mov ax, [bp+4]
    push ax               ; Push offset of string
    call strlen           ; Calculate string length

    cmp ax, 0             ; Is the string empty?
    jz exit               ; No printing if string is empty

    mov cx, ax            ; Save length in cx
    mov ax, 0xb800        ; Video memory base address
    mov es, ax            ; Point es to video base
    mov al, 80            ; Load al with columns per row
    mul byte [bp+8]       ; Multiply with y position
    add ax, [bp+10]       ; Add x position
    shl ax, 1             ; Turn into byte offset
    mov di, ax            ; Point di to required location
    mov si, [bp+4]        ; Point si to string
    mov ah, [bp+6]        ; Load attribute in ah
    cld                   ; Clear direction flag for auto-increment mode

 nextchar:
    lodsb                 ; Load next char in al
    stosw                 ; Print char/attribute pair
    loop nextchar         ; Repeat for the whole string

 exit:
    pop ds
    pop di
    popa
    pop bp
    ret 8

start:
    mov ax, 30
    push ax               ; Push x position
    mov ax, 20
    push ax               ; Push y position
    mov ax, 0x7           ; Blue on white attribute
    push ax               ; Push attribute
    mov ax, message
    push ax               ; Push address of message
    call printstr         ; Call the printstr subroutine

    mov ax, 0x4c00        ; Terminate program
    int 0x21

Alternative version:

    push bp
    mov bp, sp
    pusha

    push ds
    pop es         ; load ds in es
    mov di, [bp+4] ; point di to string
    mov cx, 0xffff ; load maximum number in cx
    xor al, al 
    repne scasb 
    mov ax, 0xffff 
    sub ax, cx 
    dec ax 
    jz done 

    mov cx, ax
    ...

Solution

  • when i chnage LES with mov di, [bp+4] the message is still displayed on the screen

    Key is here that the segment registers in your program are all equal to each other from the start. That's because you are creating a .COM program (ORG 0x100 does that). All that changing of DS or ES does not really count, nor is it necessary in this case.

    I dont understand why are we sitcing the extra segment and data segment or more precisely copying data segemnt contents into extra segment

    Setting up the ES segment register is required for using the scasb instruction that always depends on ES:DI.

    Now, there is an important mismatch in the way you return from the strlen subroutine (ret 4) and the way you exit from the printstr subroutine (exit: pop ds).

    • If you choose to keep ret 4 (that discards both the offset and the segment parts of the far pointer) then you need to remove that pop ds.
    • If you want to keep that pop ds then you need to write ret 2 (that only discards the offset part of the far pointer).

    Tip: printstr does change ES different to DS (setting it to 0xB800). There you would normally preserve ES on the stack.