Search code examples
assemblytasm

change string in TASM


I'm try to write program for searching smallest word in the input string using this algorithm.

My Algorithm:

Read character from input, but not echo
If character is space:
    current_string_length = 0;
    current_string = "";
    echo character
Else If character belong to English alphabet:
    current_string_length++;
    current_string += character;
    if current_string_length < max_string_length:
       max_string = current_string;
       max_string_length = current_length_string;
    echo character
Else If character is "\n":
    print max_string

But i'm new in assembly and can't find way to add character to string and clean string. How can i do this, or maybe i need to choose different algorithm for this task?

My code:

.model small
.stack 100h                             ; reserves 100h bytes for stack

.data
;---------------------------------------------------------------------------------- 
; Variables
maxString           db 128 dup('$') 
currentString       db 128 dup('$')
maxLength           dw 0
currentLength       dw 0
;---------------------------------------------------------------------------------- 
; Messages
helloMessage  db 10,13,'Assembly Shortest Word Finder Version 1.0 Copyright (c) 2016 RodionSoft',10,13,10,13,'Usage: enter string with length of words not more then 128 characters',10,13,10,13,10,13,10,13,'Enter string: $'
resultMessage db 10,13,"Shortest word: $"
;---------------------------------------------------------------------------------- 
; Program
.code
start :
MOV AX, @data
MOV DS, AX
;----------------------------------------------------------------------------------
; Print helloMessage
    lea dx, helloMessage                ; LEA - Load Affective Address 
    mov ah, 9                           ; print the string of the adress 
    int 21h                             ; present in DX register


;----------------------------------------------------------------------------------
; main loop
    repeat:
        ; -------------------------------------------------------------------------
        ; Read character but not echo
        mov ah, 08h                     
        int 21h
        mov ah, 0                       ; ah = 0

        cmp al, 13h                     ; if(al == enter)
        jz printResult                  ;   printResult()
        cmp al, 20h                     ; if(al == enter)   
        jz spaceinput                   ;   spaceInput()
        ; -------------------------------------------------------------------------
        cmp al, 41h                     ; if(al < 'A')
        jl badInput                     ;   badInput()
        cmp al, 7Ah                     ; if(al > 'z')
        jg badInput                     ;   badInput()
        cmp al, 5Bh                     ; if(al < '[')
        jg goodInput                    ;   goodInput()
        cmp al, 60h                     ; if(al > '`')
        jg goodInput                    ;   goodInput()
        jmp badInput                    ; else badInput()

        goodInput:
            inc currentString
            ; currentString += al


        badInput:
            jmp repeat

        spaceInput:
            mov currentLength, 0
            ;clean currentString

        endOfIteration:
            mov ah, 2                       ; echo
            int 21h
            jmp repeat                      ; loop
;---------------------------------------------------------------------------------- 
printResult:
    lea dx, secondMessage               
    mov ah, 9                           
    int 21h 
    lea dx, maxString               
    mov ah, 9                           
    int 21h 
;----------------------------------------------------------------------------------
exit:
    MOV AX, 4c00h
    INT 21h

StringComparison proc 
    push cx dx bx ax bp si di           ; save general-purpose registers    

    mov cx, maxLength                   ; cx = maxLength
    mov dx, currentLength               ; dx = currentLength
    cmp cx, dx                          ; if(currentLength > maxLength)
    jl currentBigger                    ;   currentBigger()
    jmp return                          ; else return

    currentBigger:
        ; maxString = currentString
    return:
    pop di si bp ax bx dx cx            ; restore general-purpose registers
    ret 
endp
end start

Solution

  • can't find way to add character to string and clean string.

    Well, in the first place it depends on your definition of what is string (this is common theme in assembly, deciding how you store your data, ie. which bits/bytes are used for what and what meaning you give them).

    Look for example at resultMessage. It's composed of consecutive bytes with ASCII encoded values, ending with value '$' used as terminator for the DOS service.

    In C/C++ the classic string literal is similar, but for terminator the value 0 is used.

    In (old 16b) Pascal the first byte contains length 0-255 of string, following "length" bytes contain the ASCII letters, there's no terminator at end.

    In Linux the system call to display string to console takes the pointer to the letters as in DOS/C definitions, but without any terminator, the length of string has to be provided aside as second argument, and it's up to programmer how he will get it.

    So, such simple thing as string, and you have already 4 different ways how to store it in memory.

    But in your case you don't need to work only with final string, but build it up and alter it, so probably the easiest way is to allocate some memory byte array: currentString db 128 dup('$')

    And to keep end() pointer in some register, let's say si.

    Then common tasks can be achieved like this:

    ; all callable subroutines bellow expect the register "si"
    ; to point beyond last character of currentString
    ; (except the clearString of course, which works always)
    
    appendLetterInAL:
        cmp     si,OFFSET currentString+127 ; 127 to have one byte for '$'
        jae     appendLetterInAL_bufferIsFull_Ignore
        mov     [si],al  ; store new letter after previous last
        inc     si       ; update "si" to point to new end()
    appendLetterInAL_bufferIsFull_Ignore:
        ret
    
    clearString:         ; works also as INIT at the start of code
        lea     si,[currentString]
        ret
    
    prepareStringForDOSOutput:
        mov     BYTE PTR [si],'$'  ; set terminator at end()
        lea     dx,[currentString] ; dx = pointer to string
        ret
    
    getLengthOfString:  ; sets cx to length of current string
        ; lea     cx,[si - currentString] ; probably not allowed in 16b?
        ; other variant
        mov     cx,si
        sub     cx,OFFSET currentString
        ret
    
    copyCurrentStringToDI:
      ; copies current string to buffer @di
      ; and also terminates it in DOS way with '$'
      ; upon return di contains original value
        push    bx
        lea     bx,[currentString]
        push    di
    copyCurrentStringToDI_loop:
        cmp     bx,si      ; all bytes copied
        jae     copyCurrentStringToDI_finish
        mov     al,[bx]
        inc     bx
        mov     [di],al
        inc     di
        jmp     copyCurrentStringToDI_loop
    copyCurrentStringToDI_finish:
        mov     BYTE PTR [di],'$'  ; set DOS terminator
        pop     di                 ; restore di to original value
        pop     bx                 ; restore also bx
        ret
    

    So basically two pointers (current end() in si, and the start of string being fixed at compile time as currentString) are enough to do many manipulations with it.

    I hope the algorithms and used data structure is easy to understand from the code and comments.