Search code examples
assemblyx86-16tasm

Writing to a file using TASM


At my university, I was given the following task:

Compare character by character the contents of two text files. Write to a new text file only those characters from the first file and their position number in the file that do not match the corresponding characters from the second. Using the TASM assembler.

Well, I wrote the program, ran it through the Turbo assembler, but nothing is written to the output.txt file. All files are in the same directory.
The contents of file1.txt are abobabiba1, the contents of file2.txt are abobapipa1.
In theory, something like this should be written to a file:

5 7

bb
.model large
.stack 1000h
.data
    infile1 db "file1.txt", 0
    infile2 db "file2.txt", 0
    outfile db "output.txt", 0
    buffer1 db 1000h dup(?)
    buffer2 db 1000h dup(?)
    outfile_char db 0
    position db 0
    end_of_file db 0
.code
main proc
    ; open the first input file
    mov ah, 3dh
    mov al, 0
    lea dx, infile1
    int 21h
    mov bx, ax ; bx contains the first file descriptor

    ; open second input file
    mov ah, 3dh
    mov al, 0
    lea dx, infile2
    int 21h
    mov cx, ax ; cx contains the second file descriptor

    ; create output file
    mov ah, 3ch
    mov al, 0
    lea dx, outfile
    int 21h
    mov dx, ax ; dx contains the output file descriptor

    ; read the contents of the first file into buffer1
    mov ah, 3fh
    mov bx, ax
    lea dx, buffer1
    mov cx, 1000h
    int 21h
    ; read the contents of the first file into buffer2
    mov ah, 3fh
    mov bx, cx
    lea dx, buffer2
    mov cx, 1000h
    int 21h

    ; compare the contents of the buffers character by character
    mov si, offset buffer1
    mov di, offset buffer2
    mov cx, 1000h
    mov position, 0
    
loop_start:
    mov al, [si]
    mov bl, [di]
    cmp al, bl
    je skip

    ; set the file pointer to the beginning of the file
    mov ah, 42h
    mov al, 0
    mov bx, dx
    int 21h

    ; write information to the output file
    mov ah, 02h
    mov dl, al
    mov ah, 40h
    mov bx, dx
    mov cx, 1
    mov dx, offset outfile_char
    int 21h
    mov ah, 02h
    mov dl, position
    add dl, '0'
    mov ah, 40h
    mov bx, dx
    mov cx, 1
    mov dx, offset outfile_char
    int 21h
    mov ah, 40h
    mov bx, dx
    mov cx, 1000h
    lea dx, buffer1
    int 21h
    mov ah, 40h
    mov bx, dx
    mov cx, 1000h
    lea dx, buffer2
    int 21h
skip:
    ; increment the position and move on to the next character
    inc si
    inc di
    inc position
    ; check for end of file
    cmp position, 10h
    jne loop_start

    ; close the output file
    mov ah, 3eh
    mov bx,dx
    int 21h

    ; close the second input file
    mov ah, 3eh
    mov bx, cx
    int 21h

    ; close the first input file
    mov ah, 3eh
    mov bx, ax
    int 21h

    ; we complete the program
    mov ah, 4ch
    int 21h

main endp
end main  

Solution

  • Are you sure that DS points to your .data section? Don't remember if .code takes care of that...


    The most important error in your program is that you are using several registers for multiple purposes each, and that you do this without taking precautions like preserving their pre-existing values on the stack.
    Another approach would be to store some data in memory-based variables. You definitely should choose this for your file handles, like in next code:

        ; open the first input file
        mov ah, 3dh
        mov al, 0
        lea dx, infile1
        int 21h
        jc  SomeError
        mov handle1, ax   ; the first file descriptor
    
        ; open second input file
        mov ah, 3dh
        mov al, 0
        lea dx, infile2
        int 21h
        jc  SomeError
        mov handle2, ax   ; the second file descriptor
    
        ; create output file
        mov ah, 3ch
        mov al, 0         <<<< THIS IS AN ERROR: NEEDS TO BE CX=0
        lea dx, outfile
        int 21h
        jc  SomeError
        mov handle3, ax   ; the output file descriptor
    
        ; read the contents of the first file into buffer1
        mov ah, 3fh
        mov bx, handle1
        mov cx, 16
        lea dx, buffer1
        int 21h
        jc  SomeError
    
        ; read the contents of the first file into buffer2
        mov ah, 3fh
        mov bx, handle2
        mov cx, 16
        lea dx, buffer2
        int 21h
        jc  SomeError
    
        ...
    
        ; close the output file
        mov ah, 3eh
        mov bx, handle3
        int 21h
    
        ; close the second input file
        mov ah, 3eh
        mov bx, handle2
        int 21h
    
        ; close the first input file
        mov ah, 3eh
        mov bx, handle1
        int 21h
    
    ; set the file pointer to the beginning of the file
    mov ah, 42h
    mov al, 0
    mov bx, dx
    int 21h
    

    In the middle part of the program where you compare buffers, you are also trying to reset the output filepointer. I say 'trying' because you don't setup the parameters correctly! But why would you need to reset anything? Right after a file is created, the filepointer is already at the start. Moreover resetting the filepointer with each find will keep overwriting the previous results!

    mov ah, 02h
    mov dl, al
    ...
    mov ah, 02h
    mov dl, position
    add dl, '0'
    

    In the part that 'writes information to the output file', you are mixing character output to the screen (but incomplete) with buffer output to a file. If the character output to the screen was there for testing purposes, then document this with suitable comments in the text, else remove these lines as they are very disturbing.

    This is how you would output the character. Notice that the character is at the address SI:

    ; write information to the output file
    mov ah, 40h
    mov bx, handle3
    mov cx, 1
    mov dx, si
    int 21h
    jc  SomeError
    

    And here you can learn how to write the position:

    mov al, position       ; [0,9] will work fine for your input files
    add al, '0'
    mov outfile_char, al
    mov ah, 40h
    mov bx, handle3
    mov cx, 1
    mov dx, offset outfile_char
    int 21h
    jc  SomeError
    

    Processing input files 'abobabiba1' and 'abobapipa1', I would expect a 4-byte output file 'b5b7'.