Search code examples
assemblyx86yasm

How to work with string from file and sort that in YASM 8086


I got stuck on a task.

I have to write a program, which reads strings from file, sorts the last 10 strings by sixth column and then outputs sorted strings in a new file.

The first two columns contain string with [1; 20] characters, then 3-5 columns contain integers in the range [-100; 100] and 6th column contain real numbers in range [-9.99; 9.99]. Each section is separated with ';' symbol.

Input file:

48CBC0h ucbe5u F bc6; 6  laY xdU;-62;-29;11;-6.34
AJKCvwHUvmL CjWRl;WQc1 E2wzyTU;3;-20;24;8.26
9jicRCI8S b ; p9m R7iHqOj  9ig h;-93;19;-92;6.18
Xv ;IufLkIUp;-23;-94;76;2.63
A o8P8T 26;Zy J IVg6;-80;-58;-42;-5.96
hlwCw   Z8ChU;KX0 w C3 N60 KZV;-94;61;-71;-3.31
Rtqn58 2 l BTWdVgl H;rd U ;9;70;10;8.66
M x91pVZ6UQ Nb;p;-5;-27;74;4.04
rq8s3Gc Bj x 2XG ;8E cTH  a ZF VLp2E;-4;21;-89;7.9
 ca yfUpVXuC7  ;sXvttLZs4  nqcv 5fTg;-34;-40;14;-5.19
 j;yL7  G dG   C vR  B;70;-89;-87;-9.52
 2n 5 O F MMc16; Awcsl2sI;-97;-82;34;1.01
EeLVLB;qR q4i  D5q ;70;49;-11;-5.43
 nsWW j9;AGBGVXO N;59;97;74;6.22
ou 7vCvBUc;yVW30Jwcv Qtj;18;-10;77;9.38

I tried to save the pointers of these lines, as well as the sixth column, and I didn't started to think of a sorting function.

Here is what I wrote so far:

%include 'yasmmac.inc'

org 100h

section .text

   start:

        mov si, 0x80
    mov di, readFile
    dec di
    mov cl, byte [si]
    cmp cl, 01
    jg .name
    .name:
        inc si
        mov al, byte [si]
        mov byte [di], al
        inc di
    loop .name

   macPutString 'Input name for writing file', crlf, '$'
   mov al, 128
   mov dx, writeFile
   call procGetStr
   macNewLine

   mov dx, readFile
   call procFOpenForReading
   jnc .reading
   macPutString 'Error occured', crlf, '$'
   jmp .end

   
   .reading:    
    mov [readingD], bx  
    mov di, pointers
    xor dx, dx
    dec dx
        .whileNotEndOfFile:
        push bx
        mov bx, numberOfLines
        inc word [bx]
        pop bx
        mov [di], dx
        inc di
        inc di
        .tillLineEnds:
            inc dx
            call procFGetChar
            cmp ax, 0x0
            je .readingClose
            cmp cl, 0x0a
            jne .tillLineEnds
            jmp .whileNotEndOfFile

   .readingClose:
   call procFClose
   mov di, pointers
   inc word[di]

   mov dx, writeFile
   call procFCreateOrTruncate
   jnc .filter
   macPutString 'Error occured', crlf, '$'
   jmp .end

   .filter:
    mov [writingD], bx
    push bx
    mov bx, numberOfLines
    dec word [bx]
    cmp word [bx], 0x000a
    jl .errorInfo
    pop bx
    jge .continue


    .errorInfo:
    pop bx
    mov bx, [writingD]
    mov cx, 0x0050
    mov dx, error
    call procFWrite
    mov bx, [readingD]
    call procFClose
    mov bx, [writingD]
    call procFClose
    jmp .end


    .continue:
    mov dx, readingFile
        call procFOpenForReading
    jnc .secondReading
    macPutString 'Error occured', crlf, '$'
    jmp .pabaiga

    .secondReading:
    mov [readingD], bx

    mov dx, numberOfLines
    sub dx, 0x000a
    xor cx, cx
    mov cx, dx
    xor ax, ax
    cmp cx, 0x0
    jg .skipLines
    mov di, sixth
    jmp .sixthColumn

    .skipLines:
       push cx
        .skip:
        call procFGetChar
        cmp cl, 0x0a
        jne .skip
       pop cx
    loop .skipLines
    
    .sixthColumn:
       xor si, si
        .skipSign:
        call procFGetChar
        cmp cl, ';'
        jne .skipSign
        inc si
        cmp si, 0x0005
        jl .skipSign

        .save:
        call procFGetChar
        cmp ax, 0x0
        je .ifNotEnd
        cmp cl, 0x0a
        je .ifNotEnd
        cmp cl, '.'
        je .save
        mov [di], cl
        inc di
        jmp .save
    .ifNotEnd:
    cmp ax, 0x0
    je .endReading
    jmp .sixthColumn

.endReading:
call procFClose

.end
   exit

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;     
%include 'yasmlib.asm'
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .data
   
readingFile:
   times 255 db 00 
writingFile:
   times 255 db 00
readingD:
   dw 0000
writingD:
   dw 0000
error:
   db 'Not enough data', 0x0d, 0x0a, '$'
numberOfLines:
   times 128 db 00
pointers:
   times 3000 db 00
sixth:
   times 20000 db 00

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
section .bss

Thank you in advance.


Solution

  • Review of your work (the most important part)

    mov si, 0x80
    mov di, readFile
    dec di
    mov cl, byte [si]
    cmp cl, 01
    jg .name
    .name:
       inc si
       mov al, byte [si]
       mov byte [di], al
       inc di
    loop .name
    

    I see 3 problems in this code that fetches a filespec from the commandline:

    • Because of the decrement in mov di, readFile dec di, the loop that follows will be writing in a byte that lies outside of the readFile buffer!

    • You always run the .name loop

      jg .name
      .name:
      

      This construct is non-sense in assembly programming. If the condition is greater then you jump to .name, but if the condition is not greater then you fall-through in .name. No matter, you will always run the code at .name.

    • The loop .name instruction depends on the whole CX register, yet you only initialized its lowest 8 bits (mov cl, byte [si]).


    In .reading you have next code to increment the numberOfLines variable:

    push bx
    mov bx, numberOfLines
    inc word [bx]
    pop bx
    

    Firstly, you can do this in 1 instruction inc word [numberOfLines], and secondly, why do you increment the line count before even knowing that a following line will be found? Later in the program you decrement the count unconditionally. Something similar happens to the first pointer that you store: you begin with assigning -1 to it, and then at some later point you unconditionally set it to 0.


    At .continue, you re-open the sourcefile after having closed it, but you are using a different filespec now!

    .continue:
       mov dx, readingFile
       call procFOpenForReading
    

    Decide where it is. At readFile or at readingFile.
    The alternative solution that I propose below, will not require reading the source file twice.


    At .secondReading, you forgot the square brackets for dereferencing.

    mov dx, numberOfLines
    sub dx, 0x000a
    xor cx, cx
    mov cx, dx
    xor ax, ax
    cmp cx, 0x0
    jg .skipLines
    mov di, sixth
    jmp .sixthCollumn
    

    Because of this, .skipLines that follows will run for way to many iterations! And because of where you burried that mov di, sixth instruction, the code at .sixthCollumn will write to wherever the DI register happens to point at!


    numberOfLines:
       times 128 db 00
    pointers:
       times 3000 db 00
    sixth:
       times 20000 db 00
    

    Why do you reserve this enormous amount of bytes just for processing 10 lines? More is definitely not better!

    numberOfLines: dw 0
    pointers:      times 10 dw 0
    sixth:         times 10 dw 0
    

    My solution (important if you study it carefully)

    Hint: 20 + 1 + 20 + 1 + 4 + 1 + 4 + 1 + 4 + 1 + 5 + 2 = 64 (longest line)

      xor  si, si            ; NumberOfLines
      mov  bx, [ReadingD]    ; Handle
    .reuseBuffers:
      mov  bp, Buffers       ; 10 slots of 64 bytes each
    .nextLine:
      mov  di, bp            ; Begin of current slot
      lea  bp, [di + 64]     ; End of current slot
    .nextByte:
      call procFGetChar      ; -> AX CL CF
      jc   .err              ; A DOS error occured
      test ax, ax
      jz   .eof              ; Reached the end of the file
      cmp  di, bp
      jnb  .err              ; Line is longer than expected
      mov  [di], cl
      inc  di
      cmp  cl, 10            ; Check for end of the line
      jne  .nextByte         ; No
      inc  si                ; Yes, NumberOfLines++
      cmp  bp, Buffers + 640 ; Check for the end of the buffer chain
      jb   .nextLine         ; No
      jmp  .reuseBuffers     ; Yes, go recycle
    .err:
    
      ...
    
    .eof:
      cmp  si, 10            ; NumberOfLines
      jb   .err
    
    
    
    Buffers: times 10*64 db 0
    

    The last 10 lines are now held in the 10 64-byte buffers. The order is not important since the task asks you to sort them anyway (using the 6th column).

    I'm looking forward to seeing how you will be sorting the records. Personally I would convert the fixed point (*) numbers from the 6th column into integers in the range [-999,999] (ignoring the decimal point) and store them in a word-sized array ready for sorting. Instead of a secondary array containing pointers, I would use an array with indexes from 0 to 9.
    The conditional swaps in the first array must be matched by unconditional swaps in the second array. There's a nice bubble sort to be found in the answer at segmentation fault in x86 trying to do bubble sort, but feel free to use any sorting algorithm of your liking...

    (*) Beware of the line that reads
    rq8s3Gc Bj x 2XG ;8E cTH a ZF VLp2E;-4;21;-89;7.9
    From previous similar Q/A's, I was under the impression that the numbers in the 6th column would all have a 2-digit fraction. This line does not use "7.90". Either it's a typo you made or you will have to take this into account in your program.