Search code examples
assemblyx86histogram

Counting character frequencies in an array of characters - x86 Assembly


I'm trying to count the occurrences of characters in a string. My code is below:

data segment 'DSEG'
    text        db  "This is a sentence.",0     ; string
    textSize    dw  $ - text - 1                ; size of string, - 1 to account for null-termination character
    freqTable   dd  256 DUP(0)
ends 'DSEG'

code segment 'CSEG'
start:                          
mov ax, data        ; set segment registers
mov ds, ax
mov es, ax
;---------------------------------------

sub cx, cx
mov cx, textSize        ; number of times to loop
L1:
    mov ax, [OFFSET text + cx - 1]  ; loop from back using cx, put character in ax
    inc [OFFSET freqTable + 4*ax]   ; increment table's index at the ascii value of character
    LOOP L1

;---------------------------------------
mov ax, 4c00h       ; return to OS
int 21h

ends 'CSEG'
end start           ; set entry point

I made an array of DWORDS where each index would represent a character. I then loop through the string and tried to increment the array at the ascii value of each character.

However, I get a wrong parameters error when I try to increment in the loop. I'm not sure what is causing this error. I am guessing I can't just increment the way I'm trying to. How do I properly create the frequency table? Am I missing something small?


Solution

  • To help you understand how to count characters I created next little program with EMU8086 (compatible with your assembly) : the program ask the user for a filename, open the file, read all characters and count them, and close the file.

    Next image shows how it works : there is an array of frequencies ("freq_array") with 256 positions. Each position is the counter of the corresponding char, for example, the position 65 is the counter for 'A' (chr(65)).

    enter image description here

    Everytime one char is read from file, the char itself is used as offset to reach its counter. For example, if the char 48 ('0') is read from file, the number 48 is added to the array offset (offset + 48), and that position is incremented. When the file ends, all its chars have been counted.

    Now the code :

    .model small
    .stack 100h
    
    ;-----------------------------------------
    
    .data
    
    freq_array   dw 256 dup(0) ;ARRAY OF FREQUENCIES OF EACH ASCII CHARACTER. 
    
    msj          db 13,10,'Enter name of file: $'
    
    filename     db 99        ;MAX NUMBER OF CHARACTERS ALLOWED (98).
                 db ?         ;LENGTH (NUMBER OF CHARACTERS ENTERED BY USER).
                 db 99 dup(0) ;CHARACTERS ENTERED BY USER. END WITH CHR(13).
    
    filehandler  dw ?         ;FILE HANDLER.
    
    the_char     db ?         ;CHAR READ FROM FILE.
    
    ;-----------------------------------------
    
    .code
    start:
    
    ;INITIALIZE DATA SEGMENT.
      mov  ax, @data
      mov  ds, ax                 
    
      call get_source_file        ;GET FILE NAME.
      call count_chars            ;FILL FREQ_ARRAY WITH FREQUENCIES OF CHARS.
    
    ;WAIT FOR ANY KEY.    
      mov  ah, 7
      int  21h
    
    ;FINISH PROGRAM.
      mov  ax, 4c00h
      int  21h
    
    ;-----------------------------------------
    
    get_source_file proc
    ;DISPLAY MESSAGE.
      mov dx, offset msj
      mov ah, 9
      int 21h      
    
    ;CAPTURE FILENAME FROM KEYBOARD.                                    
      mov ah, 0Ah
      mov dx, offset filename
      int 21h                
    
    ;CAPTURED STRING ENDS WITH CHR(13), BUT FILES REQUIRE
    ;THE FILENAME TO END WITH CHR(0), SO LET'S CHANGE IT.
      mov si, offset filename + 1 ;STRING LENGTH.
      mov cl, [ si ]        ;MOVE LENGTH TO CL.
      mov ch, 0             ;CLEAR CH TO USE CX. 
      inc cx                ;ONE MORE BYTE TO REACH CHR(13).
      add si, cx            ;NOW SI POINTS TO CHR(13).
      mov al, 0
      mov [ si ], al        ;REPLACE CHR(13) BY 0.
    
      ret
    get_source_file endp
    
    ;-----------------------------------------
    ;READ ALL CHARACTERS FROM FILE INCREASING THE COUNTER OF
    ;EACH CHARACTER IN THE ARRAY OF FREQUENCIES. EACH CHARACTER
    ;IS USED AS THE OFFSET OF ITS OWN COUNTER, EXAMPLE: THE
    ;COUNTER FOR 'A' IS THE POSITION 65 OF FREQ_ARRAY.
    
    count_chars proc
    ;OPEN FILE.
      mov  ah, 3dh          ;SERVICE TO OPEN FILE.
      mov  al, 0            ;OPEN AS READ ONLY.
      mov  dx, offset filename + 2
      int  21h  
      mov  filehandler, ax ;NECESSARY FOR OPERATIONS ON FILE.
    
    ;COUNT CHARACTERS.
    reading:  
    ;READ ONE CHAR FROM FILE.
      mov  ah, 3fh          ;SERVICE TO READ FROM FILE.
      mov  bx, filehandler
      mov  cx, 1            ;HOW MANY BYTES TO READ.
      mov  dx, offset the_char ;WHERE TO STORE THE READ BYTES.  
      int  21h              
    
    ;CHECK END OF FILE.
      cmp  ax, 0
      je   end_reading      ;IF READ ZERO BYTES, FINISH.
    
    ;INCREASE COUNTER. THE CHAR ITSELF IS BEEN USED AS INDEX: THE
    ;COUNTER FOR CHAR 65 ('A') IS IN THE 65th POSITION OF THE ARRAY.
      mov  si, offset freq_array
      mov  al, the_char     ;USE CHAR AS OFFSET OF ITS OWN COUNTER.
      mov  ah, 0            ;CLEAR AH TO USE AX.
      shl  ax, 1            ;AX * 2, BECAUSE EVERY COUNTER IS 2 BYTES.
      add  si, ax           ;SI POINTS TO COUNTER POSITION.
      inc  [ word ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.
      jmp  reading          ;REPEAT PROCESS.
    
    end_reading:           
    ;CLOSE FILE.
      mov  ah, 3eh          ;SERVICE TO CLOSE FILE.
      mov  bx, filehandler
      int  21h
    
      ret
    count_chars endp
    
    ;-----------------------------------------
    
    end start
    

    Hope this help you.

    This is 16 bits, because the array is DW. To make it compatible with 32 bits (array DD), change next lines:

    freq_array   dd 256 dup(0)
    
    shl  ax, 2             ;AX * 4, BECAUSE EVERY COUNTER IS 4 BYTES.
    
    inc  [ dword ptr si ]  ;INCREMENT COUNTER FOR CURRENT CHAR.