Search code examples
assemblyx86nasm

Reading a number from STDIN, converting it to actual number and printing it back to STDOUT


I am learning assembly. I am working on a simple program which should do the following:

  • read a 2 digits number from STDIN
  • decrement the read number
  • send the result of the decrement to STDOUT

Here the program I have so far:

section .data
    max_number_size: equ 2 ; max length of read input

section .bss
    input_buffer: resb max_number_size ; used to store input number

section .text
    global _start
    _start:
        mov eax, 3                  ; sys call read
        mov ebx, 0                  ; from FD 0
        mov ecx, input_buffer       ; indicate the adress of the memory buffer where the bytes will be stored
        mov edx, max_number_size    ; read this quantity of character
        int 80H                     ; store max_number_size to input_buffer from STDIN

    atoi:
        mov eax, 0                  ; Set initial total to 0
        mov ebx, 0                  ; keep track of nbr of char processed
        
        atoi_convert:
            mov esi, [ecx]              ; Get the current character
            test ebx, max_number_size   ; break the loop
            je _stdout


            cmp esi, 48                 ; Anything less than char 0 is invalid (check ASCII table)
            jl _exit_1

            cmp esi, 57                 ; Anything greater than char 9 is invalid (check ASCII table)
            jg _exit_1

            sub esi, 48                 ; Convert from ASCII to decimal (0 starts at 48d in ASCII)
            imul eax, 10                ; Multiply total by 10d
            add eax, esi                ; Add current digit to total

            inc ecx                     ; Get the address of the next character
            inc ebx                     ; keep track of nbr of char processed
            jmp atoi_convert

    _stdout:
        mov ecx, eax
        mov eax, 4
        mov ebx, 1
        mov edx, 32
        int 80h

    _exit_0:
        mov eax, 1
        mov ebx, 0
        int 80H

    _exit_1:
        mov eax, 1
        mov ebx, 1
        int 80H

Notice that, in the code above, there is no decrement yet.

What I am habing trouble to understand is what is actually sent to STDOUT in the _stdout label here.

What I understand is that, using the atoi, the ASCII chars read from stdin are transformed into an actual decimal value (if input is 12, then the value in eax after atoi would be 0000 1100 (binary)).

So, when reaching _stdout, we have 12d (0000 1100b) in eax. In order to send it to STDOUT, I move the value into ecx, configure eax, ebx, edx for the syscall and then, boom, syscall.

However, there is no output at all.

See, for example:

/app # make
nasm -f elf -g -F stabs main.asm
ld -o main.exe main.o -m elf_i386
/app # ./main.exe > stdout
49
/app #
/app # ls -l | grep stdout
-rwxr-xr-x    1 root     root             0 Feb  7 12:05 stdout

The stdout file is empty. Not even a single byte.

What I am doing wrong in this program and what, consequently, am I not understanding correctly ?


Solution

  • UPDATE: As @Peter Cordes said, the first code lacks the function of outputting numbers in ASCII. The current code added an ASCII number printing function based on reference link given by @Peter Cordes. Thanks!


    How about using debugger like GDB to step execution and see how the registers change?

    If you breakpoint before the _stdout's int 0x80 using GDB, you would notice that the ecx is NULL.

    (gdb) catch syscall 4
    Catchpoint 1 (syscall 'write' [4])
    (gdb) r
    49 <- user's input
    
    Catchpoint 1 (call to syscall write), 0x08049053 in _exit_0 ()
    (gdb) p/x $ecx
    $1 = 0x0
    

    Let's find out why. The caller of _stdout is atoi_convert. Let's breakpoint atoi_convert and run step by step (using si command). Don't you notice that it always jump _stdout in je _stdout?
    test ebx, max_number_size is actually just doing an and instruction. It's usually for detecting zero. In this case, cmp ebx, max_number_size is fine.

    In atoi_convert, mov esi, [ecx] copies a 4-bytes contents of ecx to esi.

    (gdb) b atoi_convert
    Breakpoint 1 at 0x8049020
    (gdb) r
    49 <- user's input
    
    Breakpoint 1, 0x08049020 in atoi_convert ()
    (gdb) ni <- to run mov esi, [ecx]
    0x08049022 in atoi_convert ()
    (gdb) p/x $esi
    $1 = 0x3934
    (gdb) x/wx $ecx
    0x804a000 <input_buffer>:       0x00003934
    

    As you see, $esi is 0x3934 when I input 49. But you assume esi is 1-byte (from cmp esi, 48 etc). As @Jester said, you should use movzx esi, byte [ecx] to ensure the content is a 1-byte and you should use read's return value instead of max_number_size.

    Moreover, if the user enters only one character on the terminal, a newline character will be also added. I think it would be better to call _stdout instead of _exit_1 if there are unrecognizable characters.

    Let's breakpoint before the int 0x80 again.

    (gdb) catch syscall write
    Catchpoint 1 (syscall 'write' [4])
    (gdb) r
    49 <- user's input
    
    Catchpoint 1 (call to syscall write), 0x08049053 in _exit_0 ()
    (gdb) p $ecx
    $1 = 49
    

    The second argument should be a pointer, but it's a value. I need to convert the value to a pointer to an ASCII character. Have a look at this: How do I print an integer in Assembly Level Programming without printf from the c library?

    Anyway, the following code will work. I've also added fixes I didn't mention, such as error checking for read. Also, I added an ASCII number printing function based on reference link.

    section .data
        max_number_size: equ 2
    
    section .bss
        input_buffer: resb max_number_size
    
    section .text
        global _start
        _start:
            mov eax, 3
            mov ebx, 0
            mov ecx, input_buffer
            mov edx, max_number_size
            int 80H
            cmp eax, -1 ; error check
            jz _exit_1
            mov edi, eax
    
        atoi:
            mov eax, 0
            mov ebx, 0
    
            ; when input is completely wrong (like `AA`), don't show output.
            movzx esi, byte [ecx]
            cmp ebx, edi
            je _exit_1
            cmp esi, 48
            jl _exit_1
            cmp esi, 57
            jg _exit_1
    
            atoi_convert:
                movzx esi, byte [ecx]
                cmp ebx, edi
                je print_uint32
    
                cmp esi, 48
                jl print_uint32
    
                cmp esi, 57
                jg print_uint32
    
                sub esi, 48
                imul eax, 10
                add eax, esi
    
                inc ecx
                inc ebx
                jmp atoi_convert
    
        ; This code is referenced from <https://stackoverflow.com/a/46301894>.
        print_uint32:
            xor ebx, ebx
            mov ecx, 0xa
            push ecx
            mov esi, esp
            add esp, 4
        .toascii_digit:
            inc ebx ; Number of digits to output
            xor edx, edx
            div ecx
            add edx, '0'
            dec esi
            mov [esi], dl
    
            test eax, eax
            jnz .toascii_digit
    
            mov edx, ebx ; ebx will be overwritten. Move it before that.
            mov eax, 4
            mov ebx, 1
            mov ecx, esi
            int 80h
    
        _exit_0:
            mov eax, 1
            mov ebx, 0
            int 80H
    
        _exit_1:
            mov eax, 1
            mov ebx, 1
            int 80H