Search code examples
linuxassemblylinux-kernelgnu32-bit

Comparing Inputted Word to Array in x86 GNU GAS Assembly


This is actually my second question about this particular problem today, but the other one was answered pretty quickly.

In essence, I am trying to take in a string of letters (no numbers or symbols) and then compare each inputted letter to an array of .asciz values that represent the NATO Military Phonetic Alphabet (Alpha, Bravo, Charlie, etc.) and output the representative Military equivalent to the letter.

This is where I am stuck. I'm fairly new to Assembly and this is a homework assignment, so help is much needed and appreciated. My professor is not great at offering resources to learn this stuff and it's difficult to find good resources for exact problems online.

Any help would be much appreciated. Specifically on how to compare each letter input to the array. I've already successfully stored the input in a variable.

Below is a C# representation of what I am attempting to do.

class MilAlpha
{
    static void Main(string[] args)
    {
        string input;
        string[] miliAlpha = { "Alpha", "Beta", "Charlie", "Delta", "Echo", "Foxtrot", "Golf", 
                               "Hotel", "India", "Juliet", "Kilo", "Lima", "Mike", "November",
                               "Oscar", "Papa", "Quebec", "Romeo", "Sierra", "Tango", "Uniform",
                               "Victor", "Whiskey", "X-Ray", "Yankee", "Zulu" };

        Console.WriteLine("Enter a string of text: ");
        input = Console.ReadLine();

        for (int i = 0; i < input.Length; i++) {
            for (int j = 0; j < miliAlpha.Length; j++) {

                if (input[i] == ' ')
                    Console.WriteLine("\n")
                
                string temp = miliAlpha[j].ToLower();

                if (input[i] == temp[0])
                    Console.WriteLine("\n" + miliAlpha[j] + "\n");

            }
        }

        Console.ReadKey();
    }
}

EDIT:

So I believe this should do what I am trying to do, but it doesn't seem to work as intended. It compares the correct things in the debugger, but when it goes to print the respective portion of the array, it simply doesn't print anything.

.section .data

MAlpha:
.asciz  "Alpha    \n"
.equ    ElementLen, .-MAlpha
.asciz  "Bravo    \n"
.asciz  "Charlie  \n"
.asciz  "Delta    \n"
.asciz  "Echo     \n"
.asciz  "Foxtrot  \n"
.asciz  "Golf     \n"
.asciz  "Hotel    \n"
.asciz  "India    \n"
.asciz  "Juliet   \n"
.asciz  "Kilo     \n"
.asciz  "Lima     \n"
.asciz  "Mike     \n"
.asciz  "November \n"
.asciz  "Oscar    \n"
.asciz  "Papa     \n"
.asciz  "Quebec   \n"
.asciz  "Romeo    \n"
.asciz  "Sierra   \n"
.asciz  "Tango    \n"
.asciz  "Uniform  \n"
.asciz  "Victor   \n"
.asciz  "Whiskey  \n"
.asciz  "X-Ray    \n"
.asciz  "Yankee   \n"
.asciz  "Zulu     \n"
.asciz  "         \n"
.equ    MAlphaLen, .-MAlpha

Input:
.fill   80
.equ    InputLen, .-Input

InputMSG:
.ascii  "Please enter a word: "
.equ    InputMSGLen, .-InputMSG

BlankLine:
.ascii  "\n"
.equ    BlankLineLen, .-BlankLine

Converting:
.ascii  "\nConverting to NATO Alphabet...\n\n"
.equ    ConvertingLen, .-Converting

.section .bss
.section .text
.globl   _start

GetInput:
    movl    $4, %eax
    movl    $1, %ebx
    movl    $InputMSG, %ecx
    movl    $InputMSGLen, %edx
    int     $0x80

    movl    $3, %eax
    movl    $0, %ebx
    movl    $Input, %ecx
    movl    $InputLen, %edx
    int     $0x80
    ret

PrintInput:
    movl    $4, %eax
    movl    $1, %ebx
    movl    $BlankLine, %ecx
    movl    $BlankLineLen, %edx
    int     $0x80

    movl    $4, %eax
    movl    $1, %ebx
    movl    $Input, %ecx
    movl    $InputLen, %edx
    int     $0x80

    movl    $4, %eax
    movl    $1, %ebx
    movl    $Converting, %ecx
    movl    $ConvertingLen, %edx
    int     $0x80
    ret

Convert:
    # Get first letter of input string
    # Compare letter to first letter of each array entry
    # When match is found, print Array entry to screen
    # Repeat until end of input string

    movl    $Input, %eax
    movl    $MAlpha, %edi
    call    Loop
    ret

Loop:
    movb    (%eax), %al
    cmp     $0x0A, %al
    je      Finished

    call    CompareAlpha

    jmp     Loop

CompareAlpha:
    movb    (%edi), %bl
    cmpb    %bl, %al
    je      PrintWord
    addl    $ElementLen, %edi
    jmp     CompareAlpha

PrintWord:
    movl    $4, %eax
    movl    $1, %ebx
    movl    (%edi), %eax
    movl    $ElementLen, %edx
    int     $0x80

Finished:
    call    ExitProg

_start:
    call    GetInput
    call    PrintInput
    call    Convert
    call    ExitProg

PrintMAlpha:
    movl    $4, %eax
    movl    $1, %ebx
    movl    $MAlpha, %ecx
    movl    $MAlphaLen, %edx
    int     $0x80

ExitProg:
    movl    $1, %eax
    movl    $0, %ebx
    int     $0x80

Solution

  • Here's a few bugs to get you started:

    1. In Loop, you are keeping the pointer to the input string in %eax, but you load the character into %al which is the low byte of %eax, thus trashing its value. Pick another register for one of them.

    2. You never increment your pointer in Loop, so it will loop forever (if it doesn't crash first due to one of your other bugs).

    3. CompareAlpha doesn't reset %edi on successive calls. So if the first character is 'H', %edi will be left pointing to "Hotel" after the call. If the next character is E, CompareAlpha will search forward for it starting from "Hotel". Of course it won't find it, so it runs off the end of the array and crashes.

    4. PrintWord loads four bytes of the string into %eax (overwriting the system call number), whereas it should be loading the address of the string into %ecx. Replace movl (%edi), %eax by movl %edi, %ecx (note it is now a register-to-register move and not a load from memory).

    5. PrintWord clobbers registers %eax, %ebx, %ecx, %edx, some of which its caller is expecting to remain unchanged. Either push and pop those registers, or rewrite CompareAlpha to do so before calling it.

    6. PrintWord is missing a ret at the end, so it falls through into ExitProg.

    After fixing these, I was able to successfully convert the string "HELLO".

    These were all findable by single-stepping the code in the gdb debugger (si command) and watching the contents of registers (display $eax) and what they point to (display/s $edi, etc). I suggest practicing this.

    Note that a more efficient design, instead of linear search through the array of codewords, would be to simply index into it. Take your character and subtract the ASCII code of 'A' (0x41), multiply by $ElementLen, and add to $MAlpha. Now you have a pointer to the desired codeword without looping. If you use an auxiliary array of pointers as in your other post, this is even easier as each pointer has length 4, so you can use the SIB addressing mode and do movl MAlpha(,%eax,4), %edi; make sure the high 24 bits of %eax are zeroed. This also avoids the need for padding all the code words with spaces (though then you'll need to write your own strlen to compute the length, or have a separate array of lengths, or write out one byte at a time until you see the 0 at the end).

    Also, as a general tip, it would be wise to document each of your subroutines: what exactly does it do, in which registers does it expect its inputs and leave its outputs, and which registers does it clobber? You may want to try to have some commonality between these, perhaps even creating your own standard calling conventions.