Search code examples
assemblymipsspimqtspimsmips

Finding Kth distinct element in an array MIPS


I am trying to write MIPS equivalent of the C code below.

int arrayData[5] = { 1,2,1,3,4 };
int K = 3;
int KCtr = 0;
int result;
bool isUnique;
for (int o = 1; o < 5; o++)
{
    isUnique = true;
    for (int i = 0; i < o; i++)
    {
        if (arrayData[o] == arrayData[i])
        {
            isUnique = false;
            break; // goto outer loop 
        }
    }
    if (isUnique)
    {
        KCtr++;
    }
    if (KCtr==K)
    {
        result = arrayData[o];
    }
}

I want to store the result in $s1 with the code below.

.data
Array: .word 1, 2, 4, 8, 16, 32, 64, 128
sze  : .word 8
K    : .word 3
.text
.globl main

main: lw $s0,K($0) # K 
      addi $t0,$zero,0 # index for outer loop
      addi $t1,$zero,0 # index for inner loop
      lw   $t2,sze($0)
      lw   $s1,Array($0)
      addi $t6,$zero,0
      #addi $t7,$zero,1
      #t3 for array[o] 
      #t4 for array[i]
      #t5 for unique flag
      #t6 for counter to reach K
      #t7 for storing 1
outerloop:
    beq $t0,$t2,finish
    lw $t3,Array($t0)
    addi $t5,$zero,0 
innerloop:
    lw $t4,Array($t1)    
    bne $t3,$t4,else
    addi $t5,$zero,1
else:

checkifunique:
    beq $t5,$t7,isnotuniquebypass
    addi $t6,$zero,1
isnotuniquebypass:
    addi $t1,$t1,4
    blt $t1,$t0,innerloop

    bne $t6,$s0,notreachedKbypass
    lw $s1,Array($t0)

notreachedKbypass:

    addi $t0,$t0,4
    b outerloop  


finish:
      li $v0,10
      syscall

While I am expecting to see 8 in $s1 register, I am getting 1. What is wrong my assembly code ?


Solution

  • A few things were wrong.

    Your C code was incomplete. By examining your asm code, I was able to add the missing stuff to the C code.

    You didn't set up some registers correctly.

    You were also comparing array offsets (your iteration variables that were incremented by 4) against an array index/count, so the compares wouldn't work.

    You were setting the register for KCtr to 1, instead of doing the asm equivalent of KCtr++

    I created three examples: the fixed C code, An annotated version of your asm showing some/most of the bugs. And, a cleaned up, refactored, and working version.


    Here's the C code:

    #include <stdio.h>
    
    #if 0
    int Array[] = { 1, 2, 1, 3, 4 };
    #else
    int Array[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
    #endif
    
    int
    main(void)
    {
        int result;
        int K = 3;
        int KCtr = 0;
        int count = sizeof(Array) / sizeof(Array[0]);
        int uniqflg;
    
        result = -1;
    
        for (int o = 1; o < count; o++) {
            uniqflg = 1;
            for (int i = 0; i < o; i++) {
                if (Array[o] == Array[i]) {
                    uniqflg = 0;
                    break;
                }
            }
            if (! uniqflg)
                continue;
    
            KCtr++;
            if (KCtr == K) {
                result = Array[o];
            }
        }
    
        printf("result=%d\n",result);
    
        return result;
    }
    

    Here's the annotated version:

        .data
    Array:      .word       1,2,4,8,16,32,64,128
        sze     :
        K       :
    
        .text
        .globl  main
    
    # s1 for result
    # t0 for o
    # t1 for i
    # t2 for array count
    # t3 for array[o]
    # t4 for array[i]
    # t5 for unique flag
    # t6 for counter to reach K (i.e. KCtr?)
    # t7 for storing 1
    main:
        lw      $s0,K($0)               # K
        addi    $t0,$zero,0             # index for outer loop
    
    # NOTE/BUG: this is misplaced it needs to be moved to just above innerloop
        addi    $t1,$zero,0             # index for inner loop
    
        lw      $t2,sze($0)             # get array count
        lw      $s1,Array($0)           # result = Array[0]
        addi    $t6,$zero,0             # KCtr = 0
    # NOTE/BUG: this was commented out:
        addi    $t7,$zero,1
    
    outerloop:
        # NOTE/BUG: based on the increment by 4 to $t0 below, this is comparing an
        # offset against a count
        beq     $t0,$t2,finish          # o < count? if no, fly
        lw      $t3,Array($t0)          # get Array[o]
        addi    $t5,$zero,0             # uniqflg = 0
    
    innerloop:
        lw      $t4,Array($t1)          # get Array[i]
        bne     $t3,$t4,inner_neq       # Array[o] == Array[i]? if no, fly
    
    # NOTE/BUG: this is just setting one (i.e. KCtr = 1 instead of KCtr++)
        addi    $t5,$zero,1             # uniqflg = 1
    
    inner_neq:
    
    checkifunique:
        beq     $t5,$t7,notuniq         # ???
        addi    $t6,$zero,1
    
    notuniq:
        addi    $t1,$t1,4               # advance array offset
    
        # NOTE/BUG: this compares an array offset against an index
        blt     $t1,$t0,innerloop       # at end? if no, loop
    
        bne     $t6,$s0,inner_next      # KCtr == K? if no, interate
        lw      $s1,Array($t0)          # get better result
    
    inner_next:
        addi    $t0,$t0,4               # advance o [as offset]
        b       outerloop
    
    finish:
        li      $v0,10
        syscall
    

    Here's the refactored version. It's somewhat different than yours because in order to get it to work, I simplified it a bit. It also uses pointers instead of indexes or offsets for array access. Also note that once KCtr == K, there is no need to continue the loop.

        .data
    Array:      .word       1,2,4,8,16,32,64,128
    ArrEnd:
    K:          .word       3
    
        .text
        .globl  main
    
    # s0 for K
    # s1 for result
    # t0 for o (as pointer)
    # t1 for i (as pointer)
    # t2 for array count
    # t3 for array[o]
    # t4 for array[i]
    # t6 for counter to reach K (i.e. KCtr?)
    main:
        lw      $s0,K                   # K
        li      $t6,0                   # KCtr = 0
    
        la      $t2,ArrEnd              # point to end of array
        la      $t0,Array               # o = &Array[0]
        lw      $s1,0($t0)              # result = Array[0]
    
    outerloop:
        addiu   $t0,$t0,4               # advance o [as pointer]
        bgeu    $t0,$t2,finish          # o < ArrEnd? if no, fly
        lw      $t3,0($t0)              # get Array[o]
    
        la      $t1,Array               # i = &Array[0]
    
    innerloop:
        lw      $t4,0($t1)              # get Array[i]
        beq     $t3,$t4,outerloop       # Array[o] == Array[i]? if yes, not unique
    
        addiu   $t1,$t1,4               # advance array pointer for i
        bltu    $t1,$t0,innerloop       # at end? if no, loop
    
        # current array value is unique
        addi    $t6,$t6,1               # KCtr++
        bne     $t6,$s0,outerloop       # KCtr == K? if no, wait for next unique
        lw      $s1,0($t0)              # get better result -- no need to loop more
    
    finish:
        li      $v0,1
        move    $a0,$s1
        syscall
    
        li      $v0,10
        syscall
    

    UPDATE:

    Your code seems to be working in spim. But I didn't understand "ArrayEnd". There is no value set for it,but it works. How ?

    ArrEnd is a "trick" of sorts. It is the address of the last element of Array + 1. That is, in C, if we have int Array[5], then ArrEnd is &Array[5].

    Just like in C, we can choose how to iterate over arrays. We can use an index and do: Array[i]. Or, we can use int * pointers. In asm, we can use offsets from the start of the array (i.e. an index << 2).

    Let's recode the C program to only use pointers [and not indexes] to iterate across the array. This isn't used as often in C code, but it comes in handy for asm. The following is actually a more accurate C code representation of what my second asm example did:

    #include <stdio.h>
    
    #if 0
    int Array[] = { 1, 2, 1, 3, 4 };
    #else
    int Array[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
    #endif
    
    int
    main(void)
    {
        int result;
        int K = 3;
        int KCtr = 0;
        int *ArrEnd = &Array[sizeof(Array) / sizeof(Array[0])];
        int uniqflg;
    
        result = -1;
    
        for (int *o = &Array[1]; o < ArrEnd; o++) {
            uniqflg = 1;
            for (int *i = &Array[0]; i < o; i++) {
                if (*o == *i) {
                    uniqflg = 0;
                    break;
                }
            }
            if (! uniqflg)
                continue;
    
            KCtr++;
            if (KCtr == K) {
                result = *o;
                break;
            }
        }
    
        printf("result=%d\n",result);
    
        return result;
    }
    

    Here are some idiomatic uses for ArrEnd:

        la      $s0,Array               # get array address
        la      $s1,ArrEnd              # address of array end [one past last]
        subu    $s2,$s1,$s0             # get byte length of array
        srl     $s3,$s2,2               # get array count
    

    Now that we have these values, we can iterate through the array by any of the above three methods. Using the offset version or the pointer version can usually be more efficient.

    To see how easy ArrEnd makes things, comment out the larger array and add the shorter array from the C code. The ArrEnd trick will adjust things automatically without needing to manually count the number of elements in Array


    UPDATE #2:

    I may be wrong about this, but upon further reflection, to meet the requirements of the question title, the C code [and hence the asm] may need to be changed.

    I think the inner loop has to scan the entire array [skipping the element where i == o] when looking for duplicates. Otherwise, it may hit a false positive.

    Also, it appears that the original never thinks the index 0 element is unique, even if it is. This is because the o loop started with index 1.

    I've created two algorithms. The original as above. And one that does the full scan. I've also added some more test cases to illustrate the issue I'm thinking of.

    Here's the test program:

    #include <stdio.h>
    
    int Array0[] = { 1, 2, 1, 3, 4 };
    int Array1[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
    int Array2[] = { 1, 2, 4, 4, 8, 16, 32, 64, 128 };
    int Array3[] = { 1, 2, 4, 8, 16, 32, 64, 128, 2 };
    int Array4[] = { 1, 2, 4, 8, 16, 32, 64, 128, 2, 4 };
    int Array5[] = { 1, 2, 3, 4, 5, 6 };
    int Array6[] = { 1, 2, 3, 4, 5, 6, 1, 3 };
    
    int K = 3;
    int sepflg;
    int tstno;
    
    int
    orig(int *Array,int *ArrEnd)
    {
        int result;
        long residx;
        int KCtr = 0;
        int uniqflg;
    
        result = -1;
        residx = -1;
    
        for (int *o = &Array[1]; o < ArrEnd; o++) {
            uniqflg = 1;
            for (int *i = &Array[0]; i < o; i++) {
                if (*o == *i) {
                    uniqflg = 0;
                    break;
                }
            }
            if (! uniqflg)
                continue;
    
            printf("  orig: MATCH value=%d index=%ld\n",*o,o - Array);
    
            KCtr++;
            if (KCtr == K) {
                result = *o;
                residx = o - Array;
                break;
            }
        }
    
        printf("  orig: FINAL result=%d residx=%ld\n",result,residx);
    
        return result;
    }
    
    int
    full(int *Array,int *ArrEnd)
    {
        int result;
        long residx;
        int KCtr = 0;
        int uniqflg;
    
        result = -1;
        residx = -1;
    
        for (int *o = &Array[0]; o < ArrEnd; o++) {
            uniqflg = 1;
            for (int *i = &Array[0]; i < ArrEnd; i++) {
                if (o == i)
                    continue;
                if (*o == *i) {
                    uniqflg = 0;
                    break;
                }
            }
            if (! uniqflg)
                continue;
    
            printf("  full: MATCH value=%d index=%ld\n",*o,o - Array);
    
            KCtr++;
            if (KCtr == K) {
                result = *o;
                residx = o - Array;
                break;
            }
        }
    
        printf("  full: FINAL result=%d residx=%ld\n",result,residx);
    
        return result;
    }
    
    void
    test(int *Array,long siz)
    {
        int *ArrEnd = &Array[siz / sizeof(int)];
        int oval;
        int fval;
    
        if (sepflg)
            printf("\n");
        sepflg = 1;
    
        printf("Array%d:",tstno);
        for (int *ptr = Array;  ptr < ArrEnd;  ++ptr)
            printf(" %4d",*ptr);
        printf("\n");
    
        oval = orig(Array,ArrEnd);
        printf("\n");
        fval = full(Array,ArrEnd);
    
        printf("\n");
        printf("  test: %s orig=%d full=%d\n",
            (oval == fval) ? "PASS" : "FAIL",oval,fval);
    
        ++tstno;
    }
    
    int
    main(void)
    {
    
        test(Array0,sizeof(Array0));
        test(Array1,sizeof(Array1));
        test(Array2,sizeof(Array2));
        test(Array3,sizeof(Array3));
        test(Array4,sizeof(Array4));
        test(Array5,sizeof(Array5));
        test(Array6,sizeof(Array6));
    
        return 0;
    }
    

    Here is the program output:

    Array0:    1    2    1    3    4
      orig: MATCH value=2 index=1
      orig: MATCH value=3 index=3
      orig: MATCH value=4 index=4
      orig: FINAL result=4 residx=4
    
      full: MATCH value=2 index=1
      full: MATCH value=3 index=3
      full: MATCH value=4 index=4
      full: FINAL result=4 residx=4
    
      test: PASS orig=4 full=4
    
    Array1:    1    2    4    8   16   32   64  128
      orig: MATCH value=2 index=1
      orig: MATCH value=4 index=2
      orig: MATCH value=8 index=3
      orig: FINAL result=8 residx=3
    
      full: MATCH value=1 index=0
      full: MATCH value=2 index=1
      full: MATCH value=4 index=2
      full: FINAL result=4 residx=2
    
      test: FAIL orig=8 full=4
    
    Array2:    1    2    4    4    8   16   32   64  128
      orig: MATCH value=2 index=1
      orig: MATCH value=4 index=2
      orig: MATCH value=8 index=4
      orig: FINAL result=8 residx=4
    
      full: MATCH value=1 index=0
      full: MATCH value=2 index=1
      full: MATCH value=8 index=4
      full: FINAL result=8 residx=4
    
      test: PASS orig=8 full=8
    
    Array3:    1    2    4    8   16   32   64  128    2
      orig: MATCH value=2 index=1
      orig: MATCH value=4 index=2
      orig: MATCH value=8 index=3
      orig: FINAL result=8 residx=3
    
      full: MATCH value=1 index=0
      full: MATCH value=4 index=2
      full: MATCH value=8 index=3
      full: FINAL result=8 residx=3
    
      test: PASS orig=8 full=8
    
    Array4:    1    2    4    8   16   32   64  128    2    4
      orig: MATCH value=2 index=1
      orig: MATCH value=4 index=2
      orig: MATCH value=8 index=3
      orig: FINAL result=8 residx=3
    
      full: MATCH value=1 index=0
      full: MATCH value=8 index=3
      full: MATCH value=16 index=4
      full: FINAL result=16 residx=4
    
      test: FAIL orig=8 full=16
    
    Array5:    1    2    3    4    5    6
      orig: MATCH value=2 index=1
      orig: MATCH value=3 index=2
      orig: MATCH value=4 index=3
      orig: FINAL result=4 residx=3
    
      full: MATCH value=1 index=0
      full: MATCH value=2 index=1
      full: MATCH value=3 index=2
      full: FINAL result=3 residx=2
    
      test: FAIL orig=4 full=3
    
    Array6:    1    2    3    4    5    6    1    3
      orig: MATCH value=2 index=1
      orig: MATCH value=3 index=2
      orig: MATCH value=4 index=3
      orig: FINAL result=4 residx=3
    
      full: MATCH value=2 index=1
      full: MATCH value=4 index=3
      full: MATCH value=5 index=4
      full: FINAL result=5 residx=4
    
      test: FAIL orig=4 full=5
    

    The simplest tests that illustrate both issues are Array5 and Array6

    Here's the corresponding asm code:

        .data
    Array:      .word       1, 2, 3, 4, 5, 6, 1, 3
    ArrEnd:
    K:          .word       3
    
        .text
        .globl  main
    
    # s0 for K
    # s1 for result
    # t0 for o (as pointer)
    # t1 for i (as pointer)
    # t2 for array count
    # t3 for array[o]
    # t4 for array[i]
    # t6 for counter to reach K (i.e. KCtr?)
    main:
        lw      $s0,K                   # K
        li      $t6,0                   # KCtr = 0
    
        la      $t2,ArrEnd              # point to end of array
        la      $t0,Array               # o = &Array[0]
        li      $s1,-1                  # get fail value
        b       outstart
    
    outerloop:
        addiu   $t0,$t0,4               # advance o [as pointer]
    outstart:
        bgeu    $t0,$t2,finish          # o < ArrEnd? if no, fly
        lw      $t3,0($t0)              # get Array[o]
    
        la      $t1,Array               # i = &Array[0]
    
    innerloop:
        lw      $t4,0($t1)              # get Array[i]
        beq     $t1,$t0,innernext       # o == i? if yes, skip test
        beq     $t3,$t4,outerloop       # Array[o] == Array[i]? if yes, not unique
    innernext:
        addiu   $t1,$t1,4               # advance array pointer for i
        bltu    $t1,$t2,innerloop       # at end? if no, loop
    
        # current array value is unique
        addi    $t6,$t6,1               # KCtr++
        bne     $t6,$s0,outerloop       # KCtr == K? if no, wait for next unique
        lw      $s1,0($t0)              # get better result -- no need to loop more
    
    finish:
        li      $v0,1
        move    $a0,$s1
        syscall
    
        li      $v0,10
        syscall