Search code examples
assemblymipsmatrix-multiplication

4x4 matrix multiplication: Exception 4: Unaligned Address in inst/data fetch: 0x100100bb


I'm trying to to do a 4x4 Matrix multiplication using Assembly in MIPS simulator (QtMips). QtMips gives me Exception 4: Unaligned Address in inst/data fetch: 0x100100bb

This is where I get the error when I single step.

    [00400070] c52b0000  lwc1 $f11, 0($9) ; 80: lwc1 $f11 0($t1) #load float from array1

The error happens when counter k = 2, meaning when it is at the third loop. I'm assuming something is wrong with 32-bit alignment at my third load, lwc1

Here's what I tried/read but didn't work:

  1. This suggests that I put .align 2 or .align 4 before my array (matrix) declaration in .data. Didn't work.
  2. This suggests that it could be the issue of the size value (defined after array3). But I'm loading this to s1 by lw $s1 size so I don't see this being a real issue for me.

I'm very lost on what to do. Please impart me some wisdom.

Below is my whole code:

    # here's our array data, two args and a result
    .data
    .globl array1
    .globl array2
    .globl array3

    .align 5 #align the data set
array1: .float 1.00, 0.00, 3.14, 2.72, 2.72, 1.00, 0.00, 3.14, 1.00, 1.00, 1.00, 1.00, 1.00, 2.00, 3.00, 4.00
    .align 5 #align the data set
array2: .float 1.00, 1.00, 0.00, 3.14, 0.00, 1.00, 3.14, 2.72, 0.00, 1.00, 1.00, 0.00, 4.00, 3.00, 2.00, 1.00
    .align 5 #align the data set
array3: .float 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00

size: .word 4 #store float in s2

    .text
    .globl main
main:
    sw $31 saved_ret_pc

    .data
lb_:    .asciiz "Vector Multiplication\n"
lbd_:   .byte 1, -1, 0, 128
lbd1_:  .word 0x76543210, 0xfedcba98
    .text
    li $v0 4    # syscall 4 (print_str)
    la $a0 lb_
    syscall

# main program: multiply matrix 1 and 2, store in array3

la $t1 array1
la $t2 array2
la $t3 array3 ###load arrrays to registers


li $t4 4 # i loop counter    -> I changed addi to li
li $t5 4 # j loop counter
li $t6 4 # k loop counter

lw $s1 size # load matrix(array) size


i_loop:
    j j_loop
j_loop:
    j k_loop
k_loop:
    #f0 and f1 - float func return values
    #f10 - multiplication return values
    #f4, f5 - register to store addr offset

    lwc1 $f11 0($t1) #load float from array1
    lwc1 $f12 0($t2) #load float from array2
    lwc1 $f13 0($t3) #load float from result array3
    nop 
    mul.s $f10 $f11 $f12 #multiply floats, store result as temp in $f10
    nop

    add.s $f13 $f13 $f10 #add to multiplication result to resulting array3

    swc1 $f13 0($t3) #store the resulting float in array3

#call index_of_A
    move $s0 $ra    #save return address into s0
    nop
    jal index_of_A  #get addr offset for array1
    nop
    move $ra $s0    #restore return address that was saved into s0

#call index_of_B
    move $s0 $ra    #save return address into s0
    nop
    jal index_of_B  #get addr offset for array2
    nop
    move $ra $s0    #restore return address that was saved into s0

    add $t1 $t1 $s2 # next address in the array1
    add $t2 $t2 $s3 # next address in the array2
    addi $t3 $t3 4 # next address in the array3

    addi $t6 $t6 -1 #decrease k counter
    bne $t6 $0 k_loop #repeat k_loop

    addi $t5 $t5 -1 #decrease j counter
    bne $t5 $0 j_loop #repeat j_loop

    addi $t4 $t4 -1 #decrease i counter
    bne $t4 $0 i_loop #repeat i_loop

#used regs: f0-f5, f10-13
index_of_A: #function for array1 addr offset    #may need to convert all to float first
    #size*i + k #$f20*i + k
    mul $s2 $s1 $t4 # 4*i, 
    add $s2 $s2 $t6 # + k, store in $s2
    jr $ra #jump back to the caller


index_of_B: #function for array2 addr offset
    #4*k + j
    mul $s3 $s1 $t6 # 4*k, 
    add $s3 $s3 $t5 # + j, store in $s3
    jr $ra #jump back to the caller


# Done multiplying...
    .data
sm: .asciiz "Done multiplying\n"
    .text
print_and_end:
    li $v0 4    # syscall 4 (print_str)
    la $a0 sm
    syscall

# Done with the program!
    lw $31 saved_ret_pc
    jr $31      # Return from main

#Terminate the program
    li $v0, 10
    syscall

.end main

But I don't understand what's wrong since the same exact code works on my another example here:


Solution

  • 4x4 Matrix multiplication Okay, so I figured it out so I am answering my own question.

    I learned many things along the way and those include

    1. .align is not necessary to run the code. It works without them. Perhaps I didn't need it for this specific situation.
    2. $f12 and $f13 is reserved specifically for printing out floats. If you save the float somewhere else, it won't print.
    3. The first offset calculation I made is 0, which is why I need to add it at the top of the loop, instead of at the end. That's what was causing all the trouble.
    4. Be sure to calculate your index correctly. Look at my code comment to see what I do :)

    Here is the final version of my code that works! You can see my GitHub for the matrix multiplication Python, C, and Assembly.https://github.com/leochoo/cmpa

    .data
    
    #define matrices
        .globl A
        .globl B
        .globl R
    
        .align 4 #align the data set
        A: .float 1.00, 0.00, 3.14, 2.72, 2.72, 1.00, 0.00, 3.14, 1.00, 1.00, 1.00, 1.00, 1.00, 2.00, 3.00, 4.00
        .align 4 
        B: .float 1.00, 1.00, 0.00, 3.14, 0.00, 1.00, 3.14, 2.72, 0.00, 1.00, 1.00, 0.00, 4.00, 3.00, 2.00, 1.00
        .align 4 
        R: .float 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00
    
    
        matrix_size: .word 4 #row and column size
        float_size: .word 4 #float is 4-byte in MIPS.
                            #i.e. 4-byte will take up 16-bit blocks in the memory,
                            #hence in hexadecimal address, 
                            #array[0] at 10010040, array[1] at 10010050.
    
        tempSum: .float 0.00 #initialize tempSum as 0
    
        lineBrk: .asciiz "\n"
    
    
        #For debugging
        arr_1: .asciiz "A: "
        arr_2: .asciiz " B: "
        arr_3: .asciiz " R: "
        i_:     .asciiz " i:"
        j_:     .asciiz "j:"
        k_:     .asciiz "k:"
        space_: .asciiz " "
        bar_:   .asciiz " | "
    
    
    
    
    
    #TEXT (MAIN) SECTION - multiply matrix 1 and 2, store in array3
    .text
        .globl main
    main:
    
        #print title
            .data
        lb_:    .asciiz "Vector Multiplication\n"
        lbd_:   .byte 1, -1, 0, 128
        lbd1_:  .word 0x76543210, 0xfedcba98
            .text
        li $v0 4    # syscall 4 (print_str)
        la $a0 lb_
        syscall
    
    #load matrices
    la $t1 A
    la $t2 B
    la $t3 R
    
    #load variables
    li $s1 0 # later used to store offset of matrix B
    lw $s1 matrix_size # $s1 = matrix_size
    lw $s2 float_size # $s2 = float_size
    l.s $f5 tempSum #tempSum 
    
    
    #store base addresses
    move $s6 $t1 # $s6 = base address of matrix A stored
    move $s7 $t2 # $s7 = base address of matrix B stored
    
    
    
    #for i in 0...4:
        #for j in 0...4:
            #for k in 0...4:
        li $t4 0 # i counter
    i_loop:
            li $t5 0 # j counter
        j_loop:
                li $t6 0 # k counter
            k_loop:
                #update index of A[i:t4][k:t6]
                    # $s0 = offset result
                    # $s1 = matrix_size: 4
                    # $s2 = float_size: 4
                    # $s6 = base address of A
    
                    #calculate offset
                    mul $s0 $s1 $t4 # s0 = matrix_size*i
                    add $s0 $s0 $t6 # s0 = s0 + k
                    mul $s0 $s0 $s2 # s0 = float_size*s0
    
                    #increase by offset
                    add $t1 $s6 $s0 # new index = base_addr + offset  ##first loop initialization will always be zero... oh..
    
                #update index of B[k:t6][j:t5]
                    # $s0 = offset result
                    # $s1 = matrix_size: 4
                    # $s2 = float_size: 4
                    # $s7 = base address of B
    
                    #caculate offset
                    mul $s0 $s1 $t6 # s0 = matrix_size*k
                    add $s0 $s0 $t5 # s0 = s0 + j
                    mul $s0 $s0 $s2 # s0 = float_size*s0
    
                    #increase by offset
                    add $t2 $s7 $s0 # new index = base_addr + offset
    
    
                #load matrix A and B
                lwc1 $f1 0($t1) #load float from matrix A
                lwc1 $f2 0($t2) #load float from matrix B
                nop
                    #print i, j, k
    
                    li $v0 4        
                    la $a0 i_
                    syscall         # "i"
    
                    li $v0 1 
                    move $a0 $t4
                    syscall         # value of i
    
                    li $v0 4        
                    la $a0 j_
                    syscall         # "j"
    
                    li $v0 1 
                    move $a0 $t5
                    syscall         # value of j
    
    
                    li $v0 4        
                    la $a0 k_
                    syscall         # "k"
    
                    li $v0 1 
                    move $a0 $t6
                    syscall         # value of k
    
                    li $v0 4        # " | "
                    la $a0 bar_
                    syscall 
    
                    #print A and B
                    li $v0 4    
                    la $a0 arr_1
                    syscall
    
                    lwc1 $f12 0($t1) #A
                    li $v0 2
                    syscall
    
                    li $v0 4    
                    la $a0 arr_2
                    syscall
    
                    lwc1 $f12 0($t2) #B
                    li $v0 2
                    syscall
    
    
                #Break down: R[i][j] +=  float_size * ( A[i][k] * B[k][j] )
                #### first result: (1*1)+(0*0)+(3.14*0)+(2.72*4)
    
                # (A * B)
                nop
                mul.s $f0 $f1 $f2 # (a*b)
                nop
                #tempSum:$f5 = tempSum + (A * B)
                add.s $f5 $f5 $f0
                nop
                    ####1st = (A*B)
                    ####2nd = (A*B) + (A*B)         
    
    
                #DON'T UPDATE index of R here
                #you only need to update it 16 times, hence in j_loop
    
            #k_loop end condition
            addi $t6 $t6 1 # k++
            bne $t6 $s1 k_loop #if k != 4, repeat k_loop
    
    
        #store R[i][j] = tempSum:$f5
        swc1 $f5 0($t3) #store the resulting float in array3
        nop
    
        #reset tempSum = 0
        l.s $f5 tempSum
    
        #load and print element in R
        li $v0 4    
        la $a0 arr_3 # " R "
        syscall     
    
        lwc1 $f12 0($t3)
        li $v0 2
        syscall 
    
        li $v0 4
        la $a0 lineBrk #print( '\n' )
        syscall
    
        #update index of R[i][j] - same as updating index of A
        add $t3 $t3 $s2
    
    
        #j_loop end condition
        addi $t5 $t5 1 
        bne $t5 $s1 j_loop 
    
    #i_loop end condition
    addi $t4 $t4 1 
    bne $t4 $s1 i_loop
    
    
    # Done multiplying...
        .data
    sm: .asciiz "Done multiplying\n"
        .text
    print_and_end:
        li $v0 4    # syscall 4 (print_str)
        la $a0 sm
        syscall
    
    #Terminate the program
        li $v0, 10
        syscall
    
    .end main