Search code examples
cpointersassemblymultidimensional-arrayfunction-parameter

Does 2D array need to know its size beforehand in C?


Comparing this 2 codes:

void foo(int rows, int cols, int **ar)
{
  printf("%d\n", ar[rows - 1][cols - 1]);
}

and

void foo(int rows, int cols, int ar[rows][cols])
{
  printf("%d\n", ar[rows - 1][cols - 1]);
}

for

int main()
{
  int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
  foo(3, 2, ar);
}

The first foo, where there is just double-pointer, the program terminates. The second one, where the sizes are specified, prints the correct result. Why is that? Is not array passed as pointer to function anyway?

As per the assembly output, both leads to the same result. The point is calculating the offset from the beginning of the array. From the assembly, the first (1) number is stored -32(%rbp), and the wanted result (6) is stored at -12(%rbp). So both the assembly leads to result of -32(%rbp) + 20 (after calculations involved).

The assembly of first:

.text
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    $16, %rsp   #,
    movl    %edi, -4(%rbp)  # rows, rows
    movl    %esi, -8(%rbp)  # cols, cols
    movq    %rdx, -16(%rbp) # ar, ar
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -4(%rbp), %eax  # rows, tmp92
    cltq
    salq    $3, %rax    #, _2
    leaq    -8(%rax), %rdx  #, _3
    movq    -16(%rbp), %rax # ar, tmp93
    addq    %rdx, %rax  # _3, _4
    movq    (%rax), %rdx    # *_4, _5
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -8(%rbp), %eax  # cols, tmp94
    cltq
    salq    $2, %rax    #, _7
    subq    $4, %rax    #, _8
    addq    %rdx, %rax  # _5, _9

# FINAL ADDRESS RESOLUTION (IN REGISTER %rax) IS `-32(%rbp) + 20` (WHICH IS CORRECT ADDRESS OF NUMBER `6`)

# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    (%rax), %eax    # *_9, _10
    movl    %eax, %esi  # _10,
    leaq    .LC0(%rip), %rdi    #,
    movl    $0, %eax    #,
    call    printf@PLT  #
# b.c:6: }
    nop 
    leave   
    ret 
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    $32, %rsp   #,
# b.c:9: {
    movq    %fs:40, %rax    # MEM[(<address-space-1> long unsigned int *)40B], tmp86
    movq    %rax, -8(%rbp)  # tmp86, D.2350
    xorl    %eax, %eax  # tmp86
# b.c:10:   int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    movl    $1, -32(%rbp)   #, ar[0][0]
    movl    $2, -28(%rbp)   #, ar[0][1]
    movl    $3, -24(%rbp)   #, ar[1][0]
    movl    $4, -20(%rbp)   #, ar[1][1]
    movl    $5, -16(%rbp)   #, ar[2][0]
    movl    $6, -12(%rbp)   #, ar[2][1]
# b.c:11:   foo(3, 2, ar);
    leaq    -32(%rbp), %rax #, tmp84
    movq    %rax, %rdx  # tmp84,
    movl    $2, %esi    #,
    movl    $3, %edi    #,
    call    foo #
    movl    $0, %eax    #, _10
# b.c:12: }
    movq    -8(%rbp), %rcx  # D.2350, tmp87
    subq    %fs:40, %rcx    # MEM[(<address-space-1> long unsigned int *)40B], tmp87
    je  .L4 #,
    call    __stack_chk_fail@PLT    #
.L4:
    leave   
    ret 

And the second assembly is:

.text
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  foo
    .type   foo, @function
foo:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    pushq   %rbx    #
    subq    $40, %rsp   #,
    movl    %edi, -36(%rbp) # rows, rows
    movl    %esi, -40(%rbp) # cols, cols
    movq    %rdx, -48(%rbp) # ar, ar
# b.c:3: void foo(int rows, int cols, int ar[rows][cols])
    movl    -40(%rbp), %eax # cols, cols.0_6
    movslq  %eax, %rdx  # cols.0_6, _1
    subq    $1, %rdx    #, _2
# b.c:3: void foo(int rows, int cols, int ar[rows][cols])
    movq    %rdx, -24(%rbp) # _2, D.2346
    movslq  %eax, %rdx  # cols.0_6, _4
    movq    %rdx, %rcx  # _4, _5
    movl    $0, %ebx    #, _5
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -36(%rbp), %edx # rows, tmp99
    subl    $1, %edx    #, _9
    movslq  %edx, %rdx  # _9, _10
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    cltq
    imulq   %rdx, %rax  # _10, _12
    leaq    0(,%rax,4), %rdx    #, _13
    movq    -48(%rbp), %rax # ar, tmp100
    addq    %rax, %rdx  # tmp100, _14
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    movl    -40(%rbp), %eax # cols, tmp101
    subl    $1, %eax    #, _15
# b.c:5:   printf("%d\n", ar[rows - 1][cols - 1]);
    cltq
    movl    (%rdx,%rax,4), %eax # (*_14)[_15], _16

# AGAIN, THE FINAL ADDRESS RESOLUTION (IN REGISTER %eax) IS -32(%rbp) + 20` (WHICH IS CORRECT ADDRESS OF NUMBER `6`)

    movl    %eax, %esi  # _16,
    leaq    .LC0(%rip), %rdi    #,
    movl    $0, %eax    #,
    call    printf@PLT  #
# b.c:6: }
    nop 
    movq    -8(%rbp), %rbx  #,
    leave   
    ret 
    .size   foo, .-foo
    .globl  main
    .type   main, @function
main:
    endbr64 
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    $32, %rsp   #,
# b.c:9: {
    movq    %fs:40, %rax    # MEM[(<address-space-1> long unsigned int *)40B], tmp86
    movq    %rax, -8(%rbp)  # tmp86, D.2355
    xorl    %eax, %eax  # tmp86
# b.c:10:   int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    movl    $1, -32(%rbp)   #, ar[0][0]
    movl    $2, -28(%rbp)   #, ar[0][1]
    movl    $3, -24(%rbp)   #, ar[1][0]
    movl    $4, -20(%rbp)   #, ar[1][1]
    movl    $5, -16(%rbp)   #, ar[2][0]
    movl    $6, -12(%rbp)   #, ar[2][1]
# b.c:11:   foo(3, 2, ar);
    leaq    -32(%rbp), %rax #, tmp84
    movq    %rax, %rdx  # tmp84,
    movl    $2, %esi    #,
    movl    $3, %edi    #,
    call    foo #
    movl    $0, %eax    #, _10
# b.c:12: }
    movq    -8(%rbp), %rcx  # D.2355, tmp87
    subq    %fs:40, %rcx    # MEM[(<address-space-1> long unsigned int *)40B], tmp87
    je  .L4 #,
    call    __stack_chk_fail@PLT    #
.L4:
    leave   
    ret 

So why both assembly uses the same address to yield the number 6 and yet one terminates, and the other prints?


Solution

  • The declared array

    int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
    

    used in expression as a function argument is converted to the type int ( * )[2]. The type int ** and int ( * )[2] are not compatible pointer types. So the first function call is incorrect and the function will invoke undefined behavior.

    Pay attention to that in both function calls the same address is passed it is the address of the first element of the array.

    But within the first function the dereferenced pointer ar[rows - 1] expects a pointer of the type int * while at this memory there is stored the value of the first element of the array.

    Here is a demonstration program.

    #include <stdio.h>
    
    void foo(int rows, int cols, int **ar)
    {
        printf( "%p\n", ( void * )ar[rows - 1] );
    }
    
    int main(void) 
    {
        int ar[3][2] = {{1, 2}, {3, 4}, {5, 6}};
        foo(3, 2, ( int ** )ar);
      
        return 0;
    }
    

    Its output might look like

    0x600000005
    

    That is elements of the array are interpreted as a pointer after dereferencing the pointer ar. So dereferencing the pointer one more results in accessing an arbitrary memory.

    The assembler code is generated such a way that it interprets memory and values according to the type of the object stored in the memory. Different types of the same address of the memory results in generating different assembler code.