Search code examples
cassemblyx86got

Is memcpy a really function with symbol?


This simple c:

#include <stdio.h>
#include <string.h>
int *add(int a, int b){
    int ar[1];
    int result = a+b;
    memcpy(ar, &result, sizeof(int));
    return ar;
}

int main(){
    int a = add(1,2)[0];
    printf("%i\n",a);
}

is compiled into this:

.text
    .globl  add
    .type   add, @function
add:
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    movl    %edi, -20(%rbp) # a, a
    movl    %esi, -24(%rbp) # b, b
# a.c:5:    int result = a+b;
    movl    -20(%rbp), %edx # a, tmp91
    movl    -24(%rbp), %eax # b, tmp92
    addl    %edx, %eax  # tmp91, _1
# a.c:5:    int result = a+b;
    movl    %eax, -8(%rbp)  # _1, result
# a.c:6:    memcpy(ar, &result, sizeof(int)); ---I SEE NO CALL INSTRUCTION---
    movl    -8(%rbp), %eax  # MEM[(char * {ref-all})&result], _6
    movl    %eax, -4(%rbp)  # _6, MEM[(char * {ref-all})&ar]
# a.c:7:    return ar;
    movl    $0, %eax    #--THE FUNCTION SHOULD RETURN ADDRESS OF ARRAY, NOT 0. OTHERWISE command terminated
#   lea -4(%rbp), %rax  #--ONLY THIS IS CORRECT, NOT `0`
# a.c:8: }
    popq    %rbp    #
    ret 
    .size   add, .-add
    .section    .rodata
.LC0:
    .string "%i\n"
    .text
    .globl  main
    .type   main, @function
main:
    pushq   %rbp    #
    movq    %rsp, %rbp  #,
    subq    $16, %rsp   #,
# a.c:11:   int a = add(1,2)[0];
    movl    $2, %esi    #,
    movl    $1, %edi    #,
    call    add #
# a.c:11:   int a = add(1,2)[0];
    movl    (%rax), %eax    # *_1, tmp90
    movl    %eax, -4(%rbp)  # tmp90, a
# a.c:12:   printf("%i\n",a);
    movl    -4(%rbp), %eax  # a, tmp91
    movl    %eax, %esi  # tmp91,
    leaq    .LC0(%rip), %rdi    #,
    movl    $0, %eax    #,
    call    printf@PLT  #
    movl    $0, %eax    #, _6
# a.c:13: }
    leave   
    ret 
    .size   main, .-main
    .ident  "GCC: (Debian 8.3.0-6) 8.3.0"
    .section    .note.GNU-stack,"",@progbits

Every function from stdlib like printf or puts are called from GOT (i.e. %rip register holds the address of GOT). But not memcpy, it is like "assembly inline instructions" instead of regular call address. So is memcpy even a symbol? If so, why is it not as argument to call? Is memcpy in GOT table? If so, what is a offset from GOT to that symbol?


Solution

  • So first off, you have a bug:

    $ cc -O2 -S test.c
    test.c: In function ‘add’:
    test.c:7:12: warning: function returns address of local variable
    

    Returning the address of a local variable has undefined behavior, if and only if the caller uses that value; this is why your compiler generated code that returned a null pointer, which will crash the program if used but be harmless otherwise. In fact, my copy of GCC generates only this for add:

    add:
            xorl    %eax, %eax
            ret
    

    because that treatment of the return value makes the other operations in add be dead code.

    (The "only if used" restriction is also why my compiler generates a warning, not a hard error.)

    Now, if I modify your program to have well-defined behavior, e.g.

    #include <stdio.h>
    #include <string.h>
    
    void add(int *sum, int a, int b)
    {
        int result = a+b;
        memcpy(sum, &result, sizeof(int));
    }
    
    int main(void)
    {
        int a;
        add(&a, 1, 2);
        printf("%i\n",a);
        return 0;
    }
    

    then I do indeed see assembly code in which the memcpy call has been replaced by inline code:

    add:
        addl    %edx, %esi
        movl    %esi, (%rdi)
        ret
    

    This is a feature of many modern C compilers: they know what some of the C library's functions do, and can inline them when that makes sense. (You can see that in this case the generated code is both smaller and faster than it would have been with an actual call to memcpy.)

    GCC lets me turn this feature off with a command-line option:

    $ gcc -O2 -ffreestanding test.c
    $ sed -ne '/^add:/,/cfi_endproc/{; /^\.LF[BE]/d; /\.cfi_/d; p; }' test.s
    add:
        subq    $24, %rsp
        addl    %edx, %esi
        movl    $4, %edx
        movl    %esi, 12(%rsp)
        leaq    12(%rsp), %rsi
        call    memcpy@PLT
        addq    $24, %rsp
        ret
    

    In this mode, the call to memcpy in add is treated the same as the call to printf in main. Your compiler may have similar options.