Search code examples
cassemblygdbnasm

GDB Disassembler shows assembly code - but in a different way than I expected


I recently started using the gdb disassembler, and wanted to see how it really displays the assembly code, is it logical or not (Tried debugging a C program, a function that calculates the length of a linked-list).

This is the C code (Not mine, have to credit this site):

int length() {
   int length = 0;
   struct node *current;
    
   for(current = head; current != NULL; current = current->next) {
      length++;
   }
    
   return length;
}

Compiled:

gcc linkedlist.c -o linkedlist

This is the resulting disassemble (intel-flavored):

   0x00000000000012a8 <+0>: endbr64 
   0x00000000000012ac <+4>: push   rbp
   0x00000000000012ad <+5>: mov    rbp,rsp
   0x00000000000012b0 <+8>: mov    DWORD PTR [rbp-0xc],0x0
   0x00000000000012b7 <+15>:    mov    rax,QWORD PTR [rip+0x2d5a]        # 0x4018 <head>
   0x00000000000012be <+22>:    mov    QWORD PTR [rbp-0x8],rax
   0x00000000000012c2 <+26>:    jmp    0x12d4 <length+44>
   0x00000000000012c4 <+28>:    add    DWORD PTR [rbp-0xc],0x1
   0x00000000000012c8 <+32>:    mov    rax,QWORD PTR [rbp-0x8]
   0x00000000000012cc <+36>:    mov    rax,QWORD PTR [rax+0x8]
   0x00000000000012d0 <+40>:    mov    QWORD PTR [rbp-0x8],rax
   0x00000000000012d4 <+44>:    cmp    QWORD PTR [rbp-0x8],0x0
   0x00000000000012d9 <+49>:    jne    0x12c4 <length+28>
   0x00000000000012db <+51>:    mov    eax,DWORD PTR [rbp-0xc]
   0x00000000000012de <+54>:    pop    rbp
   0x00000000000012df <+55>:    ret  

What really bothers me, which is a little thing that I notice, maybe you notice more, is that it is not the type of assembly code I was being taught. I remember teachers/professors saying over and over again: "Don't use mov ,0x0, just xor , "

But here, inside it does: DWORD PTR [rbp-0xc],0x0

Which I assume is the initialization of the variable int length = 0;

My questions are, why it does not show the most effective code? and if it cannot do that (Probably everything is not perfect) - then why it does not know to detect initialization of the number 0 and do the xor operation instead of the mov (automatically), does that really matter performance (If it does, by what factor?)

Maybe there are more lines that could've been replaces/disregarded at all, but I as a beginner do not notice them, but this specific one I did.. any explanation?


Solution

  • Usually, gcc is compiling with the -O0 option enabled as default. It generates the code exactly as it is written in the source file, without any optimizations. The compiler can optimize the code having many possible options as in the example below:

    struct node
    {
        struct node *next;
    };
    
    int length(struct node *head) {
       int length = 0;
       struct node *current;
        
       for(current = head; current != NULL; current = current->next) {
          length++;
       }
        
       return length;
    }
    
    int __attribute__((optimize("-O3"))) length1(struct node *head) {
       int length = 0;
       struct node *current;
        
       for(current = head; current != NULL; current = current->next) {
          length++;
       }
        
       return length;
    }
    
    int __attribute__((optimize("-Os"))) length2(struct node *head) {
       int length = 0;
       struct node *current;
        
       for(current = head; current != NULL; current = current->next) {
          length++;
       }
        
       return length;
    }
    
    int __attribute__((optimize("-Og"))) length3(struct node *head) {
       int length = 0;
       struct node *current;
        
       for(current = head; current != NULL; current = current->next) {
          length++;
       }
        
       return length;
    }
    

    and the code

    length:
            push    rbp
            mov     rbp, rsp
            mov     QWORD PTR [rbp-24], rdi
            mov     DWORD PTR [rbp-4], 0
            mov     rax, QWORD PTR [rbp-24]
            mov     QWORD PTR [rbp-16], rax
            jmp     .L2
    .L3:
            add     DWORD PTR [rbp-4], 1
            mov     rax, QWORD PTR [rbp-16]
            mov     rax, QWORD PTR [rax]
            mov     QWORD PTR [rbp-16], rax
    .L2:
            cmp     QWORD PTR [rbp-16], 0
            jne     .L3
            mov     eax, DWORD PTR [rbp-4]
            pop     rbp
            ret
    length1:
            xor     eax, eax
            test    rdi, rdi
            je      .L8
    .L7:
            mov     rdi, QWORD PTR [rdi]
            add     eax, 1
            test    rdi, rdi
            jne     .L7
            ret
    .L8:
            ret
    length2:
            xor     eax, eax
    .L12:
            test    rdi, rdi
            je      .L14
            mov     rdi, QWORD PTR [rdi]
            inc     eax
            jmp     .L12
    .L14:
            ret
    length3:
            mov     eax, 0
            jmp     .L16
    .L17:
            add     eax, 1
            mov     rdi, QWORD PTR [rdi]
    .L16:
            test    rdi, rdi
            jne     .L17
            ret