Search code examples
c++optimization

How smart is my compiler?


I am trying to measure the estimated cycle lengths it takes my computer to perform different operations, so I perform the same one for a 100K times and calculate the average. I am using loop unwinding to be a little more accurate: I perform 10 basic operations in each iteration and I increase my index by 10, resulting in fewer loop operations.

None of this really matter for my question: is there any way the compiler can understand I'm doing the same operation several time and only perform it once? Here's my loop:

for (i=0; i<iterations; i+=LOOP_FACTOR)
{
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
    result = -1;
}

Also, I don't know if it matters - I am using Eclipse. I thought it might matter as there are different compilers out there.


Solution

  • In GCC without optimization it's compiled as is:

    (gdb) disas main
    Dump of assembler code for function main:
       0x00000000004004e4 <+0>: push   rbp
       0x00000000004004e5 <+1>: mov    rbp,rsp
       0x00000000004004e8 <+4>: mov    DWORD PTR [rip+0x200482],0x0        # 0x600974 <i>
       0x00000000004004f2 <+14>:    jmp    0x400567 <main+131>
       0x00000000004004f4 <+16>:    mov    DWORD PTR [rip+0x200472],0xffffffff        # 0x600970 <result>
       0x00000000004004fe <+26>:    mov    DWORD PTR [rip+0x200468],0xffffffff        # 0x600970 <result>
       0x0000000000400508 <+36>:    mov    DWORD PTR [rip+0x20045e],0xffffffff        # 0x600970 <result>
       0x0000000000400512 <+46>:    mov    DWORD PTR [rip+0x200454],0xffffffff        # 0x600970 <result>
       0x000000000040051c <+56>:    mov    DWORD PTR [rip+0x20044a],0xffffffff        # 0x600970 <result>
       0x0000000000400526 <+66>:    mov    DWORD PTR [rip+0x200440],0xffffffff        # 0x600970 <result>
       0x0000000000400530 <+76>:    mov    DWORD PTR [rip+0x200436],0xffffffff        # 0x600970 <result>
       0x000000000040053a <+86>:    mov    DWORD PTR [rip+0x20042c],0xffffffff        # 0x600970 <result>
       0x0000000000400544 <+96>:    mov    DWORD PTR [rip+0x200422],0xffffffff        # 0x600970 <result>
       0x000000000040054e <+106>:   mov    DWORD PTR [rip+0x200418],0xffffffff        # 0x600970 <result>
       0x0000000000400558 <+116>:   mov    eax,DWORD PTR [rip+0x200416]        # 0x600974 <i>
       0x000000000040055e <+122>:   add    eax,0x1
       0x0000000000400561 <+125>:   mov    DWORD PTR [rip+0x20040d],eax        # 0x600974 <i>
       0x0000000000400567 <+131>:   mov    eax,DWORD PTR [rip+0x200407]        # 0x600974 <i>
       0x000000000040056d <+137>:   cmp    eax,0x3e7
       0x0000000000400572 <+142>:   jle    0x4004f4 <main+16>
       0x0000000000400574 <+144>:   mov    eax,DWORD PTR [rip+0x2003f6]        # 0x600970 <result>
       0x000000000040057a <+150>:   mov    esi,eax
       0x000000000040057c <+152>:   mov    edi,0x40067c
       0x0000000000400581 <+157>:   mov    eax,0x0
       0x0000000000400586 <+162>:   call   0x4003e0 <printf@plt>
       0x000000000040058b <+167>:   pop    rbp
       0x000000000040058c <+168>:   ret
    

    But if you run with basic optimization (gcc -O) then it is shortened to one write:

    Dump of assembler code for function main:
       0x00000000004004e4 <+0>: sub    rsp,0x8
       0x00000000004004e8 <+4>: mov    eax,0x3e8
       0x00000000004004ed <+9>: sub    eax,0x1
       0x00000000004004f0 <+12>:    jne    0x4004ed <main+9>
       0x00000000004004f2 <+14>:    mov    DWORD PTR [rip+0x2003fc],0xffffffff        # 0x6008f8 <result>
       0x00000000004004fc <+24>:    mov    DWORD PTR [rip+0x2003f6],0x3e8        # 0x6008fc <i>
       0x0000000000400506 <+34>:    mov    esi,0xffffffff
       0x000000000040050b <+39>:    mov    edi,0x40060c
       0x0000000000400510 <+44>:    mov    eax,0x0
       0x0000000000400515 <+49>:    call   0x4003e0 <printf@plt>
       0x000000000040051a <+54>:    add    rsp,0x8
       0x000000000040051e <+58>:    ret  
    

    My testing code is:

    #define TIMES 1000
    
    int result, i;
    
    int main() {
        for (i=0; i<TIMES; i++)
        {
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
            result = -1;
        }
        printf("%d", result);
    }