Search code examples
cperformancegnu

Why applying a const attribute to a pure function cannot reduce elapsed time?


According to the GNU document:

int square (int) __attribute__ ((const)) tells GCC that subsequent calls to function square with the same argument value can be replaced by the result of the first call regardless of the statements in between.

I expect the following code will be slow down when removing __attribute__((const)) in the function declaration.

#include <stdio.h>
#include <limits.h>

int my_double(int b) __attribute__((const));

//int my_double(int b);


int main(void) {

  long result = 0;

  for (int i = 0; i < INT_MAX/2; i++)
  {
     result += my_double(5);
  }

  printf("%ld\n", result);
}

int my_double(int b) {
  return b*2;
}

However, the experiments show that __attribute__((const)) does not affect timing results significantly. Does anyone know the reason? Thanks.

By the way, I use the following commands to clear any cache that might pollute the timing results of each experiment.

  sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
  sudo swapoff -a
  sudo swapon -a

And use /usr/bin/time to time the experiments.

PS. The corresponding assembly is as follows: (I am unfamiliar with assembly)

    .file   "attribute-o.c"
    .text
    .section    .rodata
.LC0:
    .string "%ld\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB0:
    .cfi_startproc
    endbr64
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $16, %rsp
    movq    $0, -8(%rbp)
    movl    $0, -12(%rbp)
    jmp .L2
.L3:
    movl    $5, %edi
    call    my_double
    cltq
    addq    %rax, -8(%rbp)
    addl    $1, -12(%rbp)
.L2:
    cmpl    $1073741822, -12(%rbp)
    jle .L3
    movq    -8(%rbp), %rax
    movq    %rax, %rsi
    leaq    .LC0(%rip), %rax
    movq    %rax, %rdi
    movl    $0, %eax
    call    printf@PLT
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   main, .-main
    .globl  my_double
    .type   my_double, @function
my_double:
.LFB1:
    .cfi_startproc
    endbr64
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    movl    %edi, -4(%rbp)
    movl    -4(%rbp), %eax
    addl    %eax, %eax
    popq    %rbp
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   my_double, .-my_double
    .ident  "GCC: (Ubuntu 11.2.0-7ubuntu2) 11.2.0"
    .section    .note.GNU-stack,"",@progbits
    .section    .note.gnu.property,"a"
    .align 8
    .long   1f - 0f
    .long   4f - 1f
    .long   5
0:
    .string "GNU"
1:
    .align 8
    .long   0xc0000002
    .long   3f - 2f
2:
    .long   0x3
3:
    .align 8
4:

Solution

  • __attribute__ ((const)) is about common subexpression elimination inside the same expression.

    There is no common subexpresson in the expression my_double(5), therefore nothing can be eliminated and the generated code is the same.

    If you replace my_double(5) with my_double(5) + my_double(5), then there are two common subexpressions. As the compiler does not know what my_double(5) does (there may be side effects), my_double(5) must be called twice unless the compiler knows that my_double has no side effects (because of __attribute__ ((const))) and then my_double(5) can only be called once and the results can be added.

    Check here: https://www.godbolt.org/z/bGnfq9zM5