Search code examples
cstrcmp

Why the returns of strcmp is different?


Here is the C code and I compiled with gcc

char *a="a";
char *d="d";
printf("%d\n", strcmp("a", "d"));
printf("%d\n", strcmp(a, "d"));
printf("%d\n", strcmp(a, d));

When I compiled with -O the output is

-1
-3
-1

When I compiled without -O then output is

-1
-3
-3

Why the output is different and what is the code of strcmp?


Solution

  • Why the output is different

    Because all that matters is the sign (positive, negative or zero) of the return value. strcmp() is not required to return +1 or -1, nor does it have to return consistent values. I suspect that in the first and third case, the compiler optimizes away the call to strcmp() and puts -1 into the place of the return value. In the second case, I think the function is actually called.

    what is the code of strcmp?

    Deducing from the fact that it seemingly returns the difference between the character codes of the first differing character, I'd say this is glibc's strcmp():

    int
     strcmp (p1, p2)
          const char *p1;
          const char *p2;
     {
       register const unsigned char *s1 = (const unsigned char *) p1;
       register const unsigned char *s2 = (const unsigned char *) p2;
       unsigned char c1, c2;
    
       do
         {
           c1 = (unsigned char) *s1++;
           c2 = (unsigned char) *s2++;
           if (c1 == '\0')
         return c1 - c2;
         }
       while (c1 == c2);
    
       return c1 - c2;
     }
    

    Edit: @AndreyT doesn't believe me, so here's the assembly GCC 4.2 generated for me (OS X 10.7.5 64-bit Intel, default optimization level - no flags):

        .section    __TEXT,__text,regular,pure_instructions
        .globl  _main
        .align  4, 0x90
    _main:
    Leh_func_begin1:
        pushq   %rbp
    Ltmp0:
        movq    %rsp, %rbp
    Ltmp1:
        subq    $32, %rsp
    Ltmp2:
        leaq    L_.str(%rip), %rax
        movq    %rax, -16(%rbp)
        leaq    L_.str1(%rip), %rax
        movq    %rax, -24(%rbp)
        movl    $-1, %ecx             ; <- THIS!
        xorb    %dl, %dl
        leaq    L_.str2(%rip), %rsi
        movq    %rsi, %rdi
        movl    %ecx, %esi
        movq    %rax, -32(%rbp)
        movb    %dl, %al
        callq   _printf               ; <- no call to `strcmp()` so far!
        movq    -16(%rbp), %rax
        movq    %rax, %rdi
        movq    -32(%rbp), %rsi
        callq   _strcmp               ; <- strcmp()
        movl    %eax, %ecx
        xorb    %dl, %dl
        leaq    L_.str2(%rip), %rdi
        movl    %ecx, %esi
        movb    %dl, %al
        callq   _printf               ; <- printf()
        movq    -16(%rbp), %rax
        movq    -24(%rbp), %rcx
        movq    %rax, %rdi
        movq    %rcx, %rsi
        callq   _strcmp               ; <- strcmp()
        movl    %eax, %ecx
        xorb    %dl, %dl
        leaq    L_.str2(%rip), %rdi
        movl    %ecx, %esi
        movb    %dl, %al
        callq   _printf               ; <- printf()
        movl    $0, -8(%rbp)
        movl    -8(%rbp), %eax
        movl    %eax, -4(%rbp)
        movl    -4(%rbp), %eax
        addq    $32, %rsp
        popq    %rbp
        ret
    Leh_func_end1:
    
        .section    __TEXT,__cstring,cstring_literals
    L_.str:
        .asciz   "a"
    
    L_.str1:
        .asciz   "d"
    
    L_.str2:
        .asciz   "%d\n"
    
        .section    __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
    EH_frame0:
    Lsection_eh_frame:
    Leh_frame_common:
    Lset0 = Leh_frame_common_end-Leh_frame_common_begin
        .long   Lset0
    Leh_frame_common_begin:
        .long   0
        .byte   1
        .asciz   "zR"
        .byte   1
        .byte   120
        .byte   16
        .byte   1
        .byte   16
        .byte   12
        .byte   7
        .byte   8
        .byte   144
        .byte   1
        .align  3
    Leh_frame_common_end:
        .globl  _main.eh
    _main.eh:
    Lset1 = Leh_frame_end1-Leh_frame_begin1
        .long   Lset1
    Leh_frame_begin1:
    Lset2 = Leh_frame_begin1-Leh_frame_common
        .long   Lset2
    Ltmp3:
        .quad   Leh_func_begin1-Ltmp3
    Lset3 = Leh_func_end1-Leh_func_begin1
        .quad   Lset3
        .byte   0
        .byte   4
    Lset4 = Ltmp0-Leh_func_begin1
        .long   Lset4
        .byte   14
        .byte   16
        .byte   134
        .byte   2
        .byte   4
    Lset5 = Ltmp1-Ltmp0
        .long   Lset5
        .byte   13
        .byte   6
        .align  3
    Leh_frame_end1:
    
    
    .subsections_via_symbols
    

    And the original source code:

    #include <stdio.h>
    #include <string.h>
    
    int main()
    {
        const char *a = "a";
        const char *d = "d";
        printf("%d\n", strcmp("a", "d"));
        printf("%d\n", strcmp(a, "d"));
        printf("%d\n", strcmp(a, d));
    
        return 0;
    }
    

    And the output it generated (screenshot for having a better proof):

    enter image description here