Search code examples
c++performancegccmingwtdm-mingw

tdm gcc 5.1 slower than 4.7


Im using tdm gcc compilers to compile my winapi projects, when i test some simple mandelbrot sse code 9maybe with other projests its like the same but i not tested everything), 5.1 generates larger executable 330kB against 270kB (though i not recompiled everything, only hot loop module and linked with already compiled (in 4.7) ones) and also noticalby slower code 23.5 ms /frame agianst 20 ms in 4.7

It is scary.. What can i do with it? (the rest of settings etc is the same i only rename the compiler folder from one to anither

Im using c-like code but compile in c++ mode

Does maybe someone know how to resolve it? (by resolve i mean make 5.1 build working at least as fast as 4.7, also would prefer the executable be smaller)

//edit

ps i made yet quick test

as i may compile loop module in 51 and link all in 47

compile loop 47 link 47: size 270k speed 20 ms

compile loop 51 link 51: size 330k speed 23.5 ms

compile loop 47 link 51: size 330k speed 20 ms

compile loop 51 link 47: size 270k speed 23.5 ms

it shows that speed drop comes by compile in 51 and size bloat comes from link 51


Solution

  • I checked the assembly and it shows some changes in generated code, though slight

    4.7

    __Z16mandelbrot_n_sseU8__vectorfS_i: 
        pushl        %ebp 
        movl        %esp, %ebp 
        andl        $-16, %esp 
        subl        $16, %esp 
        movl        8(%ebp), %ecx 
        movaps        %xmm0, (%esp) 
        testl        %ecx, %ecx 
        js        L12 
        xorps        %xmm0, %xmm0 
        xorl        %eax, %eax 
        movaps        %xmm0, %xmm2 
        movaps        %xmm0, %xmm4 
        jmp        L11 
        .p2align 4,,7 
    L19: 
        mulps        %xmm4, %xmm2 
        addl        $1, %eax 
        subps        %xmm5, %xmm6 
        movaps        (%esp), %xmm4 
        cmpl        %eax, %ecx 
        addps        %xmm6, %xmm4 
        addps        %xmm2, %xmm2 
        addps        %xmm1, %xmm2 
        jl        L10 
    L11: 
        movaps        %xmm4, %xmm6 
        movaps        %xmm2, %xmm5 
        movaps        LC5, %xmm7 
        mulps        %xmm4, %xmm6 
        mulps        %xmm2, %xmm5 
        movaps        %xmm6, %xmm3 
        addps        %xmm5, %xmm3 
        cmpltps        LC4, %xmm3 
        andps        %xmm3, %xmm7 
        movmskps        %xmm3, %edx 
        testl        %edx, %edx 
        addps        %xmm7, %xmm0 
        jne        L19 
    L10: 
        cvtps2dq        %xmm0, %xmm0 
        leave 
        ret 
    L12: 
        xorps        %xmm0, %xmm0 
        jmp        L10 
        .globl        __Z16mandelbrot_n_sseDv4_fS_i 
    

    5.1

    __Z16mandelbrot_n_sseDv4_fS_i: 
        pushl        %ebp 
        movl        %esp, %ebp 
        andl        $-16, %esp 
        subl        $16, %esp 
        movl        8(%ebp), %ecx 
        movaps        %xmm0, (%esp) 
        testl        %ecx, %ecx 
        js        L11 
        pxor        %xmm0, %xmm0 
        xorl        %edx, %edx 
        movaps        %xmm0, %xmm5 
        movaps        %xmm0, %xmm2 
        jmp        L10 
        .p2align 4,,10 
    L18: 
        mulps        %xmm2, %xmm5 
        addl        $1, %edx 
        subps        %xmm6, %xmm4 
        cmpl        %edx, %ecx 
        addps        %xmm5, %xmm5 
        addps        (%esp), %xmm4 
        addps        %xmm1, %xmm5 
        jl        L9 
        movaps        %xmm4, %xmm2 
    L10: 
        movaps        %xmm2, %xmm4 
        movaps        %xmm5, %xmm6 
        movaps        LC7, %xmm7 
        mulps        %xmm2, %xmm4 
        mulps        %xmm5, %xmm6 
        movaps        %xmm4, %xmm3 
        addps        %xmm6, %xmm3 
        cmpltps        LC6, %xmm3 
        andps        %xmm3, %xmm7 
        movmskps        %xmm3, %eax 
        testl        %eax, %eax 
        addps        %xmm7, %xmm0 
        jne        L18 
    L9: 
        cvtps2dq        %xmm0, %xmm0 
        leave 
        ret 
    L11: 
        pxor        %xmm0, %xmm0 
        jmp        L9 
        .section        .text.unlikely,"x" 
    LCOLDE8: 
        .text 
    

    It seems that the 5.1 version is unlucky one and it cst 15% slowdown