Im using tdm gcc compilers to compile my winapi projects, when i test some simple mandelbrot sse code 9maybe with other projests its like the same but i not tested everything), 5.1 generates larger executable 330kB against 270kB (though i not recompiled everything, only hot loop module and linked with already compiled (in 4.7) ones) and also noticalby slower code 23.5 ms /frame agianst 20 ms in 4.7
It is scary.. What can i do with it? (the rest of settings etc is the same i only rename the compiler folder from one to anither
Im using c-like code but compile in c++ mode
Does maybe someone know how to resolve it? (by resolve i mean make 5.1 build working at least as fast as 4.7, also would prefer the executable be smaller)
//edit
ps i made yet quick test
as i may compile loop module in 51 and link all in 47
compile loop 47 link 47: size 270k speed 20 ms
compile loop 51 link 51: size 330k speed 23.5 ms
compile loop 47 link 51: size 330k speed 20 ms
compile loop 51 link 47: size 270k speed 23.5 ms
it shows that speed drop comes by compile in 51 and size bloat comes from link 51
I checked the assembly and it shows some changes in generated code, though slight
4.7
__Z16mandelbrot_n_sseU8__vectorfS_i:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $16, %esp
movl 8(%ebp), %ecx
movaps %xmm0, (%esp)
testl %ecx, %ecx
js L12
xorps %xmm0, %xmm0
xorl %eax, %eax
movaps %xmm0, %xmm2
movaps %xmm0, %xmm4
jmp L11
.p2align 4,,7
L19:
mulps %xmm4, %xmm2
addl $1, %eax
subps %xmm5, %xmm6
movaps (%esp), %xmm4
cmpl %eax, %ecx
addps %xmm6, %xmm4
addps %xmm2, %xmm2
addps %xmm1, %xmm2
jl L10
L11:
movaps %xmm4, %xmm6
movaps %xmm2, %xmm5
movaps LC5, %xmm7
mulps %xmm4, %xmm6
mulps %xmm2, %xmm5
movaps %xmm6, %xmm3
addps %xmm5, %xmm3
cmpltps LC4, %xmm3
andps %xmm3, %xmm7
movmskps %xmm3, %edx
testl %edx, %edx
addps %xmm7, %xmm0
jne L19
L10:
cvtps2dq %xmm0, %xmm0
leave
ret
L12:
xorps %xmm0, %xmm0
jmp L10
.globl __Z16mandelbrot_n_sseDv4_fS_i
5.1
__Z16mandelbrot_n_sseDv4_fS_i:
pushl %ebp
movl %esp, %ebp
andl $-16, %esp
subl $16, %esp
movl 8(%ebp), %ecx
movaps %xmm0, (%esp)
testl %ecx, %ecx
js L11
pxor %xmm0, %xmm0
xorl %edx, %edx
movaps %xmm0, %xmm5
movaps %xmm0, %xmm2
jmp L10
.p2align 4,,10
L18:
mulps %xmm2, %xmm5
addl $1, %edx
subps %xmm6, %xmm4
cmpl %edx, %ecx
addps %xmm5, %xmm5
addps (%esp), %xmm4
addps %xmm1, %xmm5
jl L9
movaps %xmm4, %xmm2
L10:
movaps %xmm2, %xmm4
movaps %xmm5, %xmm6
movaps LC7, %xmm7
mulps %xmm2, %xmm4
mulps %xmm5, %xmm6
movaps %xmm4, %xmm3
addps %xmm6, %xmm3
cmpltps LC6, %xmm3
andps %xmm3, %xmm7
movmskps %xmm3, %eax
testl %eax, %eax
addps %xmm7, %xmm0
jne L18
L9:
cvtps2dq %xmm0, %xmm0
leave
ret
L11:
pxor %xmm0, %xmm0
jmp L9
.section .text.unlikely,"x"
LCOLDE8:
.text
It seems that the 5.1 version is unlucky one and it cst 15% slowdown