Wrong calling conventions with float values with MinGW

I am trying to use my assembly functions work with C code and found that MinGw doesn't follow calling conventions for floating-point values, and this differ from versions of mingw.

Test file with single function test.c

#include <xmmintrin.h>

float vector_dot(__m128 v1, __m128 v2)
{
     __m128 resp = _mm_mul_ps(v1, v2);
     float res;
     _mm_store_ss(&res, resp);
     return res;
};

compiled to assembly

gcc -O -S test.c -msse4.1

    .file   "test.c"
    .text
    .globl  _vector_dot
    .def    _vector_dot;    .scl    2;  .type   32; .endef
_vector_dot:
LFB503:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    subl    $16, %esp
    mulps   %xmm1, %xmm0
    movss   %xmm0, 12(%esp)
    flds    12(%esp)
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
LFE503:
    .ident  "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"

float result stores to ST0 register, not xmm0, as it does on linux

But in different version of MinGW:

    .file   "test.c"
    .text
    .globl  vector_dot
    .def    vector_dot; .scl    2;  .type   32; .endef
    .seh_proc   vector_dot
vector_dot:
    .seh_endprologue
    movaps  (%rdx), %xmm0
    mulps   (%rcx), %xmm0
    ret
    .seh_endproc
    .ident  "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 7.3.0"

just 2 instructions, args in xmm0, xmm1 and result in xmm0, as expected.

BUT, with this mingw, function calls from C code has wrong args, in rax and rdx registers

My assemble

vector_dot: /* vector xmm0, vector xmm1 -> float xmm0 */
  mulps  %xmm1,%xmm0 

vector_horizontal_sum: /* vector xmm0 -> float xmm0 (0 + 1 + 2 + 3) */
   movshdup  %xmm0, %xmm1
   addps        %xmm1, %xmm0
   movhlps     %xmm0, %xmm1
   addss        %xmm1, %xmm0
   ret

Function header in C

extern float vector_dot(__m128 vector_1, __m128 vector_2) asm("vector_dot");

Calling my function

    float values1[] __attribute__((aligned(16))) = { 1.3f, 5.4f, -4.f, 5. } ;
    __m128 vec1 = _mm_load_ps(values1);

    float values2[] __attribute__((aligned(16))) = {0.5f, -43.5f, 0, 0 };
    __m128 vec2 = _mm_load_ps(values2);

    float dot = vector_dot(vec1, vec2);

Function call assembly, compiler not used xmm registers for args, cause to crash my program

0x401614  <+  180>        0f 28 45 e0                 movaps -0x20(%rbp),%xmm0
0x401618  <+  184>        0f 29 85 40 ff ff ff        movaps %xmm0,-0xc0(%rbp)
0x40161f  <+  191>        0f 28 45 d0                 movaps -0x30(%rbp),%xmm0
0x401623  <+  195>        0f 29 85 30 ff ff ff        movaps %xmm0,-0xd0(%rbp)
0x40162a  <+  202>        48 8d 95 30 ff ff ff        lea    -0xd0(%rbp),%rdx
0x401631  <+  209>        48 8d 85 40 ff ff ff        lea    -0xc0(%rbp),%rax
0x401638  <+  216>        48 89 c1                    mov    %rax,%rcx
0x40163b  <+  219>        e8 d4 29 00 00              callq  0x404014 <vector_dot>
0x401640  <+  224>        66 0f 7e c0                 movd   %xmm0,%eax
0x401644  <+  228>        89 45 cc                    mov    %eax,-0x34(%rbp)

What is wrong with my code, is it MinGW compiler fault?

Solution

Wrong to compare mingw 32bit and mingw 64bit binary, so when compiled with newest mingw-w64 all calling-conventions became as expected.