c assembly arm calling-convention c-standard-library

How to correctly call a function with float parameters in ARM assembly program?

I'm writing a mixed C and assembly programs. I will show you the detail of program below.
I use the C Standard Library function printf("%a",value). But I got fairly incorrect output.

This is part of the assembly code. Written in file mys.s:

    ...
    FLDS    S1, =1078530011
    FLDS    S2, =1066192077  
    FMULS   S3, S1, S2  
    FMRS    R4, S3  
    FMSR    S1, R4
    FLDS    S2, =1066192077
    FMULS   S3, S1, S2
    FMRS    R5, S3
    FMSR    S0, R5
    BL  putfloat
    MOV R13, R7
    ...

It implements something like:

putfloat(1.1*1.1*3.1415927410125732)

the implementation of 'putfloat()' show as followed:

void putfloat(float a) {
  printf("%a", a);
}

Written in file myc.c

the ouput should be:

0x1.e691e6p+1

But I actually got:

0x1.00000420a3ae7p+513

I use gcc tooltrain to compile,assemble and link my project.
gcc version is 10.2.1 20210110 (Raspbian 10.2.1-6+rpi1)
The command to build my project is like:

gcc -march=armv7-a -mfloat-abi=hard myc.c mys.s -g -o run

I suppose it is enough to focus only on the function call part of my program, unless I ignore some other elements.

I use gdb to debug my program, setting a breakpoint on 'BL putfloat' .
Since I tell the procedure call standard to be 'hard', the parameter should be passed through s0, a register in floating point co-processor of ARMv7-a architecture.

(gdb) c
Continuing.

Breakpoint 5, mainentry () at ./output/95_float_new.s:292
292             BL      putfloat
(gdb) print $s0
$2 = 3.80132747

The value in s0 is fairly correct.

If putfloat() print the value formatly, the output show on terminal should be

0x1.e691e6p+1

But I got the definately wrong result.

(gdb) n
0x1.00000420a3ae7p+513
293             MOV     R13, R7

I would appreciate it if someone point out where I went wrong.
What else affects the result of a function in C library besides the argument list?
Or I'm not following arm procedure call standard?

This is the disassembly of putfloat() and some information about the registers.

Breakpoint 2, mainentry () at ./output/95_float_new.s:292
292             BL      putfloat
(gdb) s
putfloat (a=3.80132747) at ./sysy/sylib.c:37
37        printf("%a\n", a);
(gdb) disassemble 
Dump of assembler code for function putfloat:
   0x00010aa4 <+0>:     push    {r7, lr}
   0x00010aa6 <+2>:     sub     sp, #8
   0x00010aa8 <+4>:     add     r7, sp, #0
   0x00010aaa <+6>:     vstr    s0, [r7, #4]
   0x00010aae <+10>:    vldr    s15, [r7, #4]
   0x00010ab2 <+14>:    vcvt.f64.f32    d7, s15
   0x00010ab6 <+18>:    vmov    r2, r3, d7
   0x00010aba <+22>:    movw    r0, #4576       ; 0x11e0
   0x00010abe <+26>:    movt    r0, #1
=> 0x00010ac2 <+30>:    blx     0x1040c <printf@plt>
   0x00010ac6 <+34>:    nop
   0x00010ac8 <+36>:    adds    r7, #8
   0x00010aca <+38>:    mov     sp, r7
   0x00010acc <+40>:    pop     {r7, pc}
End of assembler dump.
(gdb) print (char*)0x111e0
$1 = 0x111e0 "%a\n"
(gdb) info r
r0             0x111e0             70112
r1             0x420a3ae7          1107966695
r2             0x60000000          1610612736
r3             0x400e691e          1074686238
r4             0x405d2b0b          1079847691
r5             0x407348f3          1081297139
r6             0x10478             66680
r7             0xbefffb0c          3204446988
r8             0x0                 0
r9             0x0                 0
r10            0xb6fff000          3070226432
r11            0x0                 0
r12            0xbefffb48          3204447048
sp             0xbefffb0c          0xbefffb0c
lr             0x108cc             67788
pc             0x10ac2             0x10ac2 <putfloat+30>
cpsr           0x60000030          1610612784
fpscr          0x80000010          -2147483632

This is the complete code of mys.s

.section .data
global1:
.zero   4
global2:
.zero   4
global3:
.zero   4
global4:
.zero   4
global5:
.zero   4
global6:
.zero   4
global7:
.zero   4
global8:
.zero   4
global9:
.zero   4
global10:
.zero   4
global11:
.zero   4
global12:
.zero   4
global13:
.zero   4
global14:
.zero   4
global15:
.zero   4
.section .bss
.section .text
.global main
.extern putint
.extern getint
.extern getch
.extern putch
.arch armv7-a
.arm
error:
    PUSH    {R7, R14}
    MOV R7, R13
errorentry:
    LDR R0, =101
    BL  putch
    MOV R13, R7
    LDR R0, =114
    BL  putch
    MOV R13, R7
    LDR R0, =114
    BL  putch
    MOV R13, R7
    LDR R0, =111
    BL  putch
    MOV R13, R7
    LDR R0, =114
    BL  putch
    MOV R13, R7
    LDR R0, =10
    BL  putch
    MOV R13, R7
    B   errorendlabel
errorendlabel:
    POP {R7, R15}
    POP {R7, R15}
.pool
ok:
    PUSH    {R7, R14}
    MOV R7, R13
okentry:
    LDR R0, =111
    BL  putch
    MOV R13, R7
    LDR R0, =107
    BL  putch
    MOV R13, R7
    LDR R0, =10
    BL  putch
    MOV R13, R7
    B   okendlabel
okendlabel:
    POP {R7, R15}
    POP {R7, R15}
.pool
assert:
    PUSH    {R7, R14}
    PUSH    {R4, R5}
    MOV R7, R13
    MOV R4, R0
assertentry:
    LDR R0, =0
    CMP R4, R0
    LDR R5, =0
    MOVEQ   R5, #1
    LDR R0, =0
    CMP R5, R0
    BEQ assertlabel1
    B   assertlabel2
assertlabel2:
    BL  error
    MOV R13, R7
    B   assertlabel3
assertlabel3:
    B   assertendlabel
assertendlabel:
    POP {R4, R5}
    POP {R7, R15}
assertlabel1:
    B   assertlabel3
    POP {R4, R5}
    POP {R7, R15}
.pool
float_abs:
    PUSH    {R7, R14}
    PUSH    {R4, R5}
    SUB R13, R13, #4
    MOV R7, R13
    MOV R4, R0
float_absentry:
    FMSR    S1, R4
    FLDS    S2, =0
    FCMPS   S1, S2
    FMSTAT
    LDR R5, =0
    MOVLT   R5, #1
    LDR R0, =0
    CMP R5, R0
    BEQ float_abslabel4
    B   float_abslabel5
float_abslabel5:
    FLDS    S1, =0
    FMSR    S2, R4
    FSUBS   S3, S1, S2
    FMRS    R5, S3
    MOV R0, R5
    ADD R13, R13, #4
    POP {R4, R5}
    POP {R7, R15}
float_absendlabel:
    ADD R13, R13, #4
    POP {R4, R5}
    POP {R7, R15}
float_abslabel4:
    MOV R0, R4
    ADD R13, R13, #4
    POP {R4, R5}
    POP {R7, R15}
    ADD R13, R13, #4
    POP {R4, R5}
    POP {R7, R15}
.pool
float_eq:
    PUSH    {R7, R14}
    PUSH    {R4, R5, R6}
    SUB R13, R13, #12
    MOV R7, R13
    MOV R4, R0
    MOV R5, R1
float_eqentry:
    FMSR    S1, R4
    FMSR    S2, R5
    FSUBS   S3, S1, S2
    FMRS    R6, S3
    MOV R0, R6
    BL  float_abs
    MOV R13, R7
    MOV R4, R0
    FMSR    S1, R4
    FLDS    S2, =897988541
    FCMPS   S1, S2
    FMSTAT
    LDR R5, =0
    MOVLT   R5, #1
    LDR R0, =0
    CMP R5, R0
    BEQ float_eqlabel7
    B   float_eqlabel8
float_eqlabel8:
    FLDS    S1, =1065353216
    vcvt.s32.f32    S2, S1
    FMRS    R0, S2
    ADD R13, R13, #12
    POP {R4, R5, R6}
    POP {R7, R15}
float_eqendlabel:
    ADD R13, R13, #12
    POP {R4, R5, R6}
    POP {R7, R15}
float_eqlabel7:
    LDR R0, =0
    ADD R13, R13, #12
    POP {R4, R5, R6}
    POP {R7, R15}
    ADD R13, R13, #12
    POP {R4, R5, R6}
    POP {R7, R15}
.pool
main:
    PUSH    {R7, R14}
    PUSH    {R4, R5}
    SUB R13, R13, #12
    MOV R7, R13
mainentry:
    LDR R1, =1107966695
    LDR R0, =1107966695
    BL  float_eq
    MOV R13, R7
    MOV R4, R0
    MOV R0, R4
    BL  assert
    MOV R13, R7
    FLDS    S1, =1078530011
    FLDS    S2, =1066192077
    FMULS   S3, S1, S2
    FMRS    R4, S3
    FMSR    S1, R4
    FLDS    S2, =1066192077
    FMULS   S3, S1, S2
    FMRS    R5, S3
    FMSR    S0, R5
    BL  putfloat
    MOV R13, R7
    LDR R0, =32
    BL  putch
    MOV R13, R7
    LDR R0, =10
    BL  putch
    MOV R13, R7
    LDR R0, =0
    ADD R13, R13, #12
    POP {R4, R5}
    POP {R7, R15}
mainendlabel:
    ADD R13, R13, #12
    POP {R4, R5}
    POP {R7, R15}
    ADD R13, R13, #12
    POP {R4, R5}
    POP {R7, R15}
.pool

Solution

Stack misalignment

You need to align the stack pointer to 8 bytes at any "public interface", which includes any call to a C function. Your SUB R13, R13, #12 messes this up because 12 is not a multiple of 8.

It's common for library functions to misbehave when called with a misaligned stack, though in other cases the symptom is usually just a crash, e.g. glibc scanf Segmentation faults when called from a function that doesn't align RSP on x86-64. It's interesting that here, the result is not a crash but instead incorrect output. One could probably find out why by disassembling the code of printf. If I do so I'll post back.