Search code examples
cgccassemblyinline-assemblyregister-allocation

GCC asm inline constraints, conflicting register allocation


I've made some ARM-inline assembler code.
Looking in Semaphore.s, I see that gcc is using register r3 for both two variables: "success" and "change". I wonder if there is a problem with my constraints?
First most relevant code lines:

asm inline:

"1: MVN %[success], #0 @ success=TRUE=~FALSE\n\t"
"LDREX   %[value], %[signal] @ try to get exclusive access\n\t"
"ADDS    %[newValue], %[value], %[change]   @ new value = value + change\n\t"

constraints:

: [signal] "+m" (signal), [success] "=r" (success), [locked] "=r" (locked), [newValue] "=r" (newValue), [value] "=r" (value)
: [borderValue] "r" (borderValue),  [change] "r" (change)
: "cc"

symbol file:

1: MVN  r3, #0 @ success=TRUE=~FALSE
LDREX   r0, [r7, #12] @ try to get exclusive access
ADDS    r1, r0, r3  @ new value = value + change

More source and generated symbol is below.

BOOLEAN Semaphore_exclusiveChange (INT32U * signal, INT32S change, INT32U borderValue)
{
BOOLEAN success;
INT32U locked;// exclusive status
INT32U newValue;
INT32U value;

asm (
"1: MVN %[success], #0 @ success=TRUE=~FALSE\n\t"
"LDREX   %[value], %[signal] @ new to get exclusive access\n\t"
"ADDS    %[newValue], %[value], %[change]   @ new value = value + change\n\t"
"ITE MI @ if (new value<0) \n\t"
" SUBSMI %[newValue], %[newValue] @ (new value<0): new value=0, set zero flag \n\t"
"@ else\n\t"
" CMPPL %[newValue], %[borderValue]  @ (new value>=0): if new value > border value \n\t"
"\n\t@ zero flag is either: new value=0 or =bordervalue\n\t"
"ITE HI @ if new signal level > border value \n\t"  //
" MOVHI     %[success], #0 @ fail to raise signal, success=FALSE \n\t"
"\t@ else\n\t"
" MOVLS     %[value], %[newValue] @ use new value \n\t" // ok
"STREX  %[locked], %[value], %[signal] @ new exclusive store of value\n\t"
"TST %[locked],%[locked] @ is locked? \n\t"
"IT NE @ if locked \n\t"
"BNE 1b @ try again\n\t"
"DMB    @ memory barrier\n\t"   //

: [signal] "+m" (signal), [success] "=r" (success), [locked] "=r" (locked), [newValue] "=r" (newValue), [value] "=r" (value)
: [borderValue] "r" (borderValue),  [change] "r" (change)
: "cc"  );
return success;
}

Relevant text from symbol file:

Semaphore_exclusiveChange:
.LFB2:
    .loc 1 10 0
    @ args = 0, pretend = 0, frame = 32
    @ frame_needed = 1, uses_anonymous_args = 0
    @ link register save eliminated.
    push    {r7}
.LCFI0:
    sub sp, sp, #36
.LCFI1:
    add r7, sp, #0
.LCFI2:
    str r0, [r7, #12]
    str r1, [r7, #8]
    str r2, [r7, #4]
    .loc 1 16 0
    ldr r2, [r7, #4]
    ldr r3, [r7, #8]
@ 16 "../drivers/Semaphore.c" 1
    1: MVN  r3, #0 @ success=TRUE=~FALSE
    LDREX   r0, [r7, #12] @ new to get exclusive access
    ADDS    r1, r0, r3  @ new value = value + change
    ITE MI @ if (new value<0) 
     SUBSMI r1, r1 @ (new value<0): new value=0, set zero flag 
    @ else
     CMPPL  r1, r2  @ (new value>=0): if new value > border value 

    @ zero flag is either: new value=0 or =bordervalue
    ITE HI @ if new signal level > border value 
     MOVHI      r3, #0 @ fail to raise signal, success=FALSE 
        @ else
     MOVLS     r0, r1 @ use new value 
    STREX   r2, r0, [r7, #12] @ new exclusive store of value
    TST r2,r2 @ is locked? 
    IT NE @ if locked 
    BNE 1b  @ try again
    DMB @ memory barrier

@ 0 "" 2
    .thumb
    strb    r3, [r7, #19]
    str r2, [r7, #20]
    str r1, [r7, #24]
    str r0, [r7, #28]
    .loc 1 38 0
    ldrb    r3, [r7, #19]   @ zero_extendqisi2
    .loc 1 39 0
    mov r0, r3
    add r7, r7, #36
    mov sp, r7
    pop {r7}
    bx  lr

Solution

  • You need to constrain "success" further with '&':

    : [signal] "+m" (signal), [success] "=&r" (success), [locked] "=r" (locked), [newValue] "=r" (newValue), [value] "=r" (value)
    

    which marks it as an 'early clobber'. Otherwise the compiler will assume that all outputs are produced after all inputs are consumed and is free to use the same register for a different output and input. If you have a "input/output" value, you need to use the "repeating value" constraint.