Search code examples
gccarmcortex-mthumb

Does arm-none-eabi-ld rewrite the bl instruction?


I'm trying to understand why some Cortex-M0 code behaves differently when it is linked versus unlinked. In both cases it is loaded to 0x20000000. It looks like despite my best efforts to generate position independent code by passing -fPIC to the compiler, the bl instruction appears to differ after the code has passed through the linker. Am I reading this correctly, is that just a part of the linker's job in ARM Thumb, and is there a better way to generate a position independent function call?

Linked:

20000000:
20000000:       0003            movs    r3, r0
20000002:       4852            ldr     r0, [pc, #328]
20000004:       4685            mov     sp, r0
20000006:       0018            movs    r0, r3
20000008:       f000 f802       bl      20000010
2000000c:       46c0            nop                     ; (mov r8, r8)
2000000e:       46c0            nop                     ; (mov r8, r8)

Unlinked:

00000000:
   0:   0003            movs    r3, r0
   2:   4852            ldr     r0, [pc, #328]
   4:   4685            mov     sp, r0
   6:   0018            movs    r0, r3
   8:   f7ff fffe       bl      10
   c:   46c0            nop                     ; (mov r8, r8)
   e:   46c0            nop                     ; (mov r8, r8)

Solution

  • start.s

    .globl _start
    _start:
    .word 0x20001000
    .word reset
    .word hang
    .word hang
    
    .thumb
    
    .thumb_func
    reset:
        bl notmain
    .thumb_func
    hang:
        b .
    

    notmain.c

    unsigned int x;
    
    unsigned int fun ( unsigned int );
    void notmain ( void )
    {
        x=fun(x+5);
    }
    

    fun.c

    unsigned int y;
    unsigned int fun ( unsigned int z )
    {
        return(y+z+1);
    }
    

    memmap

    MEMORY
    {
        ram : ORIGIN = 0x20000000, LENGTH = 0x1000
    }
    
    SECTIONS
    {
        .text : { *(.text*) } > ram
        .bss : { *(.bss*) } > ram
    }
    

    build

    arm-none-eabi-as start.s -o start.o
    arm-none-eabi-gcc -fPIC -O2 -c -mthumb fun.c -o fun.o
    arm-none-eabi-gcc -fPIC -O2 -c -mthumb notmain.c -o notmain.o
    arm-none-eabi-ld -T memmap start.o notmain.o fun.o -o so.elf
    

    produces

    20000000 <_start>:
    20000000:   20001000    andcs   r1, r0, r0
    20000004:   20000011    andcs   r0, r0, r1, lsl r0
    20000008:   20000015    andcs   r0, r0, r5, lsl r0
    2000000c:   20000015    andcs   r0, r0, r5, lsl r0
    
    20000010 <reset>:
    20000010:   f000 f802   bl  20000018 <notmain>
    
    20000014 <hang>:
    20000014:   e7fe        b.n 20000014 <hang>
        ...
    
    20000018 <notmain>:
    20000018:   b510        push    {r4, lr}
    2000001a:   4b06        ldr r3, [pc, #24]   ; (20000034 <notmain+0x1c>)
    2000001c:   4a06        ldr r2, [pc, #24]   ; (20000038 <notmain+0x20>)
    2000001e:   447b        add r3, pc
    20000020:   589c        ldr r4, [r3, r2]
    20000022:   6823        ldr r3, [r4, #0]
    20000024:   1d58        adds    r0, r3, #5
    20000026:   f000 f809   bl  2000003c <fun>
    2000002a:   6020        str r0, [r4, #0]
    2000002c:   bc10        pop {r4}
    2000002e:   bc01        pop {r0}
    20000030:   4700        bx  r0
    20000032:   46c0        nop         ; (mov r8, r8)
    20000034:   00000032    andeq   r0, r0, r2, lsr r0
    20000038:   00000000    andeq   r0, r0, r0
    
    2000003c <fun>:
    2000003c:   4b03        ldr r3, [pc, #12]   ; (2000004c <fun+0x10>)
    2000003e:   4a04        ldr r2, [pc, #16]   ; (20000050 <fun+0x14>)
    20000040:   447b        add r3, pc
    20000042:   589b        ldr r3, [r3, r2]
    20000044:   681b        ldr r3, [r3, #0]
    20000046:   3301        adds    r3, #1
    20000048:   1818        adds    r0, r3, r0
    2000004a:   4770        bx  lr
    2000004c:   00000010    andeq   r0, r0, r0, lsl r0
    20000050:   00000004    andeq   r0, r0, r4
    
    Disassembly of section .got:
    
    20000054 <.got>:
    20000054:   20000068    andcs   r0, r0, r8, rrx
    20000058:   2000006c    andcs   r0, r0, ip, rrx
    
    Disassembly of section .got.plt:
    
    2000005c <_GLOBAL_OFFSET_TABLE_>:
        ...
    
    Disassembly of section .bss:
    
    20000068 <x>:
    20000068:   00000000    andeq   r0, r0, r0
    
    2000006c <y>:
    2000006c:   00000000    andeq   r0, r0, r0
    

    when it wants to find the global variable x what it appears to have done is it takes the program counter and a linker supplied/modfied offset 0x32 and uses that to find the entry in the global offset table. then takes an offset from that to find X. same for Y. so it appears that when you relocate you will need to modify the global offset table at runtime or load time depending.

    If I get rid of those global variables, other than the vector table which is hardcoded and not PIC (and wasnt compiled anyway), this is all position independent.

    20000000 <_start>:
    20000000:   20001000    andcs   r1, r0, r0
    20000004:   20000011    andcs   r0, r0, r1, lsl r0
    20000008:   20000015    andcs   r0, r0, r5, lsl r0
    2000000c:   20000015    andcs   r0, r0, r5, lsl r0
    
    20000010 <reset>:
    20000010:   f000 f802   bl  20000018 <notmain>
    
    20000014 <hang>:
    20000014:   e7fe        b.n 20000014 <hang>
        ...
    
    20000018 <notmain>:
    20000018:   b508        push    {r3, lr}
    2000001a:   2005        movs    r0, #5
    2000001c:   f000 f804   bl  20000028 <fun>
    20000020:   3006        adds    r0, #6
    20000022:   bc08        pop {r3}
    20000024:   bc02        pop {r1}
    20000026:   4708        bx  r1
    
    20000028 <fun>:
    20000028:   3001        adds    r0, #1
    2000002a:   4770        bx  lr
    

    back to this version

    unsigned int y;
    unsigned int fun ( unsigned int z )
    {
        return(y+z+1);
    }
    

    position independent

    00000000 <fun>:
       0:   4b03        ldr r3, [pc, #12]   ; (10 <fun+0x10>)
       2:   4a04        ldr r2, [pc, #16]   ; (14 <fun+0x14>)
       4:   447b        add r3, pc
       6:   589b        ldr r3, [r3, r2]
       8:   681b        ldr r3, [r3, #0]
       a:   3301        adds    r3, #1
       c:   1818        adds    r0, r3, r0
       e:   4770        bx  lr
      10:   00000008    andeq   r0, r0, r8
      14:   00000000    andeq   r0, r0, r0
    

    not position independent

    00000000 <fun>:
       0:   4b02        ldr r3, [pc, #8]    ; (c <fun+0xc>)
       2:   681b        ldr r3, [r3, #0]
       4:   3301        adds    r3, #1
       6:   1818        adds    r0, r3, r0
       8:   4770        bx  lr
       a:   46c0        nop         ; (mov r8, r8)
       c:   00000000    andeq   r0, r0, r0
    

    the code has to do a bit more work to access the external variable. position dependent, some work because it is external but not as much. the linker will fill in the required items to make it work...to link it...

    the elf file contains information for the linker to know to do this.

    Relocation section '.rel.text' at offset 0x1a4 contains 2 entries:
     Offset     Info    Type            Sym.Value  Sym. Name
    00000010  00000a19 R_ARM_BASE_PREL   00000000   _GLOBAL_OFFSET_TABLE_
    00000014  00000b1a R_ARM_GOT_BREL    00000004   y
    

    or

    Relocation section '.rel.text' at offset 0x174 contains 1 entries:
     Offset     Info    Type            Sym.Value  Sym. Name
    0000000c  00000a02 R_ARM_ABS32       00000004   y
    

    notmain had these PIC

    Relocation section '.rel.text' at offset 0x1cc contains 3 entries:
     Offset     Info    Type            Sym.Value  Sym. Name
    0000000e  00000a0a R_ARM_THM_CALL    00000000   fun
    0000001c  00000b19 R_ARM_BASE_PREL   00000000   _GLOBAL_OFFSET_TABLE_
    00000020  00000c1a R_ARM_GOT_BREL    00000004   x
    

    and without.

    Relocation section '.rel.text' at offset 0x198 contains 2 entries:
     Offset     Info    Type            Sym.Value  Sym. Name
    00000008  00000a0a R_ARM_THM_CALL    00000000   fun
    00000014  00000b02 R_ARM_ABS32       00000004   x
    

    so in short the toolchain is doing its job, you dont need to re-do its job. And note this has nothing to do with arm or thumb. any time you use the object and linker model and allow for external items from an object the linker has to patch things up to glue the code together. thats just how it works.