Search code examples
assemblyarm

Branch Link to putchar causes segmentation fault in ARM


I've isolated the fact that I cannot branch link to putchar no matter how hard I try.

Even two lines alone like

mov r0,$48
bl putchar

Will always segfault when I'm expecting it to print ASCII 0

I can branch to putchar, and it will work, but I cannot branch link. Meaning

mov r0,$48
b putchar

will work

I feel like I'm missing something incredibly basic, but I cannot figure out why. I can only assume it has something to do with the return from putchar, but I have no idea what.

Sorry if this seems like a dumb question, but I honestly could not find a resource on this.

Edit: Although the above statements are true for even a standalone program for me, I am ultimately implementing this in a subroutine, which I figured might be important


Solution

  • This is difficult to say because you did not provide enough code, but you may be missing the code required for being compliant with the ARM calling conventions.
    The complete code should save fp, lr on the stack, than call putchar, then restore fp, lr and return or restore fp, pc, which is basically the same.

    Create a file named example.s with the following content:

            .arch armv7-a
            .align  2
            .globl main
            .arch armv7-a
            .syntax unified
            .arm
    main:
             push    {fp, lr}
             mov     r0, #48
             bl      putchar
             pop     {fp, pc}
    

    Compile and link it - I compiled a static version because I tested with qemu-arm:

    /opt/arm/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc -static -O0 -o example  example.s
    

    Execute it - using qemu-arm in my case

    /opt/qemu-3.1.0-static/bin/qemu-arm example
    0
    

    Please note that:

    pop     {fp, pc}
    

    is equivalent to:

    pop     {fp, lr}
    ret
    

    I hope this help.

    Update

    putchar() does return either the character that was passed or EOF in r0. Since r0 is not being modified in main, the value it does contain will be returned to the callee, i.e. bash, and can been see using the echo $? command:

    opt/qemu-3.1.0/bin/qemu-arm example
    0
    echo $?
    48
    

    According to page 15 of ARM calling conventions, r4-r8 are being preserved across subroutine calls, but r0-r3 may not be.

    Using objdump for disassembling the example program:

    /opt/arm/gcc-arm-8.3-2019.03-x86_64-arm-linux-gnueabihf/bin/arm-linux-gnueabihf-objdump -D example > example.lst
    

    In example.lst, you can see that putchar() is:
    1) preserving r4, r5, r6, r7, r8, lr according the the ARM Calling Convention,
    2) making use of the registers you are mentioning as having been modified:

    00016f50 <putchar>:
       16f50:   e92d41f0    push    {r4, r5, r6, r7, r8, lr}
       16f54:   e30354a8    movw    r5, #13480  ; 0x34a8
       16f58:   e3405008    movt    r5, #8
       16f5c:   e1a06000    mov r6, r0
       16f60:   e5954000    ldr r4, [r5]
       16f64:   e5943000    ldr r3, [r4]
       16f68:   e3130902    tst r3, #32768  ; 0x8000
       16f6c:   1a000015    bne 16fc8 <putchar+0x78>
       16f70:   e5943048    ldr r3, [r4, #72]   ; 0x48
       16f74:   ee1d7f70    mrc 15, 0, r7, cr13, cr0, {3}
       16f78:   e2477d13    sub r7, r7, #1216   ; 0x4c0
       16f7c:   e5932008    ldr r2, [r3, #8]
       16f80:   e1520007    cmp r2, r7
       16f84:   0a000030    beq 1704c <putchar+0xfc>
       16f88:   e3a02001    mov r2, #1
       16f8c:   e1931f9f    ldrex   r1, [r3]
       16f90:   e3510000    cmp r1, #0
       16f94:   1a000003    bne 16fa8 <putchar+0x58>
       16f98:   e1830f92    strex   r0, r2, [r3]
       16f9c:   e3500000    cmp r0, #0
       16fa0:   1afffff9    bne 16f8c <putchar+0x3c>
       16fa4:   f57ff05b    dmb ish
       16fa8:   1a00002d    bne 17064 <putchar+0x114>
       16fac:   e5943048    ldr r3, [r4, #72]   ; 0x48
       16fb0:   e5950000    ldr r0, [r5]
       16fb4:   e5837008    str r7, [r3, #8]
       16fb8:   e5932004    ldr r2, [r3, #4]
       16fbc:   e2822001    add r2, r2, #1
       16fc0:   e5832004    str r2, [r3, #4]
       16fc4:   ea000000    b   16fcc <putchar+0x7c>
       16fc8:   e1a00004    mov r0, r4
       16fcc:   e5903014    ldr r3, [r0, #20]
       16fd0:   e6efc076    uxtb    ip, r6
       16fd4:   e5902018    ldr r2, [r0, #24]
       16fd8:   e1530002    cmp r3, r2
       16fdc:   32832001    addcc   r2, r3, #1
       16fe0:   35802014    strcc   r2, [r0, #20]
       16fe4:   35c36000    strbcc  r6, [r3]
       16fe8:   2a000019    bcs 17054 <putchar+0x104>
       16fec:   e5943000    ldr r3, [r4]
       16ff0:   e3130902    tst r3, #32768  ; 0x8000
       16ff4:   1a000005    bne 17010 <putchar+0xc0>
       16ff8:   e5940048    ldr r0, [r4, #72]   ; 0x48
       16ffc:   e5903004    ldr r3, [r0, #4]
       17000:   e2433001    sub r3, r3, #1
       17004:   e5803004    str r3, [r0, #4]
       17008:   e3530000    cmp r3, #0
       1700c:   0a000001    beq 17018 <putchar+0xc8>
       17010:   e1a0000c    mov r0, ip
       17014:   e8bd81f0    pop {r4, r5, r6, r7, r8, pc}
       17018:   e5803008    str r3, [r0, #8]
       1701c:   f57ff05b    dmb ish
       17020:   e1902f9f    ldrex   r2, [r0]
       17024:   e1801f93    strex   r1, r3, [r0]
       17028:   e3510000    cmp r1, #0
       1702c:   1afffffb    bne 17020 <putchar+0xd0>
       17030:   e3520001    cmp r2, #1
       17034:   dafffff5    ble 17010 <putchar+0xc0>
       17038:   e3a01081    mov r1, #129    ; 0x81
       1703c:   e3a02001    mov r2, #1
       17040:   e3a070f0    mov r7, #240    ; 0xf0
       17044:   ef000000    svc 0x00000000
       17048:   eafffff0    b   17010 <putchar+0xc0>
       1704c:   e1a00004    mov r0, r4
       17050:   eaffffd8    b   16fb8 <putchar+0x68>
       ...