Search code examples
assemblyjitdisassemblypowerpcobjdump

Why does powerpc need this additional fluff to make a raw machine code function work?


I'm working on a simple JIT compiler for PowerPC, I followed the examples in https://github.com/spencertipping/jit-tutorial to get a hang of how to work with it.

The problem is that the identity function in the second example "jitproto.c" can't really be ported to powerpc as is, using the "LWA" and "BLR" instructions, it just causes segfaults when executed.

In the end I used the machine code output of the SLJIT compiler (https://github.com/linux-on-ibm-z/sljit) to see what I'm doing wrong, and I see it generates 12 instruction words before what I thought would be the function.

So what are those instructions doing? Why can't I just start the function directly like in x86?

Code can be compiled with a C99 compiler on PPC64 (tested in a powermac and a power8 server).

#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);

//instruction stream for identity function, dumped from SLJIT
unsigned int code[] = 
{
0x7c0802a6,   //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 ,  //
0xfbc1fff0 ,  //
0xf8010010 ,  //

0x3be00000,   //
0x7c7e1b78 ,  //
0xf821ff81,   //
0x38210080,   //

0xe8010010 ,  //
0xebc1fff0,   //
0xebe1fff8 ,  //end of unknown instructions
0x7c0803a6 ,  

0x4e800020,   
0x00000000,   
0x00000000,   
0x00000000};  


fn1 compile_identity(void) {
  //allocate exec memory
  unsigned int *memory = mmap(NULL,             // address
                      16*sizeof(int),             // size
                      PROT_READ | PROT_WRITE | PROT_EXEC,
                      MAP_PRIVATE | MAP_ANONYMOUS,
                      -1,               // fd (not used here)
                      0);               // offset (not used here)
  //copy instructions
  for (int i = 0; i <14; ++i){
    memory[i] = code[i];
  }
  //copy start adress to last pointer, else it only works in ppc64le
  ((unsigned long long*)memory)[7] = (unsigned long long)memory;

  return (fn1) memory;
}

int main() {
  void * test = compile_identity();
  //print stuff to check if its right
  printf("Pointer %p\n%p\n",test,((char*)test)[0]);
  for (int i = 0; i< 16; ++i){
    printf("INS %8x\n",((unsigned int*)test)[i]);
  }
  //load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
  fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
  fn1 f = test;
#endif
  //test function
  printf("%d\n",f(4));
  //free exec memory
  munmap(test, 16*sizeof(int));
  return 0;
}

objdump output of the SLJIT raw code

asm.bin:     file format binary


Disassembly of section .data:

0000000000000000 <.data>:
   0:   7c 08 02 a6     lhzu    r16,2172(r2)
   4:   fb e1 ff f8     .long 0xf8ffe1fb
   8:   fb c1 ff f0     xxsel   vs39,vs31,vs56,vs39
   c:   fb a1 ff e8     .long 0xe8ffa1fb
  10:   fb 81 ff e0     lq      r6,-32272(r31)
  14:   f8 01 00 10     ps_msub f0,f0,f7,f0
  18:   3b e0 00 00     .long 0xe03b
  1c:   7c 7e 1b 78     .long 0x781b7e7c
  20:   7c 9d 23 78     .long 0x78239d7c
  24:   7c bc 2b 78     .long 0x782bbc7c
  28:   f8 21 ff 71     andi.   r31,r15,8696
  2c:   7f a3 eb 78     .long 0x78eba37f
  30:   38 21 00 90     stw     r0,8504(0)
  34:   e8 01 00 10     vmsumshm v0,v0,v0,v7
  38:   eb 81 ff e0     lq      r6,-32288(r31)
  3c:   eb a1 ff e8     .long 0xe8ffa1eb
  40:   eb c1 ff f0     psq_st  f7,491(r31),1,4
  44:   eb e1 ff f8     .long 0xf8ffe1eb
  48:   7c 08 03 a6     lhzu    r16,2172(r3)  #These two instructions should have been enough in x86
  4c:   4e 80 00 20     subfic  r0,r0,-32690  #

GDB disassambler output

   0x00003ffff7ff9000:  mflr    r0
   0x00003ffff7ff9004:  std     r31,-8(r1)
   0x00003ffff7ff9008:  std     r30,-16(r1)
   0x00003ffff7ff900c:  std     r0,16(r1)
   0x00003ffff7ff9010:  li      r31,0
   0x00003ffff7ff9014:  mr      r30,r3
   0x00003ffff7ff9018:  stdu    r1,-128(r1)
   0x00003ffff7ff901c:  addi    r1,r1,128
   0x00003ffff7ff9020:  ld      r0,16(r1)
   0x00003ffff7ff9024:  ld      r30,-16(r1)
   0x00003ffff7ff9028:  ld      r31,-8(r1)
   0x00003ffff7ff902c:  mtlr    r0
   0x00003ffff7ff9030:  blr


Solution

  • The instructions are required for setting up the stack layout of PPC64 ABI. See here: http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK