I'm working on a simple JIT compiler for PowerPC, I followed the examples in https://github.com/spencertipping/jit-tutorial to get a hang of how to work with it.
The problem is that the identity function in the second example "jitproto.c" can't really be ported to powerpc as is, using the "LWA" and "BLR" instructions, it just causes segfaults when executed.
In the end I used the machine code output of the SLJIT compiler (https://github.com/linux-on-ibm-z/sljit) to see what I'm doing wrong, and I see it generates 12 instruction words before what I thought would be the function.
So what are those instructions doing? Why can't I just start the function directly like in x86?
Code can be compiled with a C99 compiler on PPC64 (tested in a powermac and a power8 server).
#include <stdio.h>
#include <stdlib.h>
#include <endian.h>
#include <sys/mman.h>
typedef long(*fn0)(void);
typedef long(*fn1)(long);
//instruction stream for identity function, dumped from SLJIT
unsigned int code[] =
{
0x7c0802a6, //what do all these instructions do? I guess this is loading something from the R2 register?
0xfbe1fff8 , //
0xfbc1fff0 , //
0xf8010010 , //
0x3be00000, //
0x7c7e1b78 , //
0xf821ff81, //
0x38210080, //
0xe8010010 , //
0xebc1fff0, //
0xebe1fff8 , //end of unknown instructions
0x7c0803a6 ,
0x4e800020,
0x00000000,
0x00000000,
0x00000000};
fn1 compile_identity(void) {
//allocate exec memory
unsigned int *memory = mmap(NULL, // address
16*sizeof(int), // size
PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, // fd (not used here)
0); // offset (not used here)
//copy instructions
for (int i = 0; i <14; ++i){
memory[i] = code[i];
}
//copy start adress to last pointer, else it only works in ppc64le
((unsigned long long*)memory)[7] = (unsigned long long)memory;
return (fn1) memory;
}
int main() {
void * test = compile_identity();
//print stuff to check if its right
printf("Pointer %p\n%p\n",test,((char*)test)[0]);
for (int i = 0; i< 16; ++i){
printf("INS %8x\n",((unsigned int*)test)[i]);
}
//load pointer containing function start address, for ppc64 BE and LE
#if __BYTE_ORDER == __BIG_ENDIAN
fn1 f = (fn1*) ((unsigned long long*)test+7);
#elif __BYTE_ORDER == __LITTLE_ENDIAN
fn1 f = test;
#endif
//test function
printf("%d\n",f(4));
//free exec memory
munmap(test, 16*sizeof(int));
return 0;
}
objdump output of the SLJIT raw code
asm.bin: file format binary
Disassembly of section .data:
0000000000000000 <.data>:
0: 7c 08 02 a6 lhzu r16,2172(r2)
4: fb e1 ff f8 .long 0xf8ffe1fb
8: fb c1 ff f0 xxsel vs39,vs31,vs56,vs39
c: fb a1 ff e8 .long 0xe8ffa1fb
10: fb 81 ff e0 lq r6,-32272(r31)
14: f8 01 00 10 ps_msub f0,f0,f7,f0
18: 3b e0 00 00 .long 0xe03b
1c: 7c 7e 1b 78 .long 0x781b7e7c
20: 7c 9d 23 78 .long 0x78239d7c
24: 7c bc 2b 78 .long 0x782bbc7c
28: f8 21 ff 71 andi. r31,r15,8696
2c: 7f a3 eb 78 .long 0x78eba37f
30: 38 21 00 90 stw r0,8504(0)
34: e8 01 00 10 vmsumshm v0,v0,v0,v7
38: eb 81 ff e0 lq r6,-32288(r31)
3c: eb a1 ff e8 .long 0xe8ffa1eb
40: eb c1 ff f0 psq_st f7,491(r31),1,4
44: eb e1 ff f8 .long 0xf8ffe1eb
48: 7c 08 03 a6 lhzu r16,2172(r3) #These two instructions should have been enough in x86
4c: 4e 80 00 20 subfic r0,r0,-32690 #
GDB disassambler output
0x00003ffff7ff9000: mflr r0
0x00003ffff7ff9004: std r31,-8(r1)
0x00003ffff7ff9008: std r30,-16(r1)
0x00003ffff7ff900c: std r0,16(r1)
0x00003ffff7ff9010: li r31,0
0x00003ffff7ff9014: mr r30,r3
0x00003ffff7ff9018: stdu r1,-128(r1)
0x00003ffff7ff901c: addi r1,r1,128
0x00003ffff7ff9020: ld r0,16(r1)
0x00003ffff7ff9024: ld r30,-16(r1)
0x00003ffff7ff9028: ld r31,-8(r1)
0x00003ffff7ff902c: mtlr r0
0x00003ffff7ff9030: blr
The instructions are required for setting up the stack layout of PPC64 ABI. See here: http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK