PPCJITInfo.cpp revision 263508
1//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the JIT interfaces for the 32-bit PowerPC target.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "jit"
15#include "PPCJITInfo.h"
16#include "PPCRelocations.h"
17#include "PPCTargetMachine.h"
18#include "llvm/IR/Function.h"
19#include "llvm/Support/Debug.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/Memory.h"
22#include "llvm/Support/raw_ostream.h"
23using namespace llvm;
24
25static TargetJITInfo::JITCompilerFn JITCompilerFunction;
26
27#define BUILD_ADDIS(RD,RS,IMM16) \
28  ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
29#define BUILD_ORI(RD,RS,UIMM16) \
30  ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
31#define BUILD_ORIS(RD,RS,UIMM16) \
32  ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
33#define BUILD_RLDICR(RD,RS,SH,ME) \
34  ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
35   (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
36#define BUILD_MTSPR(RS,SPR)      \
37  ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
38#define BUILD_BCCTRx(BO,BI,LINK) \
39  ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
40#define BUILD_B(TARGET, LINK) \
41  ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
42
43// Pseudo-ops
44#define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
45#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
46#define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
47#define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
48
49static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
50  intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
51  unsigned *AtI = (unsigned*)(intptr_t)At;
52
53  if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
54    AtI[0] = BUILD_B(Offset, isCall);     // b/bl target
55  } else if (!is64Bit) {
56    AtI[0] = BUILD_LIS(12, To >> 16);     // lis r12, hi16(address)
57    AtI[1] = BUILD_ORI(12, 12, To);       // ori r12, r12, lo16(address)
58    AtI[2] = BUILD_MTCTR(12);             // mtctr r12
59    AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
60  } else {
61    AtI[0] = BUILD_LIS(12, To >> 48);      // lis r12, hi16(address)
62    AtI[1] = BUILD_ORI(12, 12, To >> 32);  // ori r12, r12, lo16(address)
63    AtI[2] = BUILD_SLDI(12, 12, 32);       // sldi r12, r12, 32
64    AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
65    AtI[4] = BUILD_ORI(12, 12, To);        // ori r12, r12, lo16(address)
66    AtI[5] = BUILD_MTCTR(12);              // mtctr r12
67    AtI[6] = BUILD_BCTR(isCall);           // bctr/bctrl
68  }
69}
70
71extern "C" void PPC32CompilationCallback();
72extern "C" void PPC64CompilationCallback();
73
74// The first clause of the preprocessor directive looks wrong, but it is
75// necessary when compiling this code on non-PowerPC hosts.
76#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
77void PPC32CompilationCallback() {
78  llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
79}
80#elif !defined(__ELF__)
81// CompilationCallback stub - We can't use a C function with inline assembly in
82// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
83// write our own wrapper, which does things our way, so we have complete control
84// over register saving and restoring.
85asm(
86    ".text\n"
87    ".align 2\n"
88    ".globl _PPC32CompilationCallback\n"
89"_PPC32CompilationCallback:\n"
90    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
91    // FIXME: need to save v[0-19] for altivec?
92    // FIXME: could shrink frame
93    // Set up a proper stack frame
94    // FIXME Layout
95    //   PowerPC32 ABI linkage    -  24 bytes
96    //                 parameters -  32 bytes
97    //   13 double registers      - 104 bytes
98    //   8 int registers          -  32 bytes
99    "mflr r0\n"
100    "stw r0,  8(r1)\n"
101    "stwu r1, -208(r1)\n"
102    // Save all int arg registers
103    "stw r10, 204(r1)\n"    "stw r9,  200(r1)\n"
104    "stw r8,  196(r1)\n"    "stw r7,  192(r1)\n"
105    "stw r6,  188(r1)\n"    "stw r5,  184(r1)\n"
106    "stw r4,  180(r1)\n"    "stw r3,  176(r1)\n"
107    // Save all call-clobbered FP regs.
108    "stfd f13, 168(r1)\n"   "stfd f12, 160(r1)\n"
109    "stfd f11, 152(r1)\n"   "stfd f10, 144(r1)\n"
110    "stfd f9,  136(r1)\n"   "stfd f8,  128(r1)\n"
111    "stfd f7,  120(r1)\n"   "stfd f6,  112(r1)\n"
112    "stfd f5,  104(r1)\n"   "stfd f4,   96(r1)\n"
113    "stfd f3,   88(r1)\n"   "stfd f2,   80(r1)\n"
114    "stfd f1,   72(r1)\n"
115    // Arguments to Compilation Callback:
116    // r3 - our lr (address of the call instruction in stub plus 4)
117    // r4 - stub's lr (address of instruction that called the stub plus 4)
118    // r5 - is64Bit - always 0.
119    "mr   r3, r0\n"
120    "lwz  r2, 208(r1)\n" // stub's frame
121    "lwz  r4, 8(r2)\n" // stub's lr
122    "li   r5, 0\n"       // 0 == 32 bit
123    "bl _LLVMPPCCompilationCallback\n"
124    "mtctr r3\n"
125    // Restore all int arg registers
126    "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
127    "lwz r8,  196(r1)\n"    "lwz r7,  192(r1)\n"
128    "lwz r6,  188(r1)\n"    "lwz r5,  184(r1)\n"
129    "lwz r4,  180(r1)\n"    "lwz r3,  176(r1)\n"
130    // Restore all FP arg registers
131    "lfd f13, 168(r1)\n"    "lfd f12, 160(r1)\n"
132    "lfd f11, 152(r1)\n"    "lfd f10, 144(r1)\n"
133    "lfd f9,  136(r1)\n"    "lfd f8,  128(r1)\n"
134    "lfd f7,  120(r1)\n"    "lfd f6,  112(r1)\n"
135    "lfd f5,  104(r1)\n"    "lfd f4,   96(r1)\n"
136    "lfd f3,   88(r1)\n"    "lfd f2,   80(r1)\n"
137    "lfd f1,   72(r1)\n"
138    // Pop 3 frames off the stack and branch to target
139    "lwz  r1, 208(r1)\n"
140    "lwz  r2, 8(r1)\n"
141    "mtlr r2\n"
142    "bctr\n"
143    );
144
145#else
146// ELF PPC 32 support
147
148// CompilationCallback stub - We can't use a C function with inline assembly in
149// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
150// write our own wrapper, which does things our way, so we have complete control
151// over register saving and restoring.
152asm(
153    ".text\n"
154    ".align 2\n"
155    ".globl PPC32CompilationCallback\n"
156"PPC32CompilationCallback:\n"
157    // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
158    // FIXME: need to save v[0-19] for altivec?
159    // FIXME: could shrink frame
160    // Set up a proper stack frame
161    // FIXME Layout
162    //   8 double registers       -  64 bytes
163    //   8 int registers          -  32 bytes
164    "mflr 0\n"
165    "stw 0,  4(1)\n"
166    "stwu 1, -104(1)\n"
167    // Save all int arg registers
168    "stw 10, 100(1)\n"   "stw 9,  96(1)\n"
169    "stw 8,  92(1)\n"    "stw 7,  88(1)\n"
170    "stw 6,  84(1)\n"    "stw 5,  80(1)\n"
171    "stw 4,  76(1)\n"    "stw 3,  72(1)\n"
172    // Save all call-clobbered FP regs.
173    "stfd 8,  64(1)\n"
174    "stfd 7,  56(1)\n"   "stfd 6,  48(1)\n"
175    "stfd 5,  40(1)\n"   "stfd 4,  32(1)\n"
176    "stfd 3,  24(1)\n"   "stfd 2,  16(1)\n"
177    "stfd 1,  8(1)\n"
178    // Arguments to Compilation Callback:
179    // r3 - our lr (address of the call instruction in stub plus 4)
180    // r4 - stub's lr (address of instruction that called the stub plus 4)
181    // r5 - is64Bit - always 0.
182    "mr   3, 0\n"
183    "lwz  5, 104(1)\n" // stub's frame
184    "lwz  4, 4(5)\n" // stub's lr
185    "li   5, 0\n"       // 0 == 32 bit
186    "bl LLVMPPCCompilationCallback\n"
187    "mtctr 3\n"
188    // Restore all int arg registers
189    "lwz 10, 100(1)\n"   "lwz 9,  96(1)\n"
190    "lwz 8,  92(1)\n"    "lwz 7,  88(1)\n"
191    "lwz 6,  84(1)\n"    "lwz 5,  80(1)\n"
192    "lwz 4,  76(1)\n"    "lwz 3,  72(1)\n"
193    // Restore all FP arg registers
194    "lfd 8,  64(1)\n"
195    "lfd 7,  56(1)\n"    "lfd 6,  48(1)\n"
196    "lfd 5,  40(1)\n"    "lfd 4,  32(1)\n"
197    "lfd 3,  24(1)\n"    "lfd 2,  16(1)\n"
198    "lfd 1,  8(1)\n"
199    // Pop 3 frames off the stack and branch to target
200    "lwz  1, 104(1)\n"
201    "lwz  0, 4(1)\n"
202    "mtlr 0\n"
203    "bctr\n"
204    );
205#endif
206
207#if !defined(__powerpc64__) && !defined(__ppc64__)
208void PPC64CompilationCallback() {
209  llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
210}
211#else
212#  ifdef __ELF__
213asm(
214    ".text\n"
215    ".align 2\n"
216    ".globl PPC64CompilationCallback\n"
217    ".section \".opd\",\"aw\",@progbits\n"
218    ".align 3\n"
219"PPC64CompilationCallback:\n"
220    ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
221    ".size PPC64CompilationCallback,24\n"
222    ".previous\n"
223    ".align 4\n"
224    ".type PPC64CompilationCallback,@function\n"
225".L.PPC64CompilationCallback:\n"
226#  else
227asm(
228    ".text\n"
229    ".align 2\n"
230    ".globl _PPC64CompilationCallback\n"
231"_PPC64CompilationCallback:\n"
232#  endif
233    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
234    // FIXME: need to save v[0-19] for altivec?
235    // Set up a proper stack frame
236    // Layout
237    //   PowerPC64 ABI linkage    -  48 bytes
238    //                 parameters -  64 bytes
239    //   13 double registers      - 104 bytes
240    //   8 int registers          -  64 bytes
241    "mflr 0\n"
242    "std  0,  16(1)\n"
243    "stdu 1, -280(1)\n"
244    // Save all int arg registers
245    "std 10, 272(1)\n"    "std 9,  264(1)\n"
246    "std 8,  256(1)\n"    "std 7,  248(1)\n"
247    "std 6,  240(1)\n"    "std 5,  232(1)\n"
248    "std 4,  224(1)\n"    "std 3,  216(1)\n"
249    // Save all call-clobbered FP regs.
250    "stfd 13, 208(1)\n"    "stfd 12, 200(1)\n"
251    "stfd 11, 192(1)\n"    "stfd 10, 184(1)\n"
252    "stfd 9,  176(1)\n"    "stfd 8,  168(1)\n"
253    "stfd 7,  160(1)\n"    "stfd 6,  152(1)\n"
254    "stfd 5,  144(1)\n"    "stfd 4,  136(1)\n"
255    "stfd 3,  128(1)\n"    "stfd 2,  120(1)\n"
256    "stfd 1,  112(1)\n"
257    // Arguments to Compilation Callback:
258    // r3 - our lr (address of the call instruction in stub plus 4)
259    // r4 - stub's lr (address of instruction that called the stub plus 4)
260    // r5 - is64Bit - always 1.
261    "mr   3, 0\n"      // return address (still in r0)
262    "ld   5, 280(1)\n" // stub's frame
263    "ld   4, 16(5)\n"  // stub's lr
264    "li   5, 1\n"      // 1 == 64 bit
265#  ifdef __ELF__
266    "bl LLVMPPCCompilationCallback\n"
267    "nop\n"
268#  else
269    "bl _LLVMPPCCompilationCallback\n"
270#  endif
271    "mtctr 3\n"
272    // Restore all int arg registers
273    "ld 10, 272(1)\n"    "ld 9,  264(1)\n"
274    "ld 8,  256(1)\n"    "ld 7,  248(1)\n"
275    "ld 6,  240(1)\n"    "ld 5,  232(1)\n"
276    "ld 4,  224(1)\n"    "ld 3,  216(1)\n"
277    // Restore all FP arg registers
278    "lfd 13, 208(1)\n"    "lfd 12, 200(1)\n"
279    "lfd 11, 192(1)\n"    "lfd 10, 184(1)\n"
280    "lfd 9,  176(1)\n"    "lfd 8,  168(1)\n"
281    "lfd 7,  160(1)\n"    "lfd 6,  152(1)\n"
282    "lfd 5,  144(1)\n"    "lfd 4,  136(1)\n"
283    "lfd 3,  128(1)\n"    "lfd 2,  120(1)\n"
284    "lfd 1,  112(1)\n"
285    // Pop 3 frames off the stack and branch to target
286    "ld  1, 280(1)\n"
287    "ld  0, 16(1)\n"
288    "mtlr 0\n"
289    // XXX: any special TOC handling in the ELF case for JIT?
290    "bctr\n"
291    );
292#endif
293
294extern "C" {
295LLVM_LIBRARY_VISIBILITY void *
296LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
297                           unsigned *OrigCallAddrPlus4,
298                           bool is64Bit) {
299  // Adjust the pointer to the address of the call instruction in the stub
300  // emitted by emitFunctionStub, rather than the instruction after it.
301  unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
302  unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
303
304  void *Target = JITCompilerFunction(StubCallAddr);
305
306  // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
307  // it to branch directly to the destination.  If so, rewrite it so it does not
308  // need to go through the stub anymore.
309  unsigned OrigCallInst = *OrigCallAddr;
310  if ((OrigCallInst >> 26) == 18) {     // Direct call.
311    intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
312
313    if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
314      // Clear the original target out.
315      OrigCallInst &= (63 << 26) | 3;
316      // Fill in the new target.
317      OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
318      // Replace the call.
319      *OrigCallAddr = OrigCallInst;
320    }
321  }
322
323  // Assert that we are coming from a stub that was created with our
324  // emitFunctionStub.
325  if ((*StubCallAddr >> 26) == 18)
326    StubCallAddr -= 3;
327  else {
328  assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
329    StubCallAddr -= is64Bit ? 9 : 6;
330  }
331
332  // Rewrite the stub with an unconditional branch to the target, for any users
333  // who took the address of the stub.
334  EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
335  sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
336
337  // Put the address of the target function to call and the address to return to
338  // after calling the target function in a place that is easy to get on the
339  // stack after we restore all regs.
340  return Target;
341}
342}
343
344
345
346TargetJITInfo::LazyResolverFn
347PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
348  JITCompilerFunction = Fn;
349  return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
350}
351
352TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
353  // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
354  // instructions to save the caller's address if this is a lazy-compilation
355  // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
356  // into a register and jump through it.
357  StubLayout Result = {10*4, 4};
358  return Result;
359}
360
361#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
362defined(__APPLE__)
363extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
364#endif
365
366void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
367                                   JITCodeEmitter &JCE) {
368  // If this is just a call to an external function, emit a branch instead of a
369  // call.  The code is the same except for one bit of the last instruction.
370  if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
371      Fn != (void*)(intptr_t)PPC64CompilationCallback) {
372    void *Addr = (void*)JCE.getCurrentPCValue();
373    JCE.emitWordBE(0);
374    JCE.emitWordBE(0);
375    JCE.emitWordBE(0);
376    JCE.emitWordBE(0);
377    JCE.emitWordBE(0);
378    JCE.emitWordBE(0);
379    JCE.emitWordBE(0);
380    EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
381    sys::Memory::InvalidateInstructionCache(Addr, 7*4);
382    return Addr;
383  }
384
385  void *Addr = (void*)JCE.getCurrentPCValue();
386  if (is64Bit) {
387    JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
388    JCE.emitWordBE(0x7d6802a6);     // mflr r11
389    JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
390  } else if (TM.getSubtargetImpl()->isDarwinABI()){
391    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
392    JCE.emitWordBE(0x7d6802a6);     // mflr r11
393    JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
394  } else {
395    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
396    JCE.emitWordBE(0x7d6802a6);     // mflr r11
397    JCE.emitWordBE(0x91610024);     // stw r11, 36(r1)
398  }
399  intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
400  JCE.emitWordBE(0);
401  JCE.emitWordBE(0);
402  JCE.emitWordBE(0);
403  JCE.emitWordBE(0);
404  JCE.emitWordBE(0);
405  JCE.emitWordBE(0);
406  JCE.emitWordBE(0);
407  EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
408  sys::Memory::InvalidateInstructionCache(Addr, 10*4);
409  return Addr;
410}
411
412
413void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
414                          unsigned NumRelocs, unsigned char* GOTBase) {
415  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
416    unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
417    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
418    switch ((PPC::RelocationType)MR->getRelocationType()) {
419    default: llvm_unreachable("Unknown relocation type!");
420    case PPC::reloc_pcrel_bx:
421      // PC-relative relocation for b and bl instructions.
422      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
423      assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
424             "Relocation out of range!");
425      *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
426      break;
427    case PPC::reloc_pcrel_bcx:
428      // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
429      // bcx instructions.
430      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
431      assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
432             "Relocation out of range!");
433      *RelocPos |= (ResultPtr & ((1 << 14)-1))  << 2;
434      break;
435    case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
436    case PPC::reloc_absolute_low: {    // low bits of ref  -> low 16 of instr
437      ResultPtr += MR->getConstantVal();
438
439      // If this is a high-part access, get the high-part.
440      if (MR->getRelocationType() == PPC::reloc_absolute_high) {
441        // If the low part will have a carry (really a borrow) from the low
442        // 16-bits into the high 16, add a bit to borrow from.
443        if (((int)ResultPtr << 16) < 0)
444          ResultPtr += 1 << 16;
445        ResultPtr >>= 16;
446      }
447
448      // Do the addition then mask, so the addition does not overflow the 16-bit
449      // immediate section of the instruction.
450      unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
451      unsigned HighBits = *RelocPos & ~65535;
452      *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
453      break;
454    }
455    case PPC::reloc_absolute_low_ix: {  // low bits of ref  -> low 14 of instr
456      ResultPtr += MR->getConstantVal();
457      // Do the addition then mask, so the addition does not overflow the 16-bit
458      // immediate section of the instruction.
459      unsigned LowBits  = (*RelocPos + ResultPtr) & 0xFFFC;
460      unsigned HighBits = *RelocPos & 0xFFFF0003;
461      *RelocPos = LowBits | HighBits;  // Slam into low 14-bits.
462      break;
463    }
464    }
465  }
466}
467
468void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
469  EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
470  sys::Memory::InvalidateInstructionCache(Old, 7*4);
471}
472