sharedRuntime_ppc.cpp revision 9751:4a24de859a87
1/*
2 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3 * Copyright 2012, 2015 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#include "precompiled.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "code/debugInfoRec.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "frame_ppc.hpp"
32#include "interpreter/interpreter.hpp"
33#include "interpreter/interp_masm.hpp"
34#include "oops/compiledICHolder.hpp"
35#include "prims/jvmtiRedefineClassesTrace.hpp"
36#include "runtime/sharedRuntime.hpp"
37#include "runtime/vframeArray.hpp"
38#include "vmreg_ppc.inline.hpp"
39#ifdef COMPILER1
40#include "c1/c1_Runtime1.hpp"
41#endif
42#ifdef COMPILER2
43#include "adfiles/ad_ppc_64.hpp"
44#include "opto/runtime.hpp"
45#endif
46
47#define __ masm->
48
49#ifdef PRODUCT
50#define BLOCK_COMMENT(str) // nothing
51#else
52#define BLOCK_COMMENT(str) __ block_comment(str)
53#endif
54
55#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
56
57
58class RegisterSaver {
59 // Used for saving volatile registers.
60 public:
61
62  // Support different return pc locations.
63  enum ReturnPCLocation {
64    return_pc_is_lr,
65    return_pc_is_pre_saved,
66    return_pc_is_thread_saved_exception_pc
67  };
68
69  static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
70                         int* out_frame_size_in_bytes,
71                         bool generate_oop_map,
72                         int return_pc_adjustment,
73                         ReturnPCLocation return_pc_location);
74  static void    restore_live_registers_and_pop_frame(MacroAssembler* masm,
75                         int frame_size_in_bytes,
76                         bool restore_ctr);
77
78  static void push_frame_and_save_argument_registers(MacroAssembler* masm,
79                         Register r_temp,
80                         int frame_size,
81                         int total_args,
82                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
83  static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
84                         int frame_size,
85                         int total_args,
86                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
87
88  // During deoptimization only the result registers need to be restored
89  // all the other values have already been extracted.
90  static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes);
91
92  // Constants and data structures:
93
94  typedef enum {
95    int_reg           = 0,
96    float_reg         = 1,
97    special_reg       = 2
98  } RegisterType;
99
100  typedef enum {
101    reg_size          = 8,
102    half_reg_size     = reg_size / 2,
103  } RegisterConstants;
104
105  typedef struct {
106    RegisterType        reg_type;
107    int                 reg_num;
108    VMReg               vmreg;
109  } LiveRegType;
110};
111
112
113#define RegisterSaver_LiveSpecialReg(regname) \
114  { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
115
116#define RegisterSaver_LiveIntReg(regname) \
117  { RegisterSaver::int_reg,     regname->encoding(), regname->as_VMReg() }
118
119#define RegisterSaver_LiveFloatReg(regname) \
120  { RegisterSaver::float_reg,   regname->encoding(), regname->as_VMReg() }
121
122static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
123  // Live registers which get spilled to the stack. Register
124  // positions in this array correspond directly to the stack layout.
125
126  //
127  // live special registers:
128  //
129  RegisterSaver_LiveSpecialReg(SR_CTR),
130  //
131  // live float registers:
132  //
133  RegisterSaver_LiveFloatReg( F0  ),
134  RegisterSaver_LiveFloatReg( F1  ),
135  RegisterSaver_LiveFloatReg( F2  ),
136  RegisterSaver_LiveFloatReg( F3  ),
137  RegisterSaver_LiveFloatReg( F4  ),
138  RegisterSaver_LiveFloatReg( F5  ),
139  RegisterSaver_LiveFloatReg( F6  ),
140  RegisterSaver_LiveFloatReg( F7  ),
141  RegisterSaver_LiveFloatReg( F8  ),
142  RegisterSaver_LiveFloatReg( F9  ),
143  RegisterSaver_LiveFloatReg( F10 ),
144  RegisterSaver_LiveFloatReg( F11 ),
145  RegisterSaver_LiveFloatReg( F12 ),
146  RegisterSaver_LiveFloatReg( F13 ),
147  RegisterSaver_LiveFloatReg( F14 ),
148  RegisterSaver_LiveFloatReg( F15 ),
149  RegisterSaver_LiveFloatReg( F16 ),
150  RegisterSaver_LiveFloatReg( F17 ),
151  RegisterSaver_LiveFloatReg( F18 ),
152  RegisterSaver_LiveFloatReg( F19 ),
153  RegisterSaver_LiveFloatReg( F20 ),
154  RegisterSaver_LiveFloatReg( F21 ),
155  RegisterSaver_LiveFloatReg( F22 ),
156  RegisterSaver_LiveFloatReg( F23 ),
157  RegisterSaver_LiveFloatReg( F24 ),
158  RegisterSaver_LiveFloatReg( F25 ),
159  RegisterSaver_LiveFloatReg( F26 ),
160  RegisterSaver_LiveFloatReg( F27 ),
161  RegisterSaver_LiveFloatReg( F28 ),
162  RegisterSaver_LiveFloatReg( F29 ),
163  RegisterSaver_LiveFloatReg( F30 ),
164  RegisterSaver_LiveFloatReg( F31 ),
165  //
166  // live integer registers:
167  //
168  RegisterSaver_LiveIntReg(   R0  ),
169  //RegisterSaver_LiveIntReg( R1  ), // stack pointer
170  RegisterSaver_LiveIntReg(   R2  ),
171  RegisterSaver_LiveIntReg(   R3  ),
172  RegisterSaver_LiveIntReg(   R4  ),
173  RegisterSaver_LiveIntReg(   R5  ),
174  RegisterSaver_LiveIntReg(   R6  ),
175  RegisterSaver_LiveIntReg(   R7  ),
176  RegisterSaver_LiveIntReg(   R8  ),
177  RegisterSaver_LiveIntReg(   R9  ),
178  RegisterSaver_LiveIntReg(   R10 ),
179  RegisterSaver_LiveIntReg(   R11 ),
180  RegisterSaver_LiveIntReg(   R12 ),
181  //RegisterSaver_LiveIntReg( R13 ), // system thread id
182  RegisterSaver_LiveIntReg(   R14 ),
183  RegisterSaver_LiveIntReg(   R15 ),
184  RegisterSaver_LiveIntReg(   R16 ),
185  RegisterSaver_LiveIntReg(   R17 ),
186  RegisterSaver_LiveIntReg(   R18 ),
187  RegisterSaver_LiveIntReg(   R19 ),
188  RegisterSaver_LiveIntReg(   R20 ),
189  RegisterSaver_LiveIntReg(   R21 ),
190  RegisterSaver_LiveIntReg(   R22 ),
191  RegisterSaver_LiveIntReg(   R23 ),
192  RegisterSaver_LiveIntReg(   R24 ),
193  RegisterSaver_LiveIntReg(   R25 ),
194  RegisterSaver_LiveIntReg(   R26 ),
195  RegisterSaver_LiveIntReg(   R27 ),
196  RegisterSaver_LiveIntReg(   R28 ),
197  RegisterSaver_LiveIntReg(   R29 ),
198  RegisterSaver_LiveIntReg(   R30 ),
199  RegisterSaver_LiveIntReg(   R31 ), // must be the last register (see save/restore functions below)
200};
201
202OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
203                         int* out_frame_size_in_bytes,
204                         bool generate_oop_map,
205                         int return_pc_adjustment,
206                         ReturnPCLocation return_pc_location) {
207  // Push an abi_reg_args-frame and store all registers which may be live.
208  // If requested, create an OopMap: Record volatile registers as
209  // callee-save values in an OopMap so their save locations will be
210  // propagated to the RegisterMap of the caller frame during
211  // StackFrameStream construction (needed for deoptimization; see
212  // compiledVFrame::create_stack_value).
213  // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
214
215  int i;
216  int offset;
217
218  // calcualte frame size
219  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
220                                   sizeof(RegisterSaver::LiveRegType);
221  const int register_save_size   = regstosave_num * reg_size;
222  const int frame_size_in_bytes  = round_to(register_save_size, frame::alignment_in_bytes)
223                                   + frame::abi_reg_args_size;
224  *out_frame_size_in_bytes       = frame_size_in_bytes;
225  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
226  const int register_save_offset = frame_size_in_bytes - register_save_size;
227
228  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
229  OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : NULL;
230
231  BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
232
233  // Save r31 in the last slot of the not yet pushed frame so that we
234  // can use it as scratch reg.
235  __ std(R31, -reg_size, R1_SP);
236  assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
237         "consistency check");
238
239  // save the flags
240  // Do the save_LR_CR by hand and adjust the return pc if requested.
241  __ mfcr(R31);
242  __ std(R31, _abi(cr), R1_SP);
243  switch (return_pc_location) {
244    case return_pc_is_lr: __ mflr(R31); break;
245    case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
246    case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
247    default: ShouldNotReachHere();
248  }
249  if (return_pc_location != return_pc_is_pre_saved) {
250    if (return_pc_adjustment != 0) {
251      __ addi(R31, R31, return_pc_adjustment);
252    }
253    __ std(R31, _abi(lr), R1_SP);
254  }
255
256  // push a new frame
257  __ push_frame(frame_size_in_bytes, R31);
258
259  // save all registers (ints and floats)
260  offset = register_save_offset;
261  for (int i = 0; i < regstosave_num; i++) {
262    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
263    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
264
265    switch (reg_type) {
266      case RegisterSaver::int_reg: {
267        if (reg_num != 31) { // We spilled R31 right at the beginning.
268          __ std(as_Register(reg_num), offset, R1_SP);
269        }
270        break;
271      }
272      case RegisterSaver::float_reg: {
273        __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
274        break;
275      }
276      case RegisterSaver::special_reg: {
277        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
278          __ mfctr(R31);
279          __ std(R31, offset, R1_SP);
280        } else {
281          Unimplemented();
282        }
283        break;
284      }
285      default:
286        ShouldNotReachHere();
287    }
288
289    if (generate_oop_map) {
290      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
291                            RegisterSaver_LiveRegs[i].vmreg);
292      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2),
293                            RegisterSaver_LiveRegs[i].vmreg->next());
294    }
295    offset += reg_size;
296  }
297
298  BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
299
300  // And we're done.
301  return map;
302}
303
304
305// Pop the current frame and restore all the registers that we
306// saved.
307void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
308                                                         int frame_size_in_bytes,
309                                                         bool restore_ctr) {
310  int i;
311  int offset;
312  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
313                                   sizeof(RegisterSaver::LiveRegType);
314  const int register_save_size   = regstosave_num * reg_size;
315  const int register_save_offset = frame_size_in_bytes - register_save_size;
316
317  BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
318
319  // restore all registers (ints and floats)
320  offset = register_save_offset;
321  for (int i = 0; i < regstosave_num; i++) {
322    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
323    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
324
325    switch (reg_type) {
326      case RegisterSaver::int_reg: {
327        if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
328          __ ld(as_Register(reg_num), offset, R1_SP);
329        break;
330      }
331      case RegisterSaver::float_reg: {
332        __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
333        break;
334      }
335      case RegisterSaver::special_reg: {
336        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
337          if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
338            __ ld(R31, offset, R1_SP);
339            __ mtctr(R31);
340          }
341        } else {
342          Unimplemented();
343        }
344        break;
345      }
346      default:
347        ShouldNotReachHere();
348    }
349    offset += reg_size;
350  }
351
352  // pop the frame
353  __ pop_frame();
354
355  // restore the flags
356  __ restore_LR_CR(R31);
357
358  // restore scratch register's value
359  __ ld(R31, -reg_size, R1_SP);
360
361  BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
362}
363
364void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
365                                                           int frame_size,int total_args, const VMRegPair *regs,
366                                                           const VMRegPair *regs2) {
367  __ push_frame(frame_size, r_temp);
368  int st_off = frame_size - wordSize;
369  for (int i = 0; i < total_args; i++) {
370    VMReg r_1 = regs[i].first();
371    VMReg r_2 = regs[i].second();
372    if (!r_1->is_valid()) {
373      assert(!r_2->is_valid(), "");
374      continue;
375    }
376    if (r_1->is_Register()) {
377      Register r = r_1->as_Register();
378      __ std(r, st_off, R1_SP);
379      st_off -= wordSize;
380    } else if (r_1->is_FloatRegister()) {
381      FloatRegister f = r_1->as_FloatRegister();
382      __ stfd(f, st_off, R1_SP);
383      st_off -= wordSize;
384    }
385  }
386  if (regs2 != NULL) {
387    for (int i = 0; i < total_args; i++) {
388      VMReg r_1 = regs2[i].first();
389      VMReg r_2 = regs2[i].second();
390      if (!r_1->is_valid()) {
391        assert(!r_2->is_valid(), "");
392        continue;
393      }
394      if (r_1->is_Register()) {
395        Register r = r_1->as_Register();
396        __ std(r, st_off, R1_SP);
397        st_off -= wordSize;
398      } else if (r_1->is_FloatRegister()) {
399        FloatRegister f = r_1->as_FloatRegister();
400        __ stfd(f, st_off, R1_SP);
401        st_off -= wordSize;
402      }
403    }
404  }
405}
406
407void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
408                                                             int total_args, const VMRegPair *regs,
409                                                             const VMRegPair *regs2) {
410  int st_off = frame_size - wordSize;
411  for (int i = 0; i < total_args; i++) {
412    VMReg r_1 = regs[i].first();
413    VMReg r_2 = regs[i].second();
414    if (r_1->is_Register()) {
415      Register r = r_1->as_Register();
416      __ ld(r, st_off, R1_SP);
417      st_off -= wordSize;
418    } else if (r_1->is_FloatRegister()) {
419      FloatRegister f = r_1->as_FloatRegister();
420      __ lfd(f, st_off, R1_SP);
421      st_off -= wordSize;
422    }
423  }
424  if (regs2 != NULL)
425    for (int i = 0; i < total_args; i++) {
426      VMReg r_1 = regs2[i].first();
427      VMReg r_2 = regs2[i].second();
428      if (r_1->is_Register()) {
429        Register r = r_1->as_Register();
430        __ ld(r, st_off, R1_SP);
431        st_off -= wordSize;
432      } else if (r_1->is_FloatRegister()) {
433        FloatRegister f = r_1->as_FloatRegister();
434        __ lfd(f, st_off, R1_SP);
435        st_off -= wordSize;
436      }
437    }
438  __ pop_frame();
439}
440
441// Restore the registers that might be holding a result.
442void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) {
443  int i;
444  int offset;
445  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
446                                   sizeof(RegisterSaver::LiveRegType);
447  const int register_save_size   = regstosave_num * reg_size;
448  const int register_save_offset = frame_size_in_bytes - register_save_size;
449
450  // restore all result registers (ints and floats)
451  offset = register_save_offset;
452  for (int i = 0; i < regstosave_num; i++) {
453    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
454    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
455    switch (reg_type) {
456      case RegisterSaver::int_reg: {
457        if (as_Register(reg_num)==R3_RET) // int result_reg
458          __ ld(as_Register(reg_num), offset, R1_SP);
459        break;
460      }
461      case RegisterSaver::float_reg: {
462        if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
463          __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
464        break;
465      }
466      case RegisterSaver::special_reg: {
467        // Special registers don't hold a result.
468        break;
469      }
470      default:
471        ShouldNotReachHere();
472    }
473    offset += reg_size;
474  }
475}
476
477// Is vector's size (in bytes) bigger than a size saved by default?
478bool SharedRuntime::is_wide_vector(int size) {
479  // Note, MaxVectorSize == 8 on PPC64.
480  assert(size <= 8, "%d bytes vectors are not supported", size);
481  return size > 8;
482}
483#ifdef COMPILER2
484static int reg2slot(VMReg r) {
485  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
486}
487
488static int reg2offset(VMReg r) {
489  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
490}
491#endif
492
493// ---------------------------------------------------------------------------
494// Read the array of BasicTypes from a signature, and compute where the
495// arguments should go. Values in the VMRegPair regs array refer to 4-byte
496// quantities. Values less than VMRegImpl::stack0 are registers, those above
497// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
498// as framesizes are fixed.
499// VMRegImpl::stack0 refers to the first slot 0(sp).
500// and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
501// up to RegisterImpl::number_of_registers) are the 64-bit
502// integer registers.
503
504// Note: the INPUTS in sig_bt are in units of Java argument words, which are
505// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
506// units regardless of build. Of course for i486 there is no 64 bit build
507
508// The Java calling convention is a "shifted" version of the C ABI.
509// By skipping the first C ABI register we can call non-static jni methods
510// with small numbers of arguments without having to shuffle the arguments
511// at all. Since we control the java ABI we ought to at least get some
512// advantage out of it.
513
514const VMReg java_iarg_reg[8] = {
515  R3->as_VMReg(),
516  R4->as_VMReg(),
517  R5->as_VMReg(),
518  R6->as_VMReg(),
519  R7->as_VMReg(),
520  R8->as_VMReg(),
521  R9->as_VMReg(),
522  R10->as_VMReg()
523};
524
525const VMReg java_farg_reg[13] = {
526  F1->as_VMReg(),
527  F2->as_VMReg(),
528  F3->as_VMReg(),
529  F4->as_VMReg(),
530  F5->as_VMReg(),
531  F6->as_VMReg(),
532  F7->as_VMReg(),
533  F8->as_VMReg(),
534  F9->as_VMReg(),
535  F10->as_VMReg(),
536  F11->as_VMReg(),
537  F12->as_VMReg(),
538  F13->as_VMReg()
539};
540
541const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
542const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
543
544int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
545                                           VMRegPair *regs,
546                                           int total_args_passed,
547                                           int is_outgoing) {
548  // C2c calling conventions for compiled-compiled calls.
549  // Put 8 ints/longs into registers _AND_ 13 float/doubles into
550  // registers _AND_ put the rest on the stack.
551
552  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats
553  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
554
555  int i;
556  VMReg reg;
557  int stk = 0;
558  int ireg = 0;
559  int freg = 0;
560
561  // We put the first 8 arguments into registers and the rest on the
562  // stack, float arguments are already in their argument registers
563  // due to c2c calling conventions (see calling_convention).
564  for (int i = 0; i < total_args_passed; ++i) {
565    switch(sig_bt[i]) {
566    case T_BOOLEAN:
567    case T_CHAR:
568    case T_BYTE:
569    case T_SHORT:
570    case T_INT:
571      if (ireg < num_java_iarg_registers) {
572        // Put int/ptr in register
573        reg = java_iarg_reg[ireg];
574        ++ireg;
575      } else {
576        // Put int/ptr on stack.
577        reg = VMRegImpl::stack2reg(stk);
578        stk += inc_stk_for_intfloat;
579      }
580      regs[i].set1(reg);
581      break;
582    case T_LONG:
583      assert(sig_bt[i+1] == T_VOID, "expecting half");
584      if (ireg < num_java_iarg_registers) {
585        // Put long in register.
586        reg = java_iarg_reg[ireg];
587        ++ireg;
588      } else {
589        // Put long on stack. They must be aligned to 2 slots.
590        if (stk & 0x1) ++stk;
591        reg = VMRegImpl::stack2reg(stk);
592        stk += inc_stk_for_longdouble;
593      }
594      regs[i].set2(reg);
595      break;
596    case T_OBJECT:
597    case T_ARRAY:
598    case T_ADDRESS:
599      if (ireg < num_java_iarg_registers) {
600        // Put ptr in register.
601        reg = java_iarg_reg[ireg];
602        ++ireg;
603      } else {
604        // Put ptr on stack. Objects must be aligned to 2 slots too,
605        // because "64-bit pointers record oop-ishness on 2 aligned
606        // adjacent registers." (see OopFlow::build_oop_map).
607        if (stk & 0x1) ++stk;
608        reg = VMRegImpl::stack2reg(stk);
609        stk += inc_stk_for_longdouble;
610      }
611      regs[i].set2(reg);
612      break;
613    case T_FLOAT:
614      if (freg < num_java_farg_registers) {
615        // Put float in register.
616        reg = java_farg_reg[freg];
617        ++freg;
618      } else {
619        // Put float on stack.
620        reg = VMRegImpl::stack2reg(stk);
621        stk += inc_stk_for_intfloat;
622      }
623      regs[i].set1(reg);
624      break;
625    case T_DOUBLE:
626      assert(sig_bt[i+1] == T_VOID, "expecting half");
627      if (freg < num_java_farg_registers) {
628        // Put double in register.
629        reg = java_farg_reg[freg];
630        ++freg;
631      } else {
632        // Put double on stack. They must be aligned to 2 slots.
633        if (stk & 0x1) ++stk;
634        reg = VMRegImpl::stack2reg(stk);
635        stk += inc_stk_for_longdouble;
636      }
637      regs[i].set2(reg);
638      break;
639    case T_VOID:
640      // Do not count halves.
641      regs[i].set_bad();
642      break;
643    default:
644      ShouldNotReachHere();
645    }
646  }
647  return round_to(stk, 2);
648}
649
650#if defined(COMPILER1) || defined(COMPILER2)
651// Calling convention for calling C code.
652int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
653                                        VMRegPair *regs,
654                                        VMRegPair *regs2,
655                                        int total_args_passed) {
656  // Calling conventions for C runtime calls and calls to JNI native methods.
657  //
658  // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
659  // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
660  // the first 13 flt/dbl's in the first 13 fp regs but additionally
661  // copy flt/dbl to the stack if they are beyond the 8th argument.
662
663  const VMReg iarg_reg[8] = {
664    R3->as_VMReg(),
665    R4->as_VMReg(),
666    R5->as_VMReg(),
667    R6->as_VMReg(),
668    R7->as_VMReg(),
669    R8->as_VMReg(),
670    R9->as_VMReg(),
671    R10->as_VMReg()
672  };
673
674  const VMReg farg_reg[13] = {
675    F1->as_VMReg(),
676    F2->as_VMReg(),
677    F3->as_VMReg(),
678    F4->as_VMReg(),
679    F5->as_VMReg(),
680    F6->as_VMReg(),
681    F7->as_VMReg(),
682    F8->as_VMReg(),
683    F9->as_VMReg(),
684    F10->as_VMReg(),
685    F11->as_VMReg(),
686    F12->as_VMReg(),
687    F13->as_VMReg()
688  };
689
690  // Check calling conventions consistency.
691  assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
692         sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
693         "consistency");
694
695  // `Stk' counts stack slots. Due to alignment, 32 bit values occupy
696  // 2 such slots, like 64 bit values do.
697  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats
698  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
699
700  int i;
701  VMReg reg;
702  // Leave room for C-compatible ABI_REG_ARGS.
703  int stk = (frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size;
704  int arg = 0;
705  int freg = 0;
706
707  // Avoid passing C arguments in the wrong stack slots.
708#if defined(ABI_ELFv2)
709  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 96,
710         "passing C arguments in wrong stack slots");
711#else
712  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112,
713         "passing C arguments in wrong stack slots");
714#endif
715  // We fill-out regs AND regs2 if an argument must be passed in a
716  // register AND in a stack slot. If regs2 is NULL in such a
717  // situation, we bail-out with a fatal error.
718  for (int i = 0; i < total_args_passed; ++i, ++arg) {
719    // Initialize regs2 to BAD.
720    if (regs2 != NULL) regs2[i].set_bad();
721
722    switch(sig_bt[i]) {
723
724    //
725    // If arguments 0-7 are integers, they are passed in integer registers.
726    // Argument i is placed in iarg_reg[i].
727    //
728    case T_BOOLEAN:
729    case T_CHAR:
730    case T_BYTE:
731    case T_SHORT:
732    case T_INT:
733      // We must cast ints to longs and use full 64 bit stack slots
734      // here.  Thus fall through, handle as long.
735    case T_LONG:
736    case T_OBJECT:
737    case T_ARRAY:
738    case T_ADDRESS:
739    case T_METADATA:
740      // Oops are already boxed if required (JNI).
741      if (arg < Argument::n_int_register_parameters_c) {
742        reg = iarg_reg[arg];
743      } else {
744        reg = VMRegImpl::stack2reg(stk);
745        stk += inc_stk_for_longdouble;
746      }
747      regs[i].set2(reg);
748      break;
749
750    //
751    // Floats are treated differently from int regs:  The first 13 float arguments
752    // are passed in registers (not the float args among the first 13 args).
753    // Thus argument i is NOT passed in farg_reg[i] if it is float.  It is passed
754    // in farg_reg[j] if argument i is the j-th float argument of this call.
755    //
756    case T_FLOAT:
757#if defined(LINUX)
758      // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
759      // in the least significant word of an argument slot.
760#if defined(VM_LITTLE_ENDIAN)
761#define FLOAT_WORD_OFFSET_IN_SLOT 0
762#else
763#define FLOAT_WORD_OFFSET_IN_SLOT 1
764#endif
765#elif defined(AIX)
766      // Although AIX runs on big endian CPU, float is in the most
767      // significant word of an argument slot.
768#define FLOAT_WORD_OFFSET_IN_SLOT 0
769#else
770#error "unknown OS"
771#endif
772      if (freg < Argument::n_float_register_parameters_c) {
773        // Put float in register ...
774        reg = farg_reg[freg];
775        ++freg;
776
777        // Argument i for i > 8 is placed on the stack even if it's
778        // placed in a register (if it's a float arg). Aix disassembly
779        // shows that xlC places these float args on the stack AND in
780        // a register. This is not documented, but we follow this
781        // convention, too.
782        if (arg >= Argument::n_regs_not_on_stack_c) {
783          // ... and on the stack.
784          guarantee(regs2 != NULL, "must pass float in register and stack slot");
785          VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
786          regs2[i].set1(reg2);
787          stk += inc_stk_for_intfloat;
788        }
789
790      } else {
791        // Put float on stack.
792        reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
793        stk += inc_stk_for_intfloat;
794      }
795      regs[i].set1(reg);
796      break;
797    case T_DOUBLE:
798      assert(sig_bt[i+1] == T_VOID, "expecting half");
799      if (freg < Argument::n_float_register_parameters_c) {
800        // Put double in register ...
801        reg = farg_reg[freg];
802        ++freg;
803
804        // Argument i for i > 8 is placed on the stack even if it's
805        // placed in a register (if it's a double arg). Aix disassembly
806        // shows that xlC places these float args on the stack AND in
807        // a register. This is not documented, but we follow this
808        // convention, too.
809        if (arg >= Argument::n_regs_not_on_stack_c) {
810          // ... and on the stack.
811          guarantee(regs2 != NULL, "must pass float in register and stack slot");
812          VMReg reg2 = VMRegImpl::stack2reg(stk);
813          regs2[i].set2(reg2);
814          stk += inc_stk_for_longdouble;
815        }
816      } else {
817        // Put double on stack.
818        reg = VMRegImpl::stack2reg(stk);
819        stk += inc_stk_for_longdouble;
820      }
821      regs[i].set2(reg);
822      break;
823
824    case T_VOID:
825      // Do not count halves.
826      regs[i].set_bad();
827      --arg;
828      break;
829    default:
830      ShouldNotReachHere();
831    }
832  }
833
834  return round_to(stk, 2);
835}
836#endif // COMPILER2
837
838static address gen_c2i_adapter(MacroAssembler *masm,
839                            int total_args_passed,
840                            int comp_args_on_stack,
841                            const BasicType *sig_bt,
842                            const VMRegPair *regs,
843                            Label& call_interpreter,
844                            const Register& ientry) {
845
846  address c2i_entrypoint;
847
848  const Register sender_SP = R21_sender_SP; // == R21_tmp1
849  const Register code      = R22_tmp2;
850  //const Register ientry  = R23_tmp3;
851  const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
852  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
853  int value_regs_index = 0;
854
855  const Register return_pc = R27_tmp7;
856  const Register tmp       = R28_tmp8;
857
858  assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
859
860  // Adapter needs TOP_IJAVA_FRAME_ABI.
861  const int adapter_size = frame::top_ijava_frame_abi_size +
862                           round_to(total_args_passed * wordSize, frame::alignment_in_bytes);
863
864  // regular (verified) c2i entry point
865  c2i_entrypoint = __ pc();
866
867  // Does compiled code exists? If yes, patch the caller's callsite.
868  __ ld(code, method_(code));
869  __ cmpdi(CCR0, code, 0);
870  __ ld(ientry, method_(interpreter_entry)); // preloaded
871  __ beq(CCR0, call_interpreter);
872
873
874  // Patch caller's callsite, method_(code) was not NULL which means that
875  // compiled code exists.
876  __ mflr(return_pc);
877  __ std(return_pc, _abi(lr), R1_SP);
878  RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
879
880  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
881
882  RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
883  __ ld(return_pc, _abi(lr), R1_SP);
884  __ ld(ientry, method_(interpreter_entry)); // preloaded
885  __ mtlr(return_pc);
886
887
888  // Call the interpreter.
889  __ BIND(call_interpreter);
890  __ mtctr(ientry);
891
892  // Get a copy of the current SP for loading caller's arguments.
893  __ mr(sender_SP, R1_SP);
894
895  // Add space for the adapter.
896  __ resize_frame(-adapter_size, R12_scratch2);
897
898  int st_off = adapter_size - wordSize;
899
900  // Write the args into the outgoing interpreter space.
901  for (int i = 0; i < total_args_passed; i++) {
902    VMReg r_1 = regs[i].first();
903    VMReg r_2 = regs[i].second();
904    if (!r_1->is_valid()) {
905      assert(!r_2->is_valid(), "");
906      continue;
907    }
908    if (r_1->is_stack()) {
909      Register tmp_reg = value_regs[value_regs_index];
910      value_regs_index = (value_regs_index + 1) % num_value_regs;
911      // The calling convention produces OptoRegs that ignore the out
912      // preserve area (JIT's ABI). We must account for it here.
913      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
914      if (!r_2->is_valid()) {
915        __ lwz(tmp_reg, ld_off, sender_SP);
916      } else {
917        __ ld(tmp_reg, ld_off, sender_SP);
918      }
919      // Pretend stack targets were loaded into tmp_reg.
920      r_1 = tmp_reg->as_VMReg();
921    }
922
923    if (r_1->is_Register()) {
924      Register r = r_1->as_Register();
925      if (!r_2->is_valid()) {
926        __ stw(r, st_off, R1_SP);
927        st_off-=wordSize;
928      } else {
929        // Longs are given 2 64-bit slots in the interpreter, but the
930        // data is passed in only 1 slot.
931        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
932          DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
933          st_off-=wordSize;
934        }
935        __ std(r, st_off, R1_SP);
936        st_off-=wordSize;
937      }
938    } else {
939      assert(r_1->is_FloatRegister(), "");
940      FloatRegister f = r_1->as_FloatRegister();
941      if (!r_2->is_valid()) {
942        __ stfs(f, st_off, R1_SP);
943        st_off-=wordSize;
944      } else {
945        // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
946        // data is passed in only 1 slot.
947        // One of these should get known junk...
948        DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
949        st_off-=wordSize;
950        __ stfd(f, st_off, R1_SP);
951        st_off-=wordSize;
952      }
953    }
954  }
955
956  // Jump to the interpreter just as if interpreter was doing it.
957
958#ifdef CC_INTERP
959  const Register tos = R17_tos;
960#else
961  const Register tos = R15_esp;
962  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
963#endif
964
965  // load TOS
966  __ addi(tos, R1_SP, st_off);
967
968  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
969  assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
970  __ bctr();
971
972  return c2i_entrypoint;
973}
974
975void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
976                                    int total_args_passed,
977                                    int comp_args_on_stack,
978                                    const BasicType *sig_bt,
979                                    const VMRegPair *regs) {
980
981  // Load method's entry-point from method.
982  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
983  __ mtctr(R12_scratch2);
984
985  // We will only enter here from an interpreted frame and never from after
986  // passing thru a c2i. Azul allowed this but we do not. If we lose the
987  // race and use a c2i we will remain interpreted for the race loser(s).
988  // This removes all sorts of headaches on the x86 side and also eliminates
989  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
990
991  // Note: r13 contains the senderSP on entry. We must preserve it since
992  // we may do a i2c -> c2i transition if we lose a race where compiled
993  // code goes non-entrant while we get args ready.
994  // In addition we use r13 to locate all the interpreter args as
995  // we must align the stack to 16 bytes on an i2c entry else we
996  // lose alignment we expect in all compiled code and register
997  // save code can segv when fxsave instructions find improperly
998  // aligned stack pointer.
999
1000#ifdef CC_INTERP
1001  const Register ld_ptr = R17_tos;
1002#else
1003  const Register ld_ptr = R15_esp;
1004#endif
1005
1006  const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1007  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1008  int value_regs_index = 0;
1009
1010  int ld_offset = total_args_passed*wordSize;
1011
1012  // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1013  // in registers, we will occasionally have no stack args.
1014  int comp_words_on_stack = 0;
1015  if (comp_args_on_stack) {
1016    // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1017    // registers are below. By subtracting stack0, we either get a negative
1018    // number (all values in registers) or the maximum stack slot accessed.
1019
1020    // Convert 4-byte c2 stack slots to words.
1021    comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1022    // Round up to miminum stack alignment, in wordSize.
1023    comp_words_on_stack = round_to(comp_words_on_stack, 2);
1024    __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1025  }
1026
1027  // Now generate the shuffle code.  Pick up all register args and move the
1028  // rest through register value=Z_R12.
1029  BLOCK_COMMENT("Shuffle arguments");
1030  for (int i = 0; i < total_args_passed; i++) {
1031    if (sig_bt[i] == T_VOID) {
1032      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1033      continue;
1034    }
1035
1036    // Pick up 0, 1 or 2 words from ld_ptr.
1037    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1038            "scrambled load targets?");
1039    VMReg r_1 = regs[i].first();
1040    VMReg r_2 = regs[i].second();
1041    if (!r_1->is_valid()) {
1042      assert(!r_2->is_valid(), "");
1043      continue;
1044    }
1045    if (r_1->is_FloatRegister()) {
1046      if (!r_2->is_valid()) {
1047        __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1048        ld_offset-=wordSize;
1049      } else {
1050        // Skip the unused interpreter slot.
1051        __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1052        ld_offset-=2*wordSize;
1053      }
1054    } else {
1055      Register r;
1056      if (r_1->is_stack()) {
1057        // Must do a memory to memory move thru "value".
1058        r = value_regs[value_regs_index];
1059        value_regs_index = (value_regs_index + 1) % num_value_regs;
1060      } else {
1061        r = r_1->as_Register();
1062      }
1063      if (!r_2->is_valid()) {
1064        // Not sure we need to do this but it shouldn't hurt.
1065        if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
1066          __ ld(r, ld_offset, ld_ptr);
1067          ld_offset-=wordSize;
1068        } else {
1069          __ lwz(r, ld_offset, ld_ptr);
1070          ld_offset-=wordSize;
1071        }
1072      } else {
1073        // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1074        // data is passed in only 1 slot.
1075        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1076          ld_offset-=wordSize;
1077        }
1078        __ ld(r, ld_offset, ld_ptr);
1079        ld_offset-=wordSize;
1080      }
1081
1082      if (r_1->is_stack()) {
1083        // Now store value where the compiler expects it
1084        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1085
1086        if (sig_bt[i] == T_INT   || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
1087            sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR  || sig_bt[i] == T_BYTE) {
1088          __ stw(r, st_off, R1_SP);
1089        } else {
1090          __ std(r, st_off, R1_SP);
1091        }
1092      }
1093    }
1094  }
1095
1096  BLOCK_COMMENT("Store method");
1097  // Store method into thread->callee_target.
1098  // We might end up in handle_wrong_method if the callee is
1099  // deoptimized as we race thru here. If that happens we don't want
1100  // to take a safepoint because the caller frame will look
1101  // interpreted and arguments are now "compiled" so it is much better
1102  // to make this transition invisible to the stack walking
1103  // code. Unfortunately if we try and find the callee by normal means
1104  // a safepoint is possible. So we stash the desired callee in the
1105  // thread and the vm will find there should this case occur.
1106  __ std(R19_method, thread_(callee_target));
1107
1108  // Jump to the compiled code just as if compiled code was doing it.
1109  __ bctr();
1110}
1111
1112AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1113                                                            int total_args_passed,
1114                                                            int comp_args_on_stack,
1115                                                            const BasicType *sig_bt,
1116                                                            const VMRegPair *regs,
1117                                                            AdapterFingerPrint* fingerprint) {
1118  address i2c_entry;
1119  address c2i_unverified_entry;
1120  address c2i_entry;
1121
1122
1123  // entry: i2c
1124
1125  __ align(CodeEntryAlignment);
1126  i2c_entry = __ pc();
1127  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1128
1129
1130  // entry: c2i unverified
1131
1132  __ align(CodeEntryAlignment);
1133  BLOCK_COMMENT("c2i unverified entry");
1134  c2i_unverified_entry = __ pc();
1135
1136  // inline_cache contains a compiledICHolder
1137  const Register ic             = R19_method;
1138  const Register ic_klass       = R11_scratch1;
1139  const Register receiver_klass = R12_scratch2;
1140  const Register code           = R21_tmp1;
1141  const Register ientry         = R23_tmp3;
1142
1143  assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
1144  assert(R11_scratch1 == R11, "need prologue scratch register");
1145
1146  Label call_interpreter;
1147
1148  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
1149         "klass offset should reach into any page");
1150  // Check for NULL argument if we don't have implicit null checks.
1151  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
1152    if (TrapBasedNullChecks) {
1153      __ trap_null_check(R3_ARG1);
1154    } else {
1155      Label valid;
1156      __ cmpdi(CCR0, R3_ARG1, 0);
1157      __ bne_predict_taken(CCR0, valid);
1158      // We have a null argument, branch to ic_miss_stub.
1159      __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
1160                       relocInfo::runtime_call_type);
1161      __ BIND(valid);
1162    }
1163  }
1164  // Assume argument is not NULL, load klass from receiver.
1165  __ load_klass(receiver_klass, R3_ARG1);
1166
1167  __ ld(ic_klass, CompiledICHolder::holder_klass_offset(), ic);
1168
1169  if (TrapBasedICMissChecks) {
1170    __ trap_ic_miss_check(receiver_klass, ic_klass);
1171  } else {
1172    Label valid;
1173    __ cmpd(CCR0, receiver_klass, ic_klass);
1174    __ beq_predict_taken(CCR0, valid);
1175    // We have an unexpected klass, branch to ic_miss_stub.
1176    __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
1177                     relocInfo::runtime_call_type);
1178    __ BIND(valid);
1179  }
1180
1181  // Argument is valid and klass is as expected, continue.
1182
1183  // Extract method from inline cache, verified entry point needs it.
1184  __ ld(R19_method, CompiledICHolder::holder_method_offset(), ic);
1185  assert(R19_method == ic, "the inline cache register is dead here");
1186
1187  __ ld(code, method_(code));
1188  __ cmpdi(CCR0, code, 0);
1189  __ ld(ientry, method_(interpreter_entry)); // preloaded
1190  __ beq_predict_taken(CCR0, call_interpreter);
1191
1192  // Branch to ic_miss_stub.
1193  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1194
1195  // entry: c2i
1196
1197  c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
1198
1199  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1200}
1201
1202#ifdef COMPILER2
1203// An oop arg. Must pass a handle not the oop itself.
1204static void object_move(MacroAssembler* masm,
1205                        int frame_size_in_slots,
1206                        OopMap* oop_map, int oop_handle_offset,
1207                        bool is_receiver, int* receiver_offset,
1208                        VMRegPair src, VMRegPair dst,
1209                        Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1210  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1211         "receiver has already been moved");
1212
1213  // We must pass a handle. First figure out the location we use as a handle.
1214
1215  if (src.first()->is_stack()) {
1216    // stack to stack or reg
1217
1218    const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1219    Label skip;
1220    const int oop_slot_in_callers_frame = reg2slot(src.first());
1221
1222    guarantee(!is_receiver, "expecting receiver in register");
1223    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1224
1225    __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1226    __ ld(  r_temp_2, reg2offset(src.first()), r_caller_sp);
1227    __ cmpdi(CCR0, r_temp_2, 0);
1228    __ bne(CCR0, skip);
1229    // Use a NULL handle if oop is NULL.
1230    __ li(r_handle, 0);
1231    __ bind(skip);
1232
1233    if (dst.first()->is_stack()) {
1234      // stack to stack
1235      __ std(r_handle, reg2offset(dst.first()), R1_SP);
1236    } else {
1237      // stack to reg
1238      // Nothing to do, r_handle is already the dst register.
1239    }
1240  } else {
1241    // reg to stack or reg
1242    const Register r_oop      = src.first()->as_Register();
1243    const Register r_handle   = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1244    const int oop_slot        = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1245                                + oop_handle_offset; // in slots
1246    const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1247    Label skip;
1248
1249    if (is_receiver) {
1250      *receiver_offset = oop_offset;
1251    }
1252    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1253
1254    __ std( r_oop,    oop_offset, R1_SP);
1255    __ addi(r_handle, R1_SP, oop_offset);
1256
1257    __ cmpdi(CCR0, r_oop, 0);
1258    __ bne(CCR0, skip);
1259    // Use a NULL handle if oop is NULL.
1260    __ li(r_handle, 0);
1261    __ bind(skip);
1262
1263    if (dst.first()->is_stack()) {
1264      // reg to stack
1265      __ std(r_handle, reg2offset(dst.first()), R1_SP);
1266    } else {
1267      // reg to reg
1268      // Nothing to do, r_handle is already the dst register.
1269    }
1270  }
1271}
1272
1273static void int_move(MacroAssembler*masm,
1274                     VMRegPair src, VMRegPair dst,
1275                     Register r_caller_sp, Register r_temp) {
1276  assert(src.first()->is_valid(), "incoming must be int");
1277  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1278
1279  if (src.first()->is_stack()) {
1280    if (dst.first()->is_stack()) {
1281      // stack to stack
1282      __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1283      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1284    } else {
1285      // stack to reg
1286      __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1287    }
1288  } else if (dst.first()->is_stack()) {
1289    // reg to stack
1290    __ extsw(r_temp, src.first()->as_Register());
1291    __ std(r_temp, reg2offset(dst.first()), R1_SP);
1292  } else {
1293    // reg to reg
1294    __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1295  }
1296}
1297
1298static void long_move(MacroAssembler*masm,
1299                      VMRegPair src, VMRegPair dst,
1300                      Register r_caller_sp, Register r_temp) {
1301  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1302  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1303
1304  if (src.first()->is_stack()) {
1305    if (dst.first()->is_stack()) {
1306      // stack to stack
1307      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1308      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1309    } else {
1310      // stack to reg
1311      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1312    }
1313  } else if (dst.first()->is_stack()) {
1314    // reg to stack
1315    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1316  } else {
1317    // reg to reg
1318    if (dst.first()->as_Register() != src.first()->as_Register())
1319      __ mr(dst.first()->as_Register(), src.first()->as_Register());
1320  }
1321}
1322
1323static void float_move(MacroAssembler*masm,
1324                       VMRegPair src, VMRegPair dst,
1325                       Register r_caller_sp, Register r_temp) {
1326  assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1327  assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1328
1329  if (src.first()->is_stack()) {
1330    if (dst.first()->is_stack()) {
1331      // stack to stack
1332      __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1333      __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1334    } else {
1335      // stack to reg
1336      __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1337    }
1338  } else if (dst.first()->is_stack()) {
1339    // reg to stack
1340    __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1341  } else {
1342    // reg to reg
1343    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1344      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1345  }
1346}
1347
1348static void double_move(MacroAssembler*masm,
1349                        VMRegPair src, VMRegPair dst,
1350                        Register r_caller_sp, Register r_temp) {
1351  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1352  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1353
1354  if (src.first()->is_stack()) {
1355    if (dst.first()->is_stack()) {
1356      // stack to stack
1357      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1358      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1359    } else {
1360      // stack to reg
1361      __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1362    }
1363  } else if (dst.first()->is_stack()) {
1364    // reg to stack
1365    __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1366  } else {
1367    // reg to reg
1368    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1369      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1370  }
1371}
1372
1373void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1374  switch (ret_type) {
1375    case T_BOOLEAN:
1376    case T_CHAR:
1377    case T_BYTE:
1378    case T_SHORT:
1379    case T_INT:
1380      __ stw (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1381      break;
1382    case T_ARRAY:
1383    case T_OBJECT:
1384    case T_LONG:
1385      __ std (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1386      break;
1387    case T_FLOAT:
1388      __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1389      break;
1390    case T_DOUBLE:
1391      __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1392      break;
1393    case T_VOID:
1394      break;
1395    default:
1396      ShouldNotReachHere();
1397      break;
1398  }
1399}
1400
1401void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1402  switch (ret_type) {
1403    case T_BOOLEAN:
1404    case T_CHAR:
1405    case T_BYTE:
1406    case T_SHORT:
1407    case T_INT:
1408      __ lwz(R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1409      break;
1410    case T_ARRAY:
1411    case T_OBJECT:
1412    case T_LONG:
1413      __ ld (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1414      break;
1415    case T_FLOAT:
1416      __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1417      break;
1418    case T_DOUBLE:
1419      __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1420      break;
1421    case T_VOID:
1422      break;
1423    default:
1424      ShouldNotReachHere();
1425      break;
1426  }
1427}
1428
1429static void save_or_restore_arguments(MacroAssembler* masm,
1430                                      const int stack_slots,
1431                                      const int total_in_args,
1432                                      const int arg_save_area,
1433                                      OopMap* map,
1434                                      VMRegPair* in_regs,
1435                                      BasicType* in_sig_bt) {
1436  // If map is non-NULL then the code should store the values,
1437  // otherwise it should load them.
1438  int slot = arg_save_area;
1439  // Save down double word first.
1440  for (int i = 0; i < total_in_args; i++) {
1441    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1442      int offset = slot * VMRegImpl::stack_slot_size;
1443      slot += VMRegImpl::slots_per_word;
1444      assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1445      if (map != NULL) {
1446        __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1447      } else {
1448        __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1449      }
1450    } else if (in_regs[i].first()->is_Register() &&
1451        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1452      int offset = slot * VMRegImpl::stack_slot_size;
1453      if (map != NULL) {
1454        __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
1455        if (in_sig_bt[i] == T_ARRAY) {
1456          map->set_oop(VMRegImpl::stack2reg(slot));
1457        }
1458      } else {
1459        __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
1460      }
1461      slot += VMRegImpl::slots_per_word;
1462      assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1463    }
1464  }
1465  // Save or restore single word registers.
1466  for (int i = 0; i < total_in_args; i++) {
1467    // PPC64: pass ints as longs: must only deal with floats here.
1468    if (in_regs[i].first()->is_FloatRegister()) {
1469      if (in_sig_bt[i] == T_FLOAT) {
1470        int offset = slot * VMRegImpl::stack_slot_size;
1471        slot++;
1472        assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1473        if (map != NULL) {
1474          __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1475        } else {
1476          __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1477        }
1478      }
1479    } else if (in_regs[i].first()->is_stack()) {
1480      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1481        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1482        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1483      }
1484    }
1485  }
1486}
1487
1488// Check GC_locker::needs_gc and enter the runtime if it's true. This
1489// keeps a new JNI critical region from starting until a GC has been
1490// forced. Save down any oops in registers and describe them in an
1491// OopMap.
1492static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1493                                               const int stack_slots,
1494                                               const int total_in_args,
1495                                               const int arg_save_area,
1496                                               OopMapSet* oop_maps,
1497                                               VMRegPair* in_regs,
1498                                               BasicType* in_sig_bt,
1499                                               Register tmp_reg ) {
1500  __ block_comment("check GC_locker::needs_gc");
1501  Label cont;
1502  __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GC_locker::needs_gc_address());
1503  __ cmplwi(CCR0, tmp_reg, 0);
1504  __ beq(CCR0, cont);
1505
1506  // Save down any values that are live in registers and call into the
1507  // runtime to halt for a GC.
1508  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1509  save_or_restore_arguments(masm, stack_slots, total_in_args,
1510                            arg_save_area, map, in_regs, in_sig_bt);
1511
1512  __ mr(R3_ARG1, R16_thread);
1513  __ set_last_Java_frame(R1_SP, noreg);
1514
1515  __ block_comment("block_for_jni_critical");
1516  address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1517#if defined(ABI_ELFv2)
1518  __ call_c(entry_point, relocInfo::runtime_call_type);
1519#else
1520  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
1521#endif
1522  address start           = __ pc() - __ offset(),
1523          calls_return_pc = __ last_calls_return_pc();
1524  oop_maps->add_gc_map(calls_return_pc - start, map);
1525
1526  __ reset_last_Java_frame();
1527
1528  // Reload all the register arguments.
1529  save_or_restore_arguments(masm, stack_slots, total_in_args,
1530                            arg_save_area, NULL, in_regs, in_sig_bt);
1531
1532  __ BIND(cont);
1533
1534#ifdef ASSERT
1535  if (StressCriticalJNINatives) {
1536    // Stress register saving.
1537    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1538    save_or_restore_arguments(masm, stack_slots, total_in_args,
1539                              arg_save_area, map, in_regs, in_sig_bt);
1540    // Destroy argument registers.
1541    for (int i = 0; i < total_in_args; i++) {
1542      if (in_regs[i].first()->is_Register()) {
1543        const Register reg = in_regs[i].first()->as_Register();
1544        __ neg(reg, reg);
1545      } else if (in_regs[i].first()->is_FloatRegister()) {
1546        __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
1547      }
1548    }
1549
1550    save_or_restore_arguments(masm, stack_slots, total_in_args,
1551                              arg_save_area, NULL, in_regs, in_sig_bt);
1552  }
1553#endif
1554}
1555
1556static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
1557  if (src.first()->is_stack()) {
1558    if (dst.first()->is_stack()) {
1559      // stack to stack
1560      __ ld(r_temp, reg2offset(src.first()), r_caller_sp);
1561      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1562    } else {
1563      // stack to reg
1564      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1565    }
1566  } else if (dst.first()->is_stack()) {
1567    // reg to stack
1568    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1569  } else {
1570    if (dst.first() != src.first()) {
1571      __ mr(dst.first()->as_Register(), src.first()->as_Register());
1572    }
1573  }
1574}
1575
1576// Unpack an array argument into a pointer to the body and the length
1577// if the array is non-null, otherwise pass 0 for both.
1578static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type,
1579                                  VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp,
1580                                  Register tmp_reg, Register tmp2_reg) {
1581  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1582         "possible collision");
1583  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1584         "possible collision");
1585
1586  // Pass the length, ptr pair.
1587  Label set_out_args;
1588  VMRegPair tmp, tmp2;
1589  tmp.set_ptr(tmp_reg->as_VMReg());
1590  tmp2.set_ptr(tmp2_reg->as_VMReg());
1591  if (reg.first()->is_stack()) {
1592    // Load the arg up from the stack.
1593    move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0);
1594    reg = tmp;
1595  }
1596  __ li(tmp2_reg, 0); // Pass zeros if Array=null.
1597  if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0);
1598  __ cmpdi(CCR0, reg.first()->as_Register(), 0);
1599  __ beq(CCR0, set_out_args);
1600  __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register());
1601  __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type));
1602  __ bind(set_out_args);
1603  move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0);
1604  move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64.
1605}
1606
1607static void verify_oop_args(MacroAssembler* masm,
1608                            methodHandle method,
1609                            const BasicType* sig_bt,
1610                            const VMRegPair* regs) {
1611  Register temp_reg = R19_method;  // not part of any compiled calling seq
1612  if (VerifyOops) {
1613    for (int i = 0; i < method->size_of_parameters(); i++) {
1614      if (sig_bt[i] == T_OBJECT ||
1615          sig_bt[i] == T_ARRAY) {
1616        VMReg r = regs[i].first();
1617        assert(r->is_valid(), "bad oop arg");
1618        if (r->is_stack()) {
1619          __ ld(temp_reg, reg2offset(r), R1_SP);
1620          __ verify_oop(temp_reg);
1621        } else {
1622          __ verify_oop(r->as_Register());
1623        }
1624      }
1625    }
1626  }
1627}
1628
1629static void gen_special_dispatch(MacroAssembler* masm,
1630                                 methodHandle method,
1631                                 const BasicType* sig_bt,
1632                                 const VMRegPair* regs) {
1633  verify_oop_args(masm, method, sig_bt, regs);
1634  vmIntrinsics::ID iid = method->intrinsic_id();
1635
1636  // Now write the args into the outgoing interpreter space
1637  bool     has_receiver   = false;
1638  Register receiver_reg   = noreg;
1639  int      member_arg_pos = -1;
1640  Register member_reg     = noreg;
1641  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1642  if (ref_kind != 0) {
1643    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1644    member_reg = R19_method;  // known to be free at this point
1645    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1646  } else if (iid == vmIntrinsics::_invokeBasic) {
1647    has_receiver = true;
1648  } else {
1649    fatal("unexpected intrinsic id %d", iid);
1650  }
1651
1652  if (member_reg != noreg) {
1653    // Load the member_arg into register, if necessary.
1654    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1655    VMReg r = regs[member_arg_pos].first();
1656    if (r->is_stack()) {
1657      __ ld(member_reg, reg2offset(r), R1_SP);
1658    } else {
1659      // no data motion is needed
1660      member_reg = r->as_Register();
1661    }
1662  }
1663
1664  if (has_receiver) {
1665    // Make sure the receiver is loaded into a register.
1666    assert(method->size_of_parameters() > 0, "oob");
1667    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1668    VMReg r = regs[0].first();
1669    assert(r->is_valid(), "bad receiver arg");
1670    if (r->is_stack()) {
1671      // Porting note:  This assumes that compiled calling conventions always
1672      // pass the receiver oop in a register.  If this is not true on some
1673      // platform, pick a temp and load the receiver from stack.
1674      fatal("receiver always in a register");
1675      receiver_reg = R11_scratch1;  // TODO (hs24): is R11_scratch1 really free at this point?
1676      __ ld(receiver_reg, reg2offset(r), R1_SP);
1677    } else {
1678      // no data motion is needed
1679      receiver_reg = r->as_Register();
1680    }
1681  }
1682
1683  // Figure out which address we are really jumping to:
1684  MethodHandles::generate_method_handle_dispatch(masm, iid,
1685                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1686}
1687
1688#endif // COMPILER2
1689
1690// ---------------------------------------------------------------------------
1691// Generate a native wrapper for a given method. The method takes arguments
1692// in the Java compiled code convention, marshals them to the native
1693// convention (handlizes oops, etc), transitions to native, makes the call,
1694// returns to java state (possibly blocking), unhandlizes any result and
1695// returns.
1696//
1697// Critical native functions are a shorthand for the use of
1698// GetPrimtiveArrayCritical and disallow the use of any other JNI
1699// functions.  The wrapper is expected to unpack the arguments before
1700// passing them to the callee and perform checks before and after the
1701// native call to ensure that they GC_locker
1702// lock_critical/unlock_critical semantics are followed.  Some other
1703// parts of JNI setup are skipped like the tear down of the JNI handle
1704// block and the check for pending exceptions it's impossible for them
1705// to be thrown.
1706//
1707// They are roughly structured like this:
1708//   if (GC_locker::needs_gc())
1709//     SharedRuntime::block_for_jni_critical();
1710//   tranistion to thread_in_native
1711//   unpack arrray arguments and call native entry point
1712//   check for safepoint in progress
1713//   check if any thread suspend flags are set
1714//     call into JVM and possible unlock the JNI critical
1715//     if a GC was suppressed while in the critical native.
1716//   transition back to thread_in_Java
1717//   return to caller
1718//
1719nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1720                                                const methodHandle& method,
1721                                                int compile_id,
1722                                                BasicType *in_sig_bt,
1723                                                VMRegPair *in_regs,
1724                                                BasicType ret_type) {
1725#ifdef COMPILER2
1726  if (method->is_method_handle_intrinsic()) {
1727    vmIntrinsics::ID iid = method->intrinsic_id();
1728    intptr_t start = (intptr_t)__ pc();
1729    int vep_offset = ((intptr_t)__ pc()) - start;
1730    gen_special_dispatch(masm,
1731                         method,
1732                         in_sig_bt,
1733                         in_regs);
1734    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1735    __ flush();
1736    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1737    return nmethod::new_native_nmethod(method,
1738                                       compile_id,
1739                                       masm->code(),
1740                                       vep_offset,
1741                                       frame_complete,
1742                                       stack_slots / VMRegImpl::slots_per_word,
1743                                       in_ByteSize(-1),
1744                                       in_ByteSize(-1),
1745                                       (OopMapSet*)NULL);
1746  }
1747
1748  bool is_critical_native = true;
1749  address native_func = method->critical_native_function();
1750  if (native_func == NULL) {
1751    native_func = method->native_function();
1752    is_critical_native = false;
1753  }
1754  assert(native_func != NULL, "must have function");
1755
1756  // First, create signature for outgoing C call
1757  // --------------------------------------------------------------------------
1758
1759  int total_in_args = method->size_of_parameters();
1760  // We have received a description of where all the java args are located
1761  // on entry to the wrapper. We need to convert these args to where
1762  // the jni function will expect them. To figure out where they go
1763  // we convert the java signature to a C signature by inserting
1764  // the hidden arguments as arg[0] and possibly arg[1] (static method)
1765
1766  // Calculate the total number of C arguments and create arrays for the
1767  // signature and the outgoing registers.
1768  // On ppc64, we have two arrays for the outgoing registers, because
1769  // some floating-point arguments must be passed in registers _and_
1770  // in stack locations.
1771  bool method_is_static = method->is_static();
1772  int  total_c_args     = total_in_args;
1773
1774  if (!is_critical_native) {
1775    int n_hidden_args = method_is_static ? 2 : 1;
1776    total_c_args += n_hidden_args;
1777  } else {
1778    // No JNIEnv*, no this*, but unpacked arrays (base+length).
1779    for (int i = 0; i < total_in_args; i++) {
1780      if (in_sig_bt[i] == T_ARRAY) {
1781        total_c_args++;
1782      }
1783    }
1784  }
1785
1786  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1787  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1788  VMRegPair *out_regs2  = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1789  BasicType* in_elem_bt = NULL;
1790
1791  // Create the signature for the C call:
1792  //   1) add the JNIEnv*
1793  //   2) add the class if the method is static
1794  //   3) copy the rest of the incoming signature (shifted by the number of
1795  //      hidden arguments).
1796
1797  int argc = 0;
1798  if (!is_critical_native) {
1799    out_sig_bt[argc++] = T_ADDRESS;
1800    if (method->is_static()) {
1801      out_sig_bt[argc++] = T_OBJECT;
1802    }
1803
1804    for (int i = 0; i < total_in_args ; i++ ) {
1805      out_sig_bt[argc++] = in_sig_bt[i];
1806    }
1807  } else {
1808    Thread* THREAD = Thread::current();
1809    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1810    SignatureStream ss(method->signature());
1811    int o = 0;
1812    for (int i = 0; i < total_in_args ; i++, o++) {
1813      if (in_sig_bt[i] == T_ARRAY) {
1814        // Arrays are passed as int, elem* pair
1815        Symbol* atype = ss.as_symbol(CHECK_NULL);
1816        const char* at = atype->as_C_string();
1817        if (strlen(at) == 2) {
1818          assert(at[0] == '[', "must be");
1819          switch (at[1]) {
1820            case 'B': in_elem_bt[o] = T_BYTE; break;
1821            case 'C': in_elem_bt[o] = T_CHAR; break;
1822            case 'D': in_elem_bt[o] = T_DOUBLE; break;
1823            case 'F': in_elem_bt[o] = T_FLOAT; break;
1824            case 'I': in_elem_bt[o] = T_INT; break;
1825            case 'J': in_elem_bt[o] = T_LONG; break;
1826            case 'S': in_elem_bt[o] = T_SHORT; break;
1827            case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
1828            default: ShouldNotReachHere();
1829          }
1830        }
1831      } else {
1832        in_elem_bt[o] = T_VOID;
1833      }
1834      if (in_sig_bt[i] != T_VOID) {
1835        assert(in_sig_bt[i] == ss.type(), "must match");
1836        ss.next();
1837      }
1838    }
1839
1840    for (int i = 0; i < total_in_args ; i++ ) {
1841      if (in_sig_bt[i] == T_ARRAY) {
1842        // Arrays are passed as int, elem* pair.
1843        out_sig_bt[argc++] = T_INT;
1844        out_sig_bt[argc++] = T_ADDRESS;
1845      } else {
1846        out_sig_bt[argc++] = in_sig_bt[i];
1847      }
1848    }
1849  }
1850
1851
1852  // Compute the wrapper's frame size.
1853  // --------------------------------------------------------------------------
1854
1855  // Now figure out where the args must be stored and how much stack space
1856  // they require.
1857  //
1858  // Compute framesize for the wrapper. We need to handlize all oops in
1859  // incoming registers.
1860  //
1861  // Calculate the total number of stack slots we will need:
1862  //   1) abi requirements
1863  //   2) outgoing arguments
1864  //   3) space for inbound oop handle area
1865  //   4) space for handlizing a klass if static method
1866  //   5) space for a lock if synchronized method
1867  //   6) workspace for saving return values, int <-> float reg moves, etc.
1868  //   7) alignment
1869  //
1870  // Layout of the native wrapper frame:
1871  // (stack grows upwards, memory grows downwards)
1872  //
1873  // NW     [ABI_REG_ARGS]             <-- 1) R1_SP
1874  //        [outgoing arguments]       <-- 2) R1_SP + out_arg_slot_offset
1875  //        [oopHandle area]           <-- 3) R1_SP + oop_handle_offset (save area for critical natives)
1876  //        klass                      <-- 4) R1_SP + klass_offset
1877  //        lock                       <-- 5) R1_SP + lock_offset
1878  //        [workspace]                <-- 6) R1_SP + workspace_offset
1879  //        [alignment] (optional)     <-- 7)
1880  // caller [JIT_TOP_ABI_48]           <-- r_callers_sp
1881  //
1882  // - *_slot_offset Indicates offset from SP in number of stack slots.
1883  // - *_offset      Indicates offset from SP in bytes.
1884
1885  int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) // 1+2)
1886                  + SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
1887
1888  // Now the space for the inbound oop handle area.
1889  int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
1890  if (is_critical_native) {
1891    // Critical natives may have to call out so they need a save area
1892    // for register arguments.
1893    int double_slots = 0;
1894    int single_slots = 0;
1895    for (int i = 0; i < total_in_args; i++) {
1896      if (in_regs[i].first()->is_Register()) {
1897        const Register reg = in_regs[i].first()->as_Register();
1898        switch (in_sig_bt[i]) {
1899          case T_BOOLEAN:
1900          case T_BYTE:
1901          case T_SHORT:
1902          case T_CHAR:
1903          case T_INT:
1904          // Fall through.
1905          case T_ARRAY:
1906          case T_LONG: double_slots++; break;
1907          default:  ShouldNotReachHere();
1908        }
1909      } else if (in_regs[i].first()->is_FloatRegister()) {
1910        switch (in_sig_bt[i]) {
1911          case T_FLOAT:  single_slots++; break;
1912          case T_DOUBLE: double_slots++; break;
1913          default:  ShouldNotReachHere();
1914        }
1915      }
1916    }
1917    total_save_slots = double_slots * 2 + round_to(single_slots, 2); // round to even
1918  }
1919
1920  int oop_handle_slot_offset = stack_slots;
1921  stack_slots += total_save_slots;                                                // 3)
1922
1923  int klass_slot_offset = 0;
1924  int klass_offset      = -1;
1925  if (method_is_static && !is_critical_native) {                                  // 4)
1926    klass_slot_offset  = stack_slots;
1927    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1928    stack_slots       += VMRegImpl::slots_per_word;
1929  }
1930
1931  int lock_slot_offset = 0;
1932  int lock_offset      = -1;
1933  if (method->is_synchronized()) {                                                // 5)
1934    lock_slot_offset   = stack_slots;
1935    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1936    stack_slots       += VMRegImpl::slots_per_word;
1937  }
1938
1939  int workspace_slot_offset = stack_slots;                                        // 6)
1940  stack_slots         += 2;
1941
1942  // Now compute actual number of stack words we need.
1943  // Rounding to make stack properly aligned.
1944  stack_slots = round_to(stack_slots,                                             // 7)
1945                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1946  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1947
1948
1949  // Now we can start generating code.
1950  // --------------------------------------------------------------------------
1951
1952  intptr_t start_pc = (intptr_t)__ pc();
1953  intptr_t vep_start_pc;
1954  intptr_t frame_done_pc;
1955  intptr_t oopmap_pc;
1956
1957  Label    ic_miss;
1958  Label    handle_pending_exception;
1959
1960  Register r_callers_sp = R21;
1961  Register r_temp_1     = R22;
1962  Register r_temp_2     = R23;
1963  Register r_temp_3     = R24;
1964  Register r_temp_4     = R25;
1965  Register r_temp_5     = R26;
1966  Register r_temp_6     = R27;
1967  Register r_return_pc  = R28;
1968
1969  Register r_carg1_jnienv        = noreg;
1970  Register r_carg2_classorobject = noreg;
1971  if (!is_critical_native) {
1972    r_carg1_jnienv        = out_regs[0].first()->as_Register();
1973    r_carg2_classorobject = out_regs[1].first()->as_Register();
1974  }
1975
1976
1977  // Generate the Unverified Entry Point (UEP).
1978  // --------------------------------------------------------------------------
1979  assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
1980
1981  // Check ic: object class == cached class?
1982  if (!method_is_static) {
1983  Register ic = as_Register(Matcher::inline_cache_reg_encode());
1984  Register receiver_klass = r_temp_1;
1985
1986  __ cmpdi(CCR0, R3_ARG1, 0);
1987  __ beq(CCR0, ic_miss);
1988  __ verify_oop(R3_ARG1);
1989  __ load_klass(receiver_klass, R3_ARG1);
1990
1991  __ cmpd(CCR0, receiver_klass, ic);
1992  __ bne(CCR0, ic_miss);
1993  }
1994
1995
1996  // Generate the Verified Entry Point (VEP).
1997  // --------------------------------------------------------------------------
1998  vep_start_pc = (intptr_t)__ pc();
1999
2000  __ save_LR_CR(r_temp_1);
2001  __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2002  __ mr(r_callers_sp, R1_SP);                            // Remember frame pointer.
2003  __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
2004  frame_done_pc = (intptr_t)__ pc();
2005
2006  __ verify_thread();
2007
2008  // Native nmethod wrappers never take possesion of the oop arguments.
2009  // So the caller will gc the arguments.
2010  // The only thing we need an oopMap for is if the call is static.
2011  //
2012  // An OopMap for lock (and class if static), and one for the VM call itself.
2013  OopMapSet *oop_maps = new OopMapSet();
2014  OopMap    *oop_map  = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2015
2016  if (is_critical_native) {
2017    check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt, r_temp_1);
2018  }
2019
2020  // Move arguments from register/stack to register/stack.
2021  // --------------------------------------------------------------------------
2022  //
2023  // We immediately shuffle the arguments so that for any vm call we have
2024  // to make from here on out (sync slow path, jvmti, etc.) we will have
2025  // captured the oops from our caller and have a valid oopMap for them.
2026  //
2027  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2028  // (derived from JavaThread* which is in R16_thread) and, if static,
2029  // the class mirror instead of a receiver. This pretty much guarantees that
2030  // register layout will not match. We ignore these extra arguments during
2031  // the shuffle. The shuffle is described by the two calling convention
2032  // vectors we have in our possession. We simply walk the java vector to
2033  // get the source locations and the c vector to get the destinations.
2034
2035  // Record sp-based slot for receiver on stack for non-static methods.
2036  int receiver_offset = -1;
2037
2038  // We move the arguments backward because the floating point registers
2039  // destination will always be to a register with a greater or equal
2040  // register number or the stack.
2041  //   in  is the index of the incoming Java arguments
2042  //   out is the index of the outgoing C arguments
2043
2044#ifdef ASSERT
2045  bool reg_destroyed[RegisterImpl::number_of_registers];
2046  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2047  for (int r = 0 ; r < RegisterImpl::number_of_registers ; r++) {
2048    reg_destroyed[r] = false;
2049  }
2050  for (int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++) {
2051    freg_destroyed[f] = false;
2052  }
2053#endif // ASSERT
2054
2055  for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2056
2057#ifdef ASSERT
2058    if (in_regs[in].first()->is_Register()) {
2059      assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2060    } else if (in_regs[in].first()->is_FloatRegister()) {
2061      assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2062    }
2063    if (out_regs[out].first()->is_Register()) {
2064      reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2065    } else if (out_regs[out].first()->is_FloatRegister()) {
2066      freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2067    }
2068    if (out_regs2[out].first()->is_Register()) {
2069      reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true;
2070    } else if (out_regs2[out].first()->is_FloatRegister()) {
2071      freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true;
2072    }
2073#endif // ASSERT
2074
2075    switch (in_sig_bt[in]) {
2076      case T_BOOLEAN:
2077      case T_CHAR:
2078      case T_BYTE:
2079      case T_SHORT:
2080      case T_INT:
2081        // Move int and do sign extension.
2082        int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2083        break;
2084      case T_LONG:
2085        long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2086        break;
2087      case T_ARRAY:
2088        if (is_critical_native) {
2089          int body_arg = out;
2090          out -= 1; // Point to length arg.
2091          unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out],
2092                                r_callers_sp, r_temp_1, r_temp_2);
2093          break;
2094        }
2095      case T_OBJECT:
2096        assert(!is_critical_native, "no oop arguments");
2097        object_move(masm, stack_slots,
2098                    oop_map, oop_handle_slot_offset,
2099                    ((in == 0) && (!method_is_static)), &receiver_offset,
2100                    in_regs[in], out_regs[out],
2101                    r_callers_sp, r_temp_1, r_temp_2);
2102        break;
2103      case T_VOID:
2104        break;
2105      case T_FLOAT:
2106        float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2107        if (out_regs2[out].first()->is_valid()) {
2108          float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
2109        }
2110        break;
2111      case T_DOUBLE:
2112        double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2113        if (out_regs2[out].first()->is_valid()) {
2114          double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
2115        }
2116        break;
2117      case T_ADDRESS:
2118        fatal("found type (T_ADDRESS) in java args");
2119        break;
2120      default:
2121        ShouldNotReachHere();
2122        break;
2123    }
2124  }
2125
2126  // Pre-load a static method's oop into ARG2.
2127  // Used both by locking code and the normal JNI call code.
2128  if (method_is_static && !is_critical_native) {
2129    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2130                        r_carg2_classorobject);
2131
2132    // Now handlize the static class mirror in carg2. It's known not-null.
2133    __ std(r_carg2_classorobject, klass_offset, R1_SP);
2134    oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2135    __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2136  }
2137
2138  // Get JNIEnv* which is first argument to native.
2139  if (!is_critical_native) {
2140    __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2141  }
2142
2143  // NOTE:
2144  //
2145  // We have all of the arguments setup at this point.
2146  // We MUST NOT touch any outgoing regs from this point on.
2147  // So if we must call out we must push a new frame.
2148
2149  // Get current pc for oopmap, and load it patchable relative to global toc.
2150  oopmap_pc = (intptr_t) __ pc();
2151  __ calculate_address_from_global_toc(r_return_pc, (address)oopmap_pc, true, true, true, true);
2152
2153  // We use the same pc/oopMap repeatedly when we call out.
2154  oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2155
2156  // r_return_pc now has the pc loaded that we will use when we finally call
2157  // to native.
2158
2159  // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2160  assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2161
2162# if 0
2163  // DTrace method entry
2164# endif
2165
2166  // Lock a synchronized method.
2167  // --------------------------------------------------------------------------
2168
2169  if (method->is_synchronized()) {
2170    assert(!is_critical_native, "unhandled");
2171    ConditionRegister r_flag = CCR1;
2172    Register          r_oop  = r_temp_4;
2173    const Register    r_box  = r_temp_5;
2174    Label             done, locked;
2175
2176    // Load the oop for the object or class. r_carg2_classorobject contains
2177    // either the handlized oop from the incoming arguments or the handlized
2178    // class mirror (if the method is static).
2179    __ ld(r_oop, 0, r_carg2_classorobject);
2180
2181    // Get the lock box slot's address.
2182    __ addi(r_box, R1_SP, lock_offset);
2183
2184#   ifdef ASSERT
2185    if (UseBiasedLocking) {
2186      // Making the box point to itself will make it clear it went unused
2187      // but also be obviously invalid.
2188      __ std(r_box, 0, r_box);
2189    }
2190#   endif // ASSERT
2191
2192    // Try fastpath for locking.
2193    // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2194    __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2195    __ beq(r_flag, locked);
2196
2197    // None of the above fast optimizations worked so we have to get into the
2198    // slow case of monitor enter. Inline a special case of call_VM that
2199    // disallows any pending_exception.
2200
2201    // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2202    int frame_size = frame::abi_reg_args_size +
2203                     round_to(total_c_args * wordSize, frame::alignment_in_bytes);
2204    __ mr(R11_scratch1, R1_SP);
2205    RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2);
2206
2207    // Do the call.
2208    __ set_last_Java_frame(R11_scratch1, r_return_pc);
2209    assert(r_return_pc->is_nonvolatile(), "expecting return pc to be in non-volatile register");
2210    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2211    __ reset_last_Java_frame();
2212
2213    RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2);
2214
2215    __ asm_assert_mem8_is_zero(thread_(pending_exception),
2216       "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C", 0);
2217
2218    __ bind(locked);
2219  }
2220
2221
2222  // Publish thread state
2223  // --------------------------------------------------------------------------
2224
2225  // Use that pc we placed in r_return_pc a while back as the current frame anchor.
2226  __ set_last_Java_frame(R1_SP, r_return_pc);
2227
2228  // Transition from _thread_in_Java to _thread_in_native.
2229  __ li(R0, _thread_in_native);
2230  __ release();
2231  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2232  __ stw(R0, thread_(thread_state));
2233  if (UseMembar) {
2234    __ fence();
2235  }
2236
2237
2238  // The JNI call
2239  // --------------------------------------------------------------------------
2240#if defined(ABI_ELFv2)
2241  __ call_c(native_func, relocInfo::runtime_call_type);
2242#else
2243  FunctionDescriptor* fd_native_method = (FunctionDescriptor*) native_func;
2244  __ call_c(fd_native_method, relocInfo::runtime_call_type);
2245#endif
2246
2247
2248  // Now, we are back from the native code.
2249
2250
2251  // Unpack the native result.
2252  // --------------------------------------------------------------------------
2253
2254  // For int-types, we do any needed sign-extension required.
2255  // Care must be taken that the return values (R3_RET and F1_RET)
2256  // will survive any VM calls for blocking or unlocking.
2257  // An OOP result (handle) is done specially in the slow-path code.
2258
2259  switch (ret_type) {
2260    case T_VOID:    break;        // Nothing to do!
2261    case T_FLOAT:   break;        // Got it where we want it (unless slow-path).
2262    case T_DOUBLE:  break;        // Got it where we want it (unless slow-path).
2263    case T_LONG:    break;        // Got it where we want it (unless slow-path).
2264    case T_OBJECT:  break;        // Really a handle.
2265                                  // Cannot de-handlize until after reclaiming jvm_lock.
2266    case T_ARRAY:   break;
2267
2268    case T_BOOLEAN: {             // 0 -> false(0); !0 -> true(1)
2269      Label skip_modify;
2270      __ cmpwi(CCR0, R3_RET, 0);
2271      __ beq(CCR0, skip_modify);
2272      __ li(R3_RET, 1);
2273      __ bind(skip_modify);
2274      break;
2275      }
2276    case T_BYTE: {                // sign extension
2277      __ extsb(R3_RET, R3_RET);
2278      break;
2279      }
2280    case T_CHAR: {                // unsigned result
2281      __ andi(R3_RET, R3_RET, 0xffff);
2282      break;
2283      }
2284    case T_SHORT: {               // sign extension
2285      __ extsh(R3_RET, R3_RET);
2286      break;
2287      }
2288    case T_INT:                   // nothing to do
2289      break;
2290    default:
2291      ShouldNotReachHere();
2292      break;
2293  }
2294
2295
2296  // Publish thread state
2297  // --------------------------------------------------------------------------
2298
2299  // Switch thread to "native transition" state before reading the
2300  // synchronization state. This additional state is necessary because reading
2301  // and testing the synchronization state is not atomic w.r.t. GC, as this
2302  // scenario demonstrates:
2303  //   - Java thread A, in _thread_in_native state, loads _not_synchronized
2304  //     and is preempted.
2305  //   - VM thread changes sync state to synchronizing and suspends threads
2306  //     for GC.
2307  //   - Thread A is resumed to finish this native method, but doesn't block
2308  //     here since it didn't see any synchronization in progress, and escapes.
2309
2310  // Transition from _thread_in_native to _thread_in_native_trans.
2311  __ li(R0, _thread_in_native_trans);
2312  __ release();
2313  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2314  __ stw(R0, thread_(thread_state));
2315
2316
2317  // Must we block?
2318  // --------------------------------------------------------------------------
2319
2320  // Block, if necessary, before resuming in _thread_in_Java state.
2321  // In order for GC to work, don't clear the last_Java_sp until after blocking.
2322  Label after_transition;
2323  {
2324    Label no_block, sync;
2325
2326    if (os::is_MP()) {
2327      if (UseMembar) {
2328        // Force this write out before the read below.
2329        __ fence();
2330      } else {
2331        // Write serialization page so VM thread can do a pseudo remote membar.
2332        // We use the current thread pointer to calculate a thread specific
2333        // offset to write to within the page. This minimizes bus traffic
2334        // due to cache line collision.
2335        __ serialize_memory(R16_thread, r_temp_4, r_temp_5);
2336      }
2337    }
2338
2339    Register sync_state_addr = r_temp_4;
2340    Register sync_state      = r_temp_5;
2341    Register suspend_flags   = r_temp_6;
2342
2343    __ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state);
2344
2345    // TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
2346    __ lwz(sync_state, 0, sync_state_addr);
2347
2348    // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2349    __ lwz(suspend_flags, thread_(suspend_flags));
2350
2351    __ acquire();
2352
2353    Label do_safepoint;
2354    // No synchronization in progress nor yet synchronized.
2355    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
2356    // Not suspended.
2357    __ cmpwi(CCR1, suspend_flags, 0);
2358
2359    __ bne(CCR0, sync);
2360    __ beq(CCR1, no_block);
2361
2362    // Block. Save any potential method result value before the operation and
2363    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2364    // lets us share the oopMap we used when we went native rather than create
2365    // a distinct one for this pc.
2366    __ bind(sync);
2367
2368    address entry_point = is_critical_native
2369      ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2370      : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2371    save_native_result(masm, ret_type, workspace_slot_offset);
2372    __ call_VM_leaf(entry_point, R16_thread);
2373    restore_native_result(masm, ret_type, workspace_slot_offset);
2374
2375    if (is_critical_native) {
2376      __ b(after_transition); // No thread state transition here.
2377    }
2378    __ bind(no_block);
2379  }
2380
2381  // Publish thread state.
2382  // --------------------------------------------------------------------------
2383
2384  // Thread state is thread_in_native_trans. Any safepoint blocking has
2385  // already happened so we can now change state to _thread_in_Java.
2386
2387  // Transition from _thread_in_native_trans to _thread_in_Java.
2388  __ li(R0, _thread_in_Java);
2389  __ release();
2390  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2391  __ stw(R0, thread_(thread_state));
2392  if (UseMembar) {
2393    __ fence();
2394  }
2395  __ bind(after_transition);
2396
2397  // Reguard any pages if necessary.
2398  // --------------------------------------------------------------------------
2399
2400  Label no_reguard;
2401  __ lwz(r_temp_1, thread_(stack_guard_state));
2402  __ cmpwi(CCR0, r_temp_1, JavaThread::stack_guard_yellow_disabled);
2403  __ bne(CCR0, no_reguard);
2404
2405  save_native_result(masm, ret_type, workspace_slot_offset);
2406  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2407  restore_native_result(masm, ret_type, workspace_slot_offset);
2408
2409  __ bind(no_reguard);
2410
2411
2412  // Unlock
2413  // --------------------------------------------------------------------------
2414
2415  if (method->is_synchronized()) {
2416
2417    ConditionRegister r_flag   = CCR1;
2418    const Register r_oop       = r_temp_4;
2419    const Register r_box       = r_temp_5;
2420    const Register r_exception = r_temp_6;
2421    Label done;
2422
2423    // Get oop and address of lock object box.
2424    if (method_is_static) {
2425      assert(klass_offset != -1, "");
2426      __ ld(r_oop, klass_offset, R1_SP);
2427    } else {
2428      assert(receiver_offset != -1, "");
2429      __ ld(r_oop, receiver_offset, R1_SP);
2430    }
2431    __ addi(r_box, R1_SP, lock_offset);
2432
2433    // Try fastpath for unlocking.
2434    __ compiler_fast_unlock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2435    __ beq(r_flag, done);
2436
2437    // Save and restore any potential method result value around the unlocking operation.
2438    save_native_result(masm, ret_type, workspace_slot_offset);
2439
2440    // Must save pending exception around the slow-path VM call. Since it's a
2441    // leaf call, the pending exception (if any) can be kept in a register.
2442    __ ld(r_exception, thread_(pending_exception));
2443    assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2444    __ li(R0, 0);
2445    __ std(R0, thread_(pending_exception));
2446
2447    // Slow case of monitor enter.
2448    // Inline a special case of call_VM that disallows any pending_exception.
2449    // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2450    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2451
2452    __ asm_assert_mem8_is_zero(thread_(pending_exception),
2453       "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
2454
2455    restore_native_result(masm, ret_type, workspace_slot_offset);
2456
2457    // Check_forward_pending_exception jump to forward_exception if any pending
2458    // exception is set. The forward_exception routine expects to see the
2459    // exception in pending_exception and not in a register. Kind of clumsy,
2460    // since all folks who branch to forward_exception must have tested
2461    // pending_exception first and hence have it in a register already.
2462    __ std(r_exception, thread_(pending_exception));
2463
2464    __ bind(done);
2465  }
2466
2467# if 0
2468  // DTrace method exit
2469# endif
2470
2471  // Clear "last Java frame" SP and PC.
2472  // --------------------------------------------------------------------------
2473
2474  __ reset_last_Java_frame();
2475
2476  // Unpack oop result.
2477  // --------------------------------------------------------------------------
2478
2479  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2480    Label skip_unboxing;
2481    __ cmpdi(CCR0, R3_RET, 0);
2482    __ beq(CCR0, skip_unboxing);
2483    __ ld(R3_RET, 0, R3_RET);
2484    __ bind(skip_unboxing);
2485    __ verify_oop(R3_RET);
2486  }
2487
2488
2489  // Reset handle block.
2490  // --------------------------------------------------------------------------
2491  if (!is_critical_native) {
2492  __ ld(r_temp_1, thread_(active_handles));
2493  // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2494  __ li(r_temp_2, 0);
2495  __ stw(r_temp_2, JNIHandleBlock::top_offset_in_bytes(), r_temp_1);
2496
2497
2498  // Check for pending exceptions.
2499  // --------------------------------------------------------------------------
2500  __ ld(r_temp_2, thread_(pending_exception));
2501  __ cmpdi(CCR0, r_temp_2, 0);
2502  __ bne(CCR0, handle_pending_exception);
2503  }
2504
2505  // Return
2506  // --------------------------------------------------------------------------
2507
2508  __ pop_frame();
2509  __ restore_LR_CR(R11);
2510  __ blr();
2511
2512
2513  // Handler for pending exceptions (out-of-line).
2514  // --------------------------------------------------------------------------
2515
2516  // Since this is a native call, we know the proper exception handler
2517  // is the empty function. We just pop this frame and then jump to
2518  // forward_exception_entry.
2519  if (!is_critical_native) {
2520  __ align(InteriorEntryAlignment);
2521  __ bind(handle_pending_exception);
2522
2523  __ pop_frame();
2524  __ restore_LR_CR(R11);
2525  __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2526                       relocInfo::runtime_call_type);
2527  }
2528
2529  // Handler for a cache miss (out-of-line).
2530  // --------------------------------------------------------------------------
2531
2532  if (!method_is_static) {
2533  __ align(InteriorEntryAlignment);
2534  __ bind(ic_miss);
2535
2536  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
2537                       relocInfo::runtime_call_type);
2538  }
2539
2540  // Done.
2541  // --------------------------------------------------------------------------
2542
2543  __ flush();
2544
2545  nmethod *nm = nmethod::new_native_nmethod(method,
2546                                            compile_id,
2547                                            masm->code(),
2548                                            vep_start_pc-start_pc,
2549                                            frame_done_pc-start_pc,
2550                                            stack_slots / VMRegImpl::slots_per_word,
2551                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2552                                            in_ByteSize(lock_offset),
2553                                            oop_maps);
2554
2555  if (is_critical_native) {
2556    nm->set_lazy_critical_native(true);
2557  }
2558
2559  return nm;
2560#else
2561  ShouldNotReachHere();
2562  return NULL;
2563#endif // COMPILER2
2564}
2565
2566// This function returns the adjust size (in number of words) to a c2i adapter
2567// activation for use during deoptimization.
2568int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2569  return round_to((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::alignment_in_bytes);
2570}
2571
2572uint SharedRuntime::out_preserve_stack_slots() {
2573#if defined(COMPILER1) || defined(COMPILER2)
2574  return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2575#else
2576  return 0;
2577#endif
2578}
2579
2580#if defined(COMPILER1) || defined(COMPILER2)
2581// Frame generation for deopt and uncommon trap blobs.
2582static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2583                                /* Read */
2584                                Register unroll_block_reg,
2585                                /* Update */
2586                                Register frame_sizes_reg,
2587                                Register number_of_frames_reg,
2588                                Register pcs_reg,
2589                                /* Invalidate */
2590                                Register frame_size_reg,
2591                                Register pc_reg) {
2592
2593  __ ld(pc_reg, 0, pcs_reg);
2594  __ ld(frame_size_reg, 0, frame_sizes_reg);
2595  __ std(pc_reg, _abi(lr), R1_SP);
2596  __ push_frame(frame_size_reg, R0/*tmp*/);
2597#ifdef CC_INTERP
2598  __ std(R1_SP, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
2599#else
2600#ifdef ASSERT
2601  __ load_const_optimized(pc_reg, 0x5afe);
2602  __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
2603#endif
2604  __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2605#endif // CC_INTERP
2606  __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2607  __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2608  __ addi(pcs_reg, pcs_reg, wordSize);
2609}
2610
2611// Loop through the UnrollBlock info and create new frames.
2612static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2613                                 /* read */
2614                                 Register unroll_block_reg,
2615                                 /* invalidate */
2616                                 Register frame_sizes_reg,
2617                                 Register number_of_frames_reg,
2618                                 Register pcs_reg,
2619                                 Register frame_size_reg,
2620                                 Register pc_reg) {
2621  Label loop;
2622
2623 // _number_of_frames is of type int (deoptimization.hpp)
2624  __ lwa(number_of_frames_reg,
2625             Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(),
2626             unroll_block_reg);
2627  __ ld(pcs_reg,
2628            Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(),
2629            unroll_block_reg);
2630  __ ld(frame_sizes_reg,
2631            Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(),
2632            unroll_block_reg);
2633
2634  // stack: (caller_of_deoptee, ...).
2635
2636  // At this point we either have an interpreter frame or a compiled
2637  // frame on top of stack. If it is a compiled frame we push a new c2i
2638  // adapter here
2639
2640  // Memorize top-frame stack-pointer.
2641  __ mr(frame_size_reg/*old_sp*/, R1_SP);
2642
2643  // Resize interpreter top frame OR C2I adapter.
2644
2645  // At this moment, the top frame (which is the caller of the deoptee) is
2646  // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2647  // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2648  // outgoing arguments.
2649  //
2650  // In order to push the interpreter frame for the deoptee, we need to
2651  // resize the top frame such that we are able to place the deoptee's
2652  // locals in the frame.
2653  // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2654  // into a valid PARENT_IJAVA_FRAME_ABI.
2655
2656  __ lwa(R11_scratch1,
2657             Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(),
2658             unroll_block_reg);
2659  __ neg(R11_scratch1, R11_scratch1);
2660
2661  // R11_scratch1 contains size of locals for frame resizing.
2662  // R12_scratch2 contains top frame's lr.
2663
2664  // Resize frame by complete frame size prevents TOC from being
2665  // overwritten by locals. A more stack space saving way would be
2666  // to copy the TOC to its location in the new abi.
2667  __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2668
2669  // now, resize the frame
2670  __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2671
2672  // In the case where we have resized a c2i frame above, the optional
2673  // alignment below the locals has size 32 (why?).
2674  __ std(R12_scratch2, _abi(lr), R1_SP);
2675
2676  // Initialize initial_caller_sp.
2677#ifdef CC_INTERP
2678  __ std(frame_size_reg/*old_sp*/, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
2679#else
2680#ifdef ASSERT
2681 __ load_const_optimized(pc_reg, 0x5afe);
2682 __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
2683#endif
2684 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2685#endif // CC_INTERP
2686
2687#ifdef ASSERT
2688  // Make sure that there is at least one entry in the array.
2689  __ cmpdi(CCR0, number_of_frames_reg, 0);
2690  __ asm_assert_ne("array_size must be > 0", 0x205);
2691#endif
2692
2693  // Now push the new interpreter frames.
2694  //
2695  __ bind(loop);
2696  // Allocate a new frame, fill in the pc.
2697  push_skeleton_frame(masm, deopt,
2698                      unroll_block_reg,
2699                      frame_sizes_reg,
2700                      number_of_frames_reg,
2701                      pcs_reg,
2702                      frame_size_reg,
2703                      pc_reg);
2704  __ cmpdi(CCR0, number_of_frames_reg, 0);
2705  __ bne(CCR0, loop);
2706
2707  // Get the return address pointing into the frame manager.
2708  __ ld(R0, 0, pcs_reg);
2709  // Store it in the top interpreter frame.
2710  __ std(R0, _abi(lr), R1_SP);
2711  // Initialize frame_manager_lr of interpreter top frame.
2712#ifdef CC_INTERP
2713  __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
2714#endif
2715}
2716#endif
2717
2718void SharedRuntime::generate_deopt_blob() {
2719  // Allocate space for the code
2720  ResourceMark rm;
2721  // Setup code generation tools
2722  CodeBuffer buffer("deopt_blob", 2048, 1024);
2723  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2724  Label exec_mode_initialized;
2725  int frame_size_in_words;
2726  OopMap* map = NULL;
2727  OopMapSet *oop_maps = new OopMapSet();
2728
2729  // size of ABI112 plus spill slots for R3_RET and F1_RET.
2730  const int frame_size_in_bytes = frame::abi_reg_args_spill_size;
2731  const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
2732  int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
2733
2734  const Register exec_mode_reg = R21_tmp1;
2735
2736  const address start = __ pc();
2737
2738#if defined(COMPILER1) || defined(COMPILER2)
2739  // --------------------------------------------------------------------------
2740  // Prolog for non exception case!
2741
2742  // We have been called from the deopt handler of the deoptee.
2743  //
2744  // deoptee:
2745  //                      ...
2746  //                      call X
2747  //                      ...
2748  //  deopt_handler:      call_deopt_stub
2749  //  cur. return pc  --> ...
2750  //
2751  // So currently SR_LR points behind the call in the deopt handler.
2752  // We adjust it such that it points to the start of the deopt handler.
2753  // The return_pc has been stored in the frame of the deoptee and
2754  // will replace the address of the deopt_handler in the call
2755  // to Deoptimization::fetch_unroll_info below.
2756  // We can't grab a free register here, because all registers may
2757  // contain live values, so let the RegisterSaver do the adjustment
2758  // of the return pc.
2759  const int return_pc_adjustment_no_exception = -HandlerImpl::size_deopt_handler();
2760
2761  // Push the "unpack frame"
2762  // Save everything in sight.
2763  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2764                                                                   &first_frame_size_in_bytes,
2765                                                                   /*generate_oop_map=*/ true,
2766                                                                   return_pc_adjustment_no_exception,
2767                                                                   RegisterSaver::return_pc_is_lr);
2768  assert(map != NULL, "OopMap must have been created");
2769
2770  __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
2771  // Save exec mode for unpack_frames.
2772  __ b(exec_mode_initialized);
2773
2774  // --------------------------------------------------------------------------
2775  // Prolog for exception case
2776
2777  // An exception is pending.
2778  // We have been called with a return (interpreter) or a jump (exception blob).
2779  //
2780  // - R3_ARG1: exception oop
2781  // - R4_ARG2: exception pc
2782
2783  int exception_offset = __ pc() - start;
2784
2785  BLOCK_COMMENT("Prolog for exception case");
2786
2787  // Store exception oop and pc in thread (location known to GC).
2788  // This is needed since the call to "fetch_unroll_info()" may safepoint.
2789  __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2790  __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
2791  __ std(R4_ARG2, _abi(lr), R1_SP);
2792
2793  // Vanilla deoptimization with an exception pending in exception_oop.
2794  int exception_in_tls_offset = __ pc() - start;
2795
2796  // Push the "unpack frame".
2797  // Save everything in sight.
2798  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2799                                                             &first_frame_size_in_bytes,
2800                                                             /*generate_oop_map=*/ false,
2801                                                             /*return_pc_adjustment_exception=*/ 0,
2802                                                             RegisterSaver::return_pc_is_pre_saved);
2803
2804  // Deopt during an exception. Save exec mode for unpack_frames.
2805  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
2806
2807  // fall through
2808
2809  int reexecute_offset = 0;
2810#ifdef COMPILER1
2811  __ b(exec_mode_initialized);
2812
2813  // Reexecute entry, similar to c2 uncommon trap
2814  reexecute_offset = __ pc() - start;
2815
2816  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2817                                                             &first_frame_size_in_bytes,
2818                                                             /*generate_oop_map=*/ false,
2819                                                             /*return_pc_adjustment_reexecute=*/ 0,
2820                                                             RegisterSaver::return_pc_is_pre_saved);
2821  __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
2822#endif
2823
2824  // --------------------------------------------------------------------------
2825  __ BIND(exec_mode_initialized);
2826
2827  {
2828  const Register unroll_block_reg = R22_tmp2;
2829
2830  // We need to set `last_Java_frame' because `fetch_unroll_info' will
2831  // call `last_Java_frame()'. The value of the pc in the frame is not
2832  // particularly important. It just needs to identify this blob.
2833  __ set_last_Java_frame(R1_SP, noreg);
2834
2835  // With EscapeAnalysis turned on, this call may safepoint!
2836  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
2837  address calls_return_pc = __ last_calls_return_pc();
2838  // Set an oopmap for the call site that describes all our saved registers.
2839  oop_maps->add_gc_map(calls_return_pc - start, map);
2840
2841  __ reset_last_Java_frame();
2842  // Save the return value.
2843  __ mr(unroll_block_reg, R3_RET);
2844
2845  // Restore only the result registers that have been saved
2846  // by save_volatile_registers(...).
2847  RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
2848
2849  // reload the exec mode from the UnrollBlock (it might have changed)
2850  __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
2851  // In excp_deopt_mode, restore and clear exception oop which we
2852  // stored in the thread during exception entry above. The exception
2853  // oop will be the return value of this stub.
2854  Label skip_restore_excp;
2855  __ cmpdi(CCR0, exec_mode_reg, Deoptimization::Unpack_exception);
2856  __ bne(CCR0, skip_restore_excp);
2857  __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2858  __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
2859  __ li(R0, 0);
2860  __ std(R0, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
2861  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2862  __ BIND(skip_restore_excp);
2863
2864  __ pop_frame();
2865
2866  // stack: (deoptee, optional i2c, caller of deoptee, ...).
2867
2868  // pop the deoptee's frame
2869  __ pop_frame();
2870
2871  // stack: (caller_of_deoptee, ...).
2872
2873  // Loop through the `UnrollBlock' info and create interpreter frames.
2874  push_skeleton_frames(masm, true/*deopt*/,
2875                       unroll_block_reg,
2876                       R23_tmp3,
2877                       R24_tmp4,
2878                       R25_tmp5,
2879                       R26_tmp6,
2880                       R27_tmp7);
2881
2882  // stack: (skeletal interpreter frame, ..., optional skeletal
2883  // interpreter frame, optional c2i, caller of deoptee, ...).
2884  }
2885
2886  // push an `unpack_frame' taking care of float / int return values.
2887  __ push_frame(frame_size_in_bytes, R0/*tmp*/);
2888
2889  // stack: (unpack frame, skeletal interpreter frame, ..., optional
2890  // skeletal interpreter frame, optional c2i, caller of deoptee,
2891  // ...).
2892
2893  // Spill live volatile registers since we'll do a call.
2894  __ std( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
2895  __ stfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
2896
2897  // Let the unpacker layout information in the skeletal frames just
2898  // allocated.
2899  __ get_PC_trash_LR(R3_RET);
2900  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
2901  // This is a call to a LEAF method, so no oop map is required.
2902  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2903                  R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2904  __ reset_last_Java_frame();
2905
2906  // Restore the volatiles saved above.
2907  __ ld( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
2908  __ lfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
2909
2910  // Pop the unpack frame.
2911  __ pop_frame();
2912  __ restore_LR_CR(R0);
2913
2914  // stack: (top interpreter frame, ..., optional interpreter frame,
2915  // optional c2i, caller of deoptee, ...).
2916
2917  // Initialize R14_state.
2918#ifdef CC_INTERP
2919  __ ld(R14_state, 0, R1_SP);
2920  __ addi(R14_state, R14_state, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
2921  // Also inititialize R15_prev_state.
2922  __ restore_prev_state();
2923#else
2924  __ restore_interpreter_state(R11_scratch1);
2925  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
2926#endif // CC_INTERP
2927
2928
2929  // Return to the interpreter entry point.
2930  __ blr();
2931  __ flush();
2932#else // COMPILER2
2933  __ unimplemented("deopt blob needed only with compiler");
2934  int exception_offset = __ pc() - start;
2935#endif // COMPILER2
2936
2937  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2938                                           reexecute_offset, first_frame_size_in_bytes / wordSize);
2939  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2940}
2941
2942#ifdef COMPILER2
2943void SharedRuntime::generate_uncommon_trap_blob() {
2944  // Allocate space for the code.
2945  ResourceMark rm;
2946  // Setup code generation tools.
2947  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2948  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2949  address start = __ pc();
2950
2951  Register unroll_block_reg = R21_tmp1;
2952  Register klass_index_reg  = R22_tmp2;
2953  Register unc_trap_reg     = R23_tmp3;
2954
2955  OopMapSet* oop_maps = new OopMapSet();
2956  int frame_size_in_bytes = frame::abi_reg_args_size;
2957  OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
2958
2959  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
2960
2961  // Push a dummy `unpack_frame' and call
2962  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2963  // vframe array and return the `UnrollBlock' information.
2964
2965  // Save LR to compiled frame.
2966  __ save_LR_CR(R11_scratch1);
2967
2968  // Push an "uncommon_trap" frame.
2969  __ push_frame_reg_args(0, R11_scratch1);
2970
2971  // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
2972
2973  // Set the `unpack_frame' as last_Java_frame.
2974  // `Deoptimization::uncommon_trap' expects it and considers its
2975  // sender frame as the deoptee frame.
2976  // Remember the offset of the instruction whose address will be
2977  // moved to R11_scratch1.
2978  address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
2979
2980  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
2981
2982  __ mr(klass_index_reg, R3);
2983  __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
2984  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
2985                  R16_thread, klass_index_reg, R5_ARG3);
2986
2987  // Set an oopmap for the call site.
2988  oop_maps->add_gc_map(gc_map_pc - start, map);
2989
2990  __ reset_last_Java_frame();
2991
2992  // Pop the `unpack frame'.
2993  __ pop_frame();
2994
2995  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
2996
2997  // Save the return value.
2998  __ mr(unroll_block_reg, R3_RET);
2999
3000  // Pop the uncommon_trap frame.
3001  __ pop_frame();
3002
3003  // stack: (caller_of_deoptee, ...).
3004
3005#ifdef ASSERT
3006  __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
3007  __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
3008  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
3009#endif
3010
3011  // Allocate new interpreter frame(s) and possibly a c2i adapter
3012  // frame.
3013  push_skeleton_frames(masm, false/*deopt*/,
3014                       unroll_block_reg,
3015                       R22_tmp2,
3016                       R23_tmp3,
3017                       R24_tmp4,
3018                       R25_tmp5,
3019                       R26_tmp6);
3020
3021  // stack: (skeletal interpreter frame, ..., optional skeletal
3022  // interpreter frame, optional c2i, caller of deoptee, ...).
3023
3024  // Push a dummy `unpack_frame' taking care of float return values.
3025  // Call `Deoptimization::unpack_frames' to layout information in the
3026  // interpreter frames just created.
3027
3028  // Push a simple "unpack frame" here.
3029  __ push_frame_reg_args(0, R11_scratch1);
3030
3031  // stack: (unpack frame, skeletal interpreter frame, ..., optional
3032  // skeletal interpreter frame, optional c2i, caller of deoptee,
3033  // ...).
3034
3035  // Set the "unpack_frame" as last_Java_frame.
3036  __ get_PC_trash_LR(R11_scratch1);
3037  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3038
3039  // Indicate it is the uncommon trap case.
3040  __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3041  // Let the unpacker layout information in the skeletal frames just
3042  // allocated.
3043  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3044                  R16_thread, unc_trap_reg);
3045
3046  __ reset_last_Java_frame();
3047  // Pop the `unpack frame'.
3048  __ pop_frame();
3049  // Restore LR from top interpreter frame.
3050  __ restore_LR_CR(R11_scratch1);
3051
3052  // stack: (top interpreter frame, ..., optional interpreter frame,
3053  // optional c2i, caller of deoptee, ...).
3054
3055#ifdef CC_INTERP
3056  // Initialize R14_state, ...
3057  __ ld(R11_scratch1, 0, R1_SP);
3058  __ addi(R14_state, R11_scratch1, -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
3059  // also initialize R15_prev_state.
3060  __ restore_prev_state();
3061#else
3062  __ restore_interpreter_state(R11_scratch1);
3063  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3064#endif // CC_INTERP
3065
3066  // Return to the interpreter entry point.
3067  __ blr();
3068
3069  masm->flush();
3070
3071  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3072}
3073#endif // COMPILER2
3074
3075// Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3076SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3077  assert(StubRoutines::forward_exception_entry() != NULL,
3078         "must be generated before");
3079
3080  ResourceMark rm;
3081  OopMapSet *oop_maps = new OopMapSet();
3082  OopMap* map;
3083
3084  // Allocate space for the code. Setup code generation tools.
3085  CodeBuffer buffer("handler_blob", 2048, 1024);
3086  MacroAssembler* masm = new MacroAssembler(&buffer);
3087
3088  address start = __ pc();
3089  int frame_size_in_bytes = 0;
3090
3091  RegisterSaver::ReturnPCLocation return_pc_location;
3092  bool cause_return = (poll_type == POLL_AT_RETURN);
3093  if (cause_return) {
3094    // Nothing to do here. The frame has already been popped in MachEpilogNode.
3095    // Register LR already contains the return pc.
3096    return_pc_location = RegisterSaver::return_pc_is_lr;
3097  } else {
3098    // Use thread()->saved_exception_pc() as return pc.
3099    return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3100  }
3101
3102  // Save registers, fpu state, and flags.
3103  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3104                                                                   &frame_size_in_bytes,
3105                                                                   /*generate_oop_map=*/ true,
3106                                                                   /*return_pc_adjustment=*/0,
3107                                                                   return_pc_location);
3108
3109  // The following is basically a call_VM. However, we need the precise
3110  // address of the call in order to generate an oopmap. Hence, we do all the
3111  // work outselves.
3112  __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3113
3114  // The return address must always be correct so that the frame constructor
3115  // never sees an invalid pc.
3116
3117  // Do the call
3118  __ call_VM_leaf(call_ptr, R16_thread);
3119  address calls_return_pc = __ last_calls_return_pc();
3120
3121  // Set an oopmap for the call site. This oopmap will map all
3122  // oop-registers and debug-info registers as callee-saved. This
3123  // will allow deoptimization at this safepoint to find all possible
3124  // debug-info recordings, as well as let GC find all oops.
3125  oop_maps->add_gc_map(calls_return_pc - start, map);
3126
3127  Label noException;
3128
3129  // Clear the last Java frame.
3130  __ reset_last_Java_frame();
3131
3132  BLOCK_COMMENT("  Check pending exception.");
3133  const Register pending_exception = R0;
3134  __ ld(pending_exception, thread_(pending_exception));
3135  __ cmpdi(CCR0, pending_exception, 0);
3136  __ beq(CCR0, noException);
3137
3138  // Exception pending
3139  RegisterSaver::restore_live_registers_and_pop_frame(masm,
3140                                                      frame_size_in_bytes,
3141                                                      /*restore_ctr=*/true);
3142
3143  BLOCK_COMMENT("  Jump to forward_exception_entry.");
3144  // Jump to forward_exception_entry, with the issuing PC in LR
3145  // so it looks like the original nmethod called forward_exception_entry.
3146  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3147
3148  // No exception case.
3149  __ BIND(noException);
3150
3151
3152  // Normal exit, restore registers and exit.
3153  RegisterSaver::restore_live_registers_and_pop_frame(masm,
3154                                                      frame_size_in_bytes,
3155                                                      /*restore_ctr=*/true);
3156
3157  __ blr();
3158
3159  // Make sure all code is generated
3160  masm->flush();
3161
3162  // Fill-out other meta info
3163  // CodeBlob frame size is in words.
3164  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3165}
3166
3167// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3168//
3169// Generate a stub that calls into the vm to find out the proper destination
3170// of a java call. All the argument registers are live at this point
3171// but since this is generic code we don't know what they are and the caller
3172// must do any gc of the args.
3173//
3174RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3175
3176  // allocate space for the code
3177  ResourceMark rm;
3178
3179  CodeBuffer buffer(name, 1000, 512);
3180  MacroAssembler* masm = new MacroAssembler(&buffer);
3181
3182  int frame_size_in_bytes;
3183
3184  OopMapSet *oop_maps = new OopMapSet();
3185  OopMap* map = NULL;
3186
3187  address start = __ pc();
3188
3189  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3190                                                                   &frame_size_in_bytes,
3191                                                                   /*generate_oop_map*/ true,
3192                                                                   /*return_pc_adjustment*/ 0,
3193                                                                   RegisterSaver::return_pc_is_lr);
3194
3195  // Use noreg as last_Java_pc, the return pc will be reconstructed
3196  // from the physical frame.
3197  __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3198
3199  int frame_complete = __ offset();
3200
3201  // Pass R19_method as 2nd (optional) argument, used by
3202  // counter_overflow_stub.
3203  __ call_VM_leaf(destination, R16_thread, R19_method);
3204  address calls_return_pc = __ last_calls_return_pc();
3205  // Set an oopmap for the call site.
3206  // We need this not only for callee-saved registers, but also for volatile
3207  // registers that the compiler might be keeping live across a safepoint.
3208  // Create the oopmap for the call's return pc.
3209  oop_maps->add_gc_map(calls_return_pc - start, map);
3210
3211  // R3_RET contains the address we are going to jump to assuming no exception got installed.
3212
3213  // clear last_Java_sp
3214  __ reset_last_Java_frame();
3215
3216  // Check for pending exceptions.
3217  BLOCK_COMMENT("Check for pending exceptions.");
3218  Label pending;
3219  __ ld(R11_scratch1, thread_(pending_exception));
3220  __ cmpdi(CCR0, R11_scratch1, 0);
3221  __ bne(CCR0, pending);
3222
3223  __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3224
3225  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3226
3227  // Get the returned method.
3228  __ get_vm_result_2(R19_method);
3229
3230  __ bctr();
3231
3232
3233  // Pending exception after the safepoint.
3234  __ BIND(pending);
3235
3236  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3237
3238  // exception pending => remove activation and forward to exception handler
3239
3240  __ li(R11_scratch1, 0);
3241  __ ld(R3_ARG1, thread_(pending_exception));
3242  __ std(R11_scratch1, in_bytes(JavaThread::vm_result_offset()), R16_thread);
3243  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3244
3245  // -------------
3246  // Make sure all code is generated.
3247  masm->flush();
3248
3249  // return the blob
3250  // frame_size_words or bytes??
3251  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3252                                       oop_maps, true);
3253}
3254