sharedRuntime_ppc.cpp revision 13249:a2753984d2c1
1226031Sstas/*
2226031Sstas * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3226031Sstas * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
4226031Sstas * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5226031Sstas *
6226031Sstas * This code is free software; you can redistribute it and/or modify it
7226031Sstas * under the terms of the GNU General Public License version 2 only, as
8226031Sstas * published by the Free Software Foundation.
9226031Sstas *
10226031Sstas * This code is distributed in the hope that it will be useful, but WITHOUT
11226031Sstas * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12226031Sstas * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13226031Sstas * version 2 for more details (a copy is included in the LICENSE file that
14226031Sstas * accompanied this code).
15226031Sstas *
16226031Sstas * You should have received a copy of the GNU General Public License version
17226031Sstas * 2 along with this work; if not, write to the Free Software Foundation,
18226031Sstas * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19226031Sstas *
20226031Sstas * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21226031Sstas * or visit www.oracle.com if you need additional information or have any
22226031Sstas * questions.
23226031Sstas *
24226031Sstas */
25226031Sstas
26226031Sstas#include "precompiled.hpp"
27226031Sstas#include "asm/macroAssembler.inline.hpp"
28226031Sstas#include "code/debugInfoRec.hpp"
29226031Sstas#include "code/icBuffer.hpp"
30226031Sstas#include "code/vtableStubs.hpp"
31226031Sstas#include "frame_ppc.hpp"
32226031Sstas#include "interpreter/interpreter.hpp"
33226031Sstas#include "interpreter/interp_masm.hpp"
34226031Sstas#include "memory/resourceArea.hpp"
35226031Sstas#include "oops/compiledICHolder.hpp"
36226031Sstas#include "runtime/sharedRuntime.hpp"
37226031Sstas#include "runtime/vframeArray.hpp"
38226031Sstas#include "utilities/align.hpp"
39226031Sstas#include "vmreg_ppc.inline.hpp"
40226031Sstas#ifdef COMPILER1
41226031Sstas#include "c1/c1_Runtime1.hpp"
42226031Sstas#endif
43226031Sstas#ifdef COMPILER2
44226031Sstas#include "opto/ad.hpp"
45226031Sstas#include "opto/runtime.hpp"
46226031Sstas#endif
47226031Sstas
48226031Sstas#include <alloca.h>
49226031Sstas
50226031Sstas#define __ masm->
51226031Sstas
52226031Sstas#ifdef PRODUCT
53226031Sstas#define BLOCK_COMMENT(str) // nothing
54226031Sstas#else
55226031Sstas#define BLOCK_COMMENT(str) __ block_comment(str)
56226031Sstas#endif
57226031Sstas
58226031Sstas#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59226031Sstas
60226031Sstas
61226031Sstasclass RegisterSaver {
62226031Sstas // Used for saving volatile registers.
63226031Sstas public:
64226031Sstas
65226031Sstas  // Support different return pc locations.
66226031Sstas  enum ReturnPCLocation {
67226031Sstas    return_pc_is_lr,
68226031Sstas    return_pc_is_pre_saved,
69226031Sstas    return_pc_is_thread_saved_exception_pc
70226031Sstas  };
71226031Sstas
72226031Sstas  static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
73226031Sstas                         int* out_frame_size_in_bytes,
74226031Sstas                         bool generate_oop_map,
75226031Sstas                         int return_pc_adjustment,
76226031Sstas                         ReturnPCLocation return_pc_location);
77226031Sstas  static void    restore_live_registers_and_pop_frame(MacroAssembler* masm,
78226031Sstas                         int frame_size_in_bytes,
79226031Sstas                         bool restore_ctr);
80226031Sstas
81226031Sstas  static void push_frame_and_save_argument_registers(MacroAssembler* masm,
82226031Sstas                         Register r_temp,
83226031Sstas                         int frame_size,
84226031Sstas                         int total_args,
85226031Sstas                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
86226031Sstas  static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
87226031Sstas                         int frame_size,
88226031Sstas                         int total_args,
89226031Sstas                         const VMRegPair *regs, const VMRegPair *regs2 = NULL);
90226031Sstas
91226031Sstas  // During deoptimization only the result registers need to be restored
92226031Sstas  // all the other values have already been extracted.
93226031Sstas  static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes);
94226031Sstas
95226031Sstas  // Constants and data structures:
96226031Sstas
97226031Sstas  typedef enum {
98226031Sstas    int_reg           = 0,
99226031Sstas    float_reg         = 1,
100226031Sstas    special_reg       = 2
101226031Sstas  } RegisterType;
102226031Sstas
103226031Sstas  typedef enum {
104226031Sstas    reg_size          = 8,
105226031Sstas    half_reg_size     = reg_size / 2,
106226031Sstas  } RegisterConstants;
107226031Sstas
108226031Sstas  typedef struct {
109226031Sstas    RegisterType        reg_type;
110226031Sstas    int                 reg_num;
111226031Sstas    VMReg               vmreg;
112226031Sstas  } LiveRegType;
113226031Sstas};
114226031Sstas
115226031Sstas
116226031Sstas#define RegisterSaver_LiveSpecialReg(regname) \
117226031Sstas  { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
118226031Sstas
119226031Sstas#define RegisterSaver_LiveIntReg(regname) \
120226031Sstas  { RegisterSaver::int_reg,     regname->encoding(), regname->as_VMReg() }
121226031Sstas
122226031Sstas#define RegisterSaver_LiveFloatReg(regname) \
123226031Sstas  { RegisterSaver::float_reg,   regname->encoding(), regname->as_VMReg() }
124226031Sstas
125226031Sstasstatic const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
126226031Sstas  // Live registers which get spilled to the stack. Register
127226031Sstas  // positions in this array correspond directly to the stack layout.
128
129  //
130  // live special registers:
131  //
132  RegisterSaver_LiveSpecialReg(SR_CTR),
133  //
134  // live float registers:
135  //
136  RegisterSaver_LiveFloatReg( F0  ),
137  RegisterSaver_LiveFloatReg( F1  ),
138  RegisterSaver_LiveFloatReg( F2  ),
139  RegisterSaver_LiveFloatReg( F3  ),
140  RegisterSaver_LiveFloatReg( F4  ),
141  RegisterSaver_LiveFloatReg( F5  ),
142  RegisterSaver_LiveFloatReg( F6  ),
143  RegisterSaver_LiveFloatReg( F7  ),
144  RegisterSaver_LiveFloatReg( F8  ),
145  RegisterSaver_LiveFloatReg( F9  ),
146  RegisterSaver_LiveFloatReg( F10 ),
147  RegisterSaver_LiveFloatReg( F11 ),
148  RegisterSaver_LiveFloatReg( F12 ),
149  RegisterSaver_LiveFloatReg( F13 ),
150  RegisterSaver_LiveFloatReg( F14 ),
151  RegisterSaver_LiveFloatReg( F15 ),
152  RegisterSaver_LiveFloatReg( F16 ),
153  RegisterSaver_LiveFloatReg( F17 ),
154  RegisterSaver_LiveFloatReg( F18 ),
155  RegisterSaver_LiveFloatReg( F19 ),
156  RegisterSaver_LiveFloatReg( F20 ),
157  RegisterSaver_LiveFloatReg( F21 ),
158  RegisterSaver_LiveFloatReg( F22 ),
159  RegisterSaver_LiveFloatReg( F23 ),
160  RegisterSaver_LiveFloatReg( F24 ),
161  RegisterSaver_LiveFloatReg( F25 ),
162  RegisterSaver_LiveFloatReg( F26 ),
163  RegisterSaver_LiveFloatReg( F27 ),
164  RegisterSaver_LiveFloatReg( F28 ),
165  RegisterSaver_LiveFloatReg( F29 ),
166  RegisterSaver_LiveFloatReg( F30 ),
167  RegisterSaver_LiveFloatReg( F31 ),
168  //
169  // live integer registers:
170  //
171  RegisterSaver_LiveIntReg(   R0  ),
172  //RegisterSaver_LiveIntReg( R1  ), // stack pointer
173  RegisterSaver_LiveIntReg(   R2  ),
174  RegisterSaver_LiveIntReg(   R3  ),
175  RegisterSaver_LiveIntReg(   R4  ),
176  RegisterSaver_LiveIntReg(   R5  ),
177  RegisterSaver_LiveIntReg(   R6  ),
178  RegisterSaver_LiveIntReg(   R7  ),
179  RegisterSaver_LiveIntReg(   R8  ),
180  RegisterSaver_LiveIntReg(   R9  ),
181  RegisterSaver_LiveIntReg(   R10 ),
182  RegisterSaver_LiveIntReg(   R11 ),
183  RegisterSaver_LiveIntReg(   R12 ),
184  //RegisterSaver_LiveIntReg( R13 ), // system thread id
185  RegisterSaver_LiveIntReg(   R14 ),
186  RegisterSaver_LiveIntReg(   R15 ),
187  RegisterSaver_LiveIntReg(   R16 ),
188  RegisterSaver_LiveIntReg(   R17 ),
189  RegisterSaver_LiveIntReg(   R18 ),
190  RegisterSaver_LiveIntReg(   R19 ),
191  RegisterSaver_LiveIntReg(   R20 ),
192  RegisterSaver_LiveIntReg(   R21 ),
193  RegisterSaver_LiveIntReg(   R22 ),
194  RegisterSaver_LiveIntReg(   R23 ),
195  RegisterSaver_LiveIntReg(   R24 ),
196  RegisterSaver_LiveIntReg(   R25 ),
197  RegisterSaver_LiveIntReg(   R26 ),
198  RegisterSaver_LiveIntReg(   R27 ),
199  RegisterSaver_LiveIntReg(   R28 ),
200  RegisterSaver_LiveIntReg(   R29 ),
201  RegisterSaver_LiveIntReg(   R30 ),
202  RegisterSaver_LiveIntReg(   R31 ), // must be the last register (see save/restore functions below)
203};
204
205OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
206                         int* out_frame_size_in_bytes,
207                         bool generate_oop_map,
208                         int return_pc_adjustment,
209                         ReturnPCLocation return_pc_location) {
210  // Push an abi_reg_args-frame and store all registers which may be live.
211  // If requested, create an OopMap: Record volatile registers as
212  // callee-save values in an OopMap so their save locations will be
213  // propagated to the RegisterMap of the caller frame during
214  // StackFrameStream construction (needed for deoptimization; see
215  // compiledVFrame::create_stack_value).
216  // If return_pc_adjustment != 0 adjust the return pc by return_pc_adjustment.
217
218  int i;
219  int offset;
220
221  // calcualte frame size
222  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
223                                   sizeof(RegisterSaver::LiveRegType);
224  const int register_save_size   = regstosave_num * reg_size;
225  const int frame_size_in_bytes  = align_up(register_save_size, frame::alignment_in_bytes)
226                                   + frame::abi_reg_args_size;
227  *out_frame_size_in_bytes       = frame_size_in_bytes;
228  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
229  const int register_save_offset = frame_size_in_bytes - register_save_size;
230
231  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
232  OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : NULL;
233
234  BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
235
236  // Save r31 in the last slot of the not yet pushed frame so that we
237  // can use it as scratch reg.
238  __ std(R31, -reg_size, R1_SP);
239  assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
240         "consistency check");
241
242  // save the flags
243  // Do the save_LR_CR by hand and adjust the return pc if requested.
244  __ mfcr(R31);
245  __ std(R31, _abi(cr), R1_SP);
246  switch (return_pc_location) {
247    case return_pc_is_lr: __ mflr(R31); break;
248    case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
249    case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
250    default: ShouldNotReachHere();
251  }
252  if (return_pc_location != return_pc_is_pre_saved) {
253    if (return_pc_adjustment != 0) {
254      __ addi(R31, R31, return_pc_adjustment);
255    }
256    __ std(R31, _abi(lr), R1_SP);
257  }
258
259  // push a new frame
260  __ push_frame(frame_size_in_bytes, R31);
261
262  // save all registers (ints and floats)
263  offset = register_save_offset;
264  for (int i = 0; i < regstosave_num; i++) {
265    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
266    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
267
268    switch (reg_type) {
269      case RegisterSaver::int_reg: {
270        if (reg_num != 31) { // We spilled R31 right at the beginning.
271          __ std(as_Register(reg_num), offset, R1_SP);
272        }
273        break;
274      }
275      case RegisterSaver::float_reg: {
276        __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
277        break;
278      }
279      case RegisterSaver::special_reg: {
280        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
281          __ mfctr(R31);
282          __ std(R31, offset, R1_SP);
283        } else {
284          Unimplemented();
285        }
286        break;
287      }
288      default:
289        ShouldNotReachHere();
290    }
291
292    if (generate_oop_map) {
293      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
294                            RegisterSaver_LiveRegs[i].vmreg);
295      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2),
296                            RegisterSaver_LiveRegs[i].vmreg->next());
297    }
298    offset += reg_size;
299  }
300
301  BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
302
303  // And we're done.
304  return map;
305}
306
307
308// Pop the current frame and restore all the registers that we
309// saved.
310void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
311                                                         int frame_size_in_bytes,
312                                                         bool restore_ctr) {
313  int i;
314  int offset;
315  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
316                                   sizeof(RegisterSaver::LiveRegType);
317  const int register_save_size   = regstosave_num * reg_size;
318  const int register_save_offset = frame_size_in_bytes - register_save_size;
319
320  BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
321
322  // restore all registers (ints and floats)
323  offset = register_save_offset;
324  for (int i = 0; i < regstosave_num; i++) {
325    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
326    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
327
328    switch (reg_type) {
329      case RegisterSaver::int_reg: {
330        if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
331          __ ld(as_Register(reg_num), offset, R1_SP);
332        break;
333      }
334      case RegisterSaver::float_reg: {
335        __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
336        break;
337      }
338      case RegisterSaver::special_reg: {
339        if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
340          if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
341            __ ld(R31, offset, R1_SP);
342            __ mtctr(R31);
343          }
344        } else {
345          Unimplemented();
346        }
347        break;
348      }
349      default:
350        ShouldNotReachHere();
351    }
352    offset += reg_size;
353  }
354
355  // pop the frame
356  __ pop_frame();
357
358  // restore the flags
359  __ restore_LR_CR(R31);
360
361  // restore scratch register's value
362  __ ld(R31, -reg_size, R1_SP);
363
364  BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
365}
366
367void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
368                                                           int frame_size,int total_args, const VMRegPair *regs,
369                                                           const VMRegPair *regs2) {
370  __ push_frame(frame_size, r_temp);
371  int st_off = frame_size - wordSize;
372  for (int i = 0; i < total_args; i++) {
373    VMReg r_1 = regs[i].first();
374    VMReg r_2 = regs[i].second();
375    if (!r_1->is_valid()) {
376      assert(!r_2->is_valid(), "");
377      continue;
378    }
379    if (r_1->is_Register()) {
380      Register r = r_1->as_Register();
381      __ std(r, st_off, R1_SP);
382      st_off -= wordSize;
383    } else if (r_1->is_FloatRegister()) {
384      FloatRegister f = r_1->as_FloatRegister();
385      __ stfd(f, st_off, R1_SP);
386      st_off -= wordSize;
387    }
388  }
389  if (regs2 != NULL) {
390    for (int i = 0; i < total_args; i++) {
391      VMReg r_1 = regs2[i].first();
392      VMReg r_2 = regs2[i].second();
393      if (!r_1->is_valid()) {
394        assert(!r_2->is_valid(), "");
395        continue;
396      }
397      if (r_1->is_Register()) {
398        Register r = r_1->as_Register();
399        __ std(r, st_off, R1_SP);
400        st_off -= wordSize;
401      } else if (r_1->is_FloatRegister()) {
402        FloatRegister f = r_1->as_FloatRegister();
403        __ stfd(f, st_off, R1_SP);
404        st_off -= wordSize;
405      }
406    }
407  }
408}
409
410void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
411                                                             int total_args, const VMRegPair *regs,
412                                                             const VMRegPair *regs2) {
413  int st_off = frame_size - wordSize;
414  for (int i = 0; i < total_args; i++) {
415    VMReg r_1 = regs[i].first();
416    VMReg r_2 = regs[i].second();
417    if (r_1->is_Register()) {
418      Register r = r_1->as_Register();
419      __ ld(r, st_off, R1_SP);
420      st_off -= wordSize;
421    } else if (r_1->is_FloatRegister()) {
422      FloatRegister f = r_1->as_FloatRegister();
423      __ lfd(f, st_off, R1_SP);
424      st_off -= wordSize;
425    }
426  }
427  if (regs2 != NULL)
428    for (int i = 0; i < total_args; i++) {
429      VMReg r_1 = regs2[i].first();
430      VMReg r_2 = regs2[i].second();
431      if (r_1->is_Register()) {
432        Register r = r_1->as_Register();
433        __ ld(r, st_off, R1_SP);
434        st_off -= wordSize;
435      } else if (r_1->is_FloatRegister()) {
436        FloatRegister f = r_1->as_FloatRegister();
437        __ lfd(f, st_off, R1_SP);
438        st_off -= wordSize;
439      }
440    }
441  __ pop_frame();
442}
443
444// Restore the registers that might be holding a result.
445void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes) {
446  int i;
447  int offset;
448  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
449                                   sizeof(RegisterSaver::LiveRegType);
450  const int register_save_size   = regstosave_num * reg_size;
451  const int register_save_offset = frame_size_in_bytes - register_save_size;
452
453  // restore all result registers (ints and floats)
454  offset = register_save_offset;
455  for (int i = 0; i < regstosave_num; i++) {
456    int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
457    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
458    switch (reg_type) {
459      case RegisterSaver::int_reg: {
460        if (as_Register(reg_num)==R3_RET) // int result_reg
461          __ ld(as_Register(reg_num), offset, R1_SP);
462        break;
463      }
464      case RegisterSaver::float_reg: {
465        if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
466          __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
467        break;
468      }
469      case RegisterSaver::special_reg: {
470        // Special registers don't hold a result.
471        break;
472      }
473      default:
474        ShouldNotReachHere();
475    }
476    offset += reg_size;
477  }
478}
479
480// Is vector's size (in bytes) bigger than a size saved by default?
481bool SharedRuntime::is_wide_vector(int size) {
482  // Note, MaxVectorSize == 8 on PPC64.
483  assert(size <= 8, "%d bytes vectors are not supported", size);
484  return size > 8;
485}
486
487size_t SharedRuntime::trampoline_size() {
488  return Assembler::load_const_size + 8;
489}
490
491void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
492  Register Rtemp = R12;
493  __ load_const(Rtemp, destination);
494  __ mtctr(Rtemp);
495  __ bctr();
496}
497
498#ifdef COMPILER2
499static int reg2slot(VMReg r) {
500  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
501}
502
503static int reg2offset(VMReg r) {
504  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
505}
506#endif
507
508// ---------------------------------------------------------------------------
509// Read the array of BasicTypes from a signature, and compute where the
510// arguments should go. Values in the VMRegPair regs array refer to 4-byte
511// quantities. Values less than VMRegImpl::stack0 are registers, those above
512// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
513// as framesizes are fixed.
514// VMRegImpl::stack0 refers to the first slot 0(sp).
515// and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
516// up to RegisterImpl::number_of_registers) are the 64-bit
517// integer registers.
518
519// Note: the INPUTS in sig_bt are in units of Java argument words, which are
520// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
521// units regardless of build. Of course for i486 there is no 64 bit build
522
523// The Java calling convention is a "shifted" version of the C ABI.
524// By skipping the first C ABI register we can call non-static jni methods
525// with small numbers of arguments without having to shuffle the arguments
526// at all. Since we control the java ABI we ought to at least get some
527// advantage out of it.
528
529const VMReg java_iarg_reg[8] = {
530  R3->as_VMReg(),
531  R4->as_VMReg(),
532  R5->as_VMReg(),
533  R6->as_VMReg(),
534  R7->as_VMReg(),
535  R8->as_VMReg(),
536  R9->as_VMReg(),
537  R10->as_VMReg()
538};
539
540const VMReg java_farg_reg[13] = {
541  F1->as_VMReg(),
542  F2->as_VMReg(),
543  F3->as_VMReg(),
544  F4->as_VMReg(),
545  F5->as_VMReg(),
546  F6->as_VMReg(),
547  F7->as_VMReg(),
548  F8->as_VMReg(),
549  F9->as_VMReg(),
550  F10->as_VMReg(),
551  F11->as_VMReg(),
552  F12->as_VMReg(),
553  F13->as_VMReg()
554};
555
556const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
557const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
558
559int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
560                                           VMRegPair *regs,
561                                           int total_args_passed,
562                                           int is_outgoing) {
563  // C2c calling conventions for compiled-compiled calls.
564  // Put 8 ints/longs into registers _AND_ 13 float/doubles into
565  // registers _AND_ put the rest on the stack.
566
567  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats
568  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
569
570  int i;
571  VMReg reg;
572  int stk = 0;
573  int ireg = 0;
574  int freg = 0;
575
576  // We put the first 8 arguments into registers and the rest on the
577  // stack, float arguments are already in their argument registers
578  // due to c2c calling conventions (see calling_convention).
579  for (int i = 0; i < total_args_passed; ++i) {
580    switch(sig_bt[i]) {
581    case T_BOOLEAN:
582    case T_CHAR:
583    case T_BYTE:
584    case T_SHORT:
585    case T_INT:
586      if (ireg < num_java_iarg_registers) {
587        // Put int/ptr in register
588        reg = java_iarg_reg[ireg];
589        ++ireg;
590      } else {
591        // Put int/ptr on stack.
592        reg = VMRegImpl::stack2reg(stk);
593        stk += inc_stk_for_intfloat;
594      }
595      regs[i].set1(reg);
596      break;
597    case T_LONG:
598      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
599      if (ireg < num_java_iarg_registers) {
600        // Put long in register.
601        reg = java_iarg_reg[ireg];
602        ++ireg;
603      } else {
604        // Put long on stack. They must be aligned to 2 slots.
605        if (stk & 0x1) ++stk;
606        reg = VMRegImpl::stack2reg(stk);
607        stk += inc_stk_for_longdouble;
608      }
609      regs[i].set2(reg);
610      break;
611    case T_OBJECT:
612    case T_ARRAY:
613    case T_ADDRESS:
614      if (ireg < num_java_iarg_registers) {
615        // Put ptr in register.
616        reg = java_iarg_reg[ireg];
617        ++ireg;
618      } else {
619        // Put ptr on stack. Objects must be aligned to 2 slots too,
620        // because "64-bit pointers record oop-ishness on 2 aligned
621        // adjacent registers." (see OopFlow::build_oop_map).
622        if (stk & 0x1) ++stk;
623        reg = VMRegImpl::stack2reg(stk);
624        stk += inc_stk_for_longdouble;
625      }
626      regs[i].set2(reg);
627      break;
628    case T_FLOAT:
629      if (freg < num_java_farg_registers) {
630        // Put float in register.
631        reg = java_farg_reg[freg];
632        ++freg;
633      } else {
634        // Put float on stack.
635        reg = VMRegImpl::stack2reg(stk);
636        stk += inc_stk_for_intfloat;
637      }
638      regs[i].set1(reg);
639      break;
640    case T_DOUBLE:
641      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
642      if (freg < num_java_farg_registers) {
643        // Put double in register.
644        reg = java_farg_reg[freg];
645        ++freg;
646      } else {
647        // Put double on stack. They must be aligned to 2 slots.
648        if (stk & 0x1) ++stk;
649        reg = VMRegImpl::stack2reg(stk);
650        stk += inc_stk_for_longdouble;
651      }
652      regs[i].set2(reg);
653      break;
654    case T_VOID:
655      // Do not count halves.
656      regs[i].set_bad();
657      break;
658    default:
659      ShouldNotReachHere();
660    }
661  }
662  return align_up(stk, 2);
663}
664
665#if defined(COMPILER1) || defined(COMPILER2)
666// Calling convention for calling C code.
667int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
668                                        VMRegPair *regs,
669                                        VMRegPair *regs2,
670                                        int total_args_passed) {
671  // Calling conventions for C runtime calls and calls to JNI native methods.
672  //
673  // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
674  // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
675  // the first 13 flt/dbl's in the first 13 fp regs but additionally
676  // copy flt/dbl to the stack if they are beyond the 8th argument.
677
678  const VMReg iarg_reg[8] = {
679    R3->as_VMReg(),
680    R4->as_VMReg(),
681    R5->as_VMReg(),
682    R6->as_VMReg(),
683    R7->as_VMReg(),
684    R8->as_VMReg(),
685    R9->as_VMReg(),
686    R10->as_VMReg()
687  };
688
689  const VMReg farg_reg[13] = {
690    F1->as_VMReg(),
691    F2->as_VMReg(),
692    F3->as_VMReg(),
693    F4->as_VMReg(),
694    F5->as_VMReg(),
695    F6->as_VMReg(),
696    F7->as_VMReg(),
697    F8->as_VMReg(),
698    F9->as_VMReg(),
699    F10->as_VMReg(),
700    F11->as_VMReg(),
701    F12->as_VMReg(),
702    F13->as_VMReg()
703  };
704
705  // Check calling conventions consistency.
706  assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
707         sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
708         "consistency");
709
710  // `Stk' counts stack slots. Due to alignment, 32 bit values occupy
711  // 2 such slots, like 64 bit values do.
712  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats
713  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
714
715  int i;
716  VMReg reg;
717  // Leave room for C-compatible ABI_REG_ARGS.
718  int stk = (frame::abi_reg_args_size - frame::jit_out_preserve_size) / VMRegImpl::stack_slot_size;
719  int arg = 0;
720  int freg = 0;
721
722  // Avoid passing C arguments in the wrong stack slots.
723#if defined(ABI_ELFv2)
724  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 96,
725         "passing C arguments in wrong stack slots");
726#else
727  assert((SharedRuntime::out_preserve_stack_slots() + stk) * VMRegImpl::stack_slot_size == 112,
728         "passing C arguments in wrong stack slots");
729#endif
730  // We fill-out regs AND regs2 if an argument must be passed in a
731  // register AND in a stack slot. If regs2 is NULL in such a
732  // situation, we bail-out with a fatal error.
733  for (int i = 0; i < total_args_passed; ++i, ++arg) {
734    // Initialize regs2 to BAD.
735    if (regs2 != NULL) regs2[i].set_bad();
736
737    switch(sig_bt[i]) {
738
739    //
740    // If arguments 0-7 are integers, they are passed in integer registers.
741    // Argument i is placed in iarg_reg[i].
742    //
743    case T_BOOLEAN:
744    case T_CHAR:
745    case T_BYTE:
746    case T_SHORT:
747    case T_INT:
748      // We must cast ints to longs and use full 64 bit stack slots
749      // here.  Thus fall through, handle as long.
750    case T_LONG:
751    case T_OBJECT:
752    case T_ARRAY:
753    case T_ADDRESS:
754    case T_METADATA:
755      // Oops are already boxed if required (JNI).
756      if (arg < Argument::n_int_register_parameters_c) {
757        reg = iarg_reg[arg];
758      } else {
759        reg = VMRegImpl::stack2reg(stk);
760        stk += inc_stk_for_longdouble;
761      }
762      regs[i].set2(reg);
763      break;
764
765    //
766    // Floats are treated differently from int regs:  The first 13 float arguments
767    // are passed in registers (not the float args among the first 13 args).
768    // Thus argument i is NOT passed in farg_reg[i] if it is float.  It is passed
769    // in farg_reg[j] if argument i is the j-th float argument of this call.
770    //
771    case T_FLOAT:
772#if defined(LINUX)
773      // Linux uses ELF ABI. Both original ELF and ELFv2 ABIs have float
774      // in the least significant word of an argument slot.
775#if defined(VM_LITTLE_ENDIAN)
776#define FLOAT_WORD_OFFSET_IN_SLOT 0
777#else
778#define FLOAT_WORD_OFFSET_IN_SLOT 1
779#endif
780#elif defined(AIX)
781      // Although AIX runs on big endian CPU, float is in the most
782      // significant word of an argument slot.
783#define FLOAT_WORD_OFFSET_IN_SLOT 0
784#else
785#error "unknown OS"
786#endif
787      if (freg < Argument::n_float_register_parameters_c) {
788        // Put float in register ...
789        reg = farg_reg[freg];
790        ++freg;
791
792        // Argument i for i > 8 is placed on the stack even if it's
793        // placed in a register (if it's a float arg). Aix disassembly
794        // shows that xlC places these float args on the stack AND in
795        // a register. This is not documented, but we follow this
796        // convention, too.
797        if (arg >= Argument::n_regs_not_on_stack_c) {
798          // ... and on the stack.
799          guarantee(regs2 != NULL, "must pass float in register and stack slot");
800          VMReg reg2 = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
801          regs2[i].set1(reg2);
802          stk += inc_stk_for_intfloat;
803        }
804
805      } else {
806        // Put float on stack.
807        reg = VMRegImpl::stack2reg(stk + FLOAT_WORD_OFFSET_IN_SLOT);
808        stk += inc_stk_for_intfloat;
809      }
810      regs[i].set1(reg);
811      break;
812    case T_DOUBLE:
813      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
814      if (freg < Argument::n_float_register_parameters_c) {
815        // Put double in register ...
816        reg = farg_reg[freg];
817        ++freg;
818
819        // Argument i for i > 8 is placed on the stack even if it's
820        // placed in a register (if it's a double arg). Aix disassembly
821        // shows that xlC places these float args on the stack AND in
822        // a register. This is not documented, but we follow this
823        // convention, too.
824        if (arg >= Argument::n_regs_not_on_stack_c) {
825          // ... and on the stack.
826          guarantee(regs2 != NULL, "must pass float in register and stack slot");
827          VMReg reg2 = VMRegImpl::stack2reg(stk);
828          regs2[i].set2(reg2);
829          stk += inc_stk_for_longdouble;
830        }
831      } else {
832        // Put double on stack.
833        reg = VMRegImpl::stack2reg(stk);
834        stk += inc_stk_for_longdouble;
835      }
836      regs[i].set2(reg);
837      break;
838
839    case T_VOID:
840      // Do not count halves.
841      regs[i].set_bad();
842      --arg;
843      break;
844    default:
845      ShouldNotReachHere();
846    }
847  }
848
849  return align_up(stk, 2);
850}
851#endif // COMPILER2
852
853static address gen_c2i_adapter(MacroAssembler *masm,
854                            int total_args_passed,
855                            int comp_args_on_stack,
856                            const BasicType *sig_bt,
857                            const VMRegPair *regs,
858                            Label& call_interpreter,
859                            const Register& ientry) {
860
861  address c2i_entrypoint;
862
863  const Register sender_SP = R21_sender_SP; // == R21_tmp1
864  const Register code      = R22_tmp2;
865  //const Register ientry  = R23_tmp3;
866  const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
867  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
868  int value_regs_index = 0;
869
870  const Register return_pc = R27_tmp7;
871  const Register tmp       = R28_tmp8;
872
873  assert_different_registers(sender_SP, code, ientry, return_pc, tmp);
874
875  // Adapter needs TOP_IJAVA_FRAME_ABI.
876  const int adapter_size = frame::top_ijava_frame_abi_size +
877      align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
878
879  // regular (verified) c2i entry point
880  c2i_entrypoint = __ pc();
881
882  // Does compiled code exists? If yes, patch the caller's callsite.
883  __ ld(code, method_(code));
884  __ cmpdi(CCR0, code, 0);
885  __ ld(ientry, method_(interpreter_entry)); // preloaded
886  __ beq(CCR0, call_interpreter);
887
888
889  // Patch caller's callsite, method_(code) was not NULL which means that
890  // compiled code exists.
891  __ mflr(return_pc);
892  __ std(return_pc, _abi(lr), R1_SP);
893  RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
894
895  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
896
897  RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
898  __ ld(return_pc, _abi(lr), R1_SP);
899  __ ld(ientry, method_(interpreter_entry)); // preloaded
900  __ mtlr(return_pc);
901
902
903  // Call the interpreter.
904  __ BIND(call_interpreter);
905  __ mtctr(ientry);
906
907  // Get a copy of the current SP for loading caller's arguments.
908  __ mr(sender_SP, R1_SP);
909
910  // Add space for the adapter.
911  __ resize_frame(-adapter_size, R12_scratch2);
912
913  int st_off = adapter_size - wordSize;
914
915  // Write the args into the outgoing interpreter space.
916  for (int i = 0; i < total_args_passed; i++) {
917    VMReg r_1 = regs[i].first();
918    VMReg r_2 = regs[i].second();
919    if (!r_1->is_valid()) {
920      assert(!r_2->is_valid(), "");
921      continue;
922    }
923    if (r_1->is_stack()) {
924      Register tmp_reg = value_regs[value_regs_index];
925      value_regs_index = (value_regs_index + 1) % num_value_regs;
926      // The calling convention produces OptoRegs that ignore the out
927      // preserve area (JIT's ABI). We must account for it here.
928      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
929      if (!r_2->is_valid()) {
930        __ lwz(tmp_reg, ld_off, sender_SP);
931      } else {
932        __ ld(tmp_reg, ld_off, sender_SP);
933      }
934      // Pretend stack targets were loaded into tmp_reg.
935      r_1 = tmp_reg->as_VMReg();
936    }
937
938    if (r_1->is_Register()) {
939      Register r = r_1->as_Register();
940      if (!r_2->is_valid()) {
941        __ stw(r, st_off, R1_SP);
942        st_off-=wordSize;
943      } else {
944        // Longs are given 2 64-bit slots in the interpreter, but the
945        // data is passed in only 1 slot.
946        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
947          DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
948          st_off-=wordSize;
949        }
950        __ std(r, st_off, R1_SP);
951        st_off-=wordSize;
952      }
953    } else {
954      assert(r_1->is_FloatRegister(), "");
955      FloatRegister f = r_1->as_FloatRegister();
956      if (!r_2->is_valid()) {
957        __ stfs(f, st_off, R1_SP);
958        st_off-=wordSize;
959      } else {
960        // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
961        // data is passed in only 1 slot.
962        // One of these should get known junk...
963        DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
964        st_off-=wordSize;
965        __ stfd(f, st_off, R1_SP);
966        st_off-=wordSize;
967      }
968    }
969  }
970
971  // Jump to the interpreter just as if interpreter was doing it.
972
973  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
974
975  // load TOS
976  __ addi(R15_esp, R1_SP, st_off);
977
978  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in R21_tmp1.
979  assert(sender_SP == R21_sender_SP, "passing initial caller's SP in wrong register");
980  __ bctr();
981
982  return c2i_entrypoint;
983}
984
985void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
986                                    int total_args_passed,
987                                    int comp_args_on_stack,
988                                    const BasicType *sig_bt,
989                                    const VMRegPair *regs) {
990
991  // Load method's entry-point from method.
992  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
993  __ mtctr(R12_scratch2);
994
995  // We will only enter here from an interpreted frame and never from after
996  // passing thru a c2i. Azul allowed this but we do not. If we lose the
997  // race and use a c2i we will remain interpreted for the race loser(s).
998  // This removes all sorts of headaches on the x86 side and also eliminates
999  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1000
1001  // Note: r13 contains the senderSP on entry. We must preserve it since
1002  // we may do a i2c -> c2i transition if we lose a race where compiled
1003  // code goes non-entrant while we get args ready.
1004  // In addition we use r13 to locate all the interpreter args as
1005  // we must align the stack to 16 bytes on an i2c entry else we
1006  // lose alignment we expect in all compiled code and register
1007  // save code can segv when fxsave instructions find improperly
1008  // aligned stack pointer.
1009
1010  const Register ld_ptr = R15_esp;
1011  const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1012  const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1013  int value_regs_index = 0;
1014
1015  int ld_offset = total_args_passed*wordSize;
1016
1017  // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1018  // in registers, we will occasionally have no stack args.
1019  int comp_words_on_stack = 0;
1020  if (comp_args_on_stack) {
1021    // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1022    // registers are below. By subtracting stack0, we either get a negative
1023    // number (all values in registers) or the maximum stack slot accessed.
1024
1025    // Convert 4-byte c2 stack slots to words.
1026    comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1027    // Round up to miminum stack alignment, in wordSize.
1028    comp_words_on_stack = align_up(comp_words_on_stack, 2);
1029    __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1030  }
1031
1032  // Now generate the shuffle code.  Pick up all register args and move the
1033  // rest through register value=Z_R12.
1034  BLOCK_COMMENT("Shuffle arguments");
1035  for (int i = 0; i < total_args_passed; i++) {
1036    if (sig_bt[i] == T_VOID) {
1037      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
1038      continue;
1039    }
1040
1041    // Pick up 0, 1 or 2 words from ld_ptr.
1042    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1043            "scrambled load targets?");
1044    VMReg r_1 = regs[i].first();
1045    VMReg r_2 = regs[i].second();
1046    if (!r_1->is_valid()) {
1047      assert(!r_2->is_valid(), "");
1048      continue;
1049    }
1050    if (r_1->is_FloatRegister()) {
1051      if (!r_2->is_valid()) {
1052        __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1053        ld_offset-=wordSize;
1054      } else {
1055        // Skip the unused interpreter slot.
1056        __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1057        ld_offset-=2*wordSize;
1058      }
1059    } else {
1060      Register r;
1061      if (r_1->is_stack()) {
1062        // Must do a memory to memory move thru "value".
1063        r = value_regs[value_regs_index];
1064        value_regs_index = (value_regs_index + 1) % num_value_regs;
1065      } else {
1066        r = r_1->as_Register();
1067      }
1068      if (!r_2->is_valid()) {
1069        // Not sure we need to do this but it shouldn't hurt.
1070        if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ADDRESS || sig_bt[i] == T_ARRAY) {
1071          __ ld(r, ld_offset, ld_ptr);
1072          ld_offset-=wordSize;
1073        } else {
1074          __ lwz(r, ld_offset, ld_ptr);
1075          ld_offset-=wordSize;
1076        }
1077      } else {
1078        // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1079        // data is passed in only 1 slot.
1080        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
1081          ld_offset-=wordSize;
1082        }
1083        __ ld(r, ld_offset, ld_ptr);
1084        ld_offset-=wordSize;
1085      }
1086
1087      if (r_1->is_stack()) {
1088        // Now store value where the compiler expects it
1089        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1090
1091        if (sig_bt[i] == T_INT   || sig_bt[i] == T_FLOAT ||sig_bt[i] == T_BOOLEAN ||
1092            sig_bt[i] == T_SHORT || sig_bt[i] == T_CHAR  || sig_bt[i] == T_BYTE) {
1093          __ stw(r, st_off, R1_SP);
1094        } else {
1095          __ std(r, st_off, R1_SP);
1096        }
1097      }
1098    }
1099  }
1100
1101  BLOCK_COMMENT("Store method");
1102  // Store method into thread->callee_target.
1103  // We might end up in handle_wrong_method if the callee is
1104  // deoptimized as we race thru here. If that happens we don't want
1105  // to take a safepoint because the caller frame will look
1106  // interpreted and arguments are now "compiled" so it is much better
1107  // to make this transition invisible to the stack walking
1108  // code. Unfortunately if we try and find the callee by normal means
1109  // a safepoint is possible. So we stash the desired callee in the
1110  // thread and the vm will find there should this case occur.
1111  __ std(R19_method, thread_(callee_target));
1112
1113  // Jump to the compiled code just as if compiled code was doing it.
1114  __ bctr();
1115}
1116
1117AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
1118                                                            int total_args_passed,
1119                                                            int comp_args_on_stack,
1120                                                            const BasicType *sig_bt,
1121                                                            const VMRegPair *regs,
1122                                                            AdapterFingerPrint* fingerprint) {
1123  address i2c_entry;
1124  address c2i_unverified_entry;
1125  address c2i_entry;
1126
1127
1128  // entry: i2c
1129
1130  __ align(CodeEntryAlignment);
1131  i2c_entry = __ pc();
1132  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
1133
1134
1135  // entry: c2i unverified
1136
1137  __ align(CodeEntryAlignment);
1138  BLOCK_COMMENT("c2i unverified entry");
1139  c2i_unverified_entry = __ pc();
1140
1141  // inline_cache contains a compiledICHolder
1142  const Register ic             = R19_method;
1143  const Register ic_klass       = R11_scratch1;
1144  const Register receiver_klass = R12_scratch2;
1145  const Register code           = R21_tmp1;
1146  const Register ientry         = R23_tmp3;
1147
1148  assert_different_registers(ic, ic_klass, receiver_klass, R3_ARG1, code, ientry);
1149  assert(R11_scratch1 == R11, "need prologue scratch register");
1150
1151  Label call_interpreter;
1152
1153  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
1154         "klass offset should reach into any page");
1155  // Check for NULL argument if we don't have implicit null checks.
1156  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
1157    if (TrapBasedNullChecks) {
1158      __ trap_null_check(R3_ARG1);
1159    } else {
1160      Label valid;
1161      __ cmpdi(CCR0, R3_ARG1, 0);
1162      __ bne_predict_taken(CCR0, valid);
1163      // We have a null argument, branch to ic_miss_stub.
1164      __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
1165                       relocInfo::runtime_call_type);
1166      __ BIND(valid);
1167    }
1168  }
1169  // Assume argument is not NULL, load klass from receiver.
1170  __ load_klass(receiver_klass, R3_ARG1);
1171
1172  __ ld(ic_klass, CompiledICHolder::holder_klass_offset(), ic);
1173
1174  if (TrapBasedICMissChecks) {
1175    __ trap_ic_miss_check(receiver_klass, ic_klass);
1176  } else {
1177    Label valid;
1178    __ cmpd(CCR0, receiver_klass, ic_klass);
1179    __ beq_predict_taken(CCR0, valid);
1180    // We have an unexpected klass, branch to ic_miss_stub.
1181    __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
1182                     relocInfo::runtime_call_type);
1183    __ BIND(valid);
1184  }
1185
1186  // Argument is valid and klass is as expected, continue.
1187
1188  // Extract method from inline cache, verified entry point needs it.
1189  __ ld(R19_method, CompiledICHolder::holder_method_offset(), ic);
1190  assert(R19_method == ic, "the inline cache register is dead here");
1191
1192  __ ld(code, method_(code));
1193  __ cmpdi(CCR0, code, 0);
1194  __ ld(ientry, method_(interpreter_entry)); // preloaded
1195  __ beq_predict_taken(CCR0, call_interpreter);
1196
1197  // Branch to ic_miss_stub.
1198  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1199
1200  // entry: c2i
1201
1202  c2i_entry = gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, call_interpreter, ientry);
1203
1204  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1205}
1206
1207#ifdef COMPILER2
1208// An oop arg. Must pass a handle not the oop itself.
1209static void object_move(MacroAssembler* masm,
1210                        int frame_size_in_slots,
1211                        OopMap* oop_map, int oop_handle_offset,
1212                        bool is_receiver, int* receiver_offset,
1213                        VMRegPair src, VMRegPair dst,
1214                        Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1215  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1216         "receiver has already been moved");
1217
1218  // We must pass a handle. First figure out the location we use as a handle.
1219
1220  if (src.first()->is_stack()) {
1221    // stack to stack or reg
1222
1223    const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1224    Label skip;
1225    const int oop_slot_in_callers_frame = reg2slot(src.first());
1226
1227    guarantee(!is_receiver, "expecting receiver in register");
1228    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1229
1230    __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1231    __ ld(  r_temp_2, reg2offset(src.first()), r_caller_sp);
1232    __ cmpdi(CCR0, r_temp_2, 0);
1233    __ bne(CCR0, skip);
1234    // Use a NULL handle if oop is NULL.
1235    __ li(r_handle, 0);
1236    __ bind(skip);
1237
1238    if (dst.first()->is_stack()) {
1239      // stack to stack
1240      __ std(r_handle, reg2offset(dst.first()), R1_SP);
1241    } else {
1242      // stack to reg
1243      // Nothing to do, r_handle is already the dst register.
1244    }
1245  } else {
1246    // reg to stack or reg
1247    const Register r_oop      = src.first()->as_Register();
1248    const Register r_handle   = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1249    const int oop_slot        = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1250                                + oop_handle_offset; // in slots
1251    const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1252    Label skip;
1253
1254    if (is_receiver) {
1255      *receiver_offset = oop_offset;
1256    }
1257    oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1258
1259    __ std( r_oop,    oop_offset, R1_SP);
1260    __ addi(r_handle, R1_SP, oop_offset);
1261
1262    __ cmpdi(CCR0, r_oop, 0);
1263    __ bne(CCR0, skip);
1264    // Use a NULL handle if oop is NULL.
1265    __ li(r_handle, 0);
1266    __ bind(skip);
1267
1268    if (dst.first()->is_stack()) {
1269      // reg to stack
1270      __ std(r_handle, reg2offset(dst.first()), R1_SP);
1271    } else {
1272      // reg to reg
1273      // Nothing to do, r_handle is already the dst register.
1274    }
1275  }
1276}
1277
1278static void int_move(MacroAssembler*masm,
1279                     VMRegPair src, VMRegPair dst,
1280                     Register r_caller_sp, Register r_temp) {
1281  assert(src.first()->is_valid(), "incoming must be int");
1282  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1283
1284  if (src.first()->is_stack()) {
1285    if (dst.first()->is_stack()) {
1286      // stack to stack
1287      __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1288      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1289    } else {
1290      // stack to reg
1291      __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1292    }
1293  } else if (dst.first()->is_stack()) {
1294    // reg to stack
1295    __ extsw(r_temp, src.first()->as_Register());
1296    __ std(r_temp, reg2offset(dst.first()), R1_SP);
1297  } else {
1298    // reg to reg
1299    __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1300  }
1301}
1302
1303static void long_move(MacroAssembler*masm,
1304                      VMRegPair src, VMRegPair dst,
1305                      Register r_caller_sp, Register r_temp) {
1306  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1307  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1308
1309  if (src.first()->is_stack()) {
1310    if (dst.first()->is_stack()) {
1311      // stack to stack
1312      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1313      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1314    } else {
1315      // stack to reg
1316      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1317    }
1318  } else if (dst.first()->is_stack()) {
1319    // reg to stack
1320    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1321  } else {
1322    // reg to reg
1323    if (dst.first()->as_Register() != src.first()->as_Register())
1324      __ mr(dst.first()->as_Register(), src.first()->as_Register());
1325  }
1326}
1327
1328static void float_move(MacroAssembler*masm,
1329                       VMRegPair src, VMRegPair dst,
1330                       Register r_caller_sp, Register r_temp) {
1331  assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1332  assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1333
1334  if (src.first()->is_stack()) {
1335    if (dst.first()->is_stack()) {
1336      // stack to stack
1337      __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1338      __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1339    } else {
1340      // stack to reg
1341      __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1342    }
1343  } else if (dst.first()->is_stack()) {
1344    // reg to stack
1345    __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1346  } else {
1347    // reg to reg
1348    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1349      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1350  }
1351}
1352
1353static void double_move(MacroAssembler*masm,
1354                        VMRegPair src, VMRegPair dst,
1355                        Register r_caller_sp, Register r_temp) {
1356  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1357  assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1358
1359  if (src.first()->is_stack()) {
1360    if (dst.first()->is_stack()) {
1361      // stack to stack
1362      __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1363      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1364    } else {
1365      // stack to reg
1366      __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1367    }
1368  } else if (dst.first()->is_stack()) {
1369    // reg to stack
1370    __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1371  } else {
1372    // reg to reg
1373    if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1374      __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1375  }
1376}
1377
1378void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1379  switch (ret_type) {
1380    case T_BOOLEAN:
1381    case T_CHAR:
1382    case T_BYTE:
1383    case T_SHORT:
1384    case T_INT:
1385      __ stw (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1386      break;
1387    case T_ARRAY:
1388    case T_OBJECT:
1389    case T_LONG:
1390      __ std (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1391      break;
1392    case T_FLOAT:
1393      __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1394      break;
1395    case T_DOUBLE:
1396      __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1397      break;
1398    case T_VOID:
1399      break;
1400    default:
1401      ShouldNotReachHere();
1402      break;
1403  }
1404}
1405
1406void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1407  switch (ret_type) {
1408    case T_BOOLEAN:
1409    case T_CHAR:
1410    case T_BYTE:
1411    case T_SHORT:
1412    case T_INT:
1413      __ lwz(R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1414      break;
1415    case T_ARRAY:
1416    case T_OBJECT:
1417    case T_LONG:
1418      __ ld (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1419      break;
1420    case T_FLOAT:
1421      __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1422      break;
1423    case T_DOUBLE:
1424      __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1425      break;
1426    case T_VOID:
1427      break;
1428    default:
1429      ShouldNotReachHere();
1430      break;
1431  }
1432}
1433
1434static void save_or_restore_arguments(MacroAssembler* masm,
1435                                      const int stack_slots,
1436                                      const int total_in_args,
1437                                      const int arg_save_area,
1438                                      OopMap* map,
1439                                      VMRegPair* in_regs,
1440                                      BasicType* in_sig_bt) {
1441  // If map is non-NULL then the code should store the values,
1442  // otherwise it should load them.
1443  int slot = arg_save_area;
1444  // Save down double word first.
1445  for (int i = 0; i < total_in_args; i++) {
1446    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
1447      int offset = slot * VMRegImpl::stack_slot_size;
1448      slot += VMRegImpl::slots_per_word;
1449      assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
1450      if (map != NULL) {
1451        __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1452      } else {
1453        __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1454      }
1455    } else if (in_regs[i].first()->is_Register() &&
1456        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
1457      int offset = slot * VMRegImpl::stack_slot_size;
1458      if (map != NULL) {
1459        __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
1460        if (in_sig_bt[i] == T_ARRAY) {
1461          map->set_oop(VMRegImpl::stack2reg(slot));
1462        }
1463      } else {
1464        __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
1465      }
1466      slot += VMRegImpl::slots_per_word;
1467      assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
1468    }
1469  }
1470  // Save or restore single word registers.
1471  for (int i = 0; i < total_in_args; i++) {
1472    // PPC64: pass ints as longs: must only deal with floats here.
1473    if (in_regs[i].first()->is_FloatRegister()) {
1474      if (in_sig_bt[i] == T_FLOAT) {
1475        int offset = slot * VMRegImpl::stack_slot_size;
1476        slot++;
1477        assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
1478        if (map != NULL) {
1479          __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1480        } else {
1481          __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
1482        }
1483      }
1484    } else if (in_regs[i].first()->is_stack()) {
1485      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1486        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1487        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1488      }
1489    }
1490  }
1491}
1492
1493// Check GCLocker::needs_gc and enter the runtime if it's true. This
1494// keeps a new JNI critical region from starting until a GC has been
1495// forced. Save down any oops in registers and describe them in an
1496// OopMap.
1497static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1498                                               const int stack_slots,
1499                                               const int total_in_args,
1500                                               const int arg_save_area,
1501                                               OopMapSet* oop_maps,
1502                                               VMRegPair* in_regs,
1503                                               BasicType* in_sig_bt,
1504                                               Register tmp_reg ) {
1505  __ block_comment("check GCLocker::needs_gc");
1506  Label cont;
1507  __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GCLocker::needs_gc_address());
1508  __ cmplwi(CCR0, tmp_reg, 0);
1509  __ beq(CCR0, cont);
1510
1511  // Save down any values that are live in registers and call into the
1512  // runtime to halt for a GC.
1513  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1514  save_or_restore_arguments(masm, stack_slots, total_in_args,
1515                            arg_save_area, map, in_regs, in_sig_bt);
1516
1517  __ mr(R3_ARG1, R16_thread);
1518  __ set_last_Java_frame(R1_SP, noreg);
1519
1520  __ block_comment("block_for_jni_critical");
1521  address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
1522#if defined(ABI_ELFv2)
1523  __ call_c(entry_point, relocInfo::runtime_call_type);
1524#else
1525  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
1526#endif
1527  address start           = __ pc() - __ offset(),
1528          calls_return_pc = __ last_calls_return_pc();
1529  oop_maps->add_gc_map(calls_return_pc - start, map);
1530
1531  __ reset_last_Java_frame();
1532
1533  // Reload all the register arguments.
1534  save_or_restore_arguments(masm, stack_slots, total_in_args,
1535                            arg_save_area, NULL, in_regs, in_sig_bt);
1536
1537  __ BIND(cont);
1538
1539#ifdef ASSERT
1540  if (StressCriticalJNINatives) {
1541    // Stress register saving.
1542    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1543    save_or_restore_arguments(masm, stack_slots, total_in_args,
1544                              arg_save_area, map, in_regs, in_sig_bt);
1545    // Destroy argument registers.
1546    for (int i = 0; i < total_in_args; i++) {
1547      if (in_regs[i].first()->is_Register()) {
1548        const Register reg = in_regs[i].first()->as_Register();
1549        __ neg(reg, reg);
1550      } else if (in_regs[i].first()->is_FloatRegister()) {
1551        __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
1552      }
1553    }
1554
1555    save_or_restore_arguments(masm, stack_slots, total_in_args,
1556                              arg_save_area, NULL, in_regs, in_sig_bt);
1557  }
1558#endif
1559}
1560
1561static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
1562  if (src.first()->is_stack()) {
1563    if (dst.first()->is_stack()) {
1564      // stack to stack
1565      __ ld(r_temp, reg2offset(src.first()), r_caller_sp);
1566      __ std(r_temp, reg2offset(dst.first()), R1_SP);
1567    } else {
1568      // stack to reg
1569      __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1570    }
1571  } else if (dst.first()->is_stack()) {
1572    // reg to stack
1573    __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1574  } else {
1575    if (dst.first() != src.first()) {
1576      __ mr(dst.first()->as_Register(), src.first()->as_Register());
1577    }
1578  }
1579}
1580
1581// Unpack an array argument into a pointer to the body and the length
1582// if the array is non-null, otherwise pass 0 for both.
1583static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type,
1584                                  VMRegPair body_arg, VMRegPair length_arg, Register r_caller_sp,
1585                                  Register tmp_reg, Register tmp2_reg) {
1586  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1587         "possible collision");
1588  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1589         "possible collision");
1590
1591  // Pass the length, ptr pair.
1592  Label set_out_args;
1593  VMRegPair tmp, tmp2;
1594  tmp.set_ptr(tmp_reg->as_VMReg());
1595  tmp2.set_ptr(tmp2_reg->as_VMReg());
1596  if (reg.first()->is_stack()) {
1597    // Load the arg up from the stack.
1598    move_ptr(masm, reg, tmp, r_caller_sp, /*unused*/ R0);
1599    reg = tmp;
1600  }
1601  __ li(tmp2_reg, 0); // Pass zeros if Array=null.
1602  if (tmp_reg != reg.first()->as_Register()) __ li(tmp_reg, 0);
1603  __ cmpdi(CCR0, reg.first()->as_Register(), 0);
1604  __ beq(CCR0, set_out_args);
1605  __ lwa(tmp2_reg, arrayOopDesc::length_offset_in_bytes(), reg.first()->as_Register());
1606  __ addi(tmp_reg, reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type));
1607  __ bind(set_out_args);
1608  move_ptr(masm, tmp, body_arg, r_caller_sp, /*unused*/ R0);
1609  move_ptr(masm, tmp2, length_arg, r_caller_sp, /*unused*/ R0); // Same as move32_64 on PPC64.
1610}
1611
1612static void verify_oop_args(MacroAssembler* masm,
1613                            methodHandle method,
1614                            const BasicType* sig_bt,
1615                            const VMRegPair* regs) {
1616  Register temp_reg = R19_method;  // not part of any compiled calling seq
1617  if (VerifyOops) {
1618    for (int i = 0; i < method->size_of_parameters(); i++) {
1619      if (sig_bt[i] == T_OBJECT ||
1620          sig_bt[i] == T_ARRAY) {
1621        VMReg r = regs[i].first();
1622        assert(r->is_valid(), "bad oop arg");
1623        if (r->is_stack()) {
1624          __ ld(temp_reg, reg2offset(r), R1_SP);
1625          __ verify_oop(temp_reg);
1626        } else {
1627          __ verify_oop(r->as_Register());
1628        }
1629      }
1630    }
1631  }
1632}
1633
1634static void gen_special_dispatch(MacroAssembler* masm,
1635                                 methodHandle method,
1636                                 const BasicType* sig_bt,
1637                                 const VMRegPair* regs) {
1638  verify_oop_args(masm, method, sig_bt, regs);
1639  vmIntrinsics::ID iid = method->intrinsic_id();
1640
1641  // Now write the args into the outgoing interpreter space
1642  bool     has_receiver   = false;
1643  Register receiver_reg   = noreg;
1644  int      member_arg_pos = -1;
1645  Register member_reg     = noreg;
1646  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1647  if (ref_kind != 0) {
1648    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1649    member_reg = R19_method;  // known to be free at this point
1650    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1651  } else if (iid == vmIntrinsics::_invokeBasic) {
1652    has_receiver = true;
1653  } else {
1654    fatal("unexpected intrinsic id %d", iid);
1655  }
1656
1657  if (member_reg != noreg) {
1658    // Load the member_arg into register, if necessary.
1659    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1660    VMReg r = regs[member_arg_pos].first();
1661    if (r->is_stack()) {
1662      __ ld(member_reg, reg2offset(r), R1_SP);
1663    } else {
1664      // no data motion is needed
1665      member_reg = r->as_Register();
1666    }
1667  }
1668
1669  if (has_receiver) {
1670    // Make sure the receiver is loaded into a register.
1671    assert(method->size_of_parameters() > 0, "oob");
1672    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1673    VMReg r = regs[0].first();
1674    assert(r->is_valid(), "bad receiver arg");
1675    if (r->is_stack()) {
1676      // Porting note:  This assumes that compiled calling conventions always
1677      // pass the receiver oop in a register.  If this is not true on some
1678      // platform, pick a temp and load the receiver from stack.
1679      fatal("receiver always in a register");
1680      receiver_reg = R11_scratch1;  // TODO (hs24): is R11_scratch1 really free at this point?
1681      __ ld(receiver_reg, reg2offset(r), R1_SP);
1682    } else {
1683      // no data motion is needed
1684      receiver_reg = r->as_Register();
1685    }
1686  }
1687
1688  // Figure out which address we are really jumping to:
1689  MethodHandles::generate_method_handle_dispatch(masm, iid,
1690                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1691}
1692
1693#endif // COMPILER2
1694
1695// ---------------------------------------------------------------------------
1696// Generate a native wrapper for a given method. The method takes arguments
1697// in the Java compiled code convention, marshals them to the native
1698// convention (handlizes oops, etc), transitions to native, makes the call,
1699// returns to java state (possibly blocking), unhandlizes any result and
1700// returns.
1701//
1702// Critical native functions are a shorthand for the use of
1703// GetPrimtiveArrayCritical and disallow the use of any other JNI
1704// functions.  The wrapper is expected to unpack the arguments before
1705// passing them to the callee and perform checks before and after the
1706// native call to ensure that they GCLocker
1707// lock_critical/unlock_critical semantics are followed.  Some other
1708// parts of JNI setup are skipped like the tear down of the JNI handle
1709// block and the check for pending exceptions it's impossible for them
1710// to be thrown.
1711//
1712// They are roughly structured like this:
1713//   if (GCLocker::needs_gc())
1714//     SharedRuntime::block_for_jni_critical();
1715//   tranistion to thread_in_native
1716//   unpack arrray arguments and call native entry point
1717//   check for safepoint in progress
1718//   check if any thread suspend flags are set
1719//     call into JVM and possible unlock the JNI critical
1720//     if a GC was suppressed while in the critical native.
1721//   transition back to thread_in_Java
1722//   return to caller
1723//
1724nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1725                                                const methodHandle& method,
1726                                                int compile_id,
1727                                                BasicType *in_sig_bt,
1728                                                VMRegPair *in_regs,
1729                                                BasicType ret_type) {
1730#ifdef COMPILER2
1731  if (method->is_method_handle_intrinsic()) {
1732    vmIntrinsics::ID iid = method->intrinsic_id();
1733    intptr_t start = (intptr_t)__ pc();
1734    int vep_offset = ((intptr_t)__ pc()) - start;
1735    gen_special_dispatch(masm,
1736                         method,
1737                         in_sig_bt,
1738                         in_regs);
1739    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1740    __ flush();
1741    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1742    return nmethod::new_native_nmethod(method,
1743                                       compile_id,
1744                                       masm->code(),
1745                                       vep_offset,
1746                                       frame_complete,
1747                                       stack_slots / VMRegImpl::slots_per_word,
1748                                       in_ByteSize(-1),
1749                                       in_ByteSize(-1),
1750                                       (OopMapSet*)NULL);
1751  }
1752
1753  bool is_critical_native = true;
1754  address native_func = method->critical_native_function();
1755  if (native_func == NULL) {
1756    native_func = method->native_function();
1757    is_critical_native = false;
1758  }
1759  assert(native_func != NULL, "must have function");
1760
1761  // First, create signature for outgoing C call
1762  // --------------------------------------------------------------------------
1763
1764  int total_in_args = method->size_of_parameters();
1765  // We have received a description of where all the java args are located
1766  // on entry to the wrapper. We need to convert these args to where
1767  // the jni function will expect them. To figure out where they go
1768  // we convert the java signature to a C signature by inserting
1769  // the hidden arguments as arg[0] and possibly arg[1] (static method)
1770
1771  // Calculate the total number of C arguments and create arrays for the
1772  // signature and the outgoing registers.
1773  // On ppc64, we have two arrays for the outgoing registers, because
1774  // some floating-point arguments must be passed in registers _and_
1775  // in stack locations.
1776  bool method_is_static = method->is_static();
1777  int  total_c_args     = total_in_args;
1778
1779  if (!is_critical_native) {
1780    int n_hidden_args = method_is_static ? 2 : 1;
1781    total_c_args += n_hidden_args;
1782  } else {
1783    // No JNIEnv*, no this*, but unpacked arrays (base+length).
1784    for (int i = 0; i < total_in_args; i++) {
1785      if (in_sig_bt[i] == T_ARRAY) {
1786        total_c_args++;
1787      }
1788    }
1789  }
1790
1791  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1792  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1793  VMRegPair *out_regs2  = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1794  BasicType* in_elem_bt = NULL;
1795
1796  // Create the signature for the C call:
1797  //   1) add the JNIEnv*
1798  //   2) add the class if the method is static
1799  //   3) copy the rest of the incoming signature (shifted by the number of
1800  //      hidden arguments).
1801
1802  int argc = 0;
1803  if (!is_critical_native) {
1804    out_sig_bt[argc++] = T_ADDRESS;
1805    if (method->is_static()) {
1806      out_sig_bt[argc++] = T_OBJECT;
1807    }
1808
1809    for (int i = 0; i < total_in_args ; i++ ) {
1810      out_sig_bt[argc++] = in_sig_bt[i];
1811    }
1812  } else {
1813    Thread* THREAD = Thread::current();
1814    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1815    SignatureStream ss(method->signature());
1816    int o = 0;
1817    for (int i = 0; i < total_in_args ; i++, o++) {
1818      if (in_sig_bt[i] == T_ARRAY) {
1819        // Arrays are passed as int, elem* pair
1820        Symbol* atype = ss.as_symbol(CHECK_NULL);
1821        const char* at = atype->as_C_string();
1822        if (strlen(at) == 2) {
1823          assert(at[0] == '[', "must be");
1824          switch (at[1]) {
1825            case 'B': in_elem_bt[o] = T_BYTE; break;
1826            case 'C': in_elem_bt[o] = T_CHAR; break;
1827            case 'D': in_elem_bt[o] = T_DOUBLE; break;
1828            case 'F': in_elem_bt[o] = T_FLOAT; break;
1829            case 'I': in_elem_bt[o] = T_INT; break;
1830            case 'J': in_elem_bt[o] = T_LONG; break;
1831            case 'S': in_elem_bt[o] = T_SHORT; break;
1832            case 'Z': in_elem_bt[o] = T_BOOLEAN; break;
1833            default: ShouldNotReachHere();
1834          }
1835        }
1836      } else {
1837        in_elem_bt[o] = T_VOID;
1838      }
1839      if (in_sig_bt[i] != T_VOID) {
1840        assert(in_sig_bt[i] == ss.type(), "must match");
1841        ss.next();
1842      }
1843    }
1844
1845    for (int i = 0; i < total_in_args ; i++ ) {
1846      if (in_sig_bt[i] == T_ARRAY) {
1847        // Arrays are passed as int, elem* pair.
1848        out_sig_bt[argc++] = T_INT;
1849        out_sig_bt[argc++] = T_ADDRESS;
1850      } else {
1851        out_sig_bt[argc++] = in_sig_bt[i];
1852      }
1853    }
1854  }
1855
1856
1857  // Compute the wrapper's frame size.
1858  // --------------------------------------------------------------------------
1859
1860  // Now figure out where the args must be stored and how much stack space
1861  // they require.
1862  //
1863  // Compute framesize for the wrapper. We need to handlize all oops in
1864  // incoming registers.
1865  //
1866  // Calculate the total number of stack slots we will need:
1867  //   1) abi requirements
1868  //   2) outgoing arguments
1869  //   3) space for inbound oop handle area
1870  //   4) space for handlizing a klass if static method
1871  //   5) space for a lock if synchronized method
1872  //   6) workspace for saving return values, int <-> float reg moves, etc.
1873  //   7) alignment
1874  //
1875  // Layout of the native wrapper frame:
1876  // (stack grows upwards, memory grows downwards)
1877  //
1878  // NW     [ABI_REG_ARGS]             <-- 1) R1_SP
1879  //        [outgoing arguments]       <-- 2) R1_SP + out_arg_slot_offset
1880  //        [oopHandle area]           <-- 3) R1_SP + oop_handle_offset (save area for critical natives)
1881  //        klass                      <-- 4) R1_SP + klass_offset
1882  //        lock                       <-- 5) R1_SP + lock_offset
1883  //        [workspace]                <-- 6) R1_SP + workspace_offset
1884  //        [alignment] (optional)     <-- 7)
1885  // caller [JIT_TOP_ABI_48]           <-- r_callers_sp
1886  //
1887  // - *_slot_offset Indicates offset from SP in number of stack slots.
1888  // - *_offset      Indicates offset from SP in bytes.
1889
1890  int stack_slots = c_calling_convention(out_sig_bt, out_regs, out_regs2, total_c_args) // 1+2)
1891                  + SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
1892
1893  // Now the space for the inbound oop handle area.
1894  int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
1895  if (is_critical_native) {
1896    // Critical natives may have to call out so they need a save area
1897    // for register arguments.
1898    int double_slots = 0;
1899    int single_slots = 0;
1900    for (int i = 0; i < total_in_args; i++) {
1901      if (in_regs[i].first()->is_Register()) {
1902        const Register reg = in_regs[i].first()->as_Register();
1903        switch (in_sig_bt[i]) {
1904          case T_BOOLEAN:
1905          case T_BYTE:
1906          case T_SHORT:
1907          case T_CHAR:
1908          case T_INT:
1909          // Fall through.
1910          case T_ARRAY:
1911          case T_LONG: double_slots++; break;
1912          default:  ShouldNotReachHere();
1913        }
1914      } else if (in_regs[i].first()->is_FloatRegister()) {
1915        switch (in_sig_bt[i]) {
1916          case T_FLOAT:  single_slots++; break;
1917          case T_DOUBLE: double_slots++; break;
1918          default:  ShouldNotReachHere();
1919        }
1920      }
1921    }
1922    total_save_slots = double_slots * 2 + align_up(single_slots, 2); // round to even
1923  }
1924
1925  int oop_handle_slot_offset = stack_slots;
1926  stack_slots += total_save_slots;                                                // 3)
1927
1928  int klass_slot_offset = 0;
1929  int klass_offset      = -1;
1930  if (method_is_static && !is_critical_native) {                                  // 4)
1931    klass_slot_offset  = stack_slots;
1932    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1933    stack_slots       += VMRegImpl::slots_per_word;
1934  }
1935
1936  int lock_slot_offset = 0;
1937  int lock_offset      = -1;
1938  if (method->is_synchronized()) {                                                // 5)
1939    lock_slot_offset   = stack_slots;
1940    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1941    stack_slots       += VMRegImpl::slots_per_word;
1942  }
1943
1944  int workspace_slot_offset = stack_slots;                                        // 6)
1945  stack_slots         += 2;
1946
1947  // Now compute actual number of stack words we need.
1948  // Rounding to make stack properly aligned.
1949  stack_slots = align_up(stack_slots,                                             // 7)
1950                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1951  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1952
1953
1954  // Now we can start generating code.
1955  // --------------------------------------------------------------------------
1956
1957  intptr_t start_pc = (intptr_t)__ pc();
1958  intptr_t vep_start_pc;
1959  intptr_t frame_done_pc;
1960  intptr_t oopmap_pc;
1961
1962  Label    ic_miss;
1963  Label    handle_pending_exception;
1964
1965  Register r_callers_sp = R21;
1966  Register r_temp_1     = R22;
1967  Register r_temp_2     = R23;
1968  Register r_temp_3     = R24;
1969  Register r_temp_4     = R25;
1970  Register r_temp_5     = R26;
1971  Register r_temp_6     = R27;
1972  Register r_return_pc  = R28;
1973
1974  Register r_carg1_jnienv        = noreg;
1975  Register r_carg2_classorobject = noreg;
1976  if (!is_critical_native) {
1977    r_carg1_jnienv        = out_regs[0].first()->as_Register();
1978    r_carg2_classorobject = out_regs[1].first()->as_Register();
1979  }
1980
1981
1982  // Generate the Unverified Entry Point (UEP).
1983  // --------------------------------------------------------------------------
1984  assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
1985
1986  // Check ic: object class == cached class?
1987  if (!method_is_static) {
1988  Register ic = as_Register(Matcher::inline_cache_reg_encode());
1989  Register receiver_klass = r_temp_1;
1990
1991  __ cmpdi(CCR0, R3_ARG1, 0);
1992  __ beq(CCR0, ic_miss);
1993  __ verify_oop(R3_ARG1);
1994  __ load_klass(receiver_klass, R3_ARG1);
1995
1996  __ cmpd(CCR0, receiver_klass, ic);
1997  __ bne(CCR0, ic_miss);
1998  }
1999
2000
2001  // Generate the Verified Entry Point (VEP).
2002  // --------------------------------------------------------------------------
2003  vep_start_pc = (intptr_t)__ pc();
2004
2005  __ save_LR_CR(r_temp_1);
2006  __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2007  __ mr(r_callers_sp, R1_SP);                            // Remember frame pointer.
2008  __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
2009  frame_done_pc = (intptr_t)__ pc();
2010
2011  __ verify_thread();
2012
2013  // Native nmethod wrappers never take possesion of the oop arguments.
2014  // So the caller will gc the arguments.
2015  // The only thing we need an oopMap for is if the call is static.
2016  //
2017  // An OopMap for lock (and class if static), and one for the VM call itself.
2018  OopMapSet *oop_maps = new OopMapSet();
2019  OopMap    *oop_map  = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2020
2021  if (is_critical_native) {
2022    check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt, r_temp_1);
2023  }
2024
2025  // Move arguments from register/stack to register/stack.
2026  // --------------------------------------------------------------------------
2027  //
2028  // We immediately shuffle the arguments so that for any vm call we have
2029  // to make from here on out (sync slow path, jvmti, etc.) we will have
2030  // captured the oops from our caller and have a valid oopMap for them.
2031  //
2032  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2033  // (derived from JavaThread* which is in R16_thread) and, if static,
2034  // the class mirror instead of a receiver. This pretty much guarantees that
2035  // register layout will not match. We ignore these extra arguments during
2036  // the shuffle. The shuffle is described by the two calling convention
2037  // vectors we have in our possession. We simply walk the java vector to
2038  // get the source locations and the c vector to get the destinations.
2039
2040  // Record sp-based slot for receiver on stack for non-static methods.
2041  int receiver_offset = -1;
2042
2043  // We move the arguments backward because the floating point registers
2044  // destination will always be to a register with a greater or equal
2045  // register number or the stack.
2046  //   in  is the index of the incoming Java arguments
2047  //   out is the index of the outgoing C arguments
2048
2049#ifdef ASSERT
2050  bool reg_destroyed[RegisterImpl::number_of_registers];
2051  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
2052  for (int r = 0 ; r < RegisterImpl::number_of_registers ; r++) {
2053    reg_destroyed[r] = false;
2054  }
2055  for (int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++) {
2056    freg_destroyed[f] = false;
2057  }
2058#endif // ASSERT
2059
2060  for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2061
2062#ifdef ASSERT
2063    if (in_regs[in].first()->is_Register()) {
2064      assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2065    } else if (in_regs[in].first()->is_FloatRegister()) {
2066      assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2067    }
2068    if (out_regs[out].first()->is_Register()) {
2069      reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2070    } else if (out_regs[out].first()->is_FloatRegister()) {
2071      freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2072    }
2073    if (out_regs2[out].first()->is_Register()) {
2074      reg_destroyed[out_regs2[out].first()->as_Register()->encoding()] = true;
2075    } else if (out_regs2[out].first()->is_FloatRegister()) {
2076      freg_destroyed[out_regs2[out].first()->as_FloatRegister()->encoding()] = true;
2077    }
2078#endif // ASSERT
2079
2080    switch (in_sig_bt[in]) {
2081      case T_BOOLEAN:
2082      case T_CHAR:
2083      case T_BYTE:
2084      case T_SHORT:
2085      case T_INT:
2086        // Move int and do sign extension.
2087        int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2088        break;
2089      case T_LONG:
2090        long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2091        break;
2092      case T_ARRAY:
2093        if (is_critical_native) {
2094          int body_arg = out;
2095          out -= 1; // Point to length arg.
2096          unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out],
2097                                r_callers_sp, r_temp_1, r_temp_2);
2098          break;
2099        }
2100      case T_OBJECT:
2101        assert(!is_critical_native, "no oop arguments");
2102        object_move(masm, stack_slots,
2103                    oop_map, oop_handle_slot_offset,
2104                    ((in == 0) && (!method_is_static)), &receiver_offset,
2105                    in_regs[in], out_regs[out],
2106                    r_callers_sp, r_temp_1, r_temp_2);
2107        break;
2108      case T_VOID:
2109        break;
2110      case T_FLOAT:
2111        float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2112        if (out_regs2[out].first()->is_valid()) {
2113          float_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
2114        }
2115        break;
2116      case T_DOUBLE:
2117        double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2118        if (out_regs2[out].first()->is_valid()) {
2119          double_move(masm, in_regs[in], out_regs2[out], r_callers_sp, r_temp_1);
2120        }
2121        break;
2122      case T_ADDRESS:
2123        fatal("found type (T_ADDRESS) in java args");
2124        break;
2125      default:
2126        ShouldNotReachHere();
2127        break;
2128    }
2129  }
2130
2131  // Pre-load a static method's oop into ARG2.
2132  // Used both by locking code and the normal JNI call code.
2133  if (method_is_static && !is_critical_native) {
2134    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2135                        r_carg2_classorobject);
2136
2137    // Now handlize the static class mirror in carg2. It's known not-null.
2138    __ std(r_carg2_classorobject, klass_offset, R1_SP);
2139    oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2140    __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2141  }
2142
2143  // Get JNIEnv* which is first argument to native.
2144  if (!is_critical_native) {
2145    __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2146  }
2147
2148  // NOTE:
2149  //
2150  // We have all of the arguments setup at this point.
2151  // We MUST NOT touch any outgoing regs from this point on.
2152  // So if we must call out we must push a new frame.
2153
2154  // Get current pc for oopmap, and load it patchable relative to global toc.
2155  oopmap_pc = (intptr_t) __ pc();
2156  __ calculate_address_from_global_toc(r_return_pc, (address)oopmap_pc, true, true, true, true);
2157
2158  // We use the same pc/oopMap repeatedly when we call out.
2159  oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2160
2161  // r_return_pc now has the pc loaded that we will use when we finally call
2162  // to native.
2163
2164  // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2165  assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2166
2167# if 0
2168  // DTrace method entry
2169# endif
2170
2171  // Lock a synchronized method.
2172  // --------------------------------------------------------------------------
2173
2174  if (method->is_synchronized()) {
2175    assert(!is_critical_native, "unhandled");
2176    ConditionRegister r_flag = CCR1;
2177    Register          r_oop  = r_temp_4;
2178    const Register    r_box  = r_temp_5;
2179    Label             done, locked;
2180
2181    // Load the oop for the object or class. r_carg2_classorobject contains
2182    // either the handlized oop from the incoming arguments or the handlized
2183    // class mirror (if the method is static).
2184    __ ld(r_oop, 0, r_carg2_classorobject);
2185
2186    // Get the lock box slot's address.
2187    __ addi(r_box, R1_SP, lock_offset);
2188
2189#   ifdef ASSERT
2190    if (UseBiasedLocking) {
2191      // Making the box point to itself will make it clear it went unused
2192      // but also be obviously invalid.
2193      __ std(r_box, 0, r_box);
2194    }
2195#   endif // ASSERT
2196
2197    // Try fastpath for locking.
2198    // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2199    __ compiler_fast_lock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2200    __ beq(r_flag, locked);
2201
2202    // None of the above fast optimizations worked so we have to get into the
2203    // slow case of monitor enter. Inline a special case of call_VM that
2204    // disallows any pending_exception.
2205
2206    // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2207    int frame_size = frame::abi_reg_args_size +
2208        align_up(total_c_args * wordSize, frame::alignment_in_bytes);
2209    __ mr(R11_scratch1, R1_SP);
2210    RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs, out_regs2);
2211
2212    // Do the call.
2213    __ set_last_Java_frame(R11_scratch1, r_return_pc);
2214    assert(r_return_pc->is_nonvolatile(), "expecting return pc to be in non-volatile register");
2215    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2216    __ reset_last_Java_frame();
2217
2218    RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs, out_regs2);
2219
2220    __ asm_assert_mem8_is_zero(thread_(pending_exception),
2221       "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C", 0);
2222
2223    __ bind(locked);
2224  }
2225
2226
2227  // Publish thread state
2228  // --------------------------------------------------------------------------
2229
2230  // Use that pc we placed in r_return_pc a while back as the current frame anchor.
2231  __ set_last_Java_frame(R1_SP, r_return_pc);
2232
2233  // Transition from _thread_in_Java to _thread_in_native.
2234  __ li(R0, _thread_in_native);
2235  __ release();
2236  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2237  __ stw(R0, thread_(thread_state));
2238  if (UseMembar) {
2239    __ fence();
2240  }
2241
2242
2243  // The JNI call
2244  // --------------------------------------------------------------------------
2245#if defined(ABI_ELFv2)
2246  __ call_c(native_func, relocInfo::runtime_call_type);
2247#else
2248  FunctionDescriptor* fd_native_method = (FunctionDescriptor*) native_func;
2249  __ call_c(fd_native_method, relocInfo::runtime_call_type);
2250#endif
2251
2252
2253  // Now, we are back from the native code.
2254
2255
2256  // Unpack the native result.
2257  // --------------------------------------------------------------------------
2258
2259  // For int-types, we do any needed sign-extension required.
2260  // Care must be taken that the return values (R3_RET and F1_RET)
2261  // will survive any VM calls for blocking or unlocking.
2262  // An OOP result (handle) is done specially in the slow-path code.
2263
2264  switch (ret_type) {
2265    case T_VOID:    break;        // Nothing to do!
2266    case T_FLOAT:   break;        // Got it where we want it (unless slow-path).
2267    case T_DOUBLE:  break;        // Got it where we want it (unless slow-path).
2268    case T_LONG:    break;        // Got it where we want it (unless slow-path).
2269    case T_OBJECT:  break;        // Really a handle.
2270                                  // Cannot de-handlize until after reclaiming jvm_lock.
2271    case T_ARRAY:   break;
2272
2273    case T_BOOLEAN: {             // 0 -> false(0); !0 -> true(1)
2274      Label skip_modify;
2275      __ cmpwi(CCR0, R3_RET, 0);
2276      __ beq(CCR0, skip_modify);
2277      __ li(R3_RET, 1);
2278      __ bind(skip_modify);
2279      break;
2280      }
2281    case T_BYTE: {                // sign extension
2282      __ extsb(R3_RET, R3_RET);
2283      break;
2284      }
2285    case T_CHAR: {                // unsigned result
2286      __ andi(R3_RET, R3_RET, 0xffff);
2287      break;
2288      }
2289    case T_SHORT: {               // sign extension
2290      __ extsh(R3_RET, R3_RET);
2291      break;
2292      }
2293    case T_INT:                   // nothing to do
2294      break;
2295    default:
2296      ShouldNotReachHere();
2297      break;
2298  }
2299
2300
2301  // Publish thread state
2302  // --------------------------------------------------------------------------
2303
2304  // Switch thread to "native transition" state before reading the
2305  // synchronization state. This additional state is necessary because reading
2306  // and testing the synchronization state is not atomic w.r.t. GC, as this
2307  // scenario demonstrates:
2308  //   - Java thread A, in _thread_in_native state, loads _not_synchronized
2309  //     and is preempted.
2310  //   - VM thread changes sync state to synchronizing and suspends threads
2311  //     for GC.
2312  //   - Thread A is resumed to finish this native method, but doesn't block
2313  //     here since it didn't see any synchronization in progress, and escapes.
2314
2315  // Transition from _thread_in_native to _thread_in_native_trans.
2316  __ li(R0, _thread_in_native_trans);
2317  __ release();
2318  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2319  __ stw(R0, thread_(thread_state));
2320
2321
2322  // Must we block?
2323  // --------------------------------------------------------------------------
2324
2325  // Block, if necessary, before resuming in _thread_in_Java state.
2326  // In order for GC to work, don't clear the last_Java_sp until after blocking.
2327  Label after_transition;
2328  {
2329    Label no_block, sync;
2330
2331    if (os::is_MP()) {
2332      if (UseMembar) {
2333        // Force this write out before the read below.
2334        __ fence();
2335      } else {
2336        // Write serialization page so VM thread can do a pseudo remote membar.
2337        // We use the current thread pointer to calculate a thread specific
2338        // offset to write to within the page. This minimizes bus traffic
2339        // due to cache line collision.
2340        __ serialize_memory(R16_thread, r_temp_4, r_temp_5);
2341      }
2342    }
2343
2344    Register sync_state_addr = r_temp_4;
2345    Register sync_state      = r_temp_5;
2346    Register suspend_flags   = r_temp_6;
2347
2348    __ load_const(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/ sync_state);
2349
2350    // TODO: PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
2351    __ lwz(sync_state, 0, sync_state_addr);
2352
2353    // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2354    __ lwz(suspend_flags, thread_(suspend_flags));
2355
2356    __ acquire();
2357
2358    Label do_safepoint;
2359    // No synchronization in progress nor yet synchronized.
2360    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
2361    // Not suspended.
2362    __ cmpwi(CCR1, suspend_flags, 0);
2363
2364    __ bne(CCR0, sync);
2365    __ beq(CCR1, no_block);
2366
2367    // Block. Save any potential method result value before the operation and
2368    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2369    // lets us share the oopMap we used when we went native rather than create
2370    // a distinct one for this pc.
2371    __ bind(sync);
2372
2373    address entry_point = is_critical_native
2374      ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
2375      : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2376    save_native_result(masm, ret_type, workspace_slot_offset);
2377    __ call_VM_leaf(entry_point, R16_thread);
2378    restore_native_result(masm, ret_type, workspace_slot_offset);
2379
2380    if (is_critical_native) {
2381      __ b(after_transition); // No thread state transition here.
2382    }
2383    __ bind(no_block);
2384  }
2385
2386  // Publish thread state.
2387  // --------------------------------------------------------------------------
2388
2389  // Thread state is thread_in_native_trans. Any safepoint blocking has
2390  // already happened so we can now change state to _thread_in_Java.
2391
2392  // Transition from _thread_in_native_trans to _thread_in_Java.
2393  __ li(R0, _thread_in_Java);
2394  __ release();
2395  // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2396  __ stw(R0, thread_(thread_state));
2397  if (UseMembar) {
2398    __ fence();
2399  }
2400  __ bind(after_transition);
2401
2402  // Reguard any pages if necessary.
2403  // --------------------------------------------------------------------------
2404
2405  Label no_reguard;
2406  __ lwz(r_temp_1, thread_(stack_guard_state));
2407  __ cmpwi(CCR0, r_temp_1, JavaThread::stack_guard_yellow_reserved_disabled);
2408  __ bne(CCR0, no_reguard);
2409
2410  save_native_result(masm, ret_type, workspace_slot_offset);
2411  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2412  restore_native_result(masm, ret_type, workspace_slot_offset);
2413
2414  __ bind(no_reguard);
2415
2416
2417  // Unlock
2418  // --------------------------------------------------------------------------
2419
2420  if (method->is_synchronized()) {
2421
2422    ConditionRegister r_flag   = CCR1;
2423    const Register r_oop       = r_temp_4;
2424    const Register r_box       = r_temp_5;
2425    const Register r_exception = r_temp_6;
2426    Label done;
2427
2428    // Get oop and address of lock object box.
2429    if (method_is_static) {
2430      assert(klass_offset != -1, "");
2431      __ ld(r_oop, klass_offset, R1_SP);
2432    } else {
2433      assert(receiver_offset != -1, "");
2434      __ ld(r_oop, receiver_offset, R1_SP);
2435    }
2436    __ addi(r_box, R1_SP, lock_offset);
2437
2438    // Try fastpath for unlocking.
2439    __ compiler_fast_unlock_object(r_flag, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2440    __ beq(r_flag, done);
2441
2442    // Save and restore any potential method result value around the unlocking operation.
2443    save_native_result(masm, ret_type, workspace_slot_offset);
2444
2445    // Must save pending exception around the slow-path VM call. Since it's a
2446    // leaf call, the pending exception (if any) can be kept in a register.
2447    __ ld(r_exception, thread_(pending_exception));
2448    assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2449    __ li(R0, 0);
2450    __ std(R0, thread_(pending_exception));
2451
2452    // Slow case of monitor enter.
2453    // Inline a special case of call_VM that disallows any pending_exception.
2454    // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2455    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2456
2457    __ asm_assert_mem8_is_zero(thread_(pending_exception),
2458       "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
2459
2460    restore_native_result(masm, ret_type, workspace_slot_offset);
2461
2462    // Check_forward_pending_exception jump to forward_exception if any pending
2463    // exception is set. The forward_exception routine expects to see the
2464    // exception in pending_exception and not in a register. Kind of clumsy,
2465    // since all folks who branch to forward_exception must have tested
2466    // pending_exception first and hence have it in a register already.
2467    __ std(r_exception, thread_(pending_exception));
2468
2469    __ bind(done);
2470  }
2471
2472# if 0
2473  // DTrace method exit
2474# endif
2475
2476  // Clear "last Java frame" SP and PC.
2477  // --------------------------------------------------------------------------
2478
2479  __ reset_last_Java_frame();
2480
2481  // Unbox oop result, e.g. JNIHandles::resolve value.
2482  // --------------------------------------------------------------------------
2483
2484  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2485    __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, /* needs_frame */ false); // kills R31
2486  }
2487
2488  if (CheckJNICalls) {
2489    // clear_pending_jni_exception_check
2490    __ load_const_optimized(R0, 0L);
2491    __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread);
2492  }
2493
2494  // Reset handle block.
2495  // --------------------------------------------------------------------------
2496  if (!is_critical_native) {
2497  __ ld(r_temp_1, thread_(active_handles));
2498  // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2499  __ li(r_temp_2, 0);
2500  __ stw(r_temp_2, JNIHandleBlock::top_offset_in_bytes(), r_temp_1);
2501
2502
2503  // Check for pending exceptions.
2504  // --------------------------------------------------------------------------
2505  __ ld(r_temp_2, thread_(pending_exception));
2506  __ cmpdi(CCR0, r_temp_2, 0);
2507  __ bne(CCR0, handle_pending_exception);
2508  }
2509
2510  // Return
2511  // --------------------------------------------------------------------------
2512
2513  __ pop_frame();
2514  __ restore_LR_CR(R11);
2515  __ blr();
2516
2517
2518  // Handler for pending exceptions (out-of-line).
2519  // --------------------------------------------------------------------------
2520
2521  // Since this is a native call, we know the proper exception handler
2522  // is the empty function. We just pop this frame and then jump to
2523  // forward_exception_entry.
2524  if (!is_critical_native) {
2525  __ align(InteriorEntryAlignment);
2526  __ bind(handle_pending_exception);
2527
2528  __ pop_frame();
2529  __ restore_LR_CR(R11);
2530  __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2531                       relocInfo::runtime_call_type);
2532  }
2533
2534  // Handler for a cache miss (out-of-line).
2535  // --------------------------------------------------------------------------
2536
2537  if (!method_is_static) {
2538  __ align(InteriorEntryAlignment);
2539  __ bind(ic_miss);
2540
2541  __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(),
2542                       relocInfo::runtime_call_type);
2543  }
2544
2545  // Done.
2546  // --------------------------------------------------------------------------
2547
2548  __ flush();
2549
2550  nmethod *nm = nmethod::new_native_nmethod(method,
2551                                            compile_id,
2552                                            masm->code(),
2553                                            vep_start_pc-start_pc,
2554                                            frame_done_pc-start_pc,
2555                                            stack_slots / VMRegImpl::slots_per_word,
2556                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2557                                            in_ByteSize(lock_offset),
2558                                            oop_maps);
2559
2560  if (is_critical_native) {
2561    nm->set_lazy_critical_native(true);
2562  }
2563
2564  return nm;
2565#else
2566  ShouldNotReachHere();
2567  return NULL;
2568#endif // COMPILER2
2569}
2570
2571// This function returns the adjust size (in number of words) to a c2i adapter
2572// activation for use during deoptimization.
2573int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2574  return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::alignment_in_bytes);
2575}
2576
2577uint SharedRuntime::out_preserve_stack_slots() {
2578#if defined(COMPILER1) || defined(COMPILER2)
2579  return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2580#else
2581  return 0;
2582#endif
2583}
2584
2585#if defined(COMPILER1) || defined(COMPILER2)
2586// Frame generation for deopt and uncommon trap blobs.
2587static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2588                                /* Read */
2589                                Register unroll_block_reg,
2590                                /* Update */
2591                                Register frame_sizes_reg,
2592                                Register number_of_frames_reg,
2593                                Register pcs_reg,
2594                                /* Invalidate */
2595                                Register frame_size_reg,
2596                                Register pc_reg) {
2597
2598  __ ld(pc_reg, 0, pcs_reg);
2599  __ ld(frame_size_reg, 0, frame_sizes_reg);
2600  __ std(pc_reg, _abi(lr), R1_SP);
2601  __ push_frame(frame_size_reg, R0/*tmp*/);
2602#ifdef ASSERT
2603  __ load_const_optimized(pc_reg, 0x5afe);
2604  __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
2605#endif
2606  __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2607  __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2608  __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2609  __ addi(pcs_reg, pcs_reg, wordSize);
2610}
2611
2612// Loop through the UnrollBlock info and create new frames.
2613static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2614                                 /* read */
2615                                 Register unroll_block_reg,
2616                                 /* invalidate */
2617                                 Register frame_sizes_reg,
2618                                 Register number_of_frames_reg,
2619                                 Register pcs_reg,
2620                                 Register frame_size_reg,
2621                                 Register pc_reg) {
2622  Label loop;
2623
2624 // _number_of_frames is of type int (deoptimization.hpp)
2625  __ lwa(number_of_frames_reg,
2626             Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(),
2627             unroll_block_reg);
2628  __ ld(pcs_reg,
2629            Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(),
2630            unroll_block_reg);
2631  __ ld(frame_sizes_reg,
2632            Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(),
2633            unroll_block_reg);
2634
2635  // stack: (caller_of_deoptee, ...).
2636
2637  // At this point we either have an interpreter frame or a compiled
2638  // frame on top of stack. If it is a compiled frame we push a new c2i
2639  // adapter here
2640
2641  // Memorize top-frame stack-pointer.
2642  __ mr(frame_size_reg/*old_sp*/, R1_SP);
2643
2644  // Resize interpreter top frame OR C2I adapter.
2645
2646  // At this moment, the top frame (which is the caller of the deoptee) is
2647  // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2648  // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2649  // outgoing arguments.
2650  //
2651  // In order to push the interpreter frame for the deoptee, we need to
2652  // resize the top frame such that we are able to place the deoptee's
2653  // locals in the frame.
2654  // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2655  // into a valid PARENT_IJAVA_FRAME_ABI.
2656
2657  __ lwa(R11_scratch1,
2658             Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(),
2659             unroll_block_reg);
2660  __ neg(R11_scratch1, R11_scratch1);
2661
2662  // R11_scratch1 contains size of locals for frame resizing.
2663  // R12_scratch2 contains top frame's lr.
2664
2665  // Resize frame by complete frame size prevents TOC from being
2666  // overwritten by locals. A more stack space saving way would be
2667  // to copy the TOC to its location in the new abi.
2668  __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2669
2670  // now, resize the frame
2671  __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2672
2673  // In the case where we have resized a c2i frame above, the optional
2674  // alignment below the locals has size 32 (why?).
2675  __ std(R12_scratch2, _abi(lr), R1_SP);
2676
2677  // Initialize initial_caller_sp.
2678#ifdef ASSERT
2679 __ load_const_optimized(pc_reg, 0x5afe);
2680 __ std(pc_reg, _ijava_state_neg(ijava_reserved), R1_SP);
2681#endif
2682 __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2683
2684#ifdef ASSERT
2685  // Make sure that there is at least one entry in the array.
2686  __ cmpdi(CCR0, number_of_frames_reg, 0);
2687  __ asm_assert_ne("array_size must be > 0", 0x205);
2688#endif
2689
2690  // Now push the new interpreter frames.
2691  //
2692  __ bind(loop);
2693  // Allocate a new frame, fill in the pc.
2694  push_skeleton_frame(masm, deopt,
2695                      unroll_block_reg,
2696                      frame_sizes_reg,
2697                      number_of_frames_reg,
2698                      pcs_reg,
2699                      frame_size_reg,
2700                      pc_reg);
2701  __ cmpdi(CCR0, number_of_frames_reg, 0);
2702  __ bne(CCR0, loop);
2703
2704  // Get the return address pointing into the frame manager.
2705  __ ld(R0, 0, pcs_reg);
2706  // Store it in the top interpreter frame.
2707  __ std(R0, _abi(lr), R1_SP);
2708  // Initialize frame_manager_lr of interpreter top frame.
2709}
2710#endif
2711
2712void SharedRuntime::generate_deopt_blob() {
2713  // Allocate space for the code
2714  ResourceMark rm;
2715  // Setup code generation tools
2716  CodeBuffer buffer("deopt_blob", 2048, 1024);
2717  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2718  Label exec_mode_initialized;
2719  int frame_size_in_words;
2720  OopMap* map = NULL;
2721  OopMapSet *oop_maps = new OopMapSet();
2722
2723  // size of ABI112 plus spill slots for R3_RET and F1_RET.
2724  const int frame_size_in_bytes = frame::abi_reg_args_spill_size;
2725  const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
2726  int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
2727
2728  const Register exec_mode_reg = R21_tmp1;
2729
2730  const address start = __ pc();
2731
2732#if defined(COMPILER1) || defined(COMPILER2)
2733  // --------------------------------------------------------------------------
2734  // Prolog for non exception case!
2735
2736  // We have been called from the deopt handler of the deoptee.
2737  //
2738  // deoptee:
2739  //                      ...
2740  //                      call X
2741  //                      ...
2742  //  deopt_handler:      call_deopt_stub
2743  //  cur. return pc  --> ...
2744  //
2745  // So currently SR_LR points behind the call in the deopt handler.
2746  // We adjust it such that it points to the start of the deopt handler.
2747  // The return_pc has been stored in the frame of the deoptee and
2748  // will replace the address of the deopt_handler in the call
2749  // to Deoptimization::fetch_unroll_info below.
2750  // We can't grab a free register here, because all registers may
2751  // contain live values, so let the RegisterSaver do the adjustment
2752  // of the return pc.
2753  const int return_pc_adjustment_no_exception = -HandlerImpl::size_deopt_handler();
2754
2755  // Push the "unpack frame"
2756  // Save everything in sight.
2757  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2758                                                                   &first_frame_size_in_bytes,
2759                                                                   /*generate_oop_map=*/ true,
2760                                                                   return_pc_adjustment_no_exception,
2761                                                                   RegisterSaver::return_pc_is_lr);
2762  assert(map != NULL, "OopMap must have been created");
2763
2764  __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
2765  // Save exec mode for unpack_frames.
2766  __ b(exec_mode_initialized);
2767
2768  // --------------------------------------------------------------------------
2769  // Prolog for exception case
2770
2771  // An exception is pending.
2772  // We have been called with a return (interpreter) or a jump (exception blob).
2773  //
2774  // - R3_ARG1: exception oop
2775  // - R4_ARG2: exception pc
2776
2777  int exception_offset = __ pc() - start;
2778
2779  BLOCK_COMMENT("Prolog for exception case");
2780
2781  // Store exception oop and pc in thread (location known to GC).
2782  // This is needed since the call to "fetch_unroll_info()" may safepoint.
2783  __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2784  __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
2785  __ std(R4_ARG2, _abi(lr), R1_SP);
2786
2787  // Vanilla deoptimization with an exception pending in exception_oop.
2788  int exception_in_tls_offset = __ pc() - start;
2789
2790  // Push the "unpack frame".
2791  // Save everything in sight.
2792  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2793                                                             &first_frame_size_in_bytes,
2794                                                             /*generate_oop_map=*/ false,
2795                                                             /*return_pc_adjustment_exception=*/ 0,
2796                                                             RegisterSaver::return_pc_is_pre_saved);
2797
2798  // Deopt during an exception. Save exec mode for unpack_frames.
2799  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
2800
2801  // fall through
2802
2803  int reexecute_offset = 0;
2804#ifdef COMPILER1
2805  __ b(exec_mode_initialized);
2806
2807  // Reexecute entry, similar to c2 uncommon trap
2808  reexecute_offset = __ pc() - start;
2809
2810  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
2811                                                             &first_frame_size_in_bytes,
2812                                                             /*generate_oop_map=*/ false,
2813                                                             /*return_pc_adjustment_reexecute=*/ 0,
2814                                                             RegisterSaver::return_pc_is_pre_saved);
2815  __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
2816#endif
2817
2818  // --------------------------------------------------------------------------
2819  __ BIND(exec_mode_initialized);
2820
2821  {
2822  const Register unroll_block_reg = R22_tmp2;
2823
2824  // We need to set `last_Java_frame' because `fetch_unroll_info' will
2825  // call `last_Java_frame()'. The value of the pc in the frame is not
2826  // particularly important. It just needs to identify this blob.
2827  __ set_last_Java_frame(R1_SP, noreg);
2828
2829  // With EscapeAnalysis turned on, this call may safepoint!
2830  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
2831  address calls_return_pc = __ last_calls_return_pc();
2832  // Set an oopmap for the call site that describes all our saved registers.
2833  oop_maps->add_gc_map(calls_return_pc - start, map);
2834
2835  __ reset_last_Java_frame();
2836  // Save the return value.
2837  __ mr(unroll_block_reg, R3_RET);
2838
2839  // Restore only the result registers that have been saved
2840  // by save_volatile_registers(...).
2841  RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes);
2842
2843  // reload the exec mode from the UnrollBlock (it might have changed)
2844  __ lwz(exec_mode_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
2845  // In excp_deopt_mode, restore and clear exception oop which we
2846  // stored in the thread during exception entry above. The exception
2847  // oop will be the return value of this stub.
2848  Label skip_restore_excp;
2849  __ cmpdi(CCR0, exec_mode_reg, Deoptimization::Unpack_exception);
2850  __ bne(CCR0, skip_restore_excp);
2851  __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2852  __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
2853  __ li(R0, 0);
2854  __ std(R0, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
2855  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
2856  __ BIND(skip_restore_excp);
2857
2858  __ pop_frame();
2859
2860  // stack: (deoptee, optional i2c, caller of deoptee, ...).
2861
2862  // pop the deoptee's frame
2863  __ pop_frame();
2864
2865  // stack: (caller_of_deoptee, ...).
2866
2867  // Loop through the `UnrollBlock' info and create interpreter frames.
2868  push_skeleton_frames(masm, true/*deopt*/,
2869                       unroll_block_reg,
2870                       R23_tmp3,
2871                       R24_tmp4,
2872                       R25_tmp5,
2873                       R26_tmp6,
2874                       R27_tmp7);
2875
2876  // stack: (skeletal interpreter frame, ..., optional skeletal
2877  // interpreter frame, optional c2i, caller of deoptee, ...).
2878  }
2879
2880  // push an `unpack_frame' taking care of float / int return values.
2881  __ push_frame(frame_size_in_bytes, R0/*tmp*/);
2882
2883  // stack: (unpack frame, skeletal interpreter frame, ..., optional
2884  // skeletal interpreter frame, optional c2i, caller of deoptee,
2885  // ...).
2886
2887  // Spill live volatile registers since we'll do a call.
2888  __ std( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
2889  __ stfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
2890
2891  // Let the unpacker layout information in the skeletal frames just
2892  // allocated.
2893  __ get_PC_trash_LR(R3_RET);
2894  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
2895  // This is a call to a LEAF method, so no oop map is required.
2896  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2897                  R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2898  __ reset_last_Java_frame();
2899
2900  // Restore the volatiles saved above.
2901  __ ld( R3_RET, _abi_reg_args_spill(spill_ret),  R1_SP);
2902  __ lfd(F1_RET, _abi_reg_args_spill(spill_fret), R1_SP);
2903
2904  // Pop the unpack frame.
2905  __ pop_frame();
2906  __ restore_LR_CR(R0);
2907
2908  // stack: (top interpreter frame, ..., optional interpreter frame,
2909  // optional c2i, caller of deoptee, ...).
2910
2911  // Initialize R14_state.
2912  __ restore_interpreter_state(R11_scratch1);
2913  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
2914
2915  // Return to the interpreter entry point.
2916  __ blr();
2917  __ flush();
2918#else // COMPILER2
2919  __ unimplemented("deopt blob needed only with compiler");
2920  int exception_offset = __ pc() - start;
2921#endif // COMPILER2
2922
2923  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2924                                           reexecute_offset, first_frame_size_in_bytes / wordSize);
2925  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2926}
2927
2928#ifdef COMPILER2
2929void SharedRuntime::generate_uncommon_trap_blob() {
2930  // Allocate space for the code.
2931  ResourceMark rm;
2932  // Setup code generation tools.
2933  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2934  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2935  address start = __ pc();
2936
2937  Register unroll_block_reg = R21_tmp1;
2938  Register klass_index_reg  = R22_tmp2;
2939  Register unc_trap_reg     = R23_tmp3;
2940
2941  OopMapSet* oop_maps = new OopMapSet();
2942  int frame_size_in_bytes = frame::abi_reg_args_size;
2943  OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
2944
2945  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
2946
2947  // Push a dummy `unpack_frame' and call
2948  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2949  // vframe array and return the `UnrollBlock' information.
2950
2951  // Save LR to compiled frame.
2952  __ save_LR_CR(R11_scratch1);
2953
2954  // Push an "uncommon_trap" frame.
2955  __ push_frame_reg_args(0, R11_scratch1);
2956
2957  // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
2958
2959  // Set the `unpack_frame' as last_Java_frame.
2960  // `Deoptimization::uncommon_trap' expects it and considers its
2961  // sender frame as the deoptee frame.
2962  // Remember the offset of the instruction whose address will be
2963  // moved to R11_scratch1.
2964  address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
2965
2966  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
2967
2968  __ mr(klass_index_reg, R3);
2969  __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
2970  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
2971                  R16_thread, klass_index_reg, R5_ARG3);
2972
2973  // Set an oopmap for the call site.
2974  oop_maps->add_gc_map(gc_map_pc - start, map);
2975
2976  __ reset_last_Java_frame();
2977
2978  // Pop the `unpack frame'.
2979  __ pop_frame();
2980
2981  // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
2982
2983  // Save the return value.
2984  __ mr(unroll_block_reg, R3_RET);
2985
2986  // Pop the uncommon_trap frame.
2987  __ pop_frame();
2988
2989  // stack: (caller_of_deoptee, ...).
2990
2991#ifdef ASSERT
2992  __ lwz(R22_tmp2, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes(), unroll_block_reg);
2993  __ cmpdi(CCR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
2994  __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2995#endif
2996
2997  // Allocate new interpreter frame(s) and possibly a c2i adapter
2998  // frame.
2999  push_skeleton_frames(masm, false/*deopt*/,
3000                       unroll_block_reg,
3001                       R22_tmp2,
3002                       R23_tmp3,
3003                       R24_tmp4,
3004                       R25_tmp5,
3005                       R26_tmp6);
3006
3007  // stack: (skeletal interpreter frame, ..., optional skeletal
3008  // interpreter frame, optional c2i, caller of deoptee, ...).
3009
3010  // Push a dummy `unpack_frame' taking care of float return values.
3011  // Call `Deoptimization::unpack_frames' to layout information in the
3012  // interpreter frames just created.
3013
3014  // Push a simple "unpack frame" here.
3015  __ push_frame_reg_args(0, R11_scratch1);
3016
3017  // stack: (unpack frame, skeletal interpreter frame, ..., optional
3018  // skeletal interpreter frame, optional c2i, caller of deoptee,
3019  // ...).
3020
3021  // Set the "unpack_frame" as last_Java_frame.
3022  __ get_PC_trash_LR(R11_scratch1);
3023  __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3024
3025  // Indicate it is the uncommon trap case.
3026  __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3027  // Let the unpacker layout information in the skeletal frames just
3028  // allocated.
3029  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3030                  R16_thread, unc_trap_reg);
3031
3032  __ reset_last_Java_frame();
3033  // Pop the `unpack frame'.
3034  __ pop_frame();
3035  // Restore LR from top interpreter frame.
3036  __ restore_LR_CR(R11_scratch1);
3037
3038  // stack: (top interpreter frame, ..., optional interpreter frame,
3039  // optional c2i, caller of deoptee, ...).
3040
3041  __ restore_interpreter_state(R11_scratch1);
3042  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3043
3044  // Return to the interpreter entry point.
3045  __ blr();
3046
3047  masm->flush();
3048
3049  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3050}
3051#endif // COMPILER2
3052
3053// Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3054SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
3055  assert(StubRoutines::forward_exception_entry() != NULL,
3056         "must be generated before");
3057
3058  ResourceMark rm;
3059  OopMapSet *oop_maps = new OopMapSet();
3060  OopMap* map;
3061
3062  // Allocate space for the code. Setup code generation tools.
3063  CodeBuffer buffer("handler_blob", 2048, 1024);
3064  MacroAssembler* masm = new MacroAssembler(&buffer);
3065
3066  address start = __ pc();
3067  int frame_size_in_bytes = 0;
3068
3069  RegisterSaver::ReturnPCLocation return_pc_location;
3070  bool cause_return = (poll_type == POLL_AT_RETURN);
3071  if (cause_return) {
3072    // Nothing to do here. The frame has already been popped in MachEpilogNode.
3073    // Register LR already contains the return pc.
3074    return_pc_location = RegisterSaver::return_pc_is_lr;
3075  } else {
3076    // Use thread()->saved_exception_pc() as return pc.
3077    return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3078  }
3079
3080  // Save registers, fpu state, and flags.
3081  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3082                                                                   &frame_size_in_bytes,
3083                                                                   /*generate_oop_map=*/ true,
3084                                                                   /*return_pc_adjustment=*/0,
3085                                                                   return_pc_location);
3086
3087  // The following is basically a call_VM. However, we need the precise
3088  // address of the call in order to generate an oopmap. Hence, we do all the
3089  // work outselves.
3090  __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3091
3092  // The return address must always be correct so that the frame constructor
3093  // never sees an invalid pc.
3094
3095  // Do the call
3096  __ call_VM_leaf(call_ptr, R16_thread);
3097  address calls_return_pc = __ last_calls_return_pc();
3098
3099  // Set an oopmap for the call site. This oopmap will map all
3100  // oop-registers and debug-info registers as callee-saved. This
3101  // will allow deoptimization at this safepoint to find all possible
3102  // debug-info recordings, as well as let GC find all oops.
3103  oop_maps->add_gc_map(calls_return_pc - start, map);
3104
3105  Label noException;
3106
3107  // Clear the last Java frame.
3108  __ reset_last_Java_frame();
3109
3110  BLOCK_COMMENT("  Check pending exception.");
3111  const Register pending_exception = R0;
3112  __ ld(pending_exception, thread_(pending_exception));
3113  __ cmpdi(CCR0, pending_exception, 0);
3114  __ beq(CCR0, noException);
3115
3116  // Exception pending
3117  RegisterSaver::restore_live_registers_and_pop_frame(masm,
3118                                                      frame_size_in_bytes,
3119                                                      /*restore_ctr=*/true);
3120
3121  BLOCK_COMMENT("  Jump to forward_exception_entry.");
3122  // Jump to forward_exception_entry, with the issuing PC in LR
3123  // so it looks like the original nmethod called forward_exception_entry.
3124  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3125
3126  // No exception case.
3127  __ BIND(noException);
3128
3129
3130  // Normal exit, restore registers and exit.
3131  RegisterSaver::restore_live_registers_and_pop_frame(masm,
3132                                                      frame_size_in_bytes,
3133                                                      /*restore_ctr=*/true);
3134
3135  __ blr();
3136
3137  // Make sure all code is generated
3138  masm->flush();
3139
3140  // Fill-out other meta info
3141  // CodeBlob frame size is in words.
3142  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3143}
3144
3145// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3146//
3147// Generate a stub that calls into the vm to find out the proper destination
3148// of a java call. All the argument registers are live at this point
3149// but since this is generic code we don't know what they are and the caller
3150// must do any gc of the args.
3151//
3152RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3153
3154  // allocate space for the code
3155  ResourceMark rm;
3156
3157  CodeBuffer buffer(name, 1000, 512);
3158  MacroAssembler* masm = new MacroAssembler(&buffer);
3159
3160  int frame_size_in_bytes;
3161
3162  OopMapSet *oop_maps = new OopMapSet();
3163  OopMap* map = NULL;
3164
3165  address start = __ pc();
3166
3167  map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3168                                                                   &frame_size_in_bytes,
3169                                                                   /*generate_oop_map*/ true,
3170                                                                   /*return_pc_adjustment*/ 0,
3171                                                                   RegisterSaver::return_pc_is_lr);
3172
3173  // Use noreg as last_Java_pc, the return pc will be reconstructed
3174  // from the physical frame.
3175  __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3176
3177  int frame_complete = __ offset();
3178
3179  // Pass R19_method as 2nd (optional) argument, used by
3180  // counter_overflow_stub.
3181  __ call_VM_leaf(destination, R16_thread, R19_method);
3182  address calls_return_pc = __ last_calls_return_pc();
3183  // Set an oopmap for the call site.
3184  // We need this not only for callee-saved registers, but also for volatile
3185  // registers that the compiler might be keeping live across a safepoint.
3186  // Create the oopmap for the call's return pc.
3187  oop_maps->add_gc_map(calls_return_pc - start, map);
3188
3189  // R3_RET contains the address we are going to jump to assuming no exception got installed.
3190
3191  // clear last_Java_sp
3192  __ reset_last_Java_frame();
3193
3194  // Check for pending exceptions.
3195  BLOCK_COMMENT("Check for pending exceptions.");
3196  Label pending;
3197  __ ld(R11_scratch1, thread_(pending_exception));
3198  __ cmpdi(CCR0, R11_scratch1, 0);
3199  __ bne(CCR0, pending);
3200
3201  __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3202
3203  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3204
3205  // Get the returned method.
3206  __ get_vm_result_2(R19_method);
3207
3208  __ bctr();
3209
3210
3211  // Pending exception after the safepoint.
3212  __ BIND(pending);
3213
3214  RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3215
3216  // exception pending => remove activation and forward to exception handler
3217
3218  __ li(R11_scratch1, 0);
3219  __ ld(R3_ARG1, thread_(pending_exception));
3220  __ std(R11_scratch1, in_bytes(JavaThread::vm_result_offset()), R16_thread);
3221  __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3222
3223  // -------------
3224  // Make sure all code is generated.
3225  masm->flush();
3226
3227  // return the blob
3228  // frame_size_words or bytes??
3229  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3230                                       oop_maps, true);
3231}
3232
3233
3234//------------------------------Montgomery multiplication------------------------
3235//
3236
3237// Subtract 0:b from carry:a. Return carry.
3238static unsigned long
3239sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3240  long i = 0;
3241  unsigned long tmp, tmp2;
3242  __asm__ __volatile__ (
3243    "subfc  %[tmp], %[tmp], %[tmp]   \n" // pre-set CA
3244    "mtctr  %[len]                   \n"
3245    "0:                              \n"
3246    "ldx    %[tmp], %[i], %[a]       \n"
3247    "ldx    %[tmp2], %[i], %[b]      \n"
3248    "subfe  %[tmp], %[tmp2], %[tmp]  \n" // subtract extended
3249    "stdx   %[tmp], %[i], %[a]       \n"
3250    "addi   %[i], %[i], 8            \n"
3251    "bdnz   0b                       \n"
3252    "addme  %[tmp], %[carry]         \n" // carry + CA - 1
3253    : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
3254    : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
3255    : "ctr", "xer", "memory"
3256  );
3257  return tmp;
3258}
3259
3260// Multiply (unsigned) Long A by Long B, accumulating the double-
3261// length result into the accumulator formed of T0, T1, and T2.
3262inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3263  unsigned long hi, lo;
3264  __asm__ __volatile__ (
3265    "mulld  %[lo], %[A], %[B]    \n"
3266    "mulhdu %[hi], %[A], %[B]    \n"
3267    "addc   %[T0], %[T0], %[lo]  \n"
3268    "adde   %[T1], %[T1], %[hi]  \n"
3269    "addze  %[T2], %[T2]         \n"
3270    : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3271    : [A]"r"(A), [B]"r"(B)
3272    : "xer"
3273  );
3274}
3275
3276// As above, but add twice the double-length result into the
3277// accumulator.
3278inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3279  unsigned long hi, lo;
3280  __asm__ __volatile__ (
3281    "mulld  %[lo], %[A], %[B]    \n"
3282    "mulhdu %[hi], %[A], %[B]    \n"
3283    "addc   %[T0], %[T0], %[lo]  \n"
3284    "adde   %[T1], %[T1], %[hi]  \n"
3285    "addze  %[T2], %[T2]         \n"
3286    "addc   %[T0], %[T0], %[lo]  \n"
3287    "adde   %[T1], %[T1], %[hi]  \n"
3288    "addze  %[T2], %[T2]         \n"
3289    : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3290    : [A]"r"(A), [B]"r"(B)
3291    : "xer"
3292  );
3293}
3294
3295// Fast Montgomery multiplication. The derivation of the algorithm is
3296// in "A Cryptographic Library for the Motorola DSP56000,
3297// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3298static void
3299montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3300                    unsigned long m[], unsigned long inv, int len) {
3301  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3302  int i;
3303
3304  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3305
3306  for (i = 0; i < len; i++) {
3307    int j;
3308    for (j = 0; j < i; j++) {
3309      MACC(a[j], b[i-j], t0, t1, t2);
3310      MACC(m[j], n[i-j], t0, t1, t2);
3311    }
3312    MACC(a[i], b[0], t0, t1, t2);
3313    m[i] = t0 * inv;
3314    MACC(m[i], n[0], t0, t1, t2);
3315
3316    assert(t0 == 0, "broken Montgomery multiply");
3317
3318    t0 = t1; t1 = t2; t2 = 0;
3319  }
3320
3321  for (i = len; i < 2*len; i++) {
3322    int j;
3323    for (j = i-len+1; j < len; j++) {
3324      MACC(a[j], b[i-j], t0, t1, t2);
3325      MACC(m[j], n[i-j], t0, t1, t2);
3326    }
3327    m[i-len] = t0;
3328    t0 = t1; t1 = t2; t2 = 0;
3329  }
3330
3331  while (t0) {
3332    t0 = sub(m, n, t0, len);
3333  }
3334}
3335
3336// Fast Montgomery squaring. This uses asymptotically 25% fewer
3337// multiplies so it should be up to 25% faster than Montgomery
3338// multiplication. However, its loop control is more complex and it
3339// may actually run slower on some machines.
3340static void
3341montgomery_square(unsigned long a[], unsigned long n[],
3342                  unsigned long m[], unsigned long inv, int len) {
3343  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3344  int i;
3345
3346  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3347
3348  for (i = 0; i < len; i++) {
3349    int j;
3350    int end = (i+1)/2;
3351    for (j = 0; j < end; j++) {
3352      MACC2(a[j], a[i-j], t0, t1, t2);
3353      MACC(m[j], n[i-j], t0, t1, t2);
3354    }
3355    if ((i & 1) == 0) {
3356      MACC(a[j], a[j], t0, t1, t2);
3357    }
3358    for (; j < i; j++) {
3359      MACC(m[j], n[i-j], t0, t1, t2);
3360    }
3361    m[i] = t0 * inv;
3362    MACC(m[i], n[0], t0, t1, t2);
3363
3364    assert(t0 == 0, "broken Montgomery square");
3365
3366    t0 = t1; t1 = t2; t2 = 0;
3367  }
3368
3369  for (i = len; i < 2*len; i++) {
3370    int start = i-len+1;
3371    int end = start + (len - start)/2;
3372    int j;
3373    for (j = start; j < end; j++) {
3374      MACC2(a[j], a[i-j], t0, t1, t2);
3375      MACC(m[j], n[i-j], t0, t1, t2);
3376    }
3377    if ((i & 1) == 0) {
3378      MACC(a[j], a[j], t0, t1, t2);
3379    }
3380    for (; j < len; j++) {
3381      MACC(m[j], n[i-j], t0, t1, t2);
3382    }
3383    m[i-len] = t0;
3384    t0 = t1; t1 = t2; t2 = 0;
3385  }
3386
3387  while (t0) {
3388    t0 = sub(m, n, t0, len);
3389  }
3390}
3391
3392// The threshold at which squaring is advantageous was determined
3393// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3394// Doesn't seem to be relevant for Power8 so we use the same value.
3395#define MONTGOMERY_SQUARING_THRESHOLD 64
3396
3397// Copy len longwords from s to d, word-swapping as we go. The
3398// destination array is reversed.
3399static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3400  d += len;
3401  while(len-- > 0) {
3402    d--;
3403    unsigned long s_val = *s;
3404    // Swap words in a longword on little endian machines.
3405#ifdef VM_LITTLE_ENDIAN
3406     s_val = (s_val << 32) | (s_val >> 32);
3407#endif
3408    *d = s_val;
3409    s++;
3410  }
3411}
3412
3413void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3414                                        jint len, jlong inv,
3415                                        jint *m_ints) {
3416  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3417  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3418  int longwords = len/2;
3419
3420  // Make very sure we don't use so much space that the stack might
3421  // overflow. 512 jints corresponds to an 16384-bit integer and
3422  // will use here a total of 8k bytes of stack space.
3423  int total_allocation = longwords * sizeof (unsigned long) * 4;
3424  guarantee(total_allocation <= 8192, "must be");
3425  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3426
3427  // Local scratch arrays
3428  unsigned long
3429    *a = scratch + 0 * longwords,
3430    *b = scratch + 1 * longwords,
3431    *n = scratch + 2 * longwords,
3432    *m = scratch + 3 * longwords;
3433
3434  reverse_words((unsigned long *)a_ints, a, longwords);
3435  reverse_words((unsigned long *)b_ints, b, longwords);
3436  reverse_words((unsigned long *)n_ints, n, longwords);
3437
3438  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3439
3440  reverse_words(m, (unsigned long *)m_ints, longwords);
3441}
3442
3443void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3444                                      jint len, jlong inv,
3445                                      jint *m_ints) {
3446  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3447  assert(len % 2 == 0, "array length in montgomery_square must be even");
3448  int longwords = len/2;
3449
3450  // Make very sure we don't use so much space that the stack might
3451  // overflow. 512 jints corresponds to an 16384-bit integer and
3452  // will use here a total of 6k bytes of stack space.
3453  int total_allocation = longwords * sizeof (unsigned long) * 3;
3454  guarantee(total_allocation <= 8192, "must be");
3455  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3456
3457  // Local scratch arrays
3458  unsigned long
3459    *a = scratch + 0 * longwords,
3460    *n = scratch + 1 * longwords,
3461    *m = scratch + 2 * longwords;
3462
3463  reverse_words((unsigned long *)a_ints, a, longwords);
3464  reverse_words((unsigned long *)n_ints, n, longwords);
3465
3466  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3467    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3468  } else {
3469    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3470  }
3471
3472  reverse_words(m, (unsigned long *)m_ints, longwords);
3473}
3474