sharedRuntime_x86_32.cpp revision 3602:da91efe96a93
1/*
2 * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "assembler_x86.inline.hpp"
28#include "code/debugInfoRec.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "interpreter/interpreter.hpp"
32#include "oops/compiledICHolder.hpp"
33#include "prims/jvmtiRedefineClassesTrace.hpp"
34#include "runtime/sharedRuntime.hpp"
35#include "runtime/vframeArray.hpp"
36#include "vmreg_x86.inline.hpp"
37#ifdef COMPILER1
38#include "c1/c1_Runtime1.hpp"
39#endif
40#ifdef COMPILER2
41#include "opto/runtime.hpp"
42#endif
43
44#define __ masm->
45
46const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
47
48class RegisterSaver {
49  enum { FPU_regs_live = 8 /*for the FPU stack*/+8/*eight more for XMM registers*/ };
50  // Capture info about frame layout
51  enum layout {
52                fpu_state_off = 0,
53                fpu_state_end = fpu_state_off+FPUStateSizeInWords-1,
54                st0_off, st0H_off,
55                st1_off, st1H_off,
56                st2_off, st2H_off,
57                st3_off, st3H_off,
58                st4_off, st4H_off,
59                st5_off, st5H_off,
60                st6_off, st6H_off,
61                st7_off, st7H_off,
62
63                xmm0_off, xmm0H_off,
64                xmm1_off, xmm1H_off,
65                xmm2_off, xmm2H_off,
66                xmm3_off, xmm3H_off,
67                xmm4_off, xmm4H_off,
68                xmm5_off, xmm5H_off,
69                xmm6_off, xmm6H_off,
70                xmm7_off, xmm7H_off,
71                flags_off,
72                rdi_off,
73                rsi_off,
74                ignore_off,  // extra copy of rbp,
75                rsp_off,
76                rbx_off,
77                rdx_off,
78                rcx_off,
79                rax_off,
80                // The frame sender code expects that rbp will be in the "natural" place and
81                // will override any oopMap setting for it. We must therefore force the layout
82                // so that it agrees with the frame sender code.
83                rbp_off,
84                return_off,      // slot for return address
85                reg_save_size };
86
87
88  public:
89
90  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words,
91                                     int* total_frame_words, bool verify_fpu = true);
92  static void restore_live_registers(MacroAssembler* masm);
93
94  static int rax_offset() { return rax_off; }
95  static int rbx_offset() { return rbx_off; }
96
97  // Offsets into the register save area
98  // Used by deoptimization when it is managing result register
99  // values on its own
100
101  static int raxOffset(void) { return rax_off; }
102  static int rdxOffset(void) { return rdx_off; }
103  static int rbxOffset(void) { return rbx_off; }
104  static int xmm0Offset(void) { return xmm0_off; }
105  // This really returns a slot in the fp save area, which one is not important
106  static int fpResultOffset(void) { return st0_off; }
107
108  // During deoptimization only the result register need to be restored
109  // all the other values have already been extracted.
110
111  static void restore_result_registers(MacroAssembler* masm);
112
113};
114
115OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words,
116                                           int* total_frame_words, bool verify_fpu) {
117
118  int frame_size_in_bytes =  (reg_save_size + additional_frame_words) * wordSize;
119  int frame_words = frame_size_in_bytes / wordSize;
120  *total_frame_words = frame_words;
121
122  assert(FPUStateSizeInWords == 27, "update stack layout");
123
124  // save registers, fpu state, and flags
125  // We assume caller has already has return address slot on the stack
126  // We push epb twice in this sequence because we want the real rbp,
127  // to be under the return like a normal enter and we want to use pusha
128  // We push by hand instead of pusing push
129  __ enter();
130  __ pusha();
131  __ pushf();
132  __ subptr(rsp,FPU_regs_live*sizeof(jdouble)); // Push FPU registers space
133  __ push_FPU_state();          // Save FPU state & init
134
135  if (verify_fpu) {
136    // Some stubs may have non standard FPU control word settings so
137    // only check and reset the value when it required to be the
138    // standard value.  The safepoint blob in particular can be used
139    // in methods which are using the 24 bit control word for
140    // optimized float math.
141
142#ifdef ASSERT
143    // Make sure the control word has the expected value
144    Label ok;
145    __ cmpw(Address(rsp, 0), StubRoutines::fpu_cntrl_wrd_std());
146    __ jccb(Assembler::equal, ok);
147    __ stop("corrupted control word detected");
148    __ bind(ok);
149#endif
150
151    // Reset the control word to guard against exceptions being unmasked
152    // since fstp_d can cause FPU stack underflow exceptions.  Write it
153    // into the on stack copy and then reload that to make sure that the
154    // current and future values are correct.
155    __ movw(Address(rsp, 0), StubRoutines::fpu_cntrl_wrd_std());
156  }
157
158  __ frstor(Address(rsp, 0));
159  if (!verify_fpu) {
160    // Set the control word so that exceptions are masked for the
161    // following code.
162    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
163  }
164
165  // Save the FPU registers in de-opt-able form
166
167  __ fstp_d(Address(rsp, st0_off*wordSize)); // st(0)
168  __ fstp_d(Address(rsp, st1_off*wordSize)); // st(1)
169  __ fstp_d(Address(rsp, st2_off*wordSize)); // st(2)
170  __ fstp_d(Address(rsp, st3_off*wordSize)); // st(3)
171  __ fstp_d(Address(rsp, st4_off*wordSize)); // st(4)
172  __ fstp_d(Address(rsp, st5_off*wordSize)); // st(5)
173  __ fstp_d(Address(rsp, st6_off*wordSize)); // st(6)
174  __ fstp_d(Address(rsp, st7_off*wordSize)); // st(7)
175
176  if( UseSSE == 1 ) {           // Save the XMM state
177    __ movflt(Address(rsp,xmm0_off*wordSize),xmm0);
178    __ movflt(Address(rsp,xmm1_off*wordSize),xmm1);
179    __ movflt(Address(rsp,xmm2_off*wordSize),xmm2);
180    __ movflt(Address(rsp,xmm3_off*wordSize),xmm3);
181    __ movflt(Address(rsp,xmm4_off*wordSize),xmm4);
182    __ movflt(Address(rsp,xmm5_off*wordSize),xmm5);
183    __ movflt(Address(rsp,xmm6_off*wordSize),xmm6);
184    __ movflt(Address(rsp,xmm7_off*wordSize),xmm7);
185  } else if( UseSSE >= 2 ) {
186    __ movdbl(Address(rsp,xmm0_off*wordSize),xmm0);
187    __ movdbl(Address(rsp,xmm1_off*wordSize),xmm1);
188    __ movdbl(Address(rsp,xmm2_off*wordSize),xmm2);
189    __ movdbl(Address(rsp,xmm3_off*wordSize),xmm3);
190    __ movdbl(Address(rsp,xmm4_off*wordSize),xmm4);
191    __ movdbl(Address(rsp,xmm5_off*wordSize),xmm5);
192    __ movdbl(Address(rsp,xmm6_off*wordSize),xmm6);
193    __ movdbl(Address(rsp,xmm7_off*wordSize),xmm7);
194  }
195
196  // Set an oopmap for the call site.  This oopmap will map all
197  // oop-registers and debug-info registers as callee-saved.  This
198  // will allow deoptimization at this safepoint to find all possible
199  // debug-info recordings, as well as let GC find all oops.
200
201  OopMapSet *oop_maps = new OopMapSet();
202  OopMap* map =  new OopMap( frame_words, 0 );
203
204#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
205
206  map->set_callee_saved(STACK_OFFSET( rax_off), rax->as_VMReg());
207  map->set_callee_saved(STACK_OFFSET( rcx_off), rcx->as_VMReg());
208  map->set_callee_saved(STACK_OFFSET( rdx_off), rdx->as_VMReg());
209  map->set_callee_saved(STACK_OFFSET( rbx_off), rbx->as_VMReg());
210  // rbp, location is known implicitly, no oopMap
211  map->set_callee_saved(STACK_OFFSET( rsi_off), rsi->as_VMReg());
212  map->set_callee_saved(STACK_OFFSET( rdi_off), rdi->as_VMReg());
213  map->set_callee_saved(STACK_OFFSET(st0_off), as_FloatRegister(0)->as_VMReg());
214  map->set_callee_saved(STACK_OFFSET(st1_off), as_FloatRegister(1)->as_VMReg());
215  map->set_callee_saved(STACK_OFFSET(st2_off), as_FloatRegister(2)->as_VMReg());
216  map->set_callee_saved(STACK_OFFSET(st3_off), as_FloatRegister(3)->as_VMReg());
217  map->set_callee_saved(STACK_OFFSET(st4_off), as_FloatRegister(4)->as_VMReg());
218  map->set_callee_saved(STACK_OFFSET(st5_off), as_FloatRegister(5)->as_VMReg());
219  map->set_callee_saved(STACK_OFFSET(st6_off), as_FloatRegister(6)->as_VMReg());
220  map->set_callee_saved(STACK_OFFSET(st7_off), as_FloatRegister(7)->as_VMReg());
221  map->set_callee_saved(STACK_OFFSET(xmm0_off), xmm0->as_VMReg());
222  map->set_callee_saved(STACK_OFFSET(xmm1_off), xmm1->as_VMReg());
223  map->set_callee_saved(STACK_OFFSET(xmm2_off), xmm2->as_VMReg());
224  map->set_callee_saved(STACK_OFFSET(xmm3_off), xmm3->as_VMReg());
225  map->set_callee_saved(STACK_OFFSET(xmm4_off), xmm4->as_VMReg());
226  map->set_callee_saved(STACK_OFFSET(xmm5_off), xmm5->as_VMReg());
227  map->set_callee_saved(STACK_OFFSET(xmm6_off), xmm6->as_VMReg());
228  map->set_callee_saved(STACK_OFFSET(xmm7_off), xmm7->as_VMReg());
229  // %%% This is really a waste but we'll keep things as they were for now
230  if (true) {
231#define NEXTREG(x) (x)->as_VMReg()->next()
232    map->set_callee_saved(STACK_OFFSET(st0H_off), NEXTREG(as_FloatRegister(0)));
233    map->set_callee_saved(STACK_OFFSET(st1H_off), NEXTREG(as_FloatRegister(1)));
234    map->set_callee_saved(STACK_OFFSET(st2H_off), NEXTREG(as_FloatRegister(2)));
235    map->set_callee_saved(STACK_OFFSET(st3H_off), NEXTREG(as_FloatRegister(3)));
236    map->set_callee_saved(STACK_OFFSET(st4H_off), NEXTREG(as_FloatRegister(4)));
237    map->set_callee_saved(STACK_OFFSET(st5H_off), NEXTREG(as_FloatRegister(5)));
238    map->set_callee_saved(STACK_OFFSET(st6H_off), NEXTREG(as_FloatRegister(6)));
239    map->set_callee_saved(STACK_OFFSET(st7H_off), NEXTREG(as_FloatRegister(7)));
240    map->set_callee_saved(STACK_OFFSET(xmm0H_off), NEXTREG(xmm0));
241    map->set_callee_saved(STACK_OFFSET(xmm1H_off), NEXTREG(xmm1));
242    map->set_callee_saved(STACK_OFFSET(xmm2H_off), NEXTREG(xmm2));
243    map->set_callee_saved(STACK_OFFSET(xmm3H_off), NEXTREG(xmm3));
244    map->set_callee_saved(STACK_OFFSET(xmm4H_off), NEXTREG(xmm4));
245    map->set_callee_saved(STACK_OFFSET(xmm5H_off), NEXTREG(xmm5));
246    map->set_callee_saved(STACK_OFFSET(xmm6H_off), NEXTREG(xmm6));
247    map->set_callee_saved(STACK_OFFSET(xmm7H_off), NEXTREG(xmm7));
248#undef NEXTREG
249#undef STACK_OFFSET
250  }
251
252  return map;
253
254}
255
256void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
257
258  // Recover XMM & FPU state
259  if( UseSSE == 1 ) {
260    __ movflt(xmm0,Address(rsp,xmm0_off*wordSize));
261    __ movflt(xmm1,Address(rsp,xmm1_off*wordSize));
262    __ movflt(xmm2,Address(rsp,xmm2_off*wordSize));
263    __ movflt(xmm3,Address(rsp,xmm3_off*wordSize));
264    __ movflt(xmm4,Address(rsp,xmm4_off*wordSize));
265    __ movflt(xmm5,Address(rsp,xmm5_off*wordSize));
266    __ movflt(xmm6,Address(rsp,xmm6_off*wordSize));
267    __ movflt(xmm7,Address(rsp,xmm7_off*wordSize));
268  } else if( UseSSE >= 2 ) {
269    __ movdbl(xmm0,Address(rsp,xmm0_off*wordSize));
270    __ movdbl(xmm1,Address(rsp,xmm1_off*wordSize));
271    __ movdbl(xmm2,Address(rsp,xmm2_off*wordSize));
272    __ movdbl(xmm3,Address(rsp,xmm3_off*wordSize));
273    __ movdbl(xmm4,Address(rsp,xmm4_off*wordSize));
274    __ movdbl(xmm5,Address(rsp,xmm5_off*wordSize));
275    __ movdbl(xmm6,Address(rsp,xmm6_off*wordSize));
276    __ movdbl(xmm7,Address(rsp,xmm7_off*wordSize));
277  }
278  __ pop_FPU_state();
279  __ addptr(rsp, FPU_regs_live*sizeof(jdouble)); // Pop FPU registers
280
281  __ popf();
282  __ popa();
283  // Get the rbp, described implicitly by the frame sender code (no oopMap)
284  __ pop(rbp);
285
286}
287
288void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
289
290  // Just restore result register. Only used by deoptimization. By
291  // now any callee save register that needs to be restore to a c2
292  // caller of the deoptee has been extracted into the vframeArray
293  // and will be stuffed into the c2i adapter we create for later
294  // restoration so only result registers need to be restored here.
295  //
296
297  __ frstor(Address(rsp, 0));      // Restore fpu state
298
299  // Recover XMM & FPU state
300  if( UseSSE == 1 ) {
301    __ movflt(xmm0, Address(rsp, xmm0_off*wordSize));
302  } else if( UseSSE >= 2 ) {
303    __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize));
304  }
305  __ movptr(rax, Address(rsp, rax_off*wordSize));
306  __ movptr(rdx, Address(rsp, rdx_off*wordSize));
307  // Pop all of the register save are off the stack except the return address
308  __ addptr(rsp, return_off * wordSize);
309}
310
311// The java_calling_convention describes stack locations as ideal slots on
312// a frame with no abi restrictions. Since we must observe abi restrictions
313// (like the placement of the register window) the slots must be biased by
314// the following value.
315static int reg2offset_in(VMReg r) {
316  // Account for saved rbp, and return address
317  // This should really be in_preserve_stack_slots
318  return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size;
319}
320
321static int reg2offset_out(VMReg r) {
322  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
323}
324
325// ---------------------------------------------------------------------------
326// Read the array of BasicTypes from a signature, and compute where the
327// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
328// quantities.  Values less than SharedInfo::stack0 are registers, those above
329// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
330// as framesizes are fixed.
331// VMRegImpl::stack0 refers to the first slot 0(sp).
332// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
333// up to RegisterImpl::number_of_registers) are the 32-bit
334// integer registers.
335
336// Pass first two oop/int args in registers ECX and EDX.
337// Pass first two float/double args in registers XMM0 and XMM1.
338// Doubles have precedence, so if you pass a mix of floats and doubles
339// the doubles will grab the registers before the floats will.
340
341// Note: the INPUTS in sig_bt are in units of Java argument words, which are
342// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
343// units regardless of build. Of course for i486 there is no 64 bit build
344
345
346// ---------------------------------------------------------------------------
347// The compiled Java calling convention.
348// Pass first two oop/int args in registers ECX and EDX.
349// Pass first two float/double args in registers XMM0 and XMM1.
350// Doubles have precedence, so if you pass a mix of floats and doubles
351// the doubles will grab the registers before the floats will.
352int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
353                                           VMRegPair *regs,
354                                           int total_args_passed,
355                                           int is_outgoing) {
356  uint    stack = 0;          // Starting stack position for args on stack
357
358
359  // Pass first two oop/int args in registers ECX and EDX.
360  uint reg_arg0 = 9999;
361  uint reg_arg1 = 9999;
362
363  // Pass first two float/double args in registers XMM0 and XMM1.
364  // Doubles have precedence, so if you pass a mix of floats and doubles
365  // the doubles will grab the registers before the floats will.
366  // CNC - TURNED OFF FOR non-SSE.
367  //       On Intel we have to round all doubles (and most floats) at
368  //       call sites by storing to the stack in any case.
369  // UseSSE=0 ==> Don't Use ==> 9999+0
370  // UseSSE=1 ==> Floats only ==> 9999+1
371  // UseSSE>=2 ==> Floats or doubles ==> 9999+2
372  enum { fltarg_dontuse = 9999+0, fltarg_float_only = 9999+1, fltarg_flt_dbl = 9999+2 };
373  uint fargs = (UseSSE>=2) ? 2 : UseSSE;
374  uint freg_arg0 = 9999+fargs;
375  uint freg_arg1 = 9999+fargs;
376
377  // Pass doubles & longs aligned on the stack.  First count stack slots for doubles
378  int i;
379  for( i = 0; i < total_args_passed; i++) {
380    if( sig_bt[i] == T_DOUBLE ) {
381      // first 2 doubles go in registers
382      if( freg_arg0 == fltarg_flt_dbl ) freg_arg0 = i;
383      else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i;
384      else // Else double is passed low on the stack to be aligned.
385        stack += 2;
386    } else if( sig_bt[i] == T_LONG ) {
387      stack += 2;
388    }
389  }
390  int dstack = 0;             // Separate counter for placing doubles
391
392  // Now pick where all else goes.
393  for( i = 0; i < total_args_passed; i++) {
394    // From the type and the argument number (count) compute the location
395    switch( sig_bt[i] ) {
396    case T_SHORT:
397    case T_CHAR:
398    case T_BYTE:
399    case T_BOOLEAN:
400    case T_INT:
401    case T_ARRAY:
402    case T_OBJECT:
403    case T_ADDRESS:
404      if( reg_arg0 == 9999 )  {
405        reg_arg0 = i;
406        regs[i].set1(rcx->as_VMReg());
407      } else if( reg_arg1 == 9999 )  {
408        reg_arg1 = i;
409        regs[i].set1(rdx->as_VMReg());
410      } else {
411        regs[i].set1(VMRegImpl::stack2reg(stack++));
412      }
413      break;
414    case T_FLOAT:
415      if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) {
416        freg_arg0 = i;
417        regs[i].set1(xmm0->as_VMReg());
418      } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) {
419        freg_arg1 = i;
420        regs[i].set1(xmm1->as_VMReg());
421      } else {
422        regs[i].set1(VMRegImpl::stack2reg(stack++));
423      }
424      break;
425    case T_LONG:
426      assert(sig_bt[i+1] == T_VOID, "missing Half" );
427      regs[i].set2(VMRegImpl::stack2reg(dstack));
428      dstack += 2;
429      break;
430    case T_DOUBLE:
431      assert(sig_bt[i+1] == T_VOID, "missing Half" );
432      if( freg_arg0 == (uint)i ) {
433        regs[i].set2(xmm0->as_VMReg());
434      } else if( freg_arg1 == (uint)i ) {
435        regs[i].set2(xmm1->as_VMReg());
436      } else {
437        regs[i].set2(VMRegImpl::stack2reg(dstack));
438        dstack += 2;
439      }
440      break;
441    case T_VOID: regs[i].set_bad(); break;
442      break;
443    default:
444      ShouldNotReachHere();
445      break;
446    }
447  }
448
449  // return value can be odd number of VMRegImpl stack slots make multiple of 2
450  return round_to(stack, 2);
451}
452
453// Patch the callers callsite with entry to compiled code if it exists.
454static void patch_callers_callsite(MacroAssembler *masm) {
455  Label L;
456  __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
457  __ jcc(Assembler::equal, L);
458  // Schedule the branch target address early.
459  // Call into the VM to patch the caller, then jump to compiled callee
460  // rax, isn't live so capture return address while we easily can
461  __ movptr(rax, Address(rsp, 0));
462  __ pusha();
463  __ pushf();
464
465  if (UseSSE == 1) {
466    __ subptr(rsp, 2*wordSize);
467    __ movflt(Address(rsp, 0), xmm0);
468    __ movflt(Address(rsp, wordSize), xmm1);
469  }
470  if (UseSSE >= 2) {
471    __ subptr(rsp, 4*wordSize);
472    __ movdbl(Address(rsp, 0), xmm0);
473    __ movdbl(Address(rsp, 2*wordSize), xmm1);
474  }
475#ifdef COMPILER2
476  // C2 may leave the stack dirty if not in SSE2+ mode
477  if (UseSSE >= 2) {
478    __ verify_FPU(0, "c2i transition should have clean FPU stack");
479  } else {
480    __ empty_FPU_stack();
481  }
482#endif /* COMPILER2 */
483
484  // VM needs caller's callsite
485  __ push(rax);
486  // VM needs target method
487  __ push(rbx);
488  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
489  __ addptr(rsp, 2*wordSize);
490
491  if (UseSSE == 1) {
492    __ movflt(xmm0, Address(rsp, 0));
493    __ movflt(xmm1, Address(rsp, wordSize));
494    __ addptr(rsp, 2*wordSize);
495  }
496  if (UseSSE >= 2) {
497    __ movdbl(xmm0, Address(rsp, 0));
498    __ movdbl(xmm1, Address(rsp, 2*wordSize));
499    __ addptr(rsp, 4*wordSize);
500  }
501
502  __ popf();
503  __ popa();
504  __ bind(L);
505}
506
507
508static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) {
509  int next_off = st_off - Interpreter::stackElementSize;
510  __ movdbl(Address(rsp, next_off), r);
511}
512
513static void gen_c2i_adapter(MacroAssembler *masm,
514                            int total_args_passed,
515                            int comp_args_on_stack,
516                            const BasicType *sig_bt,
517                            const VMRegPair *regs,
518                            Label& skip_fixup) {
519  // Before we get into the guts of the C2I adapter, see if we should be here
520  // at all.  We've come from compiled code and are attempting to jump to the
521  // interpreter, which means the caller made a static call to get here
522  // (vcalls always get a compiled target if there is one).  Check for a
523  // compiled target.  If there is one, we need to patch the caller's call.
524  patch_callers_callsite(masm);
525
526  __ bind(skip_fixup);
527
528#ifdef COMPILER2
529  // C2 may leave the stack dirty if not in SSE2+ mode
530  if (UseSSE >= 2) {
531    __ verify_FPU(0, "c2i transition should have clean FPU stack");
532  } else {
533    __ empty_FPU_stack();
534  }
535#endif /* COMPILER2 */
536
537  // Since all args are passed on the stack, total_args_passed * interpreter_
538  // stack_element_size  is the
539  // space we need.
540  int extraspace = total_args_passed * Interpreter::stackElementSize;
541
542  // Get return address
543  __ pop(rax);
544
545  // set senderSP value
546  __ movptr(rsi, rsp);
547
548  __ subptr(rsp, extraspace);
549
550  // Now write the args into the outgoing interpreter space
551  for (int i = 0; i < total_args_passed; i++) {
552    if (sig_bt[i] == T_VOID) {
553      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
554      continue;
555    }
556
557    // st_off points to lowest address on stack.
558    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
559    int next_off = st_off - Interpreter::stackElementSize;
560
561    // Say 4 args:
562    // i   st_off
563    // 0   12 T_LONG
564    // 1    8 T_VOID
565    // 2    4 T_OBJECT
566    // 3    0 T_BOOL
567    VMReg r_1 = regs[i].first();
568    VMReg r_2 = regs[i].second();
569    if (!r_1->is_valid()) {
570      assert(!r_2->is_valid(), "");
571      continue;
572    }
573
574    if (r_1->is_stack()) {
575      // memory to memory use fpu stack top
576      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
577
578      if (!r_2->is_valid()) {
579        __ movl(rdi, Address(rsp, ld_off));
580        __ movptr(Address(rsp, st_off), rdi);
581      } else {
582
583        // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW
584        // st_off == MSW, st_off-wordSize == LSW
585
586        __ movptr(rdi, Address(rsp, ld_off));
587        __ movptr(Address(rsp, next_off), rdi);
588#ifndef _LP64
589        __ movptr(rdi, Address(rsp, ld_off + wordSize));
590        __ movptr(Address(rsp, st_off), rdi);
591#else
592#ifdef ASSERT
593        // Overwrite the unused slot with known junk
594        __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
595        __ movptr(Address(rsp, st_off), rax);
596#endif /* ASSERT */
597#endif // _LP64
598      }
599    } else if (r_1->is_Register()) {
600      Register r = r_1->as_Register();
601      if (!r_2->is_valid()) {
602        __ movl(Address(rsp, st_off), r);
603      } else {
604        // long/double in gpr
605        NOT_LP64(ShouldNotReachHere());
606        // Two VMRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
607        // T_DOUBLE and T_LONG use two slots in the interpreter
608        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
609          // long/double in gpr
610#ifdef ASSERT
611          // Overwrite the unused slot with known junk
612          LP64_ONLY(__ mov64(rax, CONST64(0xdeadffffdeadaaab)));
613          __ movptr(Address(rsp, st_off), rax);
614#endif /* ASSERT */
615          __ movptr(Address(rsp, next_off), r);
616        } else {
617          __ movptr(Address(rsp, st_off), r);
618        }
619      }
620    } else {
621      assert(r_1->is_XMMRegister(), "");
622      if (!r_2->is_valid()) {
623        __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
624      } else {
625        assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type");
626        move_c2i_double(masm, r_1->as_XMMRegister(), st_off);
627      }
628    }
629  }
630
631  // Schedule the branch target address early.
632  __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
633  // And repush original return address
634  __ push(rax);
635  __ jmp(rcx);
636}
637
638
639static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) {
640  int next_val_off = ld_off - Interpreter::stackElementSize;
641  __ movdbl(r, Address(saved_sp, next_val_off));
642}
643
644static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
645                        address code_start, address code_end,
646                        Label& L_ok) {
647  Label L_fail;
648  __ lea(temp_reg, ExternalAddress(code_start));
649  __ cmpptr(pc_reg, temp_reg);
650  __ jcc(Assembler::belowEqual, L_fail);
651  __ lea(temp_reg, ExternalAddress(code_end));
652  __ cmpptr(pc_reg, temp_reg);
653  __ jcc(Assembler::below, L_ok);
654  __ bind(L_fail);
655}
656
657static void gen_i2c_adapter(MacroAssembler *masm,
658                            int total_args_passed,
659                            int comp_args_on_stack,
660                            const BasicType *sig_bt,
661                            const VMRegPair *regs) {
662
663  // Note: rsi contains the senderSP on entry. We must preserve it since
664  // we may do a i2c -> c2i transition if we lose a race where compiled
665  // code goes non-entrant while we get args ready.
666
667  // Adapters can be frameless because they do not require the caller
668  // to perform additional cleanup work, such as correcting the stack pointer.
669  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
670  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
671  // even if a callee has modified the stack pointer.
672  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
673  // routinely repairs its caller's stack pointer (from sender_sp, which is set
674  // up via the senderSP register).
675  // In other words, if *either* the caller or callee is interpreted, we can
676  // get the stack pointer repaired after a call.
677  // This is why c2i and i2c adapters cannot be indefinitely composed.
678  // In particular, if a c2i adapter were to somehow call an i2c adapter,
679  // both caller and callee would be compiled methods, and neither would
680  // clean up the stack pointer changes performed by the two adapters.
681  // If this happens, control eventually transfers back to the compiled
682  // caller, but with an uncorrected stack, causing delayed havoc.
683
684  // Pick up the return address
685  __ movptr(rax, Address(rsp, 0));
686
687  if (VerifyAdapterCalls &&
688      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
689    // So, let's test for cascading c2i/i2c adapters right now.
690    //  assert(Interpreter::contains($return_addr) ||
691    //         StubRoutines::contains($return_addr),
692    //         "i2c adapter must return to an interpreter frame");
693    __ block_comment("verify_i2c { ");
694    Label L_ok;
695    if (Interpreter::code() != NULL)
696      range_check(masm, rax, rdi,
697                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
698                  L_ok);
699    if (StubRoutines::code1() != NULL)
700      range_check(masm, rax, rdi,
701                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
702                  L_ok);
703    if (StubRoutines::code2() != NULL)
704      range_check(masm, rax, rdi,
705                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
706                  L_ok);
707    const char* msg = "i2c adapter must return to an interpreter frame";
708    __ block_comment(msg);
709    __ stop(msg);
710    __ bind(L_ok);
711    __ block_comment("} verify_i2ce ");
712  }
713
714  // Must preserve original SP for loading incoming arguments because
715  // we need to align the outgoing SP for compiled code.
716  __ movptr(rdi, rsp);
717
718  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
719  // in registers, we will occasionally have no stack args.
720  int comp_words_on_stack = 0;
721  if (comp_args_on_stack) {
722    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
723    // registers are below.  By subtracting stack0, we either get a negative
724    // number (all values in registers) or the maximum stack slot accessed.
725    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
726    // Convert 4-byte stack slots to words.
727    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
728    // Round up to miminum stack alignment, in wordSize
729    comp_words_on_stack = round_to(comp_words_on_stack, 2);
730    __ subptr(rsp, comp_words_on_stack * wordSize);
731  }
732
733  // Align the outgoing SP
734  __ andptr(rsp, -(StackAlignmentInBytes));
735
736  // push the return address on the stack (note that pushing, rather
737  // than storing it, yields the correct frame alignment for the callee)
738  __ push(rax);
739
740  // Put saved SP in another register
741  const Register saved_sp = rax;
742  __ movptr(saved_sp, rdi);
743
744
745  // Will jump to the compiled code just as if compiled code was doing it.
746  // Pre-load the register-jump target early, to schedule it better.
747  __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset())));
748
749  // Now generate the shuffle code.  Pick up all register args and move the
750  // rest through the floating point stack top.
751  for (int i = 0; i < total_args_passed; i++) {
752    if (sig_bt[i] == T_VOID) {
753      // Longs and doubles are passed in native word order, but misaligned
754      // in the 32-bit build.
755      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
756      continue;
757    }
758
759    // Pick up 0, 1 or 2 words from SP+offset.
760
761    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
762            "scrambled load targets?");
763    // Load in argument order going down.
764    int ld_off = (total_args_passed - i) * Interpreter::stackElementSize;
765    // Point to interpreter value (vs. tag)
766    int next_off = ld_off - Interpreter::stackElementSize;
767    //
768    //
769    //
770    VMReg r_1 = regs[i].first();
771    VMReg r_2 = regs[i].second();
772    if (!r_1->is_valid()) {
773      assert(!r_2->is_valid(), "");
774      continue;
775    }
776    if (r_1->is_stack()) {
777      // Convert stack slot to an SP offset (+ wordSize to account for return address )
778      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
779
780      // We can use rsi as a temp here because compiled code doesn't need rsi as an input
781      // and if we end up going thru a c2i because of a miss a reasonable value of rsi
782      // we be generated.
783      if (!r_2->is_valid()) {
784        // __ fld_s(Address(saved_sp, ld_off));
785        // __ fstp_s(Address(rsp, st_off));
786        __ movl(rsi, Address(saved_sp, ld_off));
787        __ movptr(Address(rsp, st_off), rsi);
788      } else {
789        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
790        // are accessed as negative so LSW is at LOW address
791
792        // ld_off is MSW so get LSW
793        // st_off is LSW (i.e. reg.first())
794        // __ fld_d(Address(saved_sp, next_off));
795        // __ fstp_d(Address(rsp, st_off));
796        //
797        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
798        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
799        // So we must adjust where to pick up the data to match the interpreter.
800        //
801        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
802        // are accessed as negative so LSW is at LOW address
803
804        // ld_off is MSW so get LSW
805        const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
806                           next_off : ld_off;
807        __ movptr(rsi, Address(saved_sp, offset));
808        __ movptr(Address(rsp, st_off), rsi);
809#ifndef _LP64
810        __ movptr(rsi, Address(saved_sp, ld_off));
811        __ movptr(Address(rsp, st_off + wordSize), rsi);
812#endif // _LP64
813      }
814    } else if (r_1->is_Register()) {  // Register argument
815      Register r = r_1->as_Register();
816      assert(r != rax, "must be different");
817      if (r_2->is_valid()) {
818        //
819        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
820        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
821        // So we must adjust where to pick up the data to match the interpreter.
822
823        const int offset = (NOT_LP64(true ||) sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
824                           next_off : ld_off;
825
826        // this can be a misaligned move
827        __ movptr(r, Address(saved_sp, offset));
828#ifndef _LP64
829        assert(r_2->as_Register() != rax, "need another temporary register");
830        // Remember r_1 is low address (and LSB on x86)
831        // So r_2 gets loaded from high address regardless of the platform
832        __ movptr(r_2->as_Register(), Address(saved_sp, ld_off));
833#endif // _LP64
834      } else {
835        __ movl(r, Address(saved_sp, ld_off));
836      }
837    } else {
838      assert(r_1->is_XMMRegister(), "");
839      if (!r_2->is_valid()) {
840        __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
841      } else {
842        move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off);
843      }
844    }
845  }
846
847  // 6243940 We might end up in handle_wrong_method if
848  // the callee is deoptimized as we race thru here. If that
849  // happens we don't want to take a safepoint because the
850  // caller frame will look interpreted and arguments are now
851  // "compiled" so it is much better to make this transition
852  // invisible to the stack walking code. Unfortunately if
853  // we try and find the callee by normal means a safepoint
854  // is possible. So we stash the desired callee in the thread
855  // and the vm will find there should this case occur.
856
857  __ get_thread(rax);
858  __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx);
859
860  // move Method* to rax, in case we end up in an c2i adapter.
861  // the c2i adapters expect Method* in rax, (c2) because c2's
862  // resolve stubs return the result (the method) in rax,.
863  // I'd love to fix this.
864  __ mov(rax, rbx);
865
866  __ jmp(rdi);
867}
868
869// ---------------------------------------------------------------
870AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
871                                                            int total_args_passed,
872                                                            int comp_args_on_stack,
873                                                            const BasicType *sig_bt,
874                                                            const VMRegPair *regs,
875                                                            AdapterFingerPrint* fingerprint) {
876  address i2c_entry = __ pc();
877
878  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
879
880  // -------------------------------------------------------------------------
881  // Generate a C2I adapter.  On entry we know rbx, holds the Method* during calls
882  // to the interpreter.  The args start out packed in the compiled layout.  They
883  // need to be unpacked into the interpreter layout.  This will almost always
884  // require some stack space.  We grow the current (compiled) stack, then repack
885  // the args.  We  finally end in a jump to the generic interpreter entry point.
886  // On exit from the interpreter, the interpreter will restore our SP (lest the
887  // compiled code, which relys solely on SP and not EBP, get sick).
888
889  address c2i_unverified_entry = __ pc();
890  Label skip_fixup;
891
892  Register holder = rax;
893  Register receiver = rcx;
894  Register temp = rbx;
895
896  {
897
898    Label missed;
899    __ movptr(temp, Address(receiver, oopDesc::klass_offset_in_bytes()));
900    __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()));
901    __ movptr(rbx, Address(holder, CompiledICHolder::holder_method_offset()));
902    __ jcc(Assembler::notEqual, missed);
903    // Method might have been compiled since the call site was patched to
904    // interpreted if that is the case treat it as a miss so we can get
905    // the call site corrected.
906    __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
907    __ jcc(Assembler::equal, skip_fixup);
908
909    __ bind(missed);
910    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
911  }
912
913  address c2i_entry = __ pc();
914
915  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
916
917  __ flush();
918  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
919}
920
921int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
922                                         VMRegPair *regs,
923                                         int total_args_passed) {
924// We return the amount of VMRegImpl stack slots we need to reserve for all
925// the arguments NOT counting out_preserve_stack_slots.
926
927  uint    stack = 0;        // All arguments on stack
928
929  for( int i = 0; i < total_args_passed; i++) {
930    // From the type and the argument number (count) compute the location
931    switch( sig_bt[i] ) {
932    case T_BOOLEAN:
933    case T_CHAR:
934    case T_FLOAT:
935    case T_BYTE:
936    case T_SHORT:
937    case T_INT:
938    case T_OBJECT:
939    case T_ARRAY:
940    case T_ADDRESS:
941      regs[i].set1(VMRegImpl::stack2reg(stack++));
942      break;
943    case T_LONG:
944    case T_DOUBLE: // The stack numbering is reversed from Java
945      // Since C arguments do not get reversed, the ordering for
946      // doubles on the stack must be opposite the Java convention
947      assert(sig_bt[i+1] == T_VOID, "missing Half" );
948      regs[i].set2(VMRegImpl::stack2reg(stack));
949      stack += 2;
950      break;
951    case T_VOID: regs[i].set_bad(); break;
952    default:
953      ShouldNotReachHere();
954      break;
955    }
956  }
957  return stack;
958}
959
960// A simple move of integer like type
961static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
962  if (src.first()->is_stack()) {
963    if (dst.first()->is_stack()) {
964      // stack to stack
965      // __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5);
966      // __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS);
967      __ movl2ptr(rax, Address(rbp, reg2offset_in(src.first())));
968      __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
969    } else {
970      // stack to reg
971      __ movl2ptr(dst.first()->as_Register(),  Address(rbp, reg2offset_in(src.first())));
972    }
973  } else if (dst.first()->is_stack()) {
974    // reg to stack
975    // no need to sign extend on 64bit
976    __ movptr(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
977  } else {
978    if (dst.first() != src.first()) {
979      __ mov(dst.first()->as_Register(), src.first()->as_Register());
980    }
981  }
982}
983
984// An oop arg. Must pass a handle not the oop itself
985static void object_move(MacroAssembler* masm,
986                        OopMap* map,
987                        int oop_handle_offset,
988                        int framesize_in_slots,
989                        VMRegPair src,
990                        VMRegPair dst,
991                        bool is_receiver,
992                        int* receiver_offset) {
993
994  // Because of the calling conventions we know that src can be a
995  // register or a stack location. dst can only be a stack location.
996
997  assert(dst.first()->is_stack(), "must be stack");
998  // must pass a handle. First figure out the location we use as a handle
999
1000  if (src.first()->is_stack()) {
1001    // Oop is already on the stack as an argument
1002    Register rHandle = rax;
1003    Label nil;
1004    __ xorptr(rHandle, rHandle);
1005    __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD);
1006    __ jcc(Assembler::equal, nil);
1007    __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
1008    __ bind(nil);
1009    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1010
1011    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1012    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
1013    if (is_receiver) {
1014      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
1015    }
1016  } else {
1017    // Oop is in an a register we must store it to the space we reserve
1018    // on the stack for oop_handles
1019    const Register rOop = src.first()->as_Register();
1020    const Register rHandle = rax;
1021    int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset;
1022    int offset = oop_slot*VMRegImpl::stack_slot_size;
1023    Label skip;
1024    __ movptr(Address(rsp, offset), rOop);
1025    map->set_oop(VMRegImpl::stack2reg(oop_slot));
1026    __ xorptr(rHandle, rHandle);
1027    __ cmpptr(rOop, (int32_t)NULL_WORD);
1028    __ jcc(Assembler::equal, skip);
1029    __ lea(rHandle, Address(rsp, offset));
1030    __ bind(skip);
1031    // Store the handle parameter
1032    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1033    if (is_receiver) {
1034      *receiver_offset = offset;
1035    }
1036  }
1037}
1038
1039// A float arg may have to do float reg int reg conversion
1040static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1041  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1042
1043  // Because of the calling convention we know that src is either a stack location
1044  // or an xmm register. dst can only be a stack location.
1045
1046  assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters");
1047
1048  if (src.first()->is_stack()) {
1049    __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1050    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1051  } else {
1052    // reg to stack
1053    __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1054  }
1055}
1056
1057// A long move
1058static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1059
1060  // The only legal possibility for a long_move VMRegPair is:
1061  // 1: two stack slots (possibly unaligned)
1062  // as neither the java  or C calling convention will use registers
1063  // for longs.
1064
1065  if (src.first()->is_stack() && dst.first()->is_stack()) {
1066    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
1067    __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
1068    NOT_LP64(__ movptr(rbx, Address(rbp, reg2offset_in(src.second()))));
1069    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1070    NOT_LP64(__ movptr(Address(rsp, reg2offset_out(dst.second())), rbx));
1071  } else {
1072    ShouldNotReachHere();
1073  }
1074}
1075
1076// A double move
1077static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1078
1079  // The only legal possibilities for a double_move VMRegPair are:
1080  // The painful thing here is that like long_move a VMRegPair might be
1081
1082  // Because of the calling convention we know that src is either
1083  //   1: a single physical register (xmm registers only)
1084  //   2: two stack slots (possibly unaligned)
1085  // dst can only be a pair of stack slots.
1086
1087  assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args");
1088
1089  if (src.first()->is_stack()) {
1090    // source is all stack
1091    __ movptr(rax, Address(rbp, reg2offset_in(src.first())));
1092    NOT_LP64(__ movptr(rbx, Address(rbp, reg2offset_in(src.second()))));
1093    __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1094    NOT_LP64(__ movptr(Address(rsp, reg2offset_out(dst.second())), rbx));
1095  } else {
1096    // reg to stack
1097    // No worries about stack alignment
1098    __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1099  }
1100}
1101
1102
1103void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1104  // We always ignore the frame_slots arg and just use the space just below frame pointer
1105  // which by this time is free to use
1106  switch (ret_type) {
1107  case T_FLOAT:
1108    __ fstp_s(Address(rbp, -wordSize));
1109    break;
1110  case T_DOUBLE:
1111    __ fstp_d(Address(rbp, -2*wordSize));
1112    break;
1113  case T_VOID:  break;
1114  case T_LONG:
1115    __ movptr(Address(rbp, -wordSize), rax);
1116    NOT_LP64(__ movptr(Address(rbp, -2*wordSize), rdx));
1117    break;
1118  default: {
1119    __ movptr(Address(rbp, -wordSize), rax);
1120    }
1121  }
1122}
1123
1124void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1125  // We always ignore the frame_slots arg and just use the space just below frame pointer
1126  // which by this time is free to use
1127  switch (ret_type) {
1128  case T_FLOAT:
1129    __ fld_s(Address(rbp, -wordSize));
1130    break;
1131  case T_DOUBLE:
1132    __ fld_d(Address(rbp, -2*wordSize));
1133    break;
1134  case T_LONG:
1135    __ movptr(rax, Address(rbp, -wordSize));
1136    NOT_LP64(__ movptr(rdx, Address(rbp, -2*wordSize)));
1137    break;
1138  case T_VOID:  break;
1139  default: {
1140    __ movptr(rax, Address(rbp, -wordSize));
1141    }
1142  }
1143}
1144
1145
1146static void save_or_restore_arguments(MacroAssembler* masm,
1147                                      const int stack_slots,
1148                                      const int total_in_args,
1149                                      const int arg_save_area,
1150                                      OopMap* map,
1151                                      VMRegPair* in_regs,
1152                                      BasicType* in_sig_bt) {
1153  // if map is non-NULL then the code should store the values,
1154  // otherwise it should load them.
1155  int handle_index = 0;
1156  // Save down double word first
1157  for ( int i = 0; i < total_in_args; i++) {
1158    if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
1159      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
1160      int offset = slot * VMRegImpl::stack_slot_size;
1161      handle_index += 2;
1162      assert(handle_index <= stack_slots, "overflow");
1163      if (map != NULL) {
1164        __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1165      } else {
1166        __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1167      }
1168    }
1169    if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
1170      int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
1171      int offset = slot * VMRegImpl::stack_slot_size;
1172      handle_index += 2;
1173      assert(handle_index <= stack_slots, "overflow");
1174      if (map != NULL) {
1175        __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
1176        if (in_regs[i].second()->is_Register()) {
1177          __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
1178        }
1179      } else {
1180        __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
1181        if (in_regs[i].second()->is_Register()) {
1182          __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
1183        }
1184      }
1185    }
1186  }
1187  // Save or restore single word registers
1188  for ( int i = 0; i < total_in_args; i++) {
1189    if (in_regs[i].first()->is_Register()) {
1190      int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1191      int offset = slot * VMRegImpl::stack_slot_size;
1192      assert(handle_index <= stack_slots, "overflow");
1193      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1194        map->set_oop(VMRegImpl::stack2reg(slot));;
1195      }
1196
1197      // Value is in an input register pass we must flush it to the stack
1198      const Register reg = in_regs[i].first()->as_Register();
1199      switch (in_sig_bt[i]) {
1200        case T_ARRAY:
1201          if (map != NULL) {
1202            __ movptr(Address(rsp, offset), reg);
1203          } else {
1204            __ movptr(reg, Address(rsp, offset));
1205          }
1206          break;
1207        case T_BOOLEAN:
1208        case T_CHAR:
1209        case T_BYTE:
1210        case T_SHORT:
1211        case T_INT:
1212          if (map != NULL) {
1213            __ movl(Address(rsp, offset), reg);
1214          } else {
1215            __ movl(reg, Address(rsp, offset));
1216          }
1217          break;
1218        case T_OBJECT:
1219        default: ShouldNotReachHere();
1220      }
1221    } else if (in_regs[i].first()->is_XMMRegister()) {
1222      if (in_sig_bt[i] == T_FLOAT) {
1223        int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
1224        int offset = slot * VMRegImpl::stack_slot_size;
1225        assert(handle_index <= stack_slots, "overflow");
1226        if (map != NULL) {
1227          __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
1228        } else {
1229          __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
1230        }
1231      }
1232    } else if (in_regs[i].first()->is_stack()) {
1233      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
1234        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1235        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1236      }
1237    }
1238  }
1239}
1240
1241// Check GC_locker::needs_gc and enter the runtime if it's true.  This
1242// keeps a new JNI critical region from starting until a GC has been
1243// forced.  Save down any oops in registers and describe them in an
1244// OopMap.
1245static void check_needs_gc_for_critical_native(MacroAssembler* masm,
1246                                               Register thread,
1247                                               int stack_slots,
1248                                               int total_c_args,
1249                                               int total_in_args,
1250                                               int arg_save_area,
1251                                               OopMapSet* oop_maps,
1252                                               VMRegPair* in_regs,
1253                                               BasicType* in_sig_bt) {
1254  __ block_comment("check GC_locker::needs_gc");
1255  Label cont;
1256  __ cmp8(ExternalAddress((address)GC_locker::needs_gc_address()), false);
1257  __ jcc(Assembler::equal, cont);
1258
1259  // Save down any incoming oops and call into the runtime to halt for a GC
1260
1261  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1262
1263  save_or_restore_arguments(masm, stack_slots, total_in_args,
1264                            arg_save_area, map, in_regs, in_sig_bt);
1265
1266  address the_pc = __ pc();
1267  oop_maps->add_gc_map( __ offset(), map);
1268  __ set_last_Java_frame(thread, rsp, noreg, the_pc);
1269
1270  __ block_comment("block_for_jni_critical");
1271  __ push(thread);
1272  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
1273  __ increment(rsp, wordSize);
1274
1275  __ get_thread(thread);
1276  __ reset_last_Java_frame(thread, false, true);
1277
1278  save_or_restore_arguments(masm, stack_slots, total_in_args,
1279                            arg_save_area, NULL, in_regs, in_sig_bt);
1280
1281  __ bind(cont);
1282#ifdef ASSERT
1283  if (StressCriticalJNINatives) {
1284    // Stress register saving
1285    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1286    save_or_restore_arguments(masm, stack_slots, total_in_args,
1287                              arg_save_area, map, in_regs, in_sig_bt);
1288    // Destroy argument registers
1289    for (int i = 0; i < total_in_args - 1; i++) {
1290      if (in_regs[i].first()->is_Register()) {
1291        const Register reg = in_regs[i].first()->as_Register();
1292        __ xorptr(reg, reg);
1293      } else if (in_regs[i].first()->is_XMMRegister()) {
1294        __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
1295      } else if (in_regs[i].first()->is_FloatRegister()) {
1296        ShouldNotReachHere();
1297      } else if (in_regs[i].first()->is_stack()) {
1298        // Nothing to do
1299      } else {
1300        ShouldNotReachHere();
1301      }
1302      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
1303        i++;
1304      }
1305    }
1306
1307    save_or_restore_arguments(masm, stack_slots, total_in_args,
1308                              arg_save_area, NULL, in_regs, in_sig_bt);
1309  }
1310#endif
1311}
1312
1313// Unpack an array argument into a pointer to the body and the length
1314// if the array is non-null, otherwise pass 0 for both.
1315static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
1316  Register tmp_reg = rax;
1317  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
1318         "possible collision");
1319  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
1320         "possible collision");
1321
1322  // Pass the length, ptr pair
1323  Label is_null, done;
1324  VMRegPair tmp(tmp_reg->as_VMReg());
1325  if (reg.first()->is_stack()) {
1326    // Load the arg up from the stack
1327    simple_move32(masm, reg, tmp);
1328    reg = tmp;
1329  }
1330  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
1331  __ jccb(Assembler::equal, is_null);
1332  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1333  simple_move32(masm, tmp, body_arg);
1334  // load the length relative to the body.
1335  __ movl(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
1336                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
1337  simple_move32(masm, tmp, length_arg);
1338  __ jmpb(done);
1339  __ bind(is_null);
1340  // Pass zeros
1341  __ xorptr(tmp_reg, tmp_reg);
1342  simple_move32(masm, tmp, body_arg);
1343  simple_move32(masm, tmp, length_arg);
1344  __ bind(done);
1345}
1346
1347static void verify_oop_args(MacroAssembler* masm,
1348                            int total_args_passed,
1349                            const BasicType* sig_bt,
1350                            const VMRegPair* regs) {
1351  Register temp_reg = rbx;  // not part of any compiled calling seq
1352  if (VerifyOops) {
1353    for (int i = 0; i < total_args_passed; i++) {
1354      if (sig_bt[i] == T_OBJECT ||
1355          sig_bt[i] == T_ARRAY) {
1356        VMReg r = regs[i].first();
1357        assert(r->is_valid(), "bad oop arg");
1358        if (r->is_stack()) {
1359          __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1360          __ verify_oop(temp_reg);
1361        } else {
1362          __ verify_oop(r->as_Register());
1363        }
1364      }
1365    }
1366  }
1367}
1368
1369static void gen_special_dispatch(MacroAssembler* masm,
1370                                 int total_args_passed,
1371                                 int comp_args_on_stack,
1372                                 vmIntrinsics::ID special_dispatch,
1373                                 const BasicType* sig_bt,
1374                                 const VMRegPair* regs) {
1375  verify_oop_args(masm, total_args_passed, sig_bt, regs);
1376
1377  // Now write the args into the outgoing interpreter space
1378  bool     has_receiver   = false;
1379  Register receiver_reg   = noreg;
1380  int      member_arg_pos = -1;
1381  Register member_reg     = noreg;
1382  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
1383  if (ref_kind != 0) {
1384    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
1385    member_reg = rbx;  // known to be free at this point
1386    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1387  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
1388    has_receiver = true;
1389  } else {
1390    guarantee(false, err_msg("special_dispatch=%d", special_dispatch));
1391  }
1392
1393  if (member_reg != noreg) {
1394    // Load the member_arg into register, if necessary.
1395    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
1396    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
1397    VMReg r = regs[member_arg_pos].first();
1398    assert(r->is_valid(), "bad member arg");
1399    if (r->is_stack()) {
1400      __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1401    } else {
1402      // no data motion is needed
1403      member_reg = r->as_Register();
1404    }
1405  }
1406
1407  if (has_receiver) {
1408    // Make sure the receiver is loaded into a register.
1409    assert(total_args_passed > 0, "oob");
1410    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1411    VMReg r = regs[0].first();
1412    assert(r->is_valid(), "bad receiver arg");
1413    if (r->is_stack()) {
1414      // Porting note:  This assumes that compiled calling conventions always
1415      // pass the receiver oop in a register.  If this is not true on some
1416      // platform, pick a temp and load the receiver from stack.
1417      assert(false, "receiver always in a register");
1418      receiver_reg = rcx;  // known to be free at this point
1419      __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
1420    } else {
1421      // no data motion is needed
1422      receiver_reg = r->as_Register();
1423    }
1424  }
1425
1426  // Figure out which address we are really jumping to:
1427  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
1428                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1429}
1430
1431// ---------------------------------------------------------------------------
1432// Generate a native wrapper for a given method.  The method takes arguments
1433// in the Java compiled code convention, marshals them to the native
1434// convention (handlizes oops, etc), transitions to native, makes the call,
1435// returns to java state (possibly blocking), unhandlizes any result and
1436// returns.
1437//
1438// Critical native functions are a shorthand for the use of
1439// GetPrimtiveArrayCritical and disallow the use of any other JNI
1440// functions.  The wrapper is expected to unpack the arguments before
1441// passing them to the callee and perform checks before and after the
1442// native call to ensure that they GC_locker
1443// lock_critical/unlock_critical semantics are followed.  Some other
1444// parts of JNI setup are skipped like the tear down of the JNI handle
1445// block and the check for pending exceptions it's impossible for them
1446// to be thrown.
1447//
1448// They are roughly structured like this:
1449//    if (GC_locker::needs_gc())
1450//      SharedRuntime::block_for_jni_critical();
1451//    tranistion to thread_in_native
1452//    unpack arrray arguments and call native entry point
1453//    check for safepoint in progress
1454//    check if any thread suspend flags are set
1455//      call into JVM and possible unlock the JNI critical
1456//      if a GC was suppressed while in the critical native.
1457//    transition back to thread_in_Java
1458//    return to caller
1459//
1460nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1461                                                methodHandle method,
1462                                                int compile_id,
1463                                                int total_in_args,
1464                                                int comp_args_on_stack,
1465                                                BasicType* in_sig_bt,
1466                                                VMRegPair* in_regs,
1467                                                BasicType ret_type) {
1468  if (method->is_method_handle_intrinsic()) {
1469    vmIntrinsics::ID iid = method->intrinsic_id();
1470    intptr_t start = (intptr_t)__ pc();
1471    int vep_offset = ((intptr_t)__ pc()) - start;
1472    gen_special_dispatch(masm,
1473                         total_in_args,
1474                         comp_args_on_stack,
1475                         method->intrinsic_id(),
1476                         in_sig_bt,
1477                         in_regs);
1478    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1479    __ flush();
1480    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1481    return nmethod::new_native_nmethod(method,
1482                                       compile_id,
1483                                       masm->code(),
1484                                       vep_offset,
1485                                       frame_complete,
1486                                       stack_slots / VMRegImpl::slots_per_word,
1487                                       in_ByteSize(-1),
1488                                       in_ByteSize(-1),
1489                                       (OopMapSet*)NULL);
1490  }
1491  bool is_critical_native = true;
1492  address native_func = method->critical_native_function();
1493  if (native_func == NULL) {
1494    native_func = method->native_function();
1495    is_critical_native = false;
1496  }
1497  assert(native_func != NULL, "must have function");
1498
1499  // An OopMap for lock (and class if static)
1500  OopMapSet *oop_maps = new OopMapSet();
1501
1502  // We have received a description of where all the java arg are located
1503  // on entry to the wrapper. We need to convert these args to where
1504  // the jni function will expect them. To figure out where they go
1505  // we convert the java signature to a C signature by inserting
1506  // the hidden arguments as arg[0] and possibly arg[1] (static method)
1507
1508  int total_c_args = total_in_args;
1509  if (!is_critical_native) {
1510    total_c_args += 1;
1511    if (method->is_static()) {
1512      total_c_args++;
1513    }
1514  } else {
1515    for (int i = 0; i < total_in_args; i++) {
1516      if (in_sig_bt[i] == T_ARRAY) {
1517        total_c_args++;
1518      }
1519    }
1520  }
1521
1522  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1523  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1524  BasicType* in_elem_bt = NULL;
1525
1526  int argc = 0;
1527  if (!is_critical_native) {
1528    out_sig_bt[argc++] = T_ADDRESS;
1529    if (method->is_static()) {
1530      out_sig_bt[argc++] = T_OBJECT;
1531    }
1532
1533    for (int i = 0; i < total_in_args ; i++ ) {
1534      out_sig_bt[argc++] = in_sig_bt[i];
1535    }
1536  } else {
1537    Thread* THREAD = Thread::current();
1538    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
1539    SignatureStream ss(method->signature());
1540    for (int i = 0; i < total_in_args ; i++ ) {
1541      if (in_sig_bt[i] == T_ARRAY) {
1542        // Arrays are passed as int, elem* pair
1543        out_sig_bt[argc++] = T_INT;
1544        out_sig_bt[argc++] = T_ADDRESS;
1545        Symbol* atype = ss.as_symbol(CHECK_NULL);
1546        const char* at = atype->as_C_string();
1547        if (strlen(at) == 2) {
1548          assert(at[0] == '[', "must be");
1549          switch (at[1]) {
1550            case 'B': in_elem_bt[i]  = T_BYTE; break;
1551            case 'C': in_elem_bt[i]  = T_CHAR; break;
1552            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
1553            case 'F': in_elem_bt[i]  = T_FLOAT; break;
1554            case 'I': in_elem_bt[i]  = T_INT; break;
1555            case 'J': in_elem_bt[i]  = T_LONG; break;
1556            case 'S': in_elem_bt[i]  = T_SHORT; break;
1557            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
1558            default: ShouldNotReachHere();
1559          }
1560        }
1561      } else {
1562        out_sig_bt[argc++] = in_sig_bt[i];
1563        in_elem_bt[i] = T_VOID;
1564      }
1565      if (in_sig_bt[i] != T_VOID) {
1566        assert(in_sig_bt[i] == ss.type(), "must match");
1567        ss.next();
1568      }
1569    }
1570  }
1571
1572  // Now figure out where the args must be stored and how much stack space
1573  // they require.
1574  int out_arg_slots;
1575  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
1576
1577  // Compute framesize for the wrapper.  We need to handlize all oops in
1578  // registers a max of 2 on x86.
1579
1580  // Calculate the total number of stack slots we will need.
1581
1582  // First count the abi requirement plus all of the outgoing args
1583  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1584
1585  // Now the space for the inbound oop handle area
1586  int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers
1587  if (is_critical_native) {
1588    // Critical natives may have to call out so they need a save area
1589    // for register arguments.
1590    int double_slots = 0;
1591    int single_slots = 0;
1592    for ( int i = 0; i < total_in_args; i++) {
1593      if (in_regs[i].first()->is_Register()) {
1594        const Register reg = in_regs[i].first()->as_Register();
1595        switch (in_sig_bt[i]) {
1596          case T_ARRAY:  // critical array (uses 2 slots on LP64)
1597          case T_BOOLEAN:
1598          case T_BYTE:
1599          case T_SHORT:
1600          case T_CHAR:
1601          case T_INT:  single_slots++; break;
1602          case T_LONG: double_slots++; break;
1603          default:  ShouldNotReachHere();
1604        }
1605      } else if (in_regs[i].first()->is_XMMRegister()) {
1606        switch (in_sig_bt[i]) {
1607          case T_FLOAT:  single_slots++; break;
1608          case T_DOUBLE: double_slots++; break;
1609          default:  ShouldNotReachHere();
1610        }
1611      } else if (in_regs[i].first()->is_FloatRegister()) {
1612        ShouldNotReachHere();
1613      }
1614    }
1615    total_save_slots = double_slots * 2 + single_slots;
1616    // align the save area
1617    if (double_slots != 0) {
1618      stack_slots = round_to(stack_slots, 2);
1619    }
1620  }
1621
1622  int oop_handle_offset = stack_slots;
1623  stack_slots += total_save_slots;
1624
1625  // Now any space we need for handlizing a klass if static method
1626
1627  int klass_slot_offset = 0;
1628  int klass_offset = -1;
1629  int lock_slot_offset = 0;
1630  bool is_static = false;
1631
1632  if (method->is_static()) {
1633    klass_slot_offset = stack_slots;
1634    stack_slots += VMRegImpl::slots_per_word;
1635    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1636    is_static = true;
1637  }
1638
1639  // Plus a lock if needed
1640
1641  if (method->is_synchronized()) {
1642    lock_slot_offset = stack_slots;
1643    stack_slots += VMRegImpl::slots_per_word;
1644  }
1645
1646  // Now a place (+2) to save return values or temp during shuffling
1647  // + 2 for return address (which we own) and saved rbp,
1648  stack_slots += 4;
1649
1650  // Ok The space we have allocated will look like:
1651  //
1652  //
1653  // FP-> |                     |
1654  //      |---------------------|
1655  //      | 2 slots for moves   |
1656  //      |---------------------|
1657  //      | lock box (if sync)  |
1658  //      |---------------------| <- lock_slot_offset  (-lock_slot_rbp_offset)
1659  //      | klass (if static)   |
1660  //      |---------------------| <- klass_slot_offset
1661  //      | oopHandle area      |
1662  //      |---------------------| <- oop_handle_offset (a max of 2 registers)
1663  //      | outbound memory     |
1664  //      | based arguments     |
1665  //      |                     |
1666  //      |---------------------|
1667  //      |                     |
1668  // SP-> | out_preserved_slots |
1669  //
1670  //
1671  // ****************************************************************************
1672  // WARNING - on Windows Java Natives use pascal calling convention and pop the
1673  // arguments off of the stack after the jni call. Before the call we can use
1674  // instructions that are SP relative. After the jni call we switch to FP
1675  // relative instructions instead of re-adjusting the stack on windows.
1676  // ****************************************************************************
1677
1678
1679  // Now compute actual number of stack words we need rounding to make
1680  // stack properly aligned.
1681  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
1682
1683  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1684
1685  intptr_t start = (intptr_t)__ pc();
1686
1687  // First thing make an ic check to see if we should even be here
1688
1689  // We are free to use all registers as temps without saving them and
1690  // restoring them except rbp. rbp is the only callee save register
1691  // as far as the interpreter and the compiler(s) are concerned.
1692
1693
1694  const Register ic_reg = rax;
1695  const Register receiver = rcx;
1696  Label hit;
1697  Label exception_pending;
1698
1699  __ verify_oop(receiver);
1700  __ cmpptr(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
1701  __ jcc(Assembler::equal, hit);
1702
1703  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1704
1705  // verified entry must be aligned for code patching.
1706  // and the first 5 bytes must be in the same cache line
1707  // if we align at 8 then we will be sure 5 bytes are in the same line
1708  __ align(8);
1709
1710  __ bind(hit);
1711
1712  int vep_offset = ((intptr_t)__ pc()) - start;
1713
1714#ifdef COMPILER1
1715  if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) {
1716    // Object.hashCode can pull the hashCode from the header word
1717    // instead of doing a full VM transition once it's been computed.
1718    // Since hashCode is usually polymorphic at call sites we can't do
1719    // this optimization at the call site without a lot of work.
1720    Label slowCase;
1721    Register receiver = rcx;
1722    Register result = rax;
1723    __ movptr(result, Address(receiver, oopDesc::mark_offset_in_bytes()));
1724
1725    // check if locked
1726    __ testptr(result, markOopDesc::unlocked_value);
1727    __ jcc (Assembler::zero, slowCase);
1728
1729    if (UseBiasedLocking) {
1730      // Check if biased and fall through to runtime if so
1731      __ testptr(result, markOopDesc::biased_lock_bit_in_place);
1732      __ jcc (Assembler::notZero, slowCase);
1733    }
1734
1735    // get hash
1736    __ andptr(result, markOopDesc::hash_mask_in_place);
1737    // test if hashCode exists
1738    __ jcc  (Assembler::zero, slowCase);
1739    __ shrptr(result, markOopDesc::hash_shift);
1740    __ ret(0);
1741    __ bind (slowCase);
1742  }
1743#endif // COMPILER1
1744
1745  // The instruction at the verified entry point must be 5 bytes or longer
1746  // because it can be patched on the fly by make_non_entrant. The stack bang
1747  // instruction fits that requirement.
1748
1749  // Generate stack overflow check
1750
1751  if (UseStackBanging) {
1752    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1753  } else {
1754    // need a 5 byte instruction to allow MT safe patching to non-entrant
1755    __ fat_nop();
1756  }
1757
1758  // Generate a new frame for the wrapper.
1759  __ enter();
1760  // -2 because return address is already present and so is saved rbp
1761  __ subptr(rsp, stack_size - 2*wordSize);
1762
1763  // Frame is now completed as far as size and linkage.
1764  int frame_complete = ((intptr_t)__ pc()) - start;
1765
1766  // Calculate the difference between rsp and rbp,. We need to know it
1767  // after the native call because on windows Java Natives will pop
1768  // the arguments and it is painful to do rsp relative addressing
1769  // in a platform independent way. So after the call we switch to
1770  // rbp, relative addressing.
1771
1772  int fp_adjustment = stack_size - 2*wordSize;
1773
1774#ifdef COMPILER2
1775  // C2 may leave the stack dirty if not in SSE2+ mode
1776  if (UseSSE >= 2) {
1777    __ verify_FPU(0, "c2i transition should have clean FPU stack");
1778  } else {
1779    __ empty_FPU_stack();
1780  }
1781#endif /* COMPILER2 */
1782
1783  // Compute the rbp, offset for any slots used after the jni call
1784
1785  int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
1786
1787  // We use rdi as a thread pointer because it is callee save and
1788  // if we load it once it is usable thru the entire wrapper
1789  const Register thread = rdi;
1790
1791  // We use rsi as the oop handle for the receiver/klass
1792  // It is callee save so it survives the call to native
1793
1794  const Register oop_handle_reg = rsi;
1795
1796  __ get_thread(thread);
1797
1798  if (is_critical_native) {
1799    check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
1800                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
1801  }
1802
1803  //
1804  // We immediately shuffle the arguments so that any vm call we have to
1805  // make from here on out (sync slow path, jvmti, etc.) we will have
1806  // captured the oops from our caller and have a valid oopMap for
1807  // them.
1808
1809  // -----------------
1810  // The Grand Shuffle
1811  //
1812  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1813  // and, if static, the class mirror instead of a receiver.  This pretty much
1814  // guarantees that register layout will not match (and x86 doesn't use reg
1815  // parms though amd does).  Since the native abi doesn't use register args
1816  // and the java conventions does we don't have to worry about collisions.
1817  // All of our moved are reg->stack or stack->stack.
1818  // We ignore the extra arguments during the shuffle and handle them at the
1819  // last moment. The shuffle is described by the two calling convention
1820  // vectors we have in our possession. We simply walk the java vector to
1821  // get the source locations and the c vector to get the destinations.
1822
1823  int c_arg = is_critical_native ? 0 : (method->is_static() ? 2 : 1 );
1824
1825  // Record rsp-based slot for receiver on stack for non-static methods
1826  int receiver_offset = -1;
1827
1828  // This is a trick. We double the stack slots so we can claim
1829  // the oops in the caller's frame. Since we are sure to have
1830  // more args than the caller doubling is enough to make
1831  // sure we can capture all the incoming oop args from the
1832  // caller.
1833  //
1834  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1835
1836  // Mark location of rbp,
1837  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
1838
1839  // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx
1840  // Are free to temporaries if we have to do  stack to steck moves.
1841  // All inbound args are referenced based on rbp, and all outbound args via rsp.
1842
1843  for (int i = 0; i < total_in_args ; i++, c_arg++ ) {
1844    switch (in_sig_bt[i]) {
1845      case T_ARRAY:
1846        if (is_critical_native) {
1847          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
1848          c_arg++;
1849          break;
1850        }
1851      case T_OBJECT:
1852        assert(!is_critical_native, "no oop arguments");
1853        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1854                    ((i == 0) && (!is_static)),
1855                    &receiver_offset);
1856        break;
1857      case T_VOID:
1858        break;
1859
1860      case T_FLOAT:
1861        float_move(masm, in_regs[i], out_regs[c_arg]);
1862          break;
1863
1864      case T_DOUBLE:
1865        assert( i + 1 < total_in_args &&
1866                in_sig_bt[i + 1] == T_VOID &&
1867                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1868        double_move(masm, in_regs[i], out_regs[c_arg]);
1869        break;
1870
1871      case T_LONG :
1872        long_move(masm, in_regs[i], out_regs[c_arg]);
1873        break;
1874
1875      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1876
1877      default:
1878        simple_move32(masm, in_regs[i], out_regs[c_arg]);
1879    }
1880  }
1881
1882  // Pre-load a static method's oop into rsi.  Used both by locking code and
1883  // the normal JNI call code.
1884  if (method->is_static() && !is_critical_native) {
1885
1886    //  load opp into a register
1887    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
1888
1889    // Now handlize the static class mirror it's known not-null.
1890    __ movptr(Address(rsp, klass_offset), oop_handle_reg);
1891    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1892
1893    // Now get the handle
1894    __ lea(oop_handle_reg, Address(rsp, klass_offset));
1895    // store the klass handle as second argument
1896    __ movptr(Address(rsp, wordSize), oop_handle_reg);
1897  }
1898
1899  // Change state to native (we save the return address in the thread, since it might not
1900  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1901  // points into the right code segment. It does not have to be the correct return pc.
1902  // We use the same pc/oopMap repeatedly when we call out
1903
1904  intptr_t the_pc = (intptr_t) __ pc();
1905  oop_maps->add_gc_map(the_pc - start, map);
1906
1907  __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc);
1908
1909
1910  // We have all of the arguments setup at this point. We must not touch any register
1911  // argument registers at this point (what if we save/restore them there are no oop?
1912
1913  {
1914    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
1915    __ mov_metadata(rax, method());
1916    __ call_VM_leaf(
1917         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1918         thread, rax);
1919  }
1920
1921  // RedefineClasses() tracing support for obsolete method entry
1922  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
1923    __ mov_metadata(rax, method());
1924    __ call_VM_leaf(
1925         CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1926         thread, rax);
1927  }
1928
1929  // These are register definitions we need for locking/unlocking
1930  const Register swap_reg = rax;  // Must use rax, for cmpxchg instruction
1931  const Register obj_reg  = rcx;  // Will contain the oop
1932  const Register lock_reg = rdx;  // Address of compiler lock object (BasicLock)
1933
1934  Label slow_path_lock;
1935  Label lock_done;
1936
1937  // Lock a synchronized method
1938  if (method->is_synchronized()) {
1939    assert(!is_critical_native, "unhandled");
1940
1941
1942    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1943
1944    // Get the handle (the 2nd argument)
1945    __ movptr(oop_handle_reg, Address(rsp, wordSize));
1946
1947    // Get address of the box
1948
1949    __ lea(lock_reg, Address(rbp, lock_slot_rbp_offset));
1950
1951    // Load the oop from the handle
1952    __ movptr(obj_reg, Address(oop_handle_reg, 0));
1953
1954    if (UseBiasedLocking) {
1955      // Note that oop_handle_reg is trashed during this call
1956      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, oop_handle_reg, false, lock_done, &slow_path_lock);
1957    }
1958
1959    // Load immediate 1 into swap_reg %rax,
1960    __ movptr(swap_reg, 1);
1961
1962    // Load (object->mark() | 1) into swap_reg %rax,
1963    __ orptr(swap_reg, Address(obj_reg, 0));
1964
1965    // Save (object->mark() | 1) into BasicLock's displaced header
1966    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1967
1968    if (os::is_MP()) {
1969      __ lock();
1970    }
1971
1972    // src -> dest iff dest == rax, else rax, <- dest
1973    // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg)
1974    __ cmpxchgptr(lock_reg, Address(obj_reg, 0));
1975    __ jcc(Assembler::equal, lock_done);
1976
1977    // Test if the oopMark is an obvious stack pointer, i.e.,
1978    //  1) (mark & 3) == 0, and
1979    //  2) rsp <= mark < mark + os::pagesize()
1980    // These 3 tests can be done by evaluating the following
1981    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1982    // assuming both stack pointer and pagesize have their
1983    // least significant 2 bits clear.
1984    // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg
1985
1986    __ subptr(swap_reg, rsp);
1987    __ andptr(swap_reg, 3 - os::vm_page_size());
1988
1989    // Save the test result, for recursive case, the result is zero
1990    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1991    __ jcc(Assembler::notEqual, slow_path_lock);
1992    // Slow path will re-enter here
1993    __ bind(lock_done);
1994
1995    if (UseBiasedLocking) {
1996      // Re-fetch oop_handle_reg as we trashed it above
1997      __ movptr(oop_handle_reg, Address(rsp, wordSize));
1998    }
1999  }
2000
2001
2002  // Finally just about ready to make the JNI call
2003
2004
2005  // get JNIEnv* which is first argument to native
2006  if (!is_critical_native) {
2007    __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
2008    __ movptr(Address(rsp, 0), rdx);
2009  }
2010
2011  // Now set thread in native
2012  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
2013
2014  __ call(RuntimeAddress(native_func));
2015
2016  // WARNING - on Windows Java Natives use pascal calling convention and pop the
2017  // arguments off of the stack. We could just re-adjust the stack pointer here
2018  // and continue to do SP relative addressing but we instead switch to FP
2019  // relative addressing.
2020
2021  // Unpack native results.
2022  switch (ret_type) {
2023  case T_BOOLEAN: __ c2bool(rax);            break;
2024  case T_CHAR   : __ andptr(rax, 0xFFFF);    break;
2025  case T_BYTE   : __ sign_extend_byte (rax); break;
2026  case T_SHORT  : __ sign_extend_short(rax); break;
2027  case T_INT    : /* nothing to do */        break;
2028  case T_DOUBLE :
2029  case T_FLOAT  :
2030    // Result is in st0 we'll save as needed
2031    break;
2032  case T_ARRAY:                 // Really a handle
2033  case T_OBJECT:                // Really a handle
2034      break; // can't de-handlize until after safepoint check
2035  case T_VOID: break;
2036  case T_LONG: break;
2037  default       : ShouldNotReachHere();
2038  }
2039
2040  // Switch thread to "native transition" state before reading the synchronization state.
2041  // This additional state is necessary because reading and testing the synchronization
2042  // state is not atomic w.r.t. GC, as this scenario demonstrates:
2043  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
2044  //     VM thread changes sync state to synchronizing and suspends threads for GC.
2045  //     Thread A is resumed to finish this native method, but doesn't block here since it
2046  //     didn't see any synchronization is progress, and escapes.
2047  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
2048
2049  if(os::is_MP()) {
2050    if (UseMembar) {
2051      // Force this write out before the read below
2052      __ membar(Assembler::Membar_mask_bits(
2053           Assembler::LoadLoad | Assembler::LoadStore |
2054           Assembler::StoreLoad | Assembler::StoreStore));
2055    } else {
2056      // Write serialization page so VM thread can do a pseudo remote membar.
2057      // We use the current thread pointer to calculate a thread specific
2058      // offset to write to within the page. This minimizes bus traffic
2059      // due to cache line collision.
2060      __ serialize_memory(thread, rcx);
2061    }
2062  }
2063
2064  if (AlwaysRestoreFPU) {
2065    // Make sure the control word is correct.
2066    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
2067  }
2068
2069  Label after_transition;
2070
2071  // check for safepoint operation in progress and/or pending suspend requests
2072  { Label Continue;
2073
2074    __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
2075             SafepointSynchronize::_not_synchronized);
2076
2077    Label L;
2078    __ jcc(Assembler::notEqual, L);
2079    __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
2080    __ jcc(Assembler::equal, Continue);
2081    __ bind(L);
2082
2083    // Don't use call_VM as it will see a possible pending exception and forward it
2084    // and never return here preventing us from clearing _last_native_pc down below.
2085    // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
2086    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
2087    // by hand.
2088    //
2089    save_native_result(masm, ret_type, stack_slots);
2090    __ push(thread);
2091    if (!is_critical_native) {
2092      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
2093                                              JavaThread::check_special_condition_for_native_trans)));
2094    } else {
2095      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
2096                                              JavaThread::check_special_condition_for_native_trans_and_transition)));
2097    }
2098    __ increment(rsp, wordSize);
2099    // Restore any method result value
2100    restore_native_result(masm, ret_type, stack_slots);
2101
2102    if (is_critical_native) {
2103      // The call above performed the transition to thread_in_Java so
2104      // skip the transition logic below.
2105      __ jmpb(after_transition);
2106    }
2107
2108    __ bind(Continue);
2109  }
2110
2111  // change thread state
2112  __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java);
2113  __ bind(after_transition);
2114
2115  Label reguard;
2116  Label reguard_done;
2117  __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
2118  __ jcc(Assembler::equal, reguard);
2119
2120  // slow path reguard  re-enters here
2121  __ bind(reguard_done);
2122
2123  // Handle possible exception (will unlock if necessary)
2124
2125  // native result if any is live
2126
2127  // Unlock
2128  Label slow_path_unlock;
2129  Label unlock_done;
2130  if (method->is_synchronized()) {
2131
2132    Label done;
2133
2134    // Get locked oop from the handle we passed to jni
2135    __ movptr(obj_reg, Address(oop_handle_reg, 0));
2136
2137    if (UseBiasedLocking) {
2138      __ biased_locking_exit(obj_reg, rbx, done);
2139    }
2140
2141    // Simple recursive lock?
2142
2143    __ cmpptr(Address(rbp, lock_slot_rbp_offset), (int32_t)NULL_WORD);
2144    __ jcc(Assembler::equal, done);
2145
2146    // Must save rax, if if it is live now because cmpxchg must use it
2147    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2148      save_native_result(masm, ret_type, stack_slots);
2149    }
2150
2151    //  get old displaced header
2152    __ movptr(rbx, Address(rbp, lock_slot_rbp_offset));
2153
2154    // get address of the stack lock
2155    __ lea(rax, Address(rbp, lock_slot_rbp_offset));
2156
2157    // Atomic swap old header if oop still contains the stack lock
2158    if (os::is_MP()) {
2159    __ lock();
2160    }
2161
2162    // src -> dest iff dest == rax, else rax, <- dest
2163    // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg)
2164    __ cmpxchgptr(rbx, Address(obj_reg, 0));
2165    __ jcc(Assembler::notEqual, slow_path_unlock);
2166
2167    // slow path re-enters here
2168    __ bind(unlock_done);
2169    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
2170      restore_native_result(masm, ret_type, stack_slots);
2171    }
2172
2173    __ bind(done);
2174
2175  }
2176
2177  {
2178    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
2179    // Tell dtrace about this method exit
2180    save_native_result(masm, ret_type, stack_slots);
2181    __ mov_metadata(rax, method());
2182    __ call_VM_leaf(
2183         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2184         thread, rax);
2185    restore_native_result(masm, ret_type, stack_slots);
2186  }
2187
2188  // We can finally stop using that last_Java_frame we setup ages ago
2189
2190  __ reset_last_Java_frame(thread, false, true);
2191
2192  // Unpack oop result
2193  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
2194      Label L;
2195      __ cmpptr(rax, (int32_t)NULL_WORD);
2196      __ jcc(Assembler::equal, L);
2197      __ movptr(rax, Address(rax, 0));
2198      __ bind(L);
2199      __ verify_oop(rax);
2200  }
2201
2202  if (!is_critical_native) {
2203    // reset handle block
2204    __ movptr(rcx, Address(thread, JavaThread::active_handles_offset()));
2205    __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD);
2206
2207    // Any exception pending?
2208    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2209    __ jcc(Assembler::notEqual, exception_pending);
2210  }
2211
2212  // no exception, we're almost done
2213
2214  // check that only result value is on FPU stack
2215  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
2216
2217  // Fixup floating pointer results so that result looks like a return from a compiled method
2218  if (ret_type == T_FLOAT) {
2219    if (UseSSE >= 1) {
2220      // Pop st0 and store as float and reload into xmm register
2221      __ fstp_s(Address(rbp, -4));
2222      __ movflt(xmm0, Address(rbp, -4));
2223    }
2224  } else if (ret_type == T_DOUBLE) {
2225    if (UseSSE >= 2) {
2226      // Pop st0 and store as double and reload into xmm register
2227      __ fstp_d(Address(rbp, -8));
2228      __ movdbl(xmm0, Address(rbp, -8));
2229    }
2230  }
2231
2232  // Return
2233
2234  __ leave();
2235  __ ret(0);
2236
2237  // Unexpected paths are out of line and go here
2238
2239  // Slow path locking & unlocking
2240  if (method->is_synchronized()) {
2241
2242    // BEGIN Slow path lock
2243
2244    __ bind(slow_path_lock);
2245
2246    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2247    // args are (oop obj, BasicLock* lock, JavaThread* thread)
2248    __ push(thread);
2249    __ push(lock_reg);
2250    __ push(obj_reg);
2251    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)));
2252    __ addptr(rsp, 3*wordSize);
2253
2254#ifdef ASSERT
2255    { Label L;
2256    __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
2257    __ jcc(Assembler::equal, L);
2258    __ stop("no pending exception allowed on exit from monitorenter");
2259    __ bind(L);
2260    }
2261#endif
2262    __ jmp(lock_done);
2263
2264    // END Slow path lock
2265
2266    // BEGIN Slow path unlock
2267    __ bind(slow_path_unlock);
2268
2269    // Slow path unlock
2270
2271    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2272      save_native_result(masm, ret_type, stack_slots);
2273    }
2274    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2275
2276    __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
2277    __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD);
2278
2279
2280    // should be a peal
2281    // +wordSize because of the push above
2282    __ lea(rax, Address(rbp, lock_slot_rbp_offset));
2283    __ push(rax);
2284
2285    __ push(obj_reg);
2286    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
2287    __ addptr(rsp, 2*wordSize);
2288#ifdef ASSERT
2289    {
2290      Label L;
2291      __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
2292      __ jcc(Assembler::equal, L);
2293      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2294      __ bind(L);
2295    }
2296#endif /* ASSERT */
2297
2298    __ popptr(Address(thread, in_bytes(Thread::pending_exception_offset())));
2299
2300    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2301      restore_native_result(masm, ret_type, stack_slots);
2302    }
2303    __ jmp(unlock_done);
2304    // END Slow path unlock
2305
2306  }
2307
2308  // SLOW PATH Reguard the stack if needed
2309
2310  __ bind(reguard);
2311  save_native_result(masm, ret_type, stack_slots);
2312  {
2313    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
2314  }
2315  restore_native_result(masm, ret_type, stack_slots);
2316  __ jmp(reguard_done);
2317
2318
2319  // BEGIN EXCEPTION PROCESSING
2320
2321  if (!is_critical_native) {
2322    // Forward  the exception
2323    __ bind(exception_pending);
2324
2325    // remove possible return value from FPU register stack
2326    __ empty_FPU_stack();
2327
2328    // pop our frame
2329    __ leave();
2330    // and forward the exception
2331    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2332  }
2333
2334  __ flush();
2335
2336  nmethod *nm = nmethod::new_native_nmethod(method,
2337                                            compile_id,
2338                                            masm->code(),
2339                                            vep_offset,
2340                                            frame_complete,
2341                                            stack_slots / VMRegImpl::slots_per_word,
2342                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2343                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2344                                            oop_maps);
2345
2346  if (is_critical_native) {
2347    nm->set_lazy_critical_native(true);
2348  }
2349
2350  return nm;
2351
2352}
2353
2354#ifdef HAVE_DTRACE_H
2355// ---------------------------------------------------------------------------
2356// Generate a dtrace nmethod for a given signature.  The method takes arguments
2357// in the Java compiled code convention, marshals them to the native
2358// abi and then leaves nops at the position you would expect to call a native
2359// function. When the probe is enabled the nops are replaced with a trap
2360// instruction that dtrace inserts and the trace will cause a notification
2361// to dtrace.
2362//
2363// The probes are only able to take primitive types and java/lang/String as
2364// arguments.  No other java types are allowed. Strings are converted to utf8
2365// strings so that from dtrace point of view java strings are converted to C
2366// strings. There is an arbitrary fixed limit on the total space that a method
2367// can use for converting the strings. (256 chars per string in the signature).
2368// So any java string larger then this is truncated.
2369
2370nmethod *SharedRuntime::generate_dtrace_nmethod(
2371    MacroAssembler *masm, methodHandle method) {
2372
2373  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
2374  // be single threaded in this method.
2375  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
2376
2377  // Fill in the signature array, for the calling-convention call.
2378  int total_args_passed = method->size_of_parameters();
2379
2380  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
2381  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
2382
2383  // The signature we are going to use for the trap that dtrace will see
2384  // java/lang/String is converted. We drop "this" and any other object
2385  // is converted to NULL.  (A one-slot java/lang/Long object reference
2386  // is converted to a two-slot long, which is why we double the allocation).
2387  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
2388  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
2389
2390  int i=0;
2391  int total_strings = 0;
2392  int first_arg_to_pass = 0;
2393  int total_c_args = 0;
2394
2395  if( !method->is_static() ) {  // Pass in receiver first
2396    in_sig_bt[i++] = T_OBJECT;
2397    first_arg_to_pass = 1;
2398  }
2399
2400  // We need to convert the java args to where a native (non-jni) function
2401  // would expect them. To figure out where they go we convert the java
2402  // signature to a C signature.
2403
2404  SignatureStream ss(method->signature());
2405  for ( ; !ss.at_return_type(); ss.next()) {
2406    BasicType bt = ss.type();
2407    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
2408    out_sig_bt[total_c_args++] = bt;
2409    if( bt == T_OBJECT) {
2410      Symbol* s = ss.as_symbol_or_null();   // symbol is created
2411      if (s == vmSymbols::java_lang_String()) {
2412        total_strings++;
2413        out_sig_bt[total_c_args-1] = T_ADDRESS;
2414      } else if (s == vmSymbols::java_lang_Boolean() ||
2415                 s == vmSymbols::java_lang_Character() ||
2416                 s == vmSymbols::java_lang_Byte() ||
2417                 s == vmSymbols::java_lang_Short() ||
2418                 s == vmSymbols::java_lang_Integer() ||
2419                 s == vmSymbols::java_lang_Float()) {
2420        out_sig_bt[total_c_args-1] = T_INT;
2421      } else if (s == vmSymbols::java_lang_Long() ||
2422                 s == vmSymbols::java_lang_Double()) {
2423        out_sig_bt[total_c_args-1] = T_LONG;
2424        out_sig_bt[total_c_args++] = T_VOID;
2425      }
2426    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
2427      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
2428      out_sig_bt[total_c_args++] = T_VOID;
2429    }
2430  }
2431
2432  assert(i==total_args_passed, "validly parsed signature");
2433
2434  // Now get the compiled-Java layout as input arguments
2435  int comp_args_on_stack;
2436  comp_args_on_stack = SharedRuntime::java_calling_convention(
2437      in_sig_bt, in_regs, total_args_passed, false);
2438
2439  // Now figure out where the args must be stored and how much stack space
2440  // they require (neglecting out_preserve_stack_slots).
2441
2442  int out_arg_slots;
2443  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2444
2445  // Calculate the total number of stack slots we will need.
2446
2447  // First count the abi requirement plus all of the outgoing args
2448  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2449
2450  // Now space for the string(s) we must convert
2451
2452  int* string_locs   = NEW_RESOURCE_ARRAY(int, total_strings + 1);
2453  for (i = 0; i < total_strings ; i++) {
2454    string_locs[i] = stack_slots;
2455    stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size;
2456  }
2457
2458  // + 2 for return address (which we own) and saved rbp,
2459
2460  stack_slots += 2;
2461
2462  // Ok The space we have allocated will look like:
2463  //
2464  //
2465  // FP-> |                     |
2466  //      |---------------------|
2467  //      | string[n]           |
2468  //      |---------------------| <- string_locs[n]
2469  //      | string[n-1]         |
2470  //      |---------------------| <- string_locs[n-1]
2471  //      | ...                 |
2472  //      | ...                 |
2473  //      |---------------------| <- string_locs[1]
2474  //      | string[0]           |
2475  //      |---------------------| <- string_locs[0]
2476  //      | outbound memory     |
2477  //      | based arguments     |
2478  //      |                     |
2479  //      |---------------------|
2480  //      |                     |
2481  // SP-> | out_preserved_slots |
2482  //
2483  //
2484
2485  // Now compute actual number of stack words we need rounding to make
2486  // stack properly aligned.
2487  stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word);
2488
2489  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2490
2491  intptr_t start = (intptr_t)__ pc();
2492
2493  // First thing make an ic check to see if we should even be here
2494
2495  // We are free to use all registers as temps without saving them and
2496  // restoring them except rbp. rbp, is the only callee save register
2497  // as far as the interpreter and the compiler(s) are concerned.
2498
2499  const Register ic_reg = rax;
2500  const Register receiver = rcx;
2501  Label hit;
2502  Label exception_pending;
2503
2504
2505  __ verify_oop(receiver);
2506  __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
2507  __ jcc(Assembler::equal, hit);
2508
2509  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2510
2511  // verified entry must be aligned for code patching.
2512  // and the first 5 bytes must be in the same cache line
2513  // if we align at 8 then we will be sure 5 bytes are in the same line
2514  __ align(8);
2515
2516  __ bind(hit);
2517
2518  int vep_offset = ((intptr_t)__ pc()) - start;
2519
2520
2521  // The instruction at the verified entry point must be 5 bytes or longer
2522  // because it can be patched on the fly by make_non_entrant. The stack bang
2523  // instruction fits that requirement.
2524
2525  // Generate stack overflow check
2526
2527
2528  if (UseStackBanging) {
2529    if (stack_size <= StackShadowPages*os::vm_page_size()) {
2530      __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
2531    } else {
2532      __ movl(rax, stack_size);
2533      __ bang_stack_size(rax, rbx);
2534    }
2535  } else {
2536    // need a 5 byte instruction to allow MT safe patching to non-entrant
2537    __ fat_nop();
2538  }
2539
2540  assert(((int)__ pc() - start - vep_offset) >= 5,
2541         "valid size for make_non_entrant");
2542
2543  // Generate a new frame for the wrapper.
2544  __ enter();
2545
2546  // -2 because return address is already present and so is saved rbp,
2547  if (stack_size - 2*wordSize != 0) {
2548    __ subl(rsp, stack_size - 2*wordSize);
2549  }
2550
2551  // Frame is now completed as far a size and linkage.
2552
2553  int frame_complete = ((intptr_t)__ pc()) - start;
2554
2555  // First thing we do store all the args as if we are doing the call.
2556  // Since the C calling convention is stack based that ensures that
2557  // all the Java register args are stored before we need to convert any
2558  // string we might have.
2559
2560  int sid = 0;
2561  int c_arg, j_arg;
2562  int string_reg = 0;
2563
2564  for (j_arg = first_arg_to_pass, c_arg = 0 ;
2565       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2566
2567    VMRegPair src = in_regs[j_arg];
2568    VMRegPair dst = out_regs[c_arg];
2569    assert(dst.first()->is_stack() || in_sig_bt[j_arg] == T_VOID,
2570           "stack based abi assumed");
2571
2572    switch (in_sig_bt[j_arg]) {
2573
2574      case T_ARRAY:
2575      case T_OBJECT:
2576        if (out_sig_bt[c_arg] == T_ADDRESS) {
2577          // Any register based arg for a java string after the first
2578          // will be destroyed by the call to get_utf so we store
2579          // the original value in the location the utf string address
2580          // will eventually be stored.
2581          if (src.first()->is_reg()) {
2582            if (string_reg++ != 0) {
2583              simple_move32(masm, src, dst);
2584            }
2585          }
2586        } else if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2587          // need to unbox a one-word value
2588          Register in_reg = rax;
2589          if ( src.first()->is_reg() ) {
2590            in_reg = src.first()->as_Register();
2591          } else {
2592            simple_move32(masm, src, in_reg->as_VMReg());
2593          }
2594          Label skipUnbox;
2595          __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD);
2596          if ( out_sig_bt[c_arg] == T_LONG ) {
2597            __ movl(Address(rsp, reg2offset_out(dst.second())), NULL_WORD);
2598          }
2599          __ testl(in_reg, in_reg);
2600          __ jcc(Assembler::zero, skipUnbox);
2601          assert(dst.first()->is_stack() &&
2602                 (!dst.second()->is_valid() || dst.second()->is_stack()),
2603                 "value(s) must go into stack slots");
2604
2605          BasicType bt = out_sig_bt[c_arg];
2606          int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2607          if ( bt == T_LONG ) {
2608            __ movl(rbx, Address(in_reg,
2609                                 box_offset + VMRegImpl::stack_slot_size));
2610            __ movl(Address(rsp, reg2offset_out(dst.second())), rbx);
2611          }
2612          __ movl(in_reg,  Address(in_reg, box_offset));
2613          __ movl(Address(rsp, reg2offset_out(dst.first())), in_reg);
2614          __ bind(skipUnbox);
2615        } else {
2616          // Convert the arg to NULL
2617          __ movl(Address(rsp, reg2offset_out(dst.first())), NULL_WORD);
2618        }
2619        if (out_sig_bt[c_arg] == T_LONG) {
2620          assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2621          ++c_arg; // Move over the T_VOID To keep the loop indices in sync
2622        }
2623        break;
2624
2625      case T_VOID:
2626        break;
2627
2628      case T_FLOAT:
2629        float_move(masm, src, dst);
2630        break;
2631
2632      case T_DOUBLE:
2633        assert( j_arg + 1 < total_args_passed &&
2634                in_sig_bt[j_arg + 1] == T_VOID, "bad arg list");
2635        double_move(masm, src, dst);
2636        break;
2637
2638      case T_LONG :
2639        long_move(masm, src, dst);
2640        break;
2641
2642      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2643
2644      default:
2645        simple_move32(masm, src, dst);
2646    }
2647  }
2648
2649  // Now we must convert any string we have to utf8
2650  //
2651
2652  for (sid = 0, j_arg = first_arg_to_pass, c_arg = 0 ;
2653       sid < total_strings ; j_arg++, c_arg++ ) {
2654
2655    if (out_sig_bt[c_arg] == T_ADDRESS) {
2656
2657      Address utf8_addr = Address(
2658          rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2659      __ leal(rax, utf8_addr);
2660
2661      // The first string we find might still be in the original java arg
2662      // register
2663      VMReg orig_loc = in_regs[j_arg].first();
2664      Register string_oop;
2665
2666      // This is where the argument will eventually reside
2667      Address dest = Address(rsp, reg2offset_out(out_regs[c_arg].first()));
2668
2669      if (sid == 1 && orig_loc->is_reg()) {
2670        string_oop = orig_loc->as_Register();
2671        assert(string_oop != rax, "smashed arg");
2672      } else {
2673
2674        if (orig_loc->is_reg()) {
2675          // Get the copy of the jls object
2676          __ movl(rcx, dest);
2677        } else {
2678          // arg is still in the original location
2679          __ movl(rcx, Address(rbp, reg2offset_in(orig_loc)));
2680        }
2681        string_oop = rcx;
2682
2683      }
2684      Label nullString;
2685      __ movl(dest, NULL_WORD);
2686      __ testl(string_oop, string_oop);
2687      __ jcc(Assembler::zero, nullString);
2688
2689      // Now we can store the address of the utf string as the argument
2690      __ movl(dest, rax);
2691
2692      // And do the conversion
2693      __ call_VM_leaf(CAST_FROM_FN_PTR(
2694             address, SharedRuntime::get_utf), string_oop, rax);
2695      __ bind(nullString);
2696    }
2697
2698    if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2699      assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2700      ++c_arg; // Move over the T_VOID To keep the loop indices in sync
2701    }
2702  }
2703
2704
2705  // Ok now we are done. Need to place the nop that dtrace wants in order to
2706  // patch in the trap
2707
2708  int patch_offset = ((intptr_t)__ pc()) - start;
2709
2710  __ nop();
2711
2712
2713  // Return
2714
2715  __ leave();
2716  __ ret(0);
2717
2718  __ flush();
2719
2720  nmethod *nm = nmethod::new_dtrace_nmethod(
2721      method, masm->code(), vep_offset, patch_offset, frame_complete,
2722      stack_slots / VMRegImpl::slots_per_word);
2723  return nm;
2724
2725}
2726
2727#endif // HAVE_DTRACE_H
2728
2729// this function returns the adjust size (in number of words) to a c2i adapter
2730// activation for use during deoptimization
2731int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2732  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2733}
2734
2735
2736uint SharedRuntime::out_preserve_stack_slots() {
2737  return 0;
2738}
2739
2740
2741//------------------------------generate_deopt_blob----------------------------
2742void SharedRuntime::generate_deopt_blob() {
2743  // allocate space for the code
2744  ResourceMark rm;
2745  // setup code generation tools
2746  CodeBuffer   buffer("deopt_blob", 1024, 1024);
2747  MacroAssembler* masm = new MacroAssembler(&buffer);
2748  int frame_size_in_words;
2749  OopMap* map = NULL;
2750  // Account for the extra args we place on the stack
2751  // by the time we call fetch_unroll_info
2752  const int additional_words = 2; // deopt kind, thread
2753
2754  OopMapSet *oop_maps = new OopMapSet();
2755
2756  // -------------
2757  // This code enters when returning to a de-optimized nmethod.  A return
2758  // address has been pushed on the the stack, and return values are in
2759  // registers.
2760  // If we are doing a normal deopt then we were called from the patched
2761  // nmethod from the point we returned to the nmethod. So the return
2762  // address on the stack is wrong by NativeCall::instruction_size
2763  // We will adjust the value to it looks like we have the original return
2764  // address on the stack (like when we eagerly deoptimized).
2765  // In the case of an exception pending with deoptimized then we enter
2766  // with a return address on the stack that points after the call we patched
2767  // into the exception handler. We have the following register state:
2768  //    rax,: exception
2769  //    rbx,: exception handler
2770  //    rdx: throwing pc
2771  // So in this case we simply jam rdx into the useless return address and
2772  // the stack looks just like we want.
2773  //
2774  // At this point we need to de-opt.  We save the argument return
2775  // registers.  We call the first C routine, fetch_unroll_info().  This
2776  // routine captures the return values and returns a structure which
2777  // describes the current frame size and the sizes of all replacement frames.
2778  // The current frame is compiled code and may contain many inlined
2779  // functions, each with their own JVM state.  We pop the current frame, then
2780  // push all the new frames.  Then we call the C routine unpack_frames() to
2781  // populate these frames.  Finally unpack_frames() returns us the new target
2782  // address.  Notice that callee-save registers are BLOWN here; they have
2783  // already been captured in the vframeArray at the time the return PC was
2784  // patched.
2785  address start = __ pc();
2786  Label cont;
2787
2788  // Prolog for non exception case!
2789
2790  // Save everything in sight.
2791
2792  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
2793  // Normal deoptimization
2794  __ push(Deoptimization::Unpack_deopt);
2795  __ jmp(cont);
2796
2797  int reexecute_offset = __ pc() - start;
2798
2799  // Reexecute case
2800  // return address is the pc describes what bci to do re-execute at
2801
2802  // No need to update map as each call to save_live_registers will produce identical oopmap
2803  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
2804
2805  __ push(Deoptimization::Unpack_reexecute);
2806  __ jmp(cont);
2807
2808  int exception_offset = __ pc() - start;
2809
2810  // Prolog for exception case
2811
2812  // all registers are dead at this entry point, except for rax, and
2813  // rdx which contain the exception oop and exception pc
2814  // respectively.  Set them in TLS and fall thru to the
2815  // unpack_with_exception_in_tls entry point.
2816
2817  __ get_thread(rdi);
2818  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), rdx);
2819  __ movptr(Address(rdi, JavaThread::exception_oop_offset()), rax);
2820
2821  int exception_in_tls_offset = __ pc() - start;
2822
2823  // new implementation because exception oop is now passed in JavaThread
2824
2825  // Prolog for exception case
2826  // All registers must be preserved because they might be used by LinearScan
2827  // Exceptiop oop and throwing PC are passed in JavaThread
2828  // tos: stack at point of call to method that threw the exception (i.e. only
2829  // args are on the stack, no return address)
2830
2831  // make room on stack for the return address
2832  // It will be patched later with the throwing pc. The correct value is not
2833  // available now because loading it from memory would destroy registers.
2834  __ push(0);
2835
2836  // Save everything in sight.
2837
2838  // No need to update map as each call to save_live_registers will produce identical oopmap
2839  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
2840
2841  // Now it is safe to overwrite any register
2842
2843  // store the correct deoptimization type
2844  __ push(Deoptimization::Unpack_exception);
2845
2846  // load throwing pc from JavaThread and patch it as the return address
2847  // of the current frame. Then clear the field in JavaThread
2848  __ get_thread(rdi);
2849  __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset()));
2850  __ movptr(Address(rbp, wordSize), rdx);
2851  __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD);
2852
2853#ifdef ASSERT
2854  // verify that there is really an exception oop in JavaThread
2855  __ movptr(rax, Address(rdi, JavaThread::exception_oop_offset()));
2856  __ verify_oop(rax);
2857
2858  // verify that there is no pending exception
2859  Label no_pending_exception;
2860  __ movptr(rax, Address(rdi, Thread::pending_exception_offset()));
2861  __ testptr(rax, rax);
2862  __ jcc(Assembler::zero, no_pending_exception);
2863  __ stop("must not have pending exception here");
2864  __ bind(no_pending_exception);
2865#endif
2866
2867  __ bind(cont);
2868
2869  // Compiled code leaves the floating point stack dirty, empty it.
2870  __ empty_FPU_stack();
2871
2872
2873  // Call C code.  Need thread and this frame, but NOT official VM entry
2874  // crud.  We cannot block on this call, no GC can happen.
2875  __ get_thread(rcx);
2876  __ push(rcx);
2877  // fetch_unroll_info needs to call last_java_frame()
2878  __ set_last_Java_frame(rcx, noreg, noreg, NULL);
2879
2880  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2881
2882  // Need to have an oopmap that tells fetch_unroll_info where to
2883  // find any register it might need.
2884
2885  oop_maps->add_gc_map( __ pc()-start, map);
2886
2887  // Discard arg to fetch_unroll_info
2888  __ pop(rcx);
2889
2890  __ get_thread(rcx);
2891  __ reset_last_Java_frame(rcx, false, false);
2892
2893  // Load UnrollBlock into EDI
2894  __ mov(rdi, rax);
2895
2896  // Move the unpack kind to a safe place in the UnrollBlock because
2897  // we are very short of registers
2898
2899  Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
2900  // retrieve the deopt kind from where we left it.
2901  __ pop(rax);
2902  __ movl(unpack_kind, rax);                      // save the unpack_kind value
2903
2904   Label noException;
2905  __ cmpl(rax, Deoptimization::Unpack_exception);   // Was exception pending?
2906  __ jcc(Assembler::notEqual, noException);
2907  __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset()));
2908  __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset()));
2909  __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD);
2910  __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD);
2911
2912  __ verify_oop(rax);
2913
2914  // Overwrite the result registers with the exception results.
2915  __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
2916  __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
2917
2918  __ bind(noException);
2919
2920  // Stack is back to only having register save data on the stack.
2921  // Now restore the result registers. Everything else is either dead or captured
2922  // in the vframeArray.
2923
2924  RegisterSaver::restore_result_registers(masm);
2925
2926  // Non standard control word may be leaked out through a safepoint blob, and we can
2927  // deopt at a poll point with the non standard control word. However, we should make
2928  // sure the control word is correct after restore_result_registers.
2929  __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
2930
2931  // All of the register save area has been popped of the stack. Only the
2932  // return address remains.
2933
2934  // Pop all the frames we must move/replace.
2935  //
2936  // Frame picture (youngest to oldest)
2937  // 1: self-frame (no frame link)
2938  // 2: deopting frame  (no frame link)
2939  // 3: caller of deopting frame (could be compiled/interpreted).
2940  //
2941  // Note: by leaving the return address of self-frame on the stack
2942  // and using the size of frame 2 to adjust the stack
2943  // when we are done the return to frame 3 will still be on the stack.
2944
2945  // Pop deoptimized frame
2946  __ addptr(rsp, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2947
2948  // sp should be pointing at the return address to the caller (3)
2949
2950  // Stack bang to make sure there's enough room for these interpreter frames.
2951  if (UseStackBanging) {
2952    __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2953    __ bang_stack_size(rbx, rcx);
2954  }
2955
2956  // Load array of frame pcs into ECX
2957  __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2958
2959  __ pop(rsi); // trash the old pc
2960
2961  // Load array of frame sizes into ESI
2962  __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2963
2964  Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
2965
2966  __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2967  __ movl(counter, rbx);
2968
2969  // Pick up the initial fp we should save
2970  __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2971
2972  // Now adjust the caller's stack to make up for the extra locals
2973  // but record the original sp so that we can save it in the skeletal interpreter
2974  // frame and the stack walking of interpreter_sender will get the unextended sp
2975  // value and not the "real" sp value.
2976
2977  Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
2978  __ movptr(sp_temp, rsp);
2979  __ movl2ptr(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2980  __ subptr(rsp, rbx);
2981
2982  // Push interpreter frames in a loop
2983  Label loop;
2984  __ bind(loop);
2985  __ movptr(rbx, Address(rsi, 0));      // Load frame size
2986#ifdef CC_INTERP
2987  __ subptr(rbx, 4*wordSize);           // we'll push pc and ebp by hand and
2988#ifdef ASSERT
2989  __ push(0xDEADDEAD);                  // Make a recognizable pattern
2990  __ push(0xDEADDEAD);
2991#else /* ASSERT */
2992  __ subptr(rsp, 2*wordSize);           // skip the "static long no_param"
2993#endif /* ASSERT */
2994#else /* CC_INTERP */
2995  __ subptr(rbx, 2*wordSize);           // we'll push pc and rbp, by hand
2996#endif /* CC_INTERP */
2997  __ pushptr(Address(rcx, 0));          // save return address
2998  __ enter();                           // save old & set new rbp,
2999  __ subptr(rsp, rbx);                  // Prolog!
3000  __ movptr(rbx, sp_temp);              // sender's sp
3001#ifdef CC_INTERP
3002  __ movptr(Address(rbp,
3003                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
3004          rbx); // Make it walkable
3005#else /* CC_INTERP */
3006  // This value is corrected by layout_activation_impl
3007  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD);
3008  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
3009#endif /* CC_INTERP */
3010  __ movptr(sp_temp, rsp);              // pass to next frame
3011  __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
3012  __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
3013  __ decrementl(counter);             // decrement counter
3014  __ jcc(Assembler::notZero, loop);
3015  __ pushptr(Address(rcx, 0));          // save final return address
3016
3017  // Re-push self-frame
3018  __ enter();                           // save old & set new rbp,
3019
3020  //  Return address and rbp, are in place
3021  // We'll push additional args later. Just allocate a full sized
3022  // register save area
3023  __ subptr(rsp, (frame_size_in_words-additional_words - 2) * wordSize);
3024
3025  // Restore frame locals after moving the frame
3026  __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax);
3027  __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx);
3028  __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize));   // Pop float stack and store in local
3029  if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
3030  if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0);
3031
3032  // Set up the args to unpack_frame
3033
3034  __ pushl(unpack_kind);                     // get the unpack_kind value
3035  __ get_thread(rcx);
3036  __ push(rcx);
3037
3038  // set last_Java_sp, last_Java_fp
3039  __ set_last_Java_frame(rcx, noreg, rbp, NULL);
3040
3041  // Call C code.  Need thread but NOT official VM entry
3042  // crud.  We cannot block on this call, no GC can happen.  Call should
3043  // restore return values to their stack-slots with the new SP.
3044  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
3045  // Set an oopmap for the call site
3046  oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 ));
3047
3048  // rax, contains the return result type
3049  __ push(rax);
3050
3051  __ get_thread(rcx);
3052  __ reset_last_Java_frame(rcx, false, false);
3053
3054  // Collect return values
3055  __ movptr(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize));
3056  __ movptr(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize));
3057
3058  // Clear floating point stack before returning to interpreter
3059  __ empty_FPU_stack();
3060
3061  // Check if we should push the float or double return value.
3062  Label results_done, yes_double_value;
3063  __ cmpl(Address(rsp, 0), T_DOUBLE);
3064  __ jcc (Assembler::zero, yes_double_value);
3065  __ cmpl(Address(rsp, 0), T_FLOAT);
3066  __ jcc (Assembler::notZero, results_done);
3067
3068  // return float value as expected by interpreter
3069  if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
3070  else            __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
3071  __ jmp(results_done);
3072
3073  // return double value as expected by interpreter
3074  __ bind(yes_double_value);
3075  if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize));
3076  else            __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize));
3077
3078  __ bind(results_done);
3079
3080  // Pop self-frame.
3081  __ leave();                              // Epilog!
3082
3083  // Jump to interpreter
3084  __ ret(0);
3085
3086  // -------------
3087  // make sure all code is generated
3088  masm->flush();
3089
3090  _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
3091  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3092}
3093
3094
3095#ifdef COMPILER2
3096//------------------------------generate_uncommon_trap_blob--------------------
3097void SharedRuntime::generate_uncommon_trap_blob() {
3098  // allocate space for the code
3099  ResourceMark rm;
3100  // setup code generation tools
3101  CodeBuffer   buffer("uncommon_trap_blob", 512, 512);
3102  MacroAssembler* masm = new MacroAssembler(&buffer);
3103
3104  enum frame_layout {
3105    arg0_off,      // thread                     sp + 0 // Arg location for
3106    arg1_off,      // unloaded_class_index       sp + 1 // calling C
3107    // The frame sender code expects that rbp will be in the "natural" place and
3108    // will override any oopMap setting for it. We must therefore force the layout
3109    // so that it agrees with the frame sender code.
3110    rbp_off,       // callee saved register      sp + 2
3111    return_off,    // slot for return address    sp + 3
3112    framesize
3113  };
3114
3115  address start = __ pc();
3116  // Push self-frame.
3117  __ subptr(rsp, return_off*wordSize);     // Epilog!
3118
3119  // rbp, is an implicitly saved callee saved register (i.e. the calling
3120  // convention will save restore it in prolog/epilog) Other than that
3121  // there are no callee save registers no that adapter frames are gone.
3122  __ movptr(Address(rsp, rbp_off*wordSize), rbp);
3123
3124  // Clear the floating point exception stack
3125  __ empty_FPU_stack();
3126
3127  // set last_Java_sp
3128  __ get_thread(rdx);
3129  __ set_last_Java_frame(rdx, noreg, noreg, NULL);
3130
3131  // Call C code.  Need thread but NOT official VM entry
3132  // crud.  We cannot block on this call, no GC can happen.  Call should
3133  // capture callee-saved registers as well as return values.
3134  __ movptr(Address(rsp, arg0_off*wordSize), rdx);
3135  // argument already in ECX
3136  __ movl(Address(rsp, arg1_off*wordSize),rcx);
3137  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
3138
3139  // Set an oopmap for the call site
3140  OopMapSet *oop_maps = new OopMapSet();
3141  OopMap* map =  new OopMap( framesize, 0 );
3142  // No oopMap for rbp, it is known implicitly
3143
3144  oop_maps->add_gc_map( __ pc()-start, map);
3145
3146  __ get_thread(rcx);
3147
3148  __ reset_last_Java_frame(rcx, false, false);
3149
3150  // Load UnrollBlock into EDI
3151  __ movptr(rdi, rax);
3152
3153  // Pop all the frames we must move/replace.
3154  //
3155  // Frame picture (youngest to oldest)
3156  // 1: self-frame (no frame link)
3157  // 2: deopting frame  (no frame link)
3158  // 3: caller of deopting frame (could be compiled/interpreted).
3159
3160  // Pop self-frame.  We have no frame, and must rely only on EAX and ESP.
3161  __ addptr(rsp,(framesize-1)*wordSize);     // Epilog!
3162
3163  // Pop deoptimized frame
3164  __ movl2ptr(rcx, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
3165  __ addptr(rsp, rcx);
3166
3167  // sp should be pointing at the return address to the caller (3)
3168
3169  // Stack bang to make sure there's enough room for these interpreter frames.
3170  if (UseStackBanging) {
3171    __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
3172    __ bang_stack_size(rbx, rcx);
3173  }
3174
3175
3176  // Load array of frame pcs into ECX
3177  __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
3178
3179  __ pop(rsi); // trash the pc
3180
3181  // Load array of frame sizes into ESI
3182  __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
3183
3184  Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset_in_bytes());
3185
3186  __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
3187  __ movl(counter, rbx);
3188
3189  // Pick up the initial fp we should save
3190  __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
3191
3192  // Now adjust the caller's stack to make up for the extra locals
3193  // but record the original sp so that we can save it in the skeletal interpreter
3194  // frame and the stack walking of interpreter_sender will get the unextended sp
3195  // value and not the "real" sp value.
3196
3197  Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset_in_bytes());
3198  __ movptr(sp_temp, rsp);
3199  __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
3200  __ subptr(rsp, rbx);
3201
3202  // Push interpreter frames in a loop
3203  Label loop;
3204  __ bind(loop);
3205  __ movptr(rbx, Address(rsi, 0));      // Load frame size
3206#ifdef CC_INTERP
3207  __ subptr(rbx, 4*wordSize);           // we'll push pc and ebp by hand and
3208#ifdef ASSERT
3209  __ push(0xDEADDEAD);                  // Make a recognizable pattern
3210  __ push(0xDEADDEAD);                  // (parm to RecursiveInterpreter...)
3211#else /* ASSERT */
3212  __ subptr(rsp, 2*wordSize);           // skip the "static long no_param"
3213#endif /* ASSERT */
3214#else /* CC_INTERP */
3215  __ subptr(rbx, 2*wordSize);           // we'll push pc and rbp, by hand
3216#endif /* CC_INTERP */
3217  __ pushptr(Address(rcx, 0));          // save return address
3218  __ enter();                           // save old & set new rbp,
3219  __ subptr(rsp, rbx);                  // Prolog!
3220  __ movptr(rbx, sp_temp);              // sender's sp
3221#ifdef CC_INTERP
3222  __ movptr(Address(rbp,
3223                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
3224          rbx); // Make it walkable
3225#else /* CC_INTERP */
3226  // This value is corrected by layout_activation_impl
3227  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD );
3228  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable
3229#endif /* CC_INTERP */
3230  __ movptr(sp_temp, rsp);              // pass to next frame
3231  __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
3232  __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
3233  __ decrementl(counter);             // decrement counter
3234  __ jcc(Assembler::notZero, loop);
3235  __ pushptr(Address(rcx, 0));            // save final return address
3236
3237  // Re-push self-frame
3238  __ enter();                           // save old & set new rbp,
3239  __ subptr(rsp, (framesize-2) * wordSize);   // Prolog!
3240
3241
3242  // set last_Java_sp, last_Java_fp
3243  __ get_thread(rdi);
3244  __ set_last_Java_frame(rdi, noreg, rbp, NULL);
3245
3246  // Call C code.  Need thread but NOT official VM entry
3247  // crud.  We cannot block on this call, no GC can happen.  Call should
3248  // restore return values to their stack-slots with the new SP.
3249  __ movptr(Address(rsp,arg0_off*wordSize),rdi);
3250  __ movl(Address(rsp,arg1_off*wordSize), Deoptimization::Unpack_uncommon_trap);
3251  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
3252  // Set an oopmap for the call site
3253  oop_maps->add_gc_map( __ pc()-start, new OopMap( framesize, 0 ) );
3254
3255  __ get_thread(rdi);
3256  __ reset_last_Java_frame(rdi, true, false);
3257
3258  // Pop self-frame.
3259  __ leave();     // Epilog!
3260
3261  // Jump to interpreter
3262  __ ret(0);
3263
3264  // -------------
3265  // make sure all code is generated
3266  masm->flush();
3267
3268   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize);
3269}
3270#endif // COMPILER2
3271
3272//------------------------------generate_handler_blob------
3273//
3274// Generate a special Compile2Runtime blob that saves all registers,
3275// setup oopmap, and calls safepoint code to stop the compiled code for
3276// a safepoint.
3277//
3278SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
3279
3280  // Account for thread arg in our frame
3281  const int additional_words = 1;
3282  int frame_size_in_words;
3283
3284  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3285
3286  ResourceMark rm;
3287  OopMapSet *oop_maps = new OopMapSet();
3288  OopMap* map;
3289
3290  // allocate space for the code
3291  // setup code generation tools
3292  CodeBuffer   buffer("handler_blob", 1024, 512);
3293  MacroAssembler* masm = new MacroAssembler(&buffer);
3294
3295  const Register java_thread = rdi; // callee-saved for VC++
3296  address start   = __ pc();
3297  address call_pc = NULL;
3298
3299  // If cause_return is true we are at a poll_return and there is
3300  // the return address on the stack to the caller on the nmethod
3301  // that is safepoint. We can leave this return on the stack and
3302  // effectively complete the return and safepoint in the caller.
3303  // Otherwise we push space for a return address that the safepoint
3304  // handler will install later to make the stack walking sensible.
3305  if( !cause_return )
3306    __ push(rbx);                // Make room for return address (or push it again)
3307
3308  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false);
3309
3310  // The following is basically a call_VM. However, we need the precise
3311  // address of the call in order to generate an oopmap. Hence, we do all the
3312  // work ourselves.
3313
3314  // Push thread argument and setup last_Java_sp
3315  __ get_thread(java_thread);
3316  __ push(java_thread);
3317  __ set_last_Java_frame(java_thread, noreg, noreg, NULL);
3318
3319  // if this was not a poll_return then we need to correct the return address now.
3320  if( !cause_return ) {
3321    __ movptr(rax, Address(java_thread, JavaThread::saved_exception_pc_offset()));
3322    __ movptr(Address(rbp, wordSize), rax);
3323  }
3324
3325  // do the call
3326  __ call(RuntimeAddress(call_ptr));
3327
3328  // Set an oopmap for the call site.  This oopmap will map all
3329  // oop-registers and debug-info registers as callee-saved.  This
3330  // will allow deoptimization at this safepoint to find all possible
3331  // debug-info recordings, as well as let GC find all oops.
3332
3333  oop_maps->add_gc_map( __ pc() - start, map);
3334
3335  // Discard arg
3336  __ pop(rcx);
3337
3338  Label noException;
3339
3340  // Clear last_Java_sp again
3341  __ get_thread(java_thread);
3342  __ reset_last_Java_frame(java_thread, false, false);
3343
3344  __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3345  __ jcc(Assembler::equal, noException);
3346
3347  // Exception pending
3348
3349  RegisterSaver::restore_live_registers(masm);
3350
3351  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3352
3353  __ bind(noException);
3354
3355  // Normal exit, register restoring and exit
3356  RegisterSaver::restore_live_registers(masm);
3357
3358  __ ret(0);
3359
3360  // make sure all code is generated
3361  masm->flush();
3362
3363  // Fill-out other meta info
3364  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3365}
3366
3367//
3368// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3369//
3370// Generate a stub that calls into vm to find out the proper destination
3371// of a java call. All the argument registers are live at this point
3372// but since this is generic code we don't know what they are and the caller
3373// must do any gc of the args.
3374//
3375RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
3376  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3377
3378  // allocate space for the code
3379  ResourceMark rm;
3380
3381  CodeBuffer buffer(name, 1000, 512);
3382  MacroAssembler* masm                = new MacroAssembler(&buffer);
3383
3384  int frame_size_words;
3385  enum frame_layout {
3386                thread_off,
3387                extra_words };
3388
3389  OopMapSet *oop_maps = new OopMapSet();
3390  OopMap* map = NULL;
3391
3392  int start = __ offset();
3393
3394  map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words);
3395
3396  int frame_complete = __ offset();
3397
3398  const Register thread = rdi;
3399  __ get_thread(rdi);
3400
3401  __ push(thread);
3402  __ set_last_Java_frame(thread, noreg, rbp, NULL);
3403
3404  __ call(RuntimeAddress(destination));
3405
3406
3407  // Set an oopmap for the call site.
3408  // We need this not only for callee-saved registers, but also for volatile
3409  // registers that the compiler might be keeping live across a safepoint.
3410
3411  oop_maps->add_gc_map( __ offset() - start, map);
3412
3413  // rax, contains the address we are going to jump to assuming no exception got installed
3414
3415  __ addptr(rsp, wordSize);
3416
3417  // clear last_Java_sp
3418  __ reset_last_Java_frame(thread, true, false);
3419  // check for pending exceptions
3420  Label pending;
3421  __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3422  __ jcc(Assembler::notEqual, pending);
3423
3424  // get the returned Method*
3425  __ get_vm_result_2(rbx, thread);
3426  __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx);
3427
3428  __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax);
3429
3430  RegisterSaver::restore_live_registers(masm);
3431
3432  // We are back the the original state on entry and ready to go.
3433
3434  __ jmp(rax);
3435
3436  // Pending exception after the safepoint
3437
3438  __ bind(pending);
3439
3440  RegisterSaver::restore_live_registers(masm);
3441
3442  // exception pending => remove activation and forward to exception handler
3443
3444  __ get_thread(thread);
3445  __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD);
3446  __ movptr(rax, Address(thread, Thread::pending_exception_offset()));
3447  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3448
3449  // -------------
3450  // make sure all code is generated
3451  masm->flush();
3452
3453  // return the  blob
3454  // frame_size_words or bytes??
3455  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
3456}
3457