sharedRuntime_x86_64.cpp revision 1472:c18cbe5936b8
1/*
2 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_sharedRuntime_x86_64.cpp.incl"
27
28DeoptimizationBlob *SharedRuntime::_deopt_blob;
29#ifdef COMPILER2
30UncommonTrapBlob   *SharedRuntime::_uncommon_trap_blob;
31ExceptionBlob      *OptoRuntime::_exception_blob;
32#endif // COMPILER2
33
34SafepointBlob      *SharedRuntime::_polling_page_safepoint_handler_blob;
35SafepointBlob      *SharedRuntime::_polling_page_return_handler_blob;
36RuntimeStub*       SharedRuntime::_wrong_method_blob;
37RuntimeStub*       SharedRuntime::_ic_miss_blob;
38RuntimeStub*       SharedRuntime::_resolve_opt_virtual_call_blob;
39RuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
40RuntimeStub*       SharedRuntime::_resolve_static_call_blob;
41
42const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
43
44#define __ masm->
45
46class SimpleRuntimeFrame {
47
48  public:
49
50  // Most of the runtime stubs have this simple frame layout.
51  // This class exists to make the layout shared in one place.
52  // Offsets are for compiler stack slots, which are jints.
53  enum layout {
54    // The frame sender code expects that rbp will be in the "natural" place and
55    // will override any oopMap setting for it. We must therefore force the layout
56    // so that it agrees with the frame sender code.
57    rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
58    rbp_off2,
59    return_off, return_off2,
60    framesize
61  };
62};
63
64class RegisterSaver {
65  // Capture info about frame layout.  Layout offsets are in jint
66  // units because compiler frame slots are jints.
67#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
68  enum layout {
69    fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
70    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
71    DEF_XMM_OFFS(0),
72    DEF_XMM_OFFS(1),
73    DEF_XMM_OFFS(2),
74    DEF_XMM_OFFS(3),
75    DEF_XMM_OFFS(4),
76    DEF_XMM_OFFS(5),
77    DEF_XMM_OFFS(6),
78    DEF_XMM_OFFS(7),
79    DEF_XMM_OFFS(8),
80    DEF_XMM_OFFS(9),
81    DEF_XMM_OFFS(10),
82    DEF_XMM_OFFS(11),
83    DEF_XMM_OFFS(12),
84    DEF_XMM_OFFS(13),
85    DEF_XMM_OFFS(14),
86    DEF_XMM_OFFS(15),
87    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
88    fpu_stateH_end,
89    r15_off, r15H_off,
90    r14_off, r14H_off,
91    r13_off, r13H_off,
92    r12_off, r12H_off,
93    r11_off, r11H_off,
94    r10_off, r10H_off,
95    r9_off,  r9H_off,
96    r8_off,  r8H_off,
97    rdi_off, rdiH_off,
98    rsi_off, rsiH_off,
99    ignore_off, ignoreH_off,  // extra copy of rbp
100    rsp_off, rspH_off,
101    rbx_off, rbxH_off,
102    rdx_off, rdxH_off,
103    rcx_off, rcxH_off,
104    rax_off, raxH_off,
105    // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
106    align_off, alignH_off,
107    flags_off, flagsH_off,
108    // The frame sender code expects that rbp will be in the "natural" place and
109    // will override any oopMap setting for it. We must therefore force the layout
110    // so that it agrees with the frame sender code.
111    rbp_off, rbpH_off,        // copy of rbp we will restore
112    return_off, returnH_off,  // slot for return address
113    reg_save_size             // size in compiler stack slots
114  };
115
116 public:
117  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
118  static void restore_live_registers(MacroAssembler* masm);
119
120  // Offsets into the register save area
121  // Used by deoptimization when it is managing result register
122  // values on its own
123
124  static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
125  static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
126  static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
127  static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
128  static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
129
130  // During deoptimization only the result registers need to be restored,
131  // all the other values have already been extracted.
132  static void restore_result_registers(MacroAssembler* masm);
133};
134
135OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
136
137  // Always make the frame size 16-byte aligned
138  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
139                                     reg_save_size*BytesPerInt, 16);
140  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
141  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
142  // The caller will allocate additional_frame_words
143  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
144  // CodeBlob frame size is in words.
145  int frame_size_in_words = frame_size_in_bytes / wordSize;
146  *total_frame_words = frame_size_in_words;
147
148  // Save registers, fpu state, and flags.
149  // We assume caller has already pushed the return address onto the
150  // stack, so rsp is 8-byte aligned here.
151  // We push rpb twice in this sequence because we want the real rbp
152  // to be under the return like a normal enter.
153
154  __ enter();          // rsp becomes 16-byte aligned here
155  __ push_CPU_state(); // Push a multiple of 16 bytes
156  if (frame::arg_reg_save_area_bytes != 0) {
157    // Allocate argument register save area
158    __ subptr(rsp, frame::arg_reg_save_area_bytes);
159  }
160
161  // Set an oopmap for the call site.  This oopmap will map all
162  // oop-registers and debug-info registers as callee-saved.  This
163  // will allow deoptimization at this safepoint to find all possible
164  // debug-info recordings, as well as let GC find all oops.
165
166  OopMapSet *oop_maps = new OopMapSet();
167  OopMap* map = new OopMap(frame_size_in_slots, 0);
168  map->set_callee_saved(VMRegImpl::stack2reg( rax_off  + additional_frame_slots), rax->as_VMReg());
169  map->set_callee_saved(VMRegImpl::stack2reg( rcx_off  + additional_frame_slots), rcx->as_VMReg());
170  map->set_callee_saved(VMRegImpl::stack2reg( rdx_off  + additional_frame_slots), rdx->as_VMReg());
171  map->set_callee_saved(VMRegImpl::stack2reg( rbx_off  + additional_frame_slots), rbx->as_VMReg());
172  // rbp location is known implicitly by the frame sender code, needs no oopmap
173  // and the location where rbp was saved by is ignored
174  map->set_callee_saved(VMRegImpl::stack2reg( rsi_off  + additional_frame_slots), rsi->as_VMReg());
175  map->set_callee_saved(VMRegImpl::stack2reg( rdi_off  + additional_frame_slots), rdi->as_VMReg());
176  map->set_callee_saved(VMRegImpl::stack2reg( r8_off   + additional_frame_slots), r8->as_VMReg());
177  map->set_callee_saved(VMRegImpl::stack2reg( r9_off   + additional_frame_slots), r9->as_VMReg());
178  map->set_callee_saved(VMRegImpl::stack2reg( r10_off  + additional_frame_slots), r10->as_VMReg());
179  map->set_callee_saved(VMRegImpl::stack2reg( r11_off  + additional_frame_slots), r11->as_VMReg());
180  map->set_callee_saved(VMRegImpl::stack2reg( r12_off  + additional_frame_slots), r12->as_VMReg());
181  map->set_callee_saved(VMRegImpl::stack2reg( r13_off  + additional_frame_slots), r13->as_VMReg());
182  map->set_callee_saved(VMRegImpl::stack2reg( r14_off  + additional_frame_slots), r14->as_VMReg());
183  map->set_callee_saved(VMRegImpl::stack2reg( r15_off  + additional_frame_slots), r15->as_VMReg());
184  map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off  + additional_frame_slots), xmm0->as_VMReg());
185  map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off  + additional_frame_slots), xmm1->as_VMReg());
186  map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off  + additional_frame_slots), xmm2->as_VMReg());
187  map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off  + additional_frame_slots), xmm3->as_VMReg());
188  map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off  + additional_frame_slots), xmm4->as_VMReg());
189  map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off  + additional_frame_slots), xmm5->as_VMReg());
190  map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off  + additional_frame_slots), xmm6->as_VMReg());
191  map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off  + additional_frame_slots), xmm7->as_VMReg());
192  map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off  + additional_frame_slots), xmm8->as_VMReg());
193  map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off  + additional_frame_slots), xmm9->as_VMReg());
194  map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg());
195  map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg());
196  map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg());
197  map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg());
198  map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg());
199  map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg());
200
201  // %%% These should all be a waste but we'll keep things as they were for now
202  if (true) {
203    map->set_callee_saved(VMRegImpl::stack2reg( raxH_off  + additional_frame_slots),
204                          rax->as_VMReg()->next());
205    map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off  + additional_frame_slots),
206                          rcx->as_VMReg()->next());
207    map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off  + additional_frame_slots),
208                          rdx->as_VMReg()->next());
209    map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off  + additional_frame_slots),
210                          rbx->as_VMReg()->next());
211    // rbp location is known implicitly by the frame sender code, needs no oopmap
212    map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off  + additional_frame_slots),
213                          rsi->as_VMReg()->next());
214    map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off  + additional_frame_slots),
215                          rdi->as_VMReg()->next());
216    map->set_callee_saved(VMRegImpl::stack2reg( r8H_off   + additional_frame_slots),
217                          r8->as_VMReg()->next());
218    map->set_callee_saved(VMRegImpl::stack2reg( r9H_off   + additional_frame_slots),
219                          r9->as_VMReg()->next());
220    map->set_callee_saved(VMRegImpl::stack2reg( r10H_off  + additional_frame_slots),
221                          r10->as_VMReg()->next());
222    map->set_callee_saved(VMRegImpl::stack2reg( r11H_off  + additional_frame_slots),
223                          r11->as_VMReg()->next());
224    map->set_callee_saved(VMRegImpl::stack2reg( r12H_off  + additional_frame_slots),
225                          r12->as_VMReg()->next());
226    map->set_callee_saved(VMRegImpl::stack2reg( r13H_off  + additional_frame_slots),
227                          r13->as_VMReg()->next());
228    map->set_callee_saved(VMRegImpl::stack2reg( r14H_off  + additional_frame_slots),
229                          r14->as_VMReg()->next());
230    map->set_callee_saved(VMRegImpl::stack2reg( r15H_off  + additional_frame_slots),
231                          r15->as_VMReg()->next());
232    map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off  + additional_frame_slots),
233                          xmm0->as_VMReg()->next());
234    map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off  + additional_frame_slots),
235                          xmm1->as_VMReg()->next());
236    map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off  + additional_frame_slots),
237                          xmm2->as_VMReg()->next());
238    map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off  + additional_frame_slots),
239                          xmm3->as_VMReg()->next());
240    map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off  + additional_frame_slots),
241                          xmm4->as_VMReg()->next());
242    map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off  + additional_frame_slots),
243                          xmm5->as_VMReg()->next());
244    map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off  + additional_frame_slots),
245                          xmm6->as_VMReg()->next());
246    map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off  + additional_frame_slots),
247                          xmm7->as_VMReg()->next());
248    map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off  + additional_frame_slots),
249                          xmm8->as_VMReg()->next());
250    map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off  + additional_frame_slots),
251                          xmm9->as_VMReg()->next());
252    map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots),
253                          xmm10->as_VMReg()->next());
254    map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots),
255                          xmm11->as_VMReg()->next());
256    map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots),
257                          xmm12->as_VMReg()->next());
258    map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots),
259                          xmm13->as_VMReg()->next());
260    map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots),
261                          xmm14->as_VMReg()->next());
262    map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots),
263                          xmm15->as_VMReg()->next());
264  }
265
266  return map;
267}
268
269void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
270  if (frame::arg_reg_save_area_bytes != 0) {
271    // Pop arg register save area
272    __ addptr(rsp, frame::arg_reg_save_area_bytes);
273  }
274  // Recover CPU state
275  __ pop_CPU_state();
276  // Get the rbp described implicitly by the calling convention (no oopMap)
277  __ pop(rbp);
278}
279
280void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
281
282  // Just restore result register. Only used by deoptimization. By
283  // now any callee save register that needs to be restored to a c2
284  // caller of the deoptee has been extracted into the vframeArray
285  // and will be stuffed into the c2i adapter we create for later
286  // restoration so only result registers need to be restored here.
287
288  // Restore fp result register
289  __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
290  // Restore integer result register
291  __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
292  __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
293
294  // Pop all of the register save are off the stack except the return address
295  __ addptr(rsp, return_offset_in_bytes());
296}
297
298// The java_calling_convention describes stack locations as ideal slots on
299// a frame with no abi restrictions. Since we must observe abi restrictions
300// (like the placement of the register window) the slots must be biased by
301// the following value.
302static int reg2offset_in(VMReg r) {
303  // Account for saved rbp and return address
304  // This should really be in_preserve_stack_slots
305  return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
306}
307
308static int reg2offset_out(VMReg r) {
309  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
310}
311
312// ---------------------------------------------------------------------------
313// Read the array of BasicTypes from a signature, and compute where the
314// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
315// quantities.  Values less than VMRegImpl::stack0 are registers, those above
316// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
317// as framesizes are fixed.
318// VMRegImpl::stack0 refers to the first slot 0(sp).
319// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
320// up to RegisterImpl::number_of_registers) are the 64-bit
321// integer registers.
322
323// Note: the INPUTS in sig_bt are in units of Java argument words, which are
324// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
325// units regardless of build. Of course for i486 there is no 64 bit build
326
327// The Java calling convention is a "shifted" version of the C ABI.
328// By skipping the first C ABI register we can call non-static jni methods
329// with small numbers of arguments without having to shuffle the arguments
330// at all. Since we control the java ABI we ought to at least get some
331// advantage out of it.
332
333int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
334                                           VMRegPair *regs,
335                                           int total_args_passed,
336                                           int is_outgoing) {
337
338  // Create the mapping between argument positions and
339  // registers.
340  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
341    j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
342  };
343  static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
344    j_farg0, j_farg1, j_farg2, j_farg3,
345    j_farg4, j_farg5, j_farg6, j_farg7
346  };
347
348
349  uint int_args = 0;
350  uint fp_args = 0;
351  uint stk_args = 0; // inc by 2 each time
352
353  for (int i = 0; i < total_args_passed; i++) {
354    switch (sig_bt[i]) {
355    case T_BOOLEAN:
356    case T_CHAR:
357    case T_BYTE:
358    case T_SHORT:
359    case T_INT:
360      if (int_args < Argument::n_int_register_parameters_j) {
361        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
362      } else {
363        regs[i].set1(VMRegImpl::stack2reg(stk_args));
364        stk_args += 2;
365      }
366      break;
367    case T_VOID:
368      // halves of T_LONG or T_DOUBLE
369      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
370      regs[i].set_bad();
371      break;
372    case T_LONG:
373      assert(sig_bt[i + 1] == T_VOID, "expecting half");
374      // fall through
375    case T_OBJECT:
376    case T_ARRAY:
377    case T_ADDRESS:
378      if (int_args < Argument::n_int_register_parameters_j) {
379        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
380      } else {
381        regs[i].set2(VMRegImpl::stack2reg(stk_args));
382        stk_args += 2;
383      }
384      break;
385    case T_FLOAT:
386      if (fp_args < Argument::n_float_register_parameters_j) {
387        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
388      } else {
389        regs[i].set1(VMRegImpl::stack2reg(stk_args));
390        stk_args += 2;
391      }
392      break;
393    case T_DOUBLE:
394      assert(sig_bt[i + 1] == T_VOID, "expecting half");
395      if (fp_args < Argument::n_float_register_parameters_j) {
396        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
397      } else {
398        regs[i].set2(VMRegImpl::stack2reg(stk_args));
399        stk_args += 2;
400      }
401      break;
402    default:
403      ShouldNotReachHere();
404      break;
405    }
406  }
407
408  return round_to(stk_args, 2);
409}
410
411// Patch the callers callsite with entry to compiled code if it exists.
412static void patch_callers_callsite(MacroAssembler *masm) {
413  Label L;
414  __ verify_oop(rbx);
415  __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
416  __ jcc(Assembler::equal, L);
417
418  // Save the current stack pointer
419  __ mov(r13, rsp);
420  // Schedule the branch target address early.
421  // Call into the VM to patch the caller, then jump to compiled callee
422  // rax isn't live so capture return address while we easily can
423  __ movptr(rax, Address(rsp, 0));
424
425  // align stack so push_CPU_state doesn't fault
426  __ andptr(rsp, -(StackAlignmentInBytes));
427  __ push_CPU_state();
428
429
430  __ verify_oop(rbx);
431  // VM needs caller's callsite
432  // VM needs target method
433  // This needs to be a long call since we will relocate this adapter to
434  // the codeBuffer and it may not reach
435
436  // Allocate argument register save area
437  if (frame::arg_reg_save_area_bytes != 0) {
438    __ subptr(rsp, frame::arg_reg_save_area_bytes);
439  }
440  __ mov(c_rarg0, rbx);
441  __ mov(c_rarg1, rax);
442  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
443
444  // De-allocate argument register save area
445  if (frame::arg_reg_save_area_bytes != 0) {
446    __ addptr(rsp, frame::arg_reg_save_area_bytes);
447  }
448
449  __ pop_CPU_state();
450  // restore sp
451  __ mov(rsp, r13);
452  __ bind(L);
453}
454
455
456static void gen_c2i_adapter(MacroAssembler *masm,
457                            int total_args_passed,
458                            int comp_args_on_stack,
459                            const BasicType *sig_bt,
460                            const VMRegPair *regs,
461                            Label& skip_fixup) {
462  // Before we get into the guts of the C2I adapter, see if we should be here
463  // at all.  We've come from compiled code and are attempting to jump to the
464  // interpreter, which means the caller made a static call to get here
465  // (vcalls always get a compiled target if there is one).  Check for a
466  // compiled target.  If there is one, we need to patch the caller's call.
467  patch_callers_callsite(masm);
468
469  __ bind(skip_fixup);
470
471  // Since all args are passed on the stack, total_args_passed *
472  // Interpreter::stackElementSize is the space we need. Plus 1 because
473  // we also account for the return address location since
474  // we store it first rather than hold it in rax across all the shuffling
475
476  int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
477
478  // stack is aligned, keep it that way
479  extraspace = round_to(extraspace, 2*wordSize);
480
481  // Get return address
482  __ pop(rax);
483
484  // set senderSP value
485  __ mov(r13, rsp);
486
487  __ subptr(rsp, extraspace);
488
489  // Store the return address in the expected location
490  __ movptr(Address(rsp, 0), rax);
491
492  // Now write the args into the outgoing interpreter space
493  for (int i = 0; i < total_args_passed; i++) {
494    if (sig_bt[i] == T_VOID) {
495      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
496      continue;
497    }
498
499    // offset to start parameters
500    int st_off   = (total_args_passed - i) * Interpreter::stackElementSize;
501    int next_off = st_off - Interpreter::stackElementSize;
502
503    // Say 4 args:
504    // i   st_off
505    // 0   32 T_LONG
506    // 1   24 T_VOID
507    // 2   16 T_OBJECT
508    // 3    8 T_BOOL
509    // -    0 return address
510    //
511    // However to make thing extra confusing. Because we can fit a long/double in
512    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
513    // leaves one slot empty and only stores to a single slot. In this case the
514    // slot that is occupied is the T_VOID slot. See I said it was confusing.
515
516    VMReg r_1 = regs[i].first();
517    VMReg r_2 = regs[i].second();
518    if (!r_1->is_valid()) {
519      assert(!r_2->is_valid(), "");
520      continue;
521    }
522    if (r_1->is_stack()) {
523      // memory to memory use rax
524      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
525      if (!r_2->is_valid()) {
526        // sign extend??
527        __ movl(rax, Address(rsp, ld_off));
528        __ movptr(Address(rsp, st_off), rax);
529
530      } else {
531
532        __ movq(rax, Address(rsp, ld_off));
533
534        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
535        // T_DOUBLE and T_LONG use two slots in the interpreter
536        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
537          // ld_off == LSW, ld_off+wordSize == MSW
538          // st_off == MSW, next_off == LSW
539          __ movq(Address(rsp, next_off), rax);
540#ifdef ASSERT
541          // Overwrite the unused slot with known junk
542          __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
543          __ movptr(Address(rsp, st_off), rax);
544#endif /* ASSERT */
545        } else {
546          __ movq(Address(rsp, st_off), rax);
547        }
548      }
549    } else if (r_1->is_Register()) {
550      Register r = r_1->as_Register();
551      if (!r_2->is_valid()) {
552        // must be only an int (or less ) so move only 32bits to slot
553        // why not sign extend??
554        __ movl(Address(rsp, st_off), r);
555      } else {
556        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
557        // T_DOUBLE and T_LONG use two slots in the interpreter
558        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
559          // long/double in gpr
560#ifdef ASSERT
561          // Overwrite the unused slot with known junk
562          __ mov64(rax, CONST64(0xdeadffffdeadaaab));
563          __ movptr(Address(rsp, st_off), rax);
564#endif /* ASSERT */
565          __ movq(Address(rsp, next_off), r);
566        } else {
567          __ movptr(Address(rsp, st_off), r);
568        }
569      }
570    } else {
571      assert(r_1->is_XMMRegister(), "");
572      if (!r_2->is_valid()) {
573        // only a float use just part of the slot
574        __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
575      } else {
576#ifdef ASSERT
577        // Overwrite the unused slot with known junk
578        __ mov64(rax, CONST64(0xdeadffffdeadaaac));
579        __ movptr(Address(rsp, st_off), rax);
580#endif /* ASSERT */
581        __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
582      }
583    }
584  }
585
586  // Schedule the branch target address early.
587  __ movptr(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset())));
588  __ jmp(rcx);
589}
590
591static void gen_i2c_adapter(MacroAssembler *masm,
592                            int total_args_passed,
593                            int comp_args_on_stack,
594                            const BasicType *sig_bt,
595                            const VMRegPair *regs) {
596
597  //
598  // We will only enter here from an interpreted frame and never from after
599  // passing thru a c2i. Azul allowed this but we do not. If we lose the
600  // race and use a c2i we will remain interpreted for the race loser(s).
601  // This removes all sorts of headaches on the x86 side and also eliminates
602  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
603
604
605  // Note: r13 contains the senderSP on entry. We must preserve it since
606  // we may do a i2c -> c2i transition if we lose a race where compiled
607  // code goes non-entrant while we get args ready.
608  // In addition we use r13 to locate all the interpreter args as
609  // we must align the stack to 16 bytes on an i2c entry else we
610  // lose alignment we expect in all compiled code and register
611  // save code can segv when fxsave instructions find improperly
612  // aligned stack pointer.
613
614  __ movptr(rax, Address(rsp, 0));
615
616  // Must preserve original SP for loading incoming arguments because
617  // we need to align the outgoing SP for compiled code.
618  __ movptr(r11, rsp);
619
620  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
621  // in registers, we will occasionally have no stack args.
622  int comp_words_on_stack = 0;
623  if (comp_args_on_stack) {
624    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
625    // registers are below.  By subtracting stack0, we either get a negative
626    // number (all values in registers) or the maximum stack slot accessed.
627
628    // Convert 4-byte c2 stack slots to words.
629    comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
630    // Round up to miminum stack alignment, in wordSize
631    comp_words_on_stack = round_to(comp_words_on_stack, 2);
632    __ subptr(rsp, comp_words_on_stack * wordSize);
633  }
634
635
636  // Ensure compiled code always sees stack at proper alignment
637  __ andptr(rsp, -16);
638
639  // push the return address and misalign the stack that youngest frame always sees
640  // as far as the placement of the call instruction
641  __ push(rax);
642
643  // Put saved SP in another register
644  const Register saved_sp = rax;
645  __ movptr(saved_sp, r11);
646
647  // Will jump to the compiled code just as if compiled code was doing it.
648  // Pre-load the register-jump target early, to schedule it better.
649  __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
650
651  // Now generate the shuffle code.  Pick up all register args and move the
652  // rest through the floating point stack top.
653  for (int i = 0; i < total_args_passed; i++) {
654    if (sig_bt[i] == T_VOID) {
655      // Longs and doubles are passed in native word order, but misaligned
656      // in the 32-bit build.
657      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
658      continue;
659    }
660
661    // Pick up 0, 1 or 2 words from SP+offset.
662
663    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
664            "scrambled load targets?");
665    // Load in argument order going down.
666    int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
667    // Point to interpreter value (vs. tag)
668    int next_off = ld_off - Interpreter::stackElementSize;
669    //
670    //
671    //
672    VMReg r_1 = regs[i].first();
673    VMReg r_2 = regs[i].second();
674    if (!r_1->is_valid()) {
675      assert(!r_2->is_valid(), "");
676      continue;
677    }
678    if (r_1->is_stack()) {
679      // Convert stack slot to an SP offset (+ wordSize to account for return address )
680      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
681
682      // We can use r13 as a temp here because compiled code doesn't need r13 as an input
683      // and if we end up going thru a c2i because of a miss a reasonable value of r13
684      // will be generated.
685      if (!r_2->is_valid()) {
686        // sign extend???
687        __ movl(r13, Address(saved_sp, ld_off));
688        __ movptr(Address(rsp, st_off), r13);
689      } else {
690        //
691        // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
692        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
693        // So we must adjust where to pick up the data to match the interpreter.
694        //
695        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
696        // are accessed as negative so LSW is at LOW address
697
698        // ld_off is MSW so get LSW
699        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
700                           next_off : ld_off;
701        __ movq(r13, Address(saved_sp, offset));
702        // st_off is LSW (i.e. reg.first())
703        __ movq(Address(rsp, st_off), r13);
704      }
705    } else if (r_1->is_Register()) {  // Register argument
706      Register r = r_1->as_Register();
707      assert(r != rax, "must be different");
708      if (r_2->is_valid()) {
709        //
710        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
711        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
712        // So we must adjust where to pick up the data to match the interpreter.
713
714        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
715                           next_off : ld_off;
716
717        // this can be a misaligned move
718        __ movq(r, Address(saved_sp, offset));
719      } else {
720        // sign extend and use a full word?
721        __ movl(r, Address(saved_sp, ld_off));
722      }
723    } else {
724      if (!r_2->is_valid()) {
725        __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
726      } else {
727        __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
728      }
729    }
730  }
731
732  // 6243940 We might end up in handle_wrong_method if
733  // the callee is deoptimized as we race thru here. If that
734  // happens we don't want to take a safepoint because the
735  // caller frame will look interpreted and arguments are now
736  // "compiled" so it is much better to make this transition
737  // invisible to the stack walking code. Unfortunately if
738  // we try and find the callee by normal means a safepoint
739  // is possible. So we stash the desired callee in the thread
740  // and the vm will find there should this case occur.
741
742  __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
743
744  // put methodOop where a c2i would expect should we end up there
745  // only needed becaus eof c2 resolve stubs return methodOop as a result in
746  // rax
747  __ mov(rax, rbx);
748  __ jmp(r11);
749}
750
751// ---------------------------------------------------------------
752AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
753                                                            int total_args_passed,
754                                                            int comp_args_on_stack,
755                                                            const BasicType *sig_bt,
756                                                            const VMRegPair *regs,
757                                                            AdapterFingerPrint* fingerprint) {
758  address i2c_entry = __ pc();
759
760  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
761
762  // -------------------------------------------------------------------------
763  // Generate a C2I adapter.  On entry we know rbx holds the methodOop during calls
764  // to the interpreter.  The args start out packed in the compiled layout.  They
765  // need to be unpacked into the interpreter layout.  This will almost always
766  // require some stack space.  We grow the current (compiled) stack, then repack
767  // the args.  We  finally end in a jump to the generic interpreter entry point.
768  // On exit from the interpreter, the interpreter will restore our SP (lest the
769  // compiled code, which relys solely on SP and not RBP, get sick).
770
771  address c2i_unverified_entry = __ pc();
772  Label skip_fixup;
773  Label ok;
774
775  Register holder = rax;
776  Register receiver = j_rarg0;
777  Register temp = rbx;
778
779  {
780    __ verify_oop(holder);
781    __ load_klass(temp, receiver);
782    __ verify_oop(temp);
783
784    __ cmpptr(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
785    __ movptr(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
786    __ jcc(Assembler::equal, ok);
787    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
788
789    __ bind(ok);
790    // Method might have been compiled since the call site was patched to
791    // interpreted if that is the case treat it as a miss so we can get
792    // the call site corrected.
793    __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
794    __ jcc(Assembler::equal, skip_fixup);
795    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
796  }
797
798  address c2i_entry = __ pc();
799
800  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
801
802  __ flush();
803  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
804}
805
806int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
807                                         VMRegPair *regs,
808                                         int total_args_passed) {
809// We return the amount of VMRegImpl stack slots we need to reserve for all
810// the arguments NOT counting out_preserve_stack_slots.
811
812// NOTE: These arrays will have to change when c1 is ported
813#ifdef _WIN64
814    static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
815      c_rarg0, c_rarg1, c_rarg2, c_rarg3
816    };
817    static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
818      c_farg0, c_farg1, c_farg2, c_farg3
819    };
820#else
821    static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
822      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
823    };
824    static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
825      c_farg0, c_farg1, c_farg2, c_farg3,
826      c_farg4, c_farg5, c_farg6, c_farg7
827    };
828#endif // _WIN64
829
830
831    uint int_args = 0;
832    uint fp_args = 0;
833    uint stk_args = 0; // inc by 2 each time
834
835    for (int i = 0; i < total_args_passed; i++) {
836      switch (sig_bt[i]) {
837      case T_BOOLEAN:
838      case T_CHAR:
839      case T_BYTE:
840      case T_SHORT:
841      case T_INT:
842        if (int_args < Argument::n_int_register_parameters_c) {
843          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
844#ifdef _WIN64
845          fp_args++;
846          // Allocate slots for callee to stuff register args the stack.
847          stk_args += 2;
848#endif
849        } else {
850          regs[i].set1(VMRegImpl::stack2reg(stk_args));
851          stk_args += 2;
852        }
853        break;
854      case T_LONG:
855        assert(sig_bt[i + 1] == T_VOID, "expecting half");
856        // fall through
857      case T_OBJECT:
858      case T_ARRAY:
859      case T_ADDRESS:
860        if (int_args < Argument::n_int_register_parameters_c) {
861          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
862#ifdef _WIN64
863          fp_args++;
864          stk_args += 2;
865#endif
866        } else {
867          regs[i].set2(VMRegImpl::stack2reg(stk_args));
868          stk_args += 2;
869        }
870        break;
871      case T_FLOAT:
872        if (fp_args < Argument::n_float_register_parameters_c) {
873          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
874#ifdef _WIN64
875          int_args++;
876          // Allocate slots for callee to stuff register args the stack.
877          stk_args += 2;
878#endif
879        } else {
880          regs[i].set1(VMRegImpl::stack2reg(stk_args));
881          stk_args += 2;
882        }
883        break;
884      case T_DOUBLE:
885        assert(sig_bt[i + 1] == T_VOID, "expecting half");
886        if (fp_args < Argument::n_float_register_parameters_c) {
887          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
888#ifdef _WIN64
889          int_args++;
890          // Allocate slots for callee to stuff register args the stack.
891          stk_args += 2;
892#endif
893        } else {
894          regs[i].set2(VMRegImpl::stack2reg(stk_args));
895          stk_args += 2;
896        }
897        break;
898      case T_VOID: // Halves of longs and doubles
899        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
900        regs[i].set_bad();
901        break;
902      default:
903        ShouldNotReachHere();
904        break;
905      }
906    }
907#ifdef _WIN64
908  // windows abi requires that we always allocate enough stack space
909  // for 4 64bit registers to be stored down.
910  if (stk_args < 8) {
911    stk_args = 8;
912  }
913#endif // _WIN64
914
915  return stk_args;
916}
917
918// On 64 bit we will store integer like items to the stack as
919// 64 bits items (sparc abi) even though java would only store
920// 32bits for a parameter. On 32bit it will simply be 32 bits
921// So this routine will do 32->32 on 32bit and 32->64 on 64bit
922static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
923  if (src.first()->is_stack()) {
924    if (dst.first()->is_stack()) {
925      // stack to stack
926      __ movslq(rax, Address(rbp, reg2offset_in(src.first())));
927      __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
928    } else {
929      // stack to reg
930      __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
931    }
932  } else if (dst.first()->is_stack()) {
933    // reg to stack
934    // Do we really have to sign extend???
935    // __ movslq(src.first()->as_Register(), src.first()->as_Register());
936    __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
937  } else {
938    // Do we really have to sign extend???
939    // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
940    if (dst.first() != src.first()) {
941      __ movq(dst.first()->as_Register(), src.first()->as_Register());
942    }
943  }
944}
945
946
947// An oop arg. Must pass a handle not the oop itself
948static void object_move(MacroAssembler* masm,
949                        OopMap* map,
950                        int oop_handle_offset,
951                        int framesize_in_slots,
952                        VMRegPair src,
953                        VMRegPair dst,
954                        bool is_receiver,
955                        int* receiver_offset) {
956
957  // must pass a handle. First figure out the location we use as a handle
958
959  Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register();
960
961  // See if oop is NULL if it is we need no handle
962
963  if (src.first()->is_stack()) {
964
965    // Oop is already on the stack as an argument
966    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
967    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
968    if (is_receiver) {
969      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
970    }
971
972    __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD);
973    __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
974    // conditionally move a NULL
975    __ cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first())));
976  } else {
977
978    // Oop is in an a register we must store it to the space we reserve
979    // on the stack for oop_handles and pass a handle if oop is non-NULL
980
981    const Register rOop = src.first()->as_Register();
982    int oop_slot;
983    if (rOop == j_rarg0)
984      oop_slot = 0;
985    else if (rOop == j_rarg1)
986      oop_slot = 1;
987    else if (rOop == j_rarg2)
988      oop_slot = 2;
989    else if (rOop == j_rarg3)
990      oop_slot = 3;
991    else if (rOop == j_rarg4)
992      oop_slot = 4;
993    else {
994      assert(rOop == j_rarg5, "wrong register");
995      oop_slot = 5;
996    }
997
998    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
999    int offset = oop_slot*VMRegImpl::stack_slot_size;
1000
1001    map->set_oop(VMRegImpl::stack2reg(oop_slot));
1002    // Store oop in handle area, may be NULL
1003    __ movptr(Address(rsp, offset), rOop);
1004    if (is_receiver) {
1005      *receiver_offset = offset;
1006    }
1007
1008    __ cmpptr(rOop, (int32_t)NULL_WORD);
1009    __ lea(rHandle, Address(rsp, offset));
1010    // conditionally move a NULL from the handle area where it was just stored
1011    __ cmovptr(Assembler::equal, rHandle, Address(rsp, offset));
1012  }
1013
1014  // If arg is on the stack then place it otherwise it is already in correct reg.
1015  if (dst.first()->is_stack()) {
1016    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1017  }
1018}
1019
1020// A float arg may have to do float reg int reg conversion
1021static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1022  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1023
1024  // The calling conventions assures us that each VMregpair is either
1025  // all really one physical register or adjacent stack slots.
1026  // This greatly simplifies the cases here compared to sparc.
1027
1028  if (src.first()->is_stack()) {
1029    if (dst.first()->is_stack()) {
1030      __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1031      __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1032    } else {
1033      // stack to reg
1034      assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters");
1035      __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first())));
1036    }
1037  } else if (dst.first()->is_stack()) {
1038    // reg to stack
1039    assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters");
1040    __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1041  } else {
1042    // reg to reg
1043    // In theory these overlap but the ordering is such that this is likely a nop
1044    if ( src.first() != dst.first()) {
1045      __ movdbl(dst.first()->as_XMMRegister(),  src.first()->as_XMMRegister());
1046    }
1047  }
1048}
1049
1050// A long move
1051static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1052
1053  // The calling conventions assures us that each VMregpair is either
1054  // all really one physical register or adjacent stack slots.
1055  // This greatly simplifies the cases here compared to sparc.
1056
1057  if (src.is_single_phys_reg() ) {
1058    if (dst.is_single_phys_reg()) {
1059      if (dst.first() != src.first()) {
1060        __ mov(dst.first()->as_Register(), src.first()->as_Register());
1061      }
1062    } else {
1063      assert(dst.is_single_reg(), "not a stack pair");
1064      __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1065    }
1066  } else if (dst.is_single_phys_reg()) {
1067    assert(src.is_single_reg(),  "not a stack pair");
1068    __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
1069  } else {
1070    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
1071    __ movq(rax, Address(rbp, reg2offset_in(src.first())));
1072    __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
1073  }
1074}
1075
1076// A double move
1077static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1078
1079  // The calling conventions assures us that each VMregpair is either
1080  // all really one physical register or adjacent stack slots.
1081  // This greatly simplifies the cases here compared to sparc.
1082
1083  if (src.is_single_phys_reg() ) {
1084    if (dst.is_single_phys_reg()) {
1085      // In theory these overlap but the ordering is such that this is likely a nop
1086      if ( src.first() != dst.first()) {
1087        __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
1088      }
1089    } else {
1090      assert(dst.is_single_reg(), "not a stack pair");
1091      __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1092    }
1093  } else if (dst.is_single_phys_reg()) {
1094    assert(src.is_single_reg(),  "not a stack pair");
1095    __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first())));
1096  } else {
1097    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
1098    __ movq(rax, Address(rbp, reg2offset_in(src.first())));
1099    __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
1100  }
1101}
1102
1103
1104void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1105  // We always ignore the frame_slots arg and just use the space just below frame pointer
1106  // which by this time is free to use
1107  switch (ret_type) {
1108  case T_FLOAT:
1109    __ movflt(Address(rbp, -wordSize), xmm0);
1110    break;
1111  case T_DOUBLE:
1112    __ movdbl(Address(rbp, -wordSize), xmm0);
1113    break;
1114  case T_VOID:  break;
1115  default: {
1116    __ movptr(Address(rbp, -wordSize), rax);
1117    }
1118  }
1119}
1120
1121void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1122  // We always ignore the frame_slots arg and just use the space just below frame pointer
1123  // which by this time is free to use
1124  switch (ret_type) {
1125  case T_FLOAT:
1126    __ movflt(xmm0, Address(rbp, -wordSize));
1127    break;
1128  case T_DOUBLE:
1129    __ movdbl(xmm0, Address(rbp, -wordSize));
1130    break;
1131  case T_VOID:  break;
1132  default: {
1133    __ movptr(rax, Address(rbp, -wordSize));
1134    }
1135  }
1136}
1137
1138static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1139    for ( int i = first_arg ; i < arg_count ; i++ ) {
1140      if (args[i].first()->is_Register()) {
1141        __ push(args[i].first()->as_Register());
1142      } else if (args[i].first()->is_XMMRegister()) {
1143        __ subptr(rsp, 2*wordSize);
1144        __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister());
1145      }
1146    }
1147}
1148
1149static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1150    for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1151      if (args[i].first()->is_Register()) {
1152        __ pop(args[i].first()->as_Register());
1153      } else if (args[i].first()->is_XMMRegister()) {
1154        __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0));
1155        __ addptr(rsp, 2*wordSize);
1156      }
1157    }
1158}
1159
1160// ---------------------------------------------------------------------------
1161// Generate a native wrapper for a given method.  The method takes arguments
1162// in the Java compiled code convention, marshals them to the native
1163// convention (handlizes oops, etc), transitions to native, makes the call,
1164// returns to java state (possibly blocking), unhandlizes any result and
1165// returns.
1166nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1167                                                methodHandle method,
1168                                                int total_in_args,
1169                                                int comp_args_on_stack,
1170                                                BasicType *in_sig_bt,
1171                                                VMRegPair *in_regs,
1172                                                BasicType ret_type) {
1173  // Native nmethod wrappers never take possesion of the oop arguments.
1174  // So the caller will gc the arguments. The only thing we need an
1175  // oopMap for is if the call is static
1176  //
1177  // An OopMap for lock (and class if static)
1178  OopMapSet *oop_maps = new OopMapSet();
1179  intptr_t start = (intptr_t)__ pc();
1180
1181  // We have received a description of where all the java arg are located
1182  // on entry to the wrapper. We need to convert these args to where
1183  // the jni function will expect them. To figure out where they go
1184  // we convert the java signature to a C signature by inserting
1185  // the hidden arguments as arg[0] and possibly arg[1] (static method)
1186
1187  int total_c_args = total_in_args + 1;
1188  if (method->is_static()) {
1189    total_c_args++;
1190  }
1191
1192  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1193  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
1194
1195  int argc = 0;
1196  out_sig_bt[argc++] = T_ADDRESS;
1197  if (method->is_static()) {
1198    out_sig_bt[argc++] = T_OBJECT;
1199  }
1200
1201  for (int i = 0; i < total_in_args ; i++ ) {
1202    out_sig_bt[argc++] = in_sig_bt[i];
1203  }
1204
1205  // Now figure out where the args must be stored and how much stack space
1206  // they require.
1207  //
1208  int out_arg_slots;
1209  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
1210
1211  // Compute framesize for the wrapper.  We need to handlize all oops in
1212  // incoming registers
1213
1214  // Calculate the total number of stack slots we will need.
1215
1216  // First count the abi requirement plus all of the outgoing args
1217  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1218
1219  // Now the space for the inbound oop handle area
1220
1221  int oop_handle_offset = stack_slots;
1222  stack_slots += 6*VMRegImpl::slots_per_word;
1223
1224  // Now any space we need for handlizing a klass if static method
1225
1226  int oop_temp_slot_offset = 0;
1227  int klass_slot_offset = 0;
1228  int klass_offset = -1;
1229  int lock_slot_offset = 0;
1230  bool is_static = false;
1231
1232  if (method->is_static()) {
1233    klass_slot_offset = stack_slots;
1234    stack_slots += VMRegImpl::slots_per_word;
1235    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1236    is_static = true;
1237  }
1238
1239  // Plus a lock if needed
1240
1241  if (method->is_synchronized()) {
1242    lock_slot_offset = stack_slots;
1243    stack_slots += VMRegImpl::slots_per_word;
1244  }
1245
1246  // Now a place (+2) to save return values or temp during shuffling
1247  // + 4 for return address (which we own) and saved rbp
1248  stack_slots += 6;
1249
1250  // Ok The space we have allocated will look like:
1251  //
1252  //
1253  // FP-> |                     |
1254  //      |---------------------|
1255  //      | 2 slots for moves   |
1256  //      |---------------------|
1257  //      | lock box (if sync)  |
1258  //      |---------------------| <- lock_slot_offset
1259  //      | klass (if static)   |
1260  //      |---------------------| <- klass_slot_offset
1261  //      | oopHandle area      |
1262  //      |---------------------| <- oop_handle_offset (6 java arg registers)
1263  //      | outbound memory     |
1264  //      | based arguments     |
1265  //      |                     |
1266  //      |---------------------|
1267  //      |                     |
1268  // SP-> | out_preserved_slots |
1269  //
1270  //
1271
1272
1273  // Now compute actual number of stack words we need rounding to make
1274  // stack properly aligned.
1275  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
1276
1277  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1278
1279
1280  // First thing make an ic check to see if we should even be here
1281
1282  // We are free to use all registers as temps without saving them and
1283  // restoring them except rbp. rbp is the only callee save register
1284  // as far as the interpreter and the compiler(s) are concerned.
1285
1286
1287  const Register ic_reg = rax;
1288  const Register receiver = j_rarg0;
1289
1290  Label ok;
1291  Label exception_pending;
1292
1293  assert_different_registers(ic_reg, receiver, rscratch1);
1294  __ verify_oop(receiver);
1295  __ load_klass(rscratch1, receiver);
1296  __ cmpq(ic_reg, rscratch1);
1297  __ jcc(Assembler::equal, ok);
1298
1299  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1300
1301  __ bind(ok);
1302
1303  // Verified entry point must be aligned
1304  __ align(8);
1305
1306  int vep_offset = ((intptr_t)__ pc()) - start;
1307
1308  // The instruction at the verified entry point must be 5 bytes or longer
1309  // because it can be patched on the fly by make_non_entrant. The stack bang
1310  // instruction fits that requirement.
1311
1312  // Generate stack overflow check
1313
1314  if (UseStackBanging) {
1315    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1316  } else {
1317    // need a 5 byte instruction to allow MT safe patching to non-entrant
1318    __ fat_nop();
1319  }
1320
1321  // Generate a new frame for the wrapper.
1322  __ enter();
1323  // -2 because return address is already present and so is saved rbp
1324  __ subptr(rsp, stack_size - 2*wordSize);
1325
1326    // Frame is now completed as far as size and linkage.
1327
1328    int frame_complete = ((intptr_t)__ pc()) - start;
1329
1330#ifdef ASSERT
1331    {
1332      Label L;
1333      __ mov(rax, rsp);
1334      __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
1335      __ cmpptr(rax, rsp);
1336      __ jcc(Assembler::equal, L);
1337      __ stop("improperly aligned stack");
1338      __ bind(L);
1339    }
1340#endif /* ASSERT */
1341
1342
1343  // We use r14 as the oop handle for the receiver/klass
1344  // It is callee save so it survives the call to native
1345
1346  const Register oop_handle_reg = r14;
1347
1348
1349
1350  //
1351  // We immediately shuffle the arguments so that any vm call we have to
1352  // make from here on out (sync slow path, jvmti, etc.) we will have
1353  // captured the oops from our caller and have a valid oopMap for
1354  // them.
1355
1356  // -----------------
1357  // The Grand Shuffle
1358
1359  // The Java calling convention is either equal (linux) or denser (win64) than the
1360  // c calling convention. However the because of the jni_env argument the c calling
1361  // convention always has at least one more (and two for static) arguments than Java.
1362  // Therefore if we move the args from java -> c backwards then we will never have
1363  // a register->register conflict and we don't have to build a dependency graph
1364  // and figure out how to break any cycles.
1365  //
1366
1367  // Record esp-based slot for receiver on stack for non-static methods
1368  int receiver_offset = -1;
1369
1370  // This is a trick. We double the stack slots so we can claim
1371  // the oops in the caller's frame. Since we are sure to have
1372  // more args than the caller doubling is enough to make
1373  // sure we can capture all the incoming oop args from the
1374  // caller.
1375  //
1376  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1377
1378  // Mark location of rbp (someday)
1379  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
1380
1381  // Use eax, ebx as temporaries during any memory-memory moves we have to do
1382  // All inbound args are referenced based on rbp and all outbound args via rsp.
1383
1384
1385#ifdef ASSERT
1386  bool reg_destroyed[RegisterImpl::number_of_registers];
1387  bool freg_destroyed[XMMRegisterImpl::number_of_registers];
1388  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1389    reg_destroyed[r] = false;
1390  }
1391  for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
1392    freg_destroyed[f] = false;
1393  }
1394
1395#endif /* ASSERT */
1396
1397
1398  int c_arg = total_c_args - 1;
1399  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
1400#ifdef ASSERT
1401    if (in_regs[i].first()->is_Register()) {
1402      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1403    } else if (in_regs[i].first()->is_XMMRegister()) {
1404      assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
1405    }
1406    if (out_regs[c_arg].first()->is_Register()) {
1407      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1408    } else if (out_regs[c_arg].first()->is_XMMRegister()) {
1409      freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
1410    }
1411#endif /* ASSERT */
1412    switch (in_sig_bt[i]) {
1413      case T_ARRAY:
1414      case T_OBJECT:
1415        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1416                    ((i == 0) && (!is_static)),
1417                    &receiver_offset);
1418        break;
1419      case T_VOID:
1420        break;
1421
1422      case T_FLOAT:
1423        float_move(masm, in_regs[i], out_regs[c_arg]);
1424          break;
1425
1426      case T_DOUBLE:
1427        assert( i + 1 < total_in_args &&
1428                in_sig_bt[i + 1] == T_VOID &&
1429                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1430        double_move(masm, in_regs[i], out_regs[c_arg]);
1431        break;
1432
1433      case T_LONG :
1434        long_move(masm, in_regs[i], out_regs[c_arg]);
1435        break;
1436
1437      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1438
1439      default:
1440        move32_64(masm, in_regs[i], out_regs[c_arg]);
1441    }
1442  }
1443
1444  // point c_arg at the first arg that is already loaded in case we
1445  // need to spill before we call out
1446  c_arg++;
1447
1448  // Pre-load a static method's oop into r14.  Used both by locking code and
1449  // the normal JNI call code.
1450  if (method->is_static()) {
1451
1452    //  load oop into a register
1453    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
1454
1455    // Now handlize the static class mirror it's known not-null.
1456    __ movptr(Address(rsp, klass_offset), oop_handle_reg);
1457    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1458
1459    // Now get the handle
1460    __ lea(oop_handle_reg, Address(rsp, klass_offset));
1461    // store the klass handle as second argument
1462    __ movptr(c_rarg1, oop_handle_reg);
1463    // and protect the arg if we must spill
1464    c_arg--;
1465  }
1466
1467  // Change state to native (we save the return address in the thread, since it might not
1468  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1469  // points into the right code segment. It does not have to be the correct return pc.
1470  // We use the same pc/oopMap repeatedly when we call out
1471
1472  intptr_t the_pc = (intptr_t) __ pc();
1473  oop_maps->add_gc_map(the_pc - start, map);
1474
1475  __ set_last_Java_frame(rsp, noreg, (address)the_pc);
1476
1477
1478  // We have all of the arguments setup at this point. We must not touch any register
1479  // argument registers at this point (what if we save/restore them there are no oop?
1480
1481  {
1482    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
1483    // protect the args we've loaded
1484    save_args(masm, total_c_args, c_arg, out_regs);
1485    __ movoop(c_rarg1, JNIHandles::make_local(method()));
1486    __ call_VM_leaf(
1487      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1488      r15_thread, c_rarg1);
1489    restore_args(masm, total_c_args, c_arg, out_regs);
1490  }
1491
1492  // RedefineClasses() tracing support for obsolete method entry
1493  if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) {
1494    // protect the args we've loaded
1495    save_args(masm, total_c_args, c_arg, out_regs);
1496    __ movoop(c_rarg1, JNIHandles::make_local(method()));
1497    __ call_VM_leaf(
1498      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1499      r15_thread, c_rarg1);
1500    restore_args(masm, total_c_args, c_arg, out_regs);
1501  }
1502
1503  // Lock a synchronized method
1504
1505  // Register definitions used by locking and unlocking
1506
1507  const Register swap_reg = rax;  // Must use rax for cmpxchg instruction
1508  const Register obj_reg  = rbx;  // Will contain the oop
1509  const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
1510  const Register old_hdr  = r13;  // value of old header at unlock time
1511
1512  Label slow_path_lock;
1513  Label lock_done;
1514
1515  if (method->is_synchronized()) {
1516
1517
1518    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1519
1520    // Get the handle (the 2nd argument)
1521    __ mov(oop_handle_reg, c_rarg1);
1522
1523    // Get address of the box
1524
1525    __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1526
1527    // Load the oop from the handle
1528    __ movptr(obj_reg, Address(oop_handle_reg, 0));
1529
1530    if (UseBiasedLocking) {
1531      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock);
1532    }
1533
1534    // Load immediate 1 into swap_reg %rax
1535    __ movl(swap_reg, 1);
1536
1537    // Load (object->mark() | 1) into swap_reg %rax
1538    __ orptr(swap_reg, Address(obj_reg, 0));
1539
1540    // Save (object->mark() | 1) into BasicLock's displaced header
1541    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1542
1543    if (os::is_MP()) {
1544      __ lock();
1545    }
1546
1547    // src -> dest iff dest == rax else rax <- dest
1548    __ cmpxchgptr(lock_reg, Address(obj_reg, 0));
1549    __ jcc(Assembler::equal, lock_done);
1550
1551    // Hmm should this move to the slow path code area???
1552
1553    // Test if the oopMark is an obvious stack pointer, i.e.,
1554    //  1) (mark & 3) == 0, and
1555    //  2) rsp <= mark < mark + os::pagesize()
1556    // These 3 tests can be done by evaluating the following
1557    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1558    // assuming both stack pointer and pagesize have their
1559    // least significant 2 bits clear.
1560    // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
1561
1562    __ subptr(swap_reg, rsp);
1563    __ andptr(swap_reg, 3 - os::vm_page_size());
1564
1565    // Save the test result, for recursive case, the result is zero
1566    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1567    __ jcc(Assembler::notEqual, slow_path_lock);
1568
1569    // Slow path will re-enter here
1570
1571    __ bind(lock_done);
1572  }
1573
1574
1575  // Finally just about ready to make the JNI call
1576
1577
1578  // get JNIEnv* which is first argument to native
1579
1580  __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
1581
1582  // Now set thread in native
1583  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
1584
1585  __ call(RuntimeAddress(method->native_function()));
1586
1587    // Either restore the MXCSR register after returning from the JNI Call
1588    // or verify that it wasn't changed.
1589    if (RestoreMXCSROnJNICalls) {
1590      __ ldmxcsr(ExternalAddress(StubRoutines::x86::mxcsr_std()));
1591
1592    }
1593    else if (CheckJNICalls ) {
1594      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::verify_mxcsr_entry())));
1595    }
1596
1597
1598  // Unpack native results.
1599  switch (ret_type) {
1600  case T_BOOLEAN: __ c2bool(rax);            break;
1601  case T_CHAR   : __ movzwl(rax, rax);      break;
1602  case T_BYTE   : __ sign_extend_byte (rax); break;
1603  case T_SHORT  : __ sign_extend_short(rax); break;
1604  case T_INT    : /* nothing to do */        break;
1605  case T_DOUBLE :
1606  case T_FLOAT  :
1607    // Result is in xmm0 we'll save as needed
1608    break;
1609  case T_ARRAY:                 // Really a handle
1610  case T_OBJECT:                // Really a handle
1611      break; // can't de-handlize until after safepoint check
1612  case T_VOID: break;
1613  case T_LONG: break;
1614  default       : ShouldNotReachHere();
1615  }
1616
1617  // Switch thread to "native transition" state before reading the synchronization state.
1618  // This additional state is necessary because reading and testing the synchronization
1619  // state is not atomic w.r.t. GC, as this scenario demonstrates:
1620  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1621  //     VM thread changes sync state to synchronizing and suspends threads for GC.
1622  //     Thread A is resumed to finish this native method, but doesn't block here since it
1623  //     didn't see any synchronization is progress, and escapes.
1624  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1625
1626  if(os::is_MP()) {
1627    if (UseMembar) {
1628      // Force this write out before the read below
1629      __ membar(Assembler::Membar_mask_bits(
1630           Assembler::LoadLoad | Assembler::LoadStore |
1631           Assembler::StoreLoad | Assembler::StoreStore));
1632    } else {
1633      // Write serialization page so VM thread can do a pseudo remote membar.
1634      // We use the current thread pointer to calculate a thread specific
1635      // offset to write to within the page. This minimizes bus traffic
1636      // due to cache line collision.
1637      __ serialize_memory(r15_thread, rcx);
1638    }
1639  }
1640
1641
1642  // check for safepoint operation in progress and/or pending suspend requests
1643  {
1644    Label Continue;
1645
1646    __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
1647             SafepointSynchronize::_not_synchronized);
1648
1649    Label L;
1650    __ jcc(Assembler::notEqual, L);
1651    __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
1652    __ jcc(Assembler::equal, Continue);
1653    __ bind(L);
1654
1655    // Don't use call_VM as it will see a possible pending exception and forward it
1656    // and never return here preventing us from clearing _last_native_pc down below.
1657    // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
1658    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
1659    // by hand.
1660    //
1661    save_native_result(masm, ret_type, stack_slots);
1662    __ mov(c_rarg0, r15_thread);
1663    __ mov(r12, rsp); // remember sp
1664    __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1665    __ andptr(rsp, -16); // align stack as required by ABI
1666    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1667    __ mov(rsp, r12); // restore sp
1668    __ reinit_heapbase();
1669    // Restore any method result value
1670    restore_native_result(masm, ret_type, stack_slots);
1671    __ bind(Continue);
1672  }
1673
1674  // change thread state
1675  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
1676
1677  Label reguard;
1678  Label reguard_done;
1679  __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
1680  __ jcc(Assembler::equal, reguard);
1681  __ bind(reguard_done);
1682
1683  // native result if any is live
1684
1685  // Unlock
1686  Label unlock_done;
1687  Label slow_path_unlock;
1688  if (method->is_synchronized()) {
1689
1690    // Get locked oop from the handle we passed to jni
1691    __ movptr(obj_reg, Address(oop_handle_reg, 0));
1692
1693    Label done;
1694
1695    if (UseBiasedLocking) {
1696      __ biased_locking_exit(obj_reg, old_hdr, done);
1697    }
1698
1699    // Simple recursive lock?
1700
1701    __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
1702    __ jcc(Assembler::equal, done);
1703
1704    // Must save rax if if it is live now because cmpxchg must use it
1705    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1706      save_native_result(masm, ret_type, stack_slots);
1707    }
1708
1709
1710    // get address of the stack lock
1711    __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1712    //  get old displaced header
1713    __ movptr(old_hdr, Address(rax, 0));
1714
1715    // Atomic swap old header if oop still contains the stack lock
1716    if (os::is_MP()) {
1717      __ lock();
1718    }
1719    __ cmpxchgptr(old_hdr, Address(obj_reg, 0));
1720    __ jcc(Assembler::notEqual, slow_path_unlock);
1721
1722    // slow path re-enters here
1723    __ bind(unlock_done);
1724    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1725      restore_native_result(masm, ret_type, stack_slots);
1726    }
1727
1728    __ bind(done);
1729
1730  }
1731  {
1732    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
1733    save_native_result(masm, ret_type, stack_slots);
1734    __ movoop(c_rarg1, JNIHandles::make_local(method()));
1735    __ call_VM_leaf(
1736         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1737         r15_thread, c_rarg1);
1738    restore_native_result(masm, ret_type, stack_slots);
1739  }
1740
1741  __ reset_last_Java_frame(false, true);
1742
1743  // Unpack oop result
1744  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1745      Label L;
1746      __ testptr(rax, rax);
1747      __ jcc(Assembler::zero, L);
1748      __ movptr(rax, Address(rax, 0));
1749      __ bind(L);
1750      __ verify_oop(rax);
1751  }
1752
1753  // reset handle block
1754  __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
1755  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
1756
1757  // pop our frame
1758
1759  __ leave();
1760
1761  // Any exception pending?
1762  __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1763  __ jcc(Assembler::notEqual, exception_pending);
1764
1765  // Return
1766
1767  __ ret(0);
1768
1769  // Unexpected paths are out of line and go here
1770
1771  // forward the exception
1772  __ bind(exception_pending);
1773
1774  // and forward the exception
1775  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1776
1777
1778  // Slow path locking & unlocking
1779  if (method->is_synchronized()) {
1780
1781    // BEGIN Slow path lock
1782    __ bind(slow_path_lock);
1783
1784    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
1785    // args are (oop obj, BasicLock* lock, JavaThread* thread)
1786
1787    // protect the args we've loaded
1788    save_args(masm, total_c_args, c_arg, out_regs);
1789
1790    __ mov(c_rarg0, obj_reg);
1791    __ mov(c_rarg1, lock_reg);
1792    __ mov(c_rarg2, r15_thread);
1793
1794    // Not a leaf but we have last_Java_frame setup as we want
1795    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
1796    restore_args(masm, total_c_args, c_arg, out_regs);
1797
1798#ifdef ASSERT
1799    { Label L;
1800    __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1801    __ jcc(Assembler::equal, L);
1802    __ stop("no pending exception allowed on exit from monitorenter");
1803    __ bind(L);
1804    }
1805#endif
1806    __ jmp(lock_done);
1807
1808    // END Slow path lock
1809
1810    // BEGIN Slow path unlock
1811    __ bind(slow_path_unlock);
1812
1813    // If we haven't already saved the native result we must save it now as xmm registers
1814    // are still exposed.
1815
1816    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1817      save_native_result(masm, ret_type, stack_slots);
1818    }
1819
1820    __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1821
1822    __ mov(c_rarg0, obj_reg);
1823    __ mov(r12, rsp); // remember sp
1824    __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1825    __ andptr(rsp, -16); // align stack as required by ABI
1826
1827    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1828    // NOTE that obj_reg == rbx currently
1829    __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
1830    __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1831
1832    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
1833    __ mov(rsp, r12); // restore sp
1834    __ reinit_heapbase();
1835#ifdef ASSERT
1836    {
1837      Label L;
1838      __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
1839      __ jcc(Assembler::equal, L);
1840      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1841      __ bind(L);
1842    }
1843#endif /* ASSERT */
1844
1845    __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
1846
1847    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1848      restore_native_result(masm, ret_type, stack_slots);
1849    }
1850    __ jmp(unlock_done);
1851
1852    // END Slow path unlock
1853
1854  } // synchronized
1855
1856  // SLOW PATH Reguard the stack if needed
1857
1858  __ bind(reguard);
1859  save_native_result(masm, ret_type, stack_slots);
1860  __ mov(r12, rsp); // remember sp
1861  __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1862  __ andptr(rsp, -16); // align stack as required by ABI
1863  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
1864  __ mov(rsp, r12); // restore sp
1865  __ reinit_heapbase();
1866  restore_native_result(masm, ret_type, stack_slots);
1867  // and continue
1868  __ jmp(reguard_done);
1869
1870
1871
1872  __ flush();
1873
1874  nmethod *nm = nmethod::new_native_nmethod(method,
1875                                            masm->code(),
1876                                            vep_offset,
1877                                            frame_complete,
1878                                            stack_slots / VMRegImpl::slots_per_word,
1879                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
1880                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
1881                                            oop_maps);
1882  return nm;
1883
1884}
1885
1886#ifdef HAVE_DTRACE_H
1887// ---------------------------------------------------------------------------
1888// Generate a dtrace nmethod for a given signature.  The method takes arguments
1889// in the Java compiled code convention, marshals them to the native
1890// abi and then leaves nops at the position you would expect to call a native
1891// function. When the probe is enabled the nops are replaced with a trap
1892// instruction that dtrace inserts and the trace will cause a notification
1893// to dtrace.
1894//
1895// The probes are only able to take primitive types and java/lang/String as
1896// arguments.  No other java types are allowed. Strings are converted to utf8
1897// strings so that from dtrace point of view java strings are converted to C
1898// strings. There is an arbitrary fixed limit on the total space that a method
1899// can use for converting the strings. (256 chars per string in the signature).
1900// So any java string larger then this is truncated.
1901
1902static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
1903static bool offsets_initialized = false;
1904
1905
1906nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
1907                                                methodHandle method) {
1908
1909
1910  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
1911  // be single threaded in this method.
1912  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
1913
1914  if (!offsets_initialized) {
1915    fp_offset[c_rarg0->as_VMReg()->value()] = -1 * wordSize;
1916    fp_offset[c_rarg1->as_VMReg()->value()] = -2 * wordSize;
1917    fp_offset[c_rarg2->as_VMReg()->value()] = -3 * wordSize;
1918    fp_offset[c_rarg3->as_VMReg()->value()] = -4 * wordSize;
1919    fp_offset[c_rarg4->as_VMReg()->value()] = -5 * wordSize;
1920    fp_offset[c_rarg5->as_VMReg()->value()] = -6 * wordSize;
1921
1922    fp_offset[c_farg0->as_VMReg()->value()] = -7 * wordSize;
1923    fp_offset[c_farg1->as_VMReg()->value()] = -8 * wordSize;
1924    fp_offset[c_farg2->as_VMReg()->value()] = -9 * wordSize;
1925    fp_offset[c_farg3->as_VMReg()->value()] = -10 * wordSize;
1926    fp_offset[c_farg4->as_VMReg()->value()] = -11 * wordSize;
1927    fp_offset[c_farg5->as_VMReg()->value()] = -12 * wordSize;
1928    fp_offset[c_farg6->as_VMReg()->value()] = -13 * wordSize;
1929    fp_offset[c_farg7->as_VMReg()->value()] = -14 * wordSize;
1930
1931    offsets_initialized = true;
1932  }
1933  // Fill in the signature array, for the calling-convention call.
1934  int total_args_passed = method->size_of_parameters();
1935
1936  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
1937  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
1938
1939  // The signature we are going to use for the trap that dtrace will see
1940  // java/lang/String is converted. We drop "this" and any other object
1941  // is converted to NULL.  (A one-slot java/lang/Long object reference
1942  // is converted to a two-slot long, which is why we double the allocation).
1943  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
1944  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
1945
1946  int i=0;
1947  int total_strings = 0;
1948  int first_arg_to_pass = 0;
1949  int total_c_args = 0;
1950
1951  // Skip the receiver as dtrace doesn't want to see it
1952  if( !method->is_static() ) {
1953    in_sig_bt[i++] = T_OBJECT;
1954    first_arg_to_pass = 1;
1955  }
1956
1957  // We need to convert the java args to where a native (non-jni) function
1958  // would expect them. To figure out where they go we convert the java
1959  // signature to a C signature.
1960
1961  SignatureStream ss(method->signature());
1962  for ( ; !ss.at_return_type(); ss.next()) {
1963    BasicType bt = ss.type();
1964    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
1965    out_sig_bt[total_c_args++] = bt;
1966    if( bt == T_OBJECT) {
1967      symbolOop s = ss.as_symbol_or_null();
1968      if (s == vmSymbols::java_lang_String()) {
1969        total_strings++;
1970        out_sig_bt[total_c_args-1] = T_ADDRESS;
1971      } else if (s == vmSymbols::java_lang_Boolean() ||
1972                 s == vmSymbols::java_lang_Character() ||
1973                 s == vmSymbols::java_lang_Byte() ||
1974                 s == vmSymbols::java_lang_Short() ||
1975                 s == vmSymbols::java_lang_Integer() ||
1976                 s == vmSymbols::java_lang_Float()) {
1977        out_sig_bt[total_c_args-1] = T_INT;
1978      } else if (s == vmSymbols::java_lang_Long() ||
1979                 s == vmSymbols::java_lang_Double()) {
1980        out_sig_bt[total_c_args-1] = T_LONG;
1981        out_sig_bt[total_c_args++] = T_VOID;
1982      }
1983    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
1984      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
1985      // We convert double to long
1986      out_sig_bt[total_c_args-1] = T_LONG;
1987      out_sig_bt[total_c_args++] = T_VOID;
1988    } else if ( bt == T_FLOAT) {
1989      // We convert float to int
1990      out_sig_bt[total_c_args-1] = T_INT;
1991    }
1992  }
1993
1994  assert(i==total_args_passed, "validly parsed signature");
1995
1996  // Now get the compiled-Java layout as input arguments
1997  int comp_args_on_stack;
1998  comp_args_on_stack = SharedRuntime::java_calling_convention(
1999      in_sig_bt, in_regs, total_args_passed, false);
2000
2001  // Now figure out where the args must be stored and how much stack space
2002  // they require (neglecting out_preserve_stack_slots but space for storing
2003  // the 1st six register arguments). It's weird see int_stk_helper.
2004
2005  int out_arg_slots;
2006  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2007
2008  // Calculate the total number of stack slots we will need.
2009
2010  // First count the abi requirement plus all of the outgoing args
2011  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2012
2013  // Now space for the string(s) we must convert
2014  int* string_locs   = NEW_RESOURCE_ARRAY(int, total_strings + 1);
2015  for (i = 0; i < total_strings ; i++) {
2016    string_locs[i] = stack_slots;
2017    stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size;
2018  }
2019
2020  // Plus the temps we might need to juggle register args
2021  // regs take two slots each
2022  stack_slots += (Argument::n_int_register_parameters_c +
2023                  Argument::n_float_register_parameters_c) * 2;
2024
2025
2026  // + 4 for return address (which we own) and saved rbp,
2027
2028  stack_slots += 4;
2029
2030  // Ok The space we have allocated will look like:
2031  //
2032  //
2033  // FP-> |                     |
2034  //      |---------------------|
2035  //      | string[n]           |
2036  //      |---------------------| <- string_locs[n]
2037  //      | string[n-1]         |
2038  //      |---------------------| <- string_locs[n-1]
2039  //      | ...                 |
2040  //      | ...                 |
2041  //      |---------------------| <- string_locs[1]
2042  //      | string[0]           |
2043  //      |---------------------| <- string_locs[0]
2044  //      | outbound memory     |
2045  //      | based arguments     |
2046  //      |                     |
2047  //      |---------------------|
2048  //      |                     |
2049  // SP-> | out_preserved_slots |
2050  //
2051  //
2052
2053  // Now compute actual number of stack words we need rounding to make
2054  // stack properly aligned.
2055  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
2056
2057  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2058
2059  intptr_t start = (intptr_t)__ pc();
2060
2061  // First thing make an ic check to see if we should even be here
2062
2063  // We are free to use all registers as temps without saving them and
2064  // restoring them except rbp. rbp, is the only callee save register
2065  // as far as the interpreter and the compiler(s) are concerned.
2066
2067  const Register ic_reg = rax;
2068  const Register receiver = rcx;
2069  Label hit;
2070  Label exception_pending;
2071
2072
2073  __ verify_oop(receiver);
2074  __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
2075  __ jcc(Assembler::equal, hit);
2076
2077  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2078
2079  // verified entry must be aligned for code patching.
2080  // and the first 5 bytes must be in the same cache line
2081  // if we align at 8 then we will be sure 5 bytes are in the same line
2082  __ align(8);
2083
2084  __ bind(hit);
2085
2086  int vep_offset = ((intptr_t)__ pc()) - start;
2087
2088
2089  // The instruction at the verified entry point must be 5 bytes or longer
2090  // because it can be patched on the fly by make_non_entrant. The stack bang
2091  // instruction fits that requirement.
2092
2093  // Generate stack overflow check
2094
2095  if (UseStackBanging) {
2096    if (stack_size <= StackShadowPages*os::vm_page_size()) {
2097      __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
2098    } else {
2099      __ movl(rax, stack_size);
2100      __ bang_stack_size(rax, rbx);
2101    }
2102  } else {
2103    // need a 5 byte instruction to allow MT safe patching to non-entrant
2104    __ fat_nop();
2105  }
2106
2107  assert(((uintptr_t)__ pc() - start - vep_offset) >= 5,
2108         "valid size for make_non_entrant");
2109
2110  // Generate a new frame for the wrapper.
2111  __ enter();
2112
2113  // -4 because return address is already present and so is saved rbp,
2114  if (stack_size - 2*wordSize != 0) {
2115    __ subq(rsp, stack_size - 2*wordSize);
2116  }
2117
2118  // Frame is now completed as far a size and linkage.
2119
2120  int frame_complete = ((intptr_t)__ pc()) - start;
2121
2122  int c_arg, j_arg;
2123
2124  // State of input register args
2125
2126  bool  live[ConcreteRegisterImpl::number_of_registers];
2127
2128  live[j_rarg0->as_VMReg()->value()] = false;
2129  live[j_rarg1->as_VMReg()->value()] = false;
2130  live[j_rarg2->as_VMReg()->value()] = false;
2131  live[j_rarg3->as_VMReg()->value()] = false;
2132  live[j_rarg4->as_VMReg()->value()] = false;
2133  live[j_rarg5->as_VMReg()->value()] = false;
2134
2135  live[j_farg0->as_VMReg()->value()] = false;
2136  live[j_farg1->as_VMReg()->value()] = false;
2137  live[j_farg2->as_VMReg()->value()] = false;
2138  live[j_farg3->as_VMReg()->value()] = false;
2139  live[j_farg4->as_VMReg()->value()] = false;
2140  live[j_farg5->as_VMReg()->value()] = false;
2141  live[j_farg6->as_VMReg()->value()] = false;
2142  live[j_farg7->as_VMReg()->value()] = false;
2143
2144
2145  bool rax_is_zero = false;
2146
2147  // All args (except strings) destined for the stack are moved first
2148  for (j_arg = first_arg_to_pass, c_arg = 0 ;
2149       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2150    VMRegPair src = in_regs[j_arg];
2151    VMRegPair dst = out_regs[c_arg];
2152
2153    // Get the real reg value or a dummy (rsp)
2154
2155    int src_reg = src.first()->is_reg() ?
2156                  src.first()->value() :
2157                  rsp->as_VMReg()->value();
2158
2159    bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2160                    (in_sig_bt[j_arg] == T_OBJECT &&
2161                     out_sig_bt[c_arg] != T_INT &&
2162                     out_sig_bt[c_arg] != T_ADDRESS &&
2163                     out_sig_bt[c_arg] != T_LONG);
2164
2165    live[src_reg] = !useless;
2166
2167    if (dst.first()->is_stack()) {
2168
2169      // Even though a string arg in a register is still live after this loop
2170      // after the string conversion loop (next) it will be dead so we take
2171      // advantage of that now for simpler code to manage live.
2172
2173      live[src_reg] = false;
2174      switch (in_sig_bt[j_arg]) {
2175
2176        case T_ARRAY:
2177        case T_OBJECT:
2178          {
2179            Address stack_dst(rsp, reg2offset_out(dst.first()));
2180
2181            if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2182              // need to unbox a one-word value
2183              Register in_reg = rax;
2184              if ( src.first()->is_reg() ) {
2185                in_reg = src.first()->as_Register();
2186              } else {
2187                __ movq(rax, Address(rbp, reg2offset_in(src.first())));
2188                rax_is_zero = false;
2189              }
2190              Label skipUnbox;
2191              __ movptr(Address(rsp, reg2offset_out(dst.first())),
2192                        (int32_t)NULL_WORD);
2193              __ testq(in_reg, in_reg);
2194              __ jcc(Assembler::zero, skipUnbox);
2195
2196              BasicType bt = out_sig_bt[c_arg];
2197              int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2198              Address src1(in_reg, box_offset);
2199              if ( bt == T_LONG ) {
2200                __ movq(in_reg,  src1);
2201                __ movq(stack_dst, in_reg);
2202                assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2203                ++c_arg; // skip over T_VOID to keep the loop indices in sync
2204              } else {
2205                __ movl(in_reg,  src1);
2206                __ movl(stack_dst, in_reg);
2207              }
2208
2209              __ bind(skipUnbox);
2210            } else if (out_sig_bt[c_arg] != T_ADDRESS) {
2211              // Convert the arg to NULL
2212              if (!rax_is_zero) {
2213                __ xorq(rax, rax);
2214                rax_is_zero = true;
2215              }
2216              __ movq(stack_dst, rax);
2217            }
2218          }
2219          break;
2220
2221        case T_VOID:
2222          break;
2223
2224        case T_FLOAT:
2225          // This does the right thing since we know it is destined for the
2226          // stack
2227          float_move(masm, src, dst);
2228          break;
2229
2230        case T_DOUBLE:
2231          // This does the right thing since we know it is destined for the
2232          // stack
2233          double_move(masm, src, dst);
2234          break;
2235
2236        case T_LONG :
2237          long_move(masm, src, dst);
2238          break;
2239
2240        case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2241
2242        default:
2243          move32_64(masm, src, dst);
2244      }
2245    }
2246
2247  }
2248
2249  // If we have any strings we must store any register based arg to the stack
2250  // This includes any still live xmm registers too.
2251
2252  int sid = 0;
2253
2254  if (total_strings > 0 ) {
2255    for (j_arg = first_arg_to_pass, c_arg = 0 ;
2256         j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2257      VMRegPair src = in_regs[j_arg];
2258      VMRegPair dst = out_regs[c_arg];
2259
2260      if (src.first()->is_reg()) {
2261        Address src_tmp(rbp, fp_offset[src.first()->value()]);
2262
2263        // string oops were left untouched by the previous loop even if the
2264        // eventual (converted) arg is destined for the stack so park them
2265        // away now (except for first)
2266
2267        if (out_sig_bt[c_arg] == T_ADDRESS) {
2268          Address utf8_addr = Address(
2269              rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2270          if (sid != 1) {
2271            // The first string arg won't be killed until after the utf8
2272            // conversion
2273            __ movq(utf8_addr, src.first()->as_Register());
2274          }
2275        } else if (dst.first()->is_reg()) {
2276          if (in_sig_bt[j_arg] == T_FLOAT || in_sig_bt[j_arg] == T_DOUBLE) {
2277
2278            // Convert the xmm register to an int and store it in the reserved
2279            // location for the eventual c register arg
2280            XMMRegister f = src.first()->as_XMMRegister();
2281            if (in_sig_bt[j_arg] == T_FLOAT) {
2282              __ movflt(src_tmp, f);
2283            } else {
2284              __ movdbl(src_tmp, f);
2285            }
2286          } else {
2287            // If the arg is an oop type we don't support don't bother to store
2288            // it remember string was handled above.
2289            bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2290                            (in_sig_bt[j_arg] == T_OBJECT &&
2291                             out_sig_bt[c_arg] != T_INT &&
2292                             out_sig_bt[c_arg] != T_LONG);
2293
2294            if (!useless) {
2295              __ movq(src_tmp, src.first()->as_Register());
2296            }
2297          }
2298        }
2299      }
2300      if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2301        assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2302        ++c_arg; // skip over T_VOID to keep the loop indices in sync
2303      }
2304    }
2305
2306    // Now that the volatile registers are safe, convert all the strings
2307    sid = 0;
2308
2309    for (j_arg = first_arg_to_pass, c_arg = 0 ;
2310         j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2311      if (out_sig_bt[c_arg] == T_ADDRESS) {
2312        // It's a string
2313        Address utf8_addr = Address(
2314            rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2315        // The first string we find might still be in the original java arg
2316        // register
2317
2318        VMReg src = in_regs[j_arg].first();
2319
2320        // We will need to eventually save the final argument to the trap
2321        // in the von-volatile location dedicated to src. This is the offset
2322        // from fp we will use.
2323        int src_off = src->is_reg() ?
2324            fp_offset[src->value()] : reg2offset_in(src);
2325
2326        // This is where the argument will eventually reside
2327        VMRegPair dst = out_regs[c_arg];
2328
2329        if (src->is_reg()) {
2330          if (sid == 1) {
2331            __ movq(c_rarg0, src->as_Register());
2332          } else {
2333            __ movq(c_rarg0, utf8_addr);
2334          }
2335        } else {
2336          // arg is still in the original location
2337          __ movq(c_rarg0, Address(rbp, reg2offset_in(src)));
2338        }
2339        Label done, convert;
2340
2341        // see if the oop is NULL
2342        __ testq(c_rarg0, c_rarg0);
2343        __ jcc(Assembler::notEqual, convert);
2344
2345        if (dst.first()->is_reg()) {
2346          // Save the ptr to utf string in the origina src loc or the tmp
2347          // dedicated to it
2348          __ movq(Address(rbp, src_off), c_rarg0);
2349        } else {
2350          __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg0);
2351        }
2352        __ jmp(done);
2353
2354        __ bind(convert);
2355
2356        __ lea(c_rarg1, utf8_addr);
2357        if (dst.first()->is_reg()) {
2358          __ movq(Address(rbp, src_off), c_rarg1);
2359        } else {
2360          __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg1);
2361        }
2362        // And do the conversion
2363        __ call(RuntimeAddress(
2364                CAST_FROM_FN_PTR(address, SharedRuntime::get_utf)));
2365
2366        __ bind(done);
2367      }
2368      if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2369        assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2370        ++c_arg; // skip over T_VOID to keep the loop indices in sync
2371      }
2372    }
2373    // The get_utf call killed all the c_arg registers
2374    live[c_rarg0->as_VMReg()->value()] = false;
2375    live[c_rarg1->as_VMReg()->value()] = false;
2376    live[c_rarg2->as_VMReg()->value()] = false;
2377    live[c_rarg3->as_VMReg()->value()] = false;
2378    live[c_rarg4->as_VMReg()->value()] = false;
2379    live[c_rarg5->as_VMReg()->value()] = false;
2380
2381    live[c_farg0->as_VMReg()->value()] = false;
2382    live[c_farg1->as_VMReg()->value()] = false;
2383    live[c_farg2->as_VMReg()->value()] = false;
2384    live[c_farg3->as_VMReg()->value()] = false;
2385    live[c_farg4->as_VMReg()->value()] = false;
2386    live[c_farg5->as_VMReg()->value()] = false;
2387    live[c_farg6->as_VMReg()->value()] = false;
2388    live[c_farg7->as_VMReg()->value()] = false;
2389  }
2390
2391  // Now we can finally move the register args to their desired locations
2392
2393  rax_is_zero = false;
2394
2395  for (j_arg = first_arg_to_pass, c_arg = 0 ;
2396       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2397
2398    VMRegPair src = in_regs[j_arg];
2399    VMRegPair dst = out_regs[c_arg];
2400
2401    // Only need to look for args destined for the interger registers (since we
2402    // convert float/double args to look like int/long outbound)
2403    if (dst.first()->is_reg()) {
2404      Register r =  dst.first()->as_Register();
2405
2406      // Check if the java arg is unsupported and thereofre useless
2407      bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2408                      (in_sig_bt[j_arg] == T_OBJECT &&
2409                       out_sig_bt[c_arg] != T_INT &&
2410                       out_sig_bt[c_arg] != T_ADDRESS &&
2411                       out_sig_bt[c_arg] != T_LONG);
2412
2413
2414      // If we're going to kill an existing arg save it first
2415      if (live[dst.first()->value()]) {
2416        // you can't kill yourself
2417        if (src.first() != dst.first()) {
2418          __ movq(Address(rbp, fp_offset[dst.first()->value()]), r);
2419        }
2420      }
2421      if (src.first()->is_reg()) {
2422        if (live[src.first()->value()] ) {
2423          if (in_sig_bt[j_arg] == T_FLOAT) {
2424            __ movdl(r, src.first()->as_XMMRegister());
2425          } else if (in_sig_bt[j_arg] == T_DOUBLE) {
2426            __ movdq(r, src.first()->as_XMMRegister());
2427          } else if (r != src.first()->as_Register()) {
2428            if (!useless) {
2429              __ movq(r, src.first()->as_Register());
2430            }
2431          }
2432        } else {
2433          // If the arg is an oop type we don't support don't bother to store
2434          // it
2435          if (!useless) {
2436            if (in_sig_bt[j_arg] == T_DOUBLE ||
2437                in_sig_bt[j_arg] == T_LONG  ||
2438                in_sig_bt[j_arg] == T_OBJECT ) {
2439              __ movq(r, Address(rbp, fp_offset[src.first()->value()]));
2440            } else {
2441              __ movl(r, Address(rbp, fp_offset[src.first()->value()]));
2442            }
2443          }
2444        }
2445        live[src.first()->value()] = false;
2446      } else if (!useless) {
2447        // full sized move even for int should be ok
2448        __ movq(r, Address(rbp, reg2offset_in(src.first())));
2449      }
2450
2451      // At this point r has the original java arg in the final location
2452      // (assuming it wasn't useless). If the java arg was an oop
2453      // we have a bit more to do
2454
2455      if (in_sig_bt[j_arg] == T_ARRAY || in_sig_bt[j_arg] == T_OBJECT ) {
2456        if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2457          // need to unbox a one-word value
2458          Label skip;
2459          __ testq(r, r);
2460          __ jcc(Assembler::equal, skip);
2461          BasicType bt = out_sig_bt[c_arg];
2462          int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2463          Address src1(r, box_offset);
2464          if ( bt == T_LONG ) {
2465            __ movq(r, src1);
2466          } else {
2467            __ movl(r, src1);
2468          }
2469          __ bind(skip);
2470
2471        } else if (out_sig_bt[c_arg] != T_ADDRESS) {
2472          // Convert the arg to NULL
2473          __ xorq(r, r);
2474        }
2475      }
2476
2477      // dst can longer be holding an input value
2478      live[dst.first()->value()] = false;
2479    }
2480    if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2481      assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2482      ++c_arg; // skip over T_VOID to keep the loop indices in sync
2483    }
2484  }
2485
2486
2487  // Ok now we are done. Need to place the nop that dtrace wants in order to
2488  // patch in the trap
2489  int patch_offset = ((intptr_t)__ pc()) - start;
2490
2491  __ nop();
2492
2493
2494  // Return
2495
2496  __ leave();
2497  __ ret(0);
2498
2499  __ flush();
2500
2501  nmethod *nm = nmethod::new_dtrace_nmethod(
2502      method, masm->code(), vep_offset, patch_offset, frame_complete,
2503      stack_slots / VMRegImpl::slots_per_word);
2504  return nm;
2505
2506}
2507
2508#endif // HAVE_DTRACE_H
2509
2510// this function returns the adjust size (in number of words) to a c2i adapter
2511// activation for use during deoptimization
2512int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2513  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2514}
2515
2516
2517uint SharedRuntime::out_preserve_stack_slots() {
2518  return 0;
2519}
2520
2521
2522//------------------------------generate_deopt_blob----------------------------
2523void SharedRuntime::generate_deopt_blob() {
2524  // Allocate space for the code
2525  ResourceMark rm;
2526  // Setup code generation tools
2527  CodeBuffer buffer("deopt_blob", 2048, 1024);
2528  MacroAssembler* masm = new MacroAssembler(&buffer);
2529  int frame_size_in_words;
2530  OopMap* map = NULL;
2531  OopMapSet *oop_maps = new OopMapSet();
2532
2533  // -------------
2534  // This code enters when returning to a de-optimized nmethod.  A return
2535  // address has been pushed on the the stack, and return values are in
2536  // registers.
2537  // If we are doing a normal deopt then we were called from the patched
2538  // nmethod from the point we returned to the nmethod. So the return
2539  // address on the stack is wrong by NativeCall::instruction_size
2540  // We will adjust the value so it looks like we have the original return
2541  // address on the stack (like when we eagerly deoptimized).
2542  // In the case of an exception pending when deoptimizing, we enter
2543  // with a return address on the stack that points after the call we patched
2544  // into the exception handler. We have the following register state from,
2545  // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2546  //    rax: exception oop
2547  //    rbx: exception handler
2548  //    rdx: throwing pc
2549  // So in this case we simply jam rdx into the useless return address and
2550  // the stack looks just like we want.
2551  //
2552  // At this point we need to de-opt.  We save the argument return
2553  // registers.  We call the first C routine, fetch_unroll_info().  This
2554  // routine captures the return values and returns a structure which
2555  // describes the current frame size and the sizes of all replacement frames.
2556  // The current frame is compiled code and may contain many inlined
2557  // functions, each with their own JVM state.  We pop the current frame, then
2558  // push all the new frames.  Then we call the C routine unpack_frames() to
2559  // populate these frames.  Finally unpack_frames() returns us the new target
2560  // address.  Notice that callee-save registers are BLOWN here; they have
2561  // already been captured in the vframeArray at the time the return PC was
2562  // patched.
2563  address start = __ pc();
2564  Label cont;
2565
2566  // Prolog for non exception case!
2567
2568  // Save everything in sight.
2569  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2570
2571  // Normal deoptimization.  Save exec mode for unpack_frames.
2572  __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2573  __ jmp(cont);
2574
2575  int reexecute_offset = __ pc() - start;
2576
2577  // Reexecute case
2578  // return address is the pc describes what bci to do re-execute at
2579
2580  // No need to update map as each call to save_live_registers will produce identical oopmap
2581  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2582
2583  __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2584  __ jmp(cont);
2585
2586  int exception_offset = __ pc() - start;
2587
2588  // Prolog for exception case
2589
2590  // all registers are dead at this entry point, except for rax, and
2591  // rdx which contain the exception oop and exception pc
2592  // respectively.  Set them in TLS and fall thru to the
2593  // unpack_with_exception_in_tls entry point.
2594
2595  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2596  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2597
2598  int exception_in_tls_offset = __ pc() - start;
2599
2600  // new implementation because exception oop is now passed in JavaThread
2601
2602  // Prolog for exception case
2603  // All registers must be preserved because they might be used by LinearScan
2604  // Exceptiop oop and throwing PC are passed in JavaThread
2605  // tos: stack at point of call to method that threw the exception (i.e. only
2606  // args are on the stack, no return address)
2607
2608  // make room on stack for the return address
2609  // It will be patched later with the throwing pc. The correct value is not
2610  // available now because loading it from memory would destroy registers.
2611  __ push(0);
2612
2613  // Save everything in sight.
2614  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2615
2616  // Now it is safe to overwrite any register
2617
2618  // Deopt during an exception.  Save exec mode for unpack_frames.
2619  __ movl(r14, Deoptimization::Unpack_exception); // callee-saved
2620
2621  // load throwing pc from JavaThread and patch it as the return address
2622  // of the current frame. Then clear the field in JavaThread
2623
2624  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2625  __ movptr(Address(rbp, wordSize), rdx);
2626  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2627
2628#ifdef ASSERT
2629  // verify that there is really an exception oop in JavaThread
2630  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2631  __ verify_oop(rax);
2632
2633  // verify that there is no pending exception
2634  Label no_pending_exception;
2635  __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
2636  __ testptr(rax, rax);
2637  __ jcc(Assembler::zero, no_pending_exception);
2638  __ stop("must not have pending exception here");
2639  __ bind(no_pending_exception);
2640#endif
2641
2642  __ bind(cont);
2643
2644  // Call C code.  Need thread and this frame, but NOT official VM entry
2645  // crud.  We cannot block on this call, no GC can happen.
2646  //
2647  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2648
2649  // fetch_unroll_info needs to call last_java_frame().
2650
2651  __ set_last_Java_frame(noreg, noreg, NULL);
2652#ifdef ASSERT
2653  { Label L;
2654    __ cmpptr(Address(r15_thread,
2655                    JavaThread::last_Java_fp_offset()),
2656            (int32_t)0);
2657    __ jcc(Assembler::equal, L);
2658    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2659    __ bind(L);
2660  }
2661#endif // ASSERT
2662  __ mov(c_rarg0, r15_thread);
2663  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2664
2665  // Need to have an oopmap that tells fetch_unroll_info where to
2666  // find any register it might need.
2667  oop_maps->add_gc_map(__ pc() - start, map);
2668
2669  __ reset_last_Java_frame(false, false);
2670
2671  // Load UnrollBlock* into rdi
2672  __ mov(rdi, rax);
2673
2674   Label noException;
2675  __ cmpl(r14, Deoptimization::Unpack_exception);   // Was exception pending?
2676  __ jcc(Assembler::notEqual, noException);
2677  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2678  // QQQ this is useless it was NULL above
2679  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2680  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2681  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2682
2683  __ verify_oop(rax);
2684
2685  // Overwrite the result registers with the exception results.
2686  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2687  // I think this is useless
2688  __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2689
2690  __ bind(noException);
2691
2692  // Only register save data is on the stack.
2693  // Now restore the result registers.  Everything else is either dead
2694  // or captured in the vframeArray.
2695  RegisterSaver::restore_result_registers(masm);
2696
2697  // All of the register save area has been popped of the stack. Only the
2698  // return address remains.
2699
2700  // Pop all the frames we must move/replace.
2701  //
2702  // Frame picture (youngest to oldest)
2703  // 1: self-frame (no frame link)
2704  // 2: deopting frame  (no frame link)
2705  // 3: caller of deopting frame (could be compiled/interpreted).
2706  //
2707  // Note: by leaving the return address of self-frame on the stack
2708  // and using the size of frame 2 to adjust the stack
2709  // when we are done the return to frame 3 will still be on the stack.
2710
2711  // Pop deoptimized frame
2712  __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2713  __ addptr(rsp, rcx);
2714
2715  // rsp should be pointing at the return address to the caller (3)
2716
2717  // Stack bang to make sure there's enough room for these interpreter frames.
2718  if (UseStackBanging) {
2719    __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2720    __ bang_stack_size(rbx, rcx);
2721  }
2722
2723  // Load address of array of frame pcs into rcx
2724  __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2725
2726  // Trash the old pc
2727  __ addptr(rsp, wordSize);
2728
2729  // Load address of array of frame sizes into rsi
2730  __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2731
2732  // Load counter into rdx
2733  __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2734
2735  // Pick up the initial fp we should save
2736  __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2737
2738  // Now adjust the caller's stack to make up for the extra locals
2739  // but record the original sp so that we can save it in the skeletal interpreter
2740  // frame and the stack walking of interpreter_sender will get the unextended sp
2741  // value and not the "real" sp value.
2742
2743  const Register sender_sp = r8;
2744
2745  __ mov(sender_sp, rsp);
2746  __ movl(rbx, Address(rdi,
2747                       Deoptimization::UnrollBlock::
2748                       caller_adjustment_offset_in_bytes()));
2749  __ subptr(rsp, rbx);
2750
2751  // Push interpreter frames in a loop
2752  Label loop;
2753  __ bind(loop);
2754  __ movptr(rbx, Address(rsi, 0));      // Load frame size
2755#ifdef CC_INTERP
2756  __ subptr(rbx, 4*wordSize);           // we'll push pc and ebp by hand and
2757#ifdef ASSERT
2758  __ push(0xDEADDEAD);                  // Make a recognizable pattern
2759  __ push(0xDEADDEAD);
2760#else /* ASSERT */
2761  __ subptr(rsp, 2*wordSize);           // skip the "static long no_param"
2762#endif /* ASSERT */
2763#else
2764  __ subptr(rbx, 2*wordSize);           // We'll push pc and ebp by hand
2765#endif // CC_INTERP
2766  __ pushptr(Address(rcx, 0));          // Save return address
2767  __ enter();                           // Save old & set new ebp
2768  __ subptr(rsp, rbx);                  // Prolog
2769#ifdef CC_INTERP
2770  __ movptr(Address(rbp,
2771                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2772            sender_sp); // Make it walkable
2773#else /* CC_INTERP */
2774  // This value is corrected by layout_activation_impl
2775  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2776  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
2777#endif /* CC_INTERP */
2778  __ mov(sender_sp, rsp);               // Pass sender_sp to next frame
2779  __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
2780  __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
2781  __ decrementl(rdx);                   // Decrement counter
2782  __ jcc(Assembler::notZero, loop);
2783  __ pushptr(Address(rcx, 0));          // Save final return address
2784
2785  // Re-push self-frame
2786  __ enter();                           // Save old & set new ebp
2787
2788  // Allocate a full sized register save area.
2789  // Return address and rbp are in place, so we allocate two less words.
2790  __ subptr(rsp, (frame_size_in_words - 2) * wordSize);
2791
2792  // Restore frame locals after moving the frame
2793  __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
2794  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2795
2796  // Call C code.  Need thread but NOT official VM entry
2797  // crud.  We cannot block on this call, no GC can happen.  Call should
2798  // restore return values to their stack-slots with the new SP.
2799  //
2800  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
2801
2802  // Use rbp because the frames look interpreted now
2803  __ set_last_Java_frame(noreg, rbp, NULL);
2804
2805  __ mov(c_rarg0, r15_thread);
2806  __ movl(c_rarg1, r14); // second arg: exec_mode
2807  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2808
2809  // Set an oopmap for the call site
2810  oop_maps->add_gc_map(__ pc() - start,
2811                       new OopMap( frame_size_in_words, 0 ));
2812
2813  __ reset_last_Java_frame(true, false);
2814
2815  // Collect return values
2816  __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
2817  __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
2818  // I think this is useless (throwing pc?)
2819  __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
2820
2821  // Pop self-frame.
2822  __ leave();                           // Epilog
2823
2824  // Jump to interpreter
2825  __ ret(0);
2826
2827  // Make sure all code is generated
2828  masm->flush();
2829
2830  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2831  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2832}
2833
2834#ifdef COMPILER2
2835//------------------------------generate_uncommon_trap_blob--------------------
2836void SharedRuntime::generate_uncommon_trap_blob() {
2837  // Allocate space for the code
2838  ResourceMark rm;
2839  // Setup code generation tools
2840  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2841  MacroAssembler* masm = new MacroAssembler(&buffer);
2842
2843  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2844
2845  address start = __ pc();
2846
2847  // Push self-frame.  We get here with a return address on the
2848  // stack, so rsp is 8-byte aligned until we allocate our frame.
2849  __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
2850
2851  // No callee saved registers. rbp is assumed implicitly saved
2852  __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
2853
2854  // compiler left unloaded_class_index in j_rarg0 move to where the
2855  // runtime expects it.
2856  __ movl(c_rarg1, j_rarg0);
2857
2858  __ set_last_Java_frame(noreg, noreg, NULL);
2859
2860  // Call C code.  Need thread but NOT official VM entry
2861  // crud.  We cannot block on this call, no GC can happen.  Call should
2862  // capture callee-saved registers as well as return values.
2863  // Thread is in rdi already.
2864  //
2865  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
2866
2867  __ mov(c_rarg0, r15_thread);
2868  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2869
2870  // Set an oopmap for the call site
2871  OopMapSet* oop_maps = new OopMapSet();
2872  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
2873
2874  // location of rbp is known implicitly by the frame sender code
2875
2876  oop_maps->add_gc_map(__ pc() - start, map);
2877
2878  __ reset_last_Java_frame(false, false);
2879
2880  // Load UnrollBlock* into rdi
2881  __ mov(rdi, rax);
2882
2883  // Pop all the frames we must move/replace.
2884  //
2885  // Frame picture (youngest to oldest)
2886  // 1: self-frame (no frame link)
2887  // 2: deopting frame  (no frame link)
2888  // 3: caller of deopting frame (could be compiled/interpreted).
2889
2890  // Pop self-frame.  We have no frame, and must rely only on rax and rsp.
2891  __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog!
2892
2893  // Pop deoptimized frame (int)
2894  __ movl(rcx, Address(rdi,
2895                       Deoptimization::UnrollBlock::
2896                       size_of_deoptimized_frame_offset_in_bytes()));
2897  __ addptr(rsp, rcx);
2898
2899  // rsp should be pointing at the return address to the caller (3)
2900
2901  // Stack bang to make sure there's enough room for these interpreter frames.
2902  if (UseStackBanging) {
2903    __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2904    __ bang_stack_size(rbx, rcx);
2905  }
2906
2907  // Load address of array of frame pcs into rcx (address*)
2908  __ movptr(rcx,
2909            Address(rdi,
2910                    Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2911
2912  // Trash the return pc
2913  __ addptr(rsp, wordSize);
2914
2915  // Load address of array of frame sizes into rsi (intptr_t*)
2916  __ movptr(rsi, Address(rdi,
2917                         Deoptimization::UnrollBlock::
2918                         frame_sizes_offset_in_bytes()));
2919
2920  // Counter
2921  __ movl(rdx, Address(rdi,
2922                       Deoptimization::UnrollBlock::
2923                       number_of_frames_offset_in_bytes())); // (int)
2924
2925  // Pick up the initial fp we should save
2926  __ movptr(rbp,
2927            Address(rdi,
2928                    Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2929
2930  // Now adjust the caller's stack to make up for the extra locals but
2931  // record the original sp so that we can save it in the skeletal
2932  // interpreter frame and the stack walking of interpreter_sender
2933  // will get the unextended sp value and not the "real" sp value.
2934
2935  const Register sender_sp = r8;
2936
2937  __ mov(sender_sp, rsp);
2938  __ movl(rbx, Address(rdi,
2939                       Deoptimization::UnrollBlock::
2940                       caller_adjustment_offset_in_bytes())); // (int)
2941  __ subptr(rsp, rbx);
2942
2943  // Push interpreter frames in a loop
2944  Label loop;
2945  __ bind(loop);
2946  __ movptr(rbx, Address(rsi, 0)); // Load frame size
2947  __ subptr(rbx, 2 * wordSize);    // We'll push pc and rbp by hand
2948  __ pushptr(Address(rcx, 0));     // Save return address
2949  __ enter();                      // Save old & set new rbp
2950  __ subptr(rsp, rbx);             // Prolog
2951#ifdef CC_INTERP
2952  __ movptr(Address(rbp,
2953                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2954            sender_sp); // Make it walkable
2955#else // CC_INTERP
2956  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
2957            sender_sp);            // Make it walkable
2958  // This value is corrected by layout_activation_impl
2959  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2960#endif // CC_INTERP
2961  __ mov(sender_sp, rsp);          // Pass sender_sp to next frame
2962  __ addptr(rsi, wordSize);        // Bump array pointer (sizes)
2963  __ addptr(rcx, wordSize);        // Bump array pointer (pcs)
2964  __ decrementl(rdx);              // Decrement counter
2965  __ jcc(Assembler::notZero, loop);
2966  __ pushptr(Address(rcx, 0));     // Save final return address
2967
2968  // Re-push self-frame
2969  __ enter();                 // Save old & set new rbp
2970  __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt);
2971                              // Prolog
2972
2973  // Use rbp because the frames look interpreted now
2974  __ set_last_Java_frame(noreg, rbp, NULL);
2975
2976  // Call C code.  Need thread but NOT official VM entry
2977  // crud.  We cannot block on this call, no GC can happen.  Call should
2978  // restore return values to their stack-slots with the new SP.
2979  // Thread is in rdi already.
2980  //
2981  // BasicType unpack_frames(JavaThread* thread, int exec_mode);
2982
2983  __ mov(c_rarg0, r15_thread);
2984  __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap);
2985  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2986
2987  // Set an oopmap for the call site
2988  oop_maps->add_gc_map(__ pc() - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
2989
2990  __ reset_last_Java_frame(true, false);
2991
2992  // Pop self-frame.
2993  __ leave();                 // Epilog
2994
2995  // Jump to interpreter
2996  __ ret(0);
2997
2998  // Make sure all code is generated
2999  masm->flush();
3000
3001  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
3002                                                 SimpleRuntimeFrame::framesize >> 1);
3003}
3004#endif // COMPILER2
3005
3006
3007//------------------------------generate_handler_blob------
3008//
3009// Generate a special Compile2Runtime blob that saves all registers,
3010// and setup oopmap.
3011//
3012static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
3013  assert(StubRoutines::forward_exception_entry() != NULL,
3014         "must be generated before");
3015
3016  ResourceMark rm;
3017  OopMapSet *oop_maps = new OopMapSet();
3018  OopMap* map;
3019
3020  // Allocate space for the code.  Setup code generation tools.
3021  CodeBuffer buffer("handler_blob", 2048, 1024);
3022  MacroAssembler* masm = new MacroAssembler(&buffer);
3023
3024  address start   = __ pc();
3025  address call_pc = NULL;
3026  int frame_size_in_words;
3027
3028  // Make room for return address (or push it again)
3029  if (!cause_return) {
3030    __ push(rbx);
3031  }
3032
3033  // Save registers, fpu state, and flags
3034  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
3035
3036  // The following is basically a call_VM.  However, we need the precise
3037  // address of the call in order to generate an oopmap. Hence, we do all the
3038  // work outselves.
3039
3040  __ set_last_Java_frame(noreg, noreg, NULL);
3041
3042  // The return address must always be correct so that frame constructor never
3043  // sees an invalid pc.
3044
3045  if (!cause_return) {
3046    // overwrite the dummy value we pushed on entry
3047    __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
3048    __ movptr(Address(rbp, wordSize), c_rarg0);
3049  }
3050
3051  // Do the call
3052  __ mov(c_rarg0, r15_thread);
3053  __ call(RuntimeAddress(call_ptr));
3054
3055  // Set an oopmap for the call site.  This oopmap will map all
3056  // oop-registers and debug-info registers as callee-saved.  This
3057  // will allow deoptimization at this safepoint to find all possible
3058  // debug-info recordings, as well as let GC find all oops.
3059
3060  oop_maps->add_gc_map( __ pc() - start, map);
3061
3062  Label noException;
3063
3064  __ reset_last_Java_frame(false, false);
3065
3066  __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3067  __ jcc(Assembler::equal, noException);
3068
3069  // Exception pending
3070
3071  RegisterSaver::restore_live_registers(masm);
3072
3073  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3074
3075  // No exception case
3076  __ bind(noException);
3077
3078  // Normal exit, restore registers and exit.
3079  RegisterSaver::restore_live_registers(masm);
3080
3081  __ ret(0);
3082
3083  // Make sure all code is generated
3084  masm->flush();
3085
3086  // Fill-out other meta info
3087  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3088}
3089
3090//
3091// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3092//
3093// Generate a stub that calls into vm to find out the proper destination
3094// of a java call. All the argument registers are live at this point
3095// but since this is generic code we don't know what they are and the caller
3096// must do any gc of the args.
3097//
3098static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
3099  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3100
3101  // allocate space for the code
3102  ResourceMark rm;
3103
3104  CodeBuffer buffer(name, 1000, 512);
3105  MacroAssembler* masm                = new MacroAssembler(&buffer);
3106
3107  int frame_size_in_words;
3108
3109  OopMapSet *oop_maps = new OopMapSet();
3110  OopMap* map = NULL;
3111
3112  int start = __ offset();
3113
3114  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
3115
3116  int frame_complete = __ offset();
3117
3118  __ set_last_Java_frame(noreg, noreg, NULL);
3119
3120  __ mov(c_rarg0, r15_thread);
3121
3122  __ call(RuntimeAddress(destination));
3123
3124
3125  // Set an oopmap for the call site.
3126  // We need this not only for callee-saved registers, but also for volatile
3127  // registers that the compiler might be keeping live across a safepoint.
3128
3129  oop_maps->add_gc_map( __ offset() - start, map);
3130
3131  // rax contains the address we are going to jump to assuming no exception got installed
3132
3133  // clear last_Java_sp
3134  __ reset_last_Java_frame(false, false);
3135  // check for pending exceptions
3136  Label pending;
3137  __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3138  __ jcc(Assembler::notEqual, pending);
3139
3140  // get the returned methodOop
3141  __ movptr(rbx, Address(r15_thread, JavaThread::vm_result_offset()));
3142  __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
3143
3144  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3145
3146  RegisterSaver::restore_live_registers(masm);
3147
3148  // We are back the the original state on entry and ready to go.
3149
3150  __ jmp(rax);
3151
3152  // Pending exception after the safepoint
3153
3154  __ bind(pending);
3155
3156  RegisterSaver::restore_live_registers(masm);
3157
3158  // exception pending => remove activation and forward to exception handler
3159
3160  __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3161
3162  __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3163  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3164
3165  // -------------
3166  // make sure all code is generated
3167  masm->flush();
3168
3169  // return the  blob
3170  // frame_size_words or bytes??
3171  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3172}
3173
3174
3175void SharedRuntime::generate_stubs() {
3176
3177  _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
3178                                        "wrong_method_stub");
3179  _ic_miss_blob =      generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
3180                                        "ic_miss_stub");
3181  _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
3182                                        "resolve_opt_virtual_call");
3183
3184  _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
3185                                        "resolve_virtual_call");
3186
3187  _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
3188                                        "resolve_static_call");
3189  _polling_page_safepoint_handler_blob =
3190    generate_handler_blob(CAST_FROM_FN_PTR(address,
3191                   SafepointSynchronize::handle_polling_page_exception), false);
3192
3193  _polling_page_return_handler_blob =
3194    generate_handler_blob(CAST_FROM_FN_PTR(address,
3195                   SafepointSynchronize::handle_polling_page_exception), true);
3196
3197  generate_deopt_blob();
3198
3199#ifdef COMPILER2
3200  generate_uncommon_trap_blob();
3201#endif // COMPILER2
3202}
3203
3204
3205#ifdef COMPILER2
3206// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3207//
3208//------------------------------generate_exception_blob---------------------------
3209// creates exception blob at the end
3210// Using exception blob, this code is jumped from a compiled method.
3211// (see emit_exception_handler in x86_64.ad file)
3212//
3213// Given an exception pc at a call we call into the runtime for the
3214// handler in this method. This handler might merely restore state
3215// (i.e. callee save registers) unwind the frame and jump to the
3216// exception handler for the nmethod if there is no Java level handler
3217// for the nmethod.
3218//
3219// This code is entered with a jmp.
3220//
3221// Arguments:
3222//   rax: exception oop
3223//   rdx: exception pc
3224//
3225// Results:
3226//   rax: exception oop
3227//   rdx: exception pc in caller or ???
3228//   destination: exception handler of caller
3229//
3230// Note: the exception pc MUST be at a call (precise debug information)
3231//       Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved.
3232//
3233
3234void OptoRuntime::generate_exception_blob() {
3235  assert(!OptoRuntime::is_callee_saved_register(RDX_num), "");
3236  assert(!OptoRuntime::is_callee_saved_register(RAX_num), "");
3237  assert(!OptoRuntime::is_callee_saved_register(RCX_num), "");
3238
3239  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3240
3241  // Allocate space for the code
3242  ResourceMark rm;
3243  // Setup code generation tools
3244  CodeBuffer buffer("exception_blob", 2048, 1024);
3245  MacroAssembler* masm = new MacroAssembler(&buffer);
3246
3247
3248  address start = __ pc();
3249
3250  // Exception pc is 'return address' for stack walker
3251  __ push(rdx);
3252  __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog
3253
3254  // Save callee-saved registers.  See x86_64.ad.
3255
3256  // rbp is an implicitly saved callee saved register (i.e. the calling
3257  // convention will save restore it in prolog/epilog) Other than that
3258  // there are no callee save registers now that adapter frames are gone.
3259
3260  __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
3261
3262  // Store exception in Thread object. We cannot pass any arguments to the
3263  // handle_exception call, since we do not want to make any assumption
3264  // about the size of the frame where the exception happened in.
3265  // c_rarg0 is either rdi (Linux) or rcx (Windows).
3266  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax);
3267  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
3268
3269  // This call does all the hard work.  It checks if an exception handler
3270  // exists in the method.
3271  // If so, it returns the handler address.
3272  // If not, it prepares for stack-unwinding, restoring the callee-save
3273  // registers of the frame being removed.
3274  //
3275  // address OptoRuntime::handle_exception_C(JavaThread* thread)
3276
3277  __ set_last_Java_frame(noreg, noreg, NULL);
3278  __ mov(c_rarg0, r15_thread);
3279  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
3280
3281  // Set an oopmap for the call site.  This oopmap will only be used if we
3282  // are unwinding the stack.  Hence, all locations will be dead.
3283  // Callee-saved registers will be the same as the frame above (i.e.,
3284  // handle_exception_stub), since they were restored when we got the
3285  // exception.
3286
3287  OopMapSet* oop_maps = new OopMapSet();
3288
3289  oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3290
3291  __ reset_last_Java_frame(false, false);
3292
3293  // Restore callee-saved registers
3294
3295  // rbp is an implicitly saved callee saved register (i.e. the calling
3296  // convention will save restore it in prolog/epilog) Other than that
3297  // there are no callee save registers no that adapter frames are gone.
3298
3299  __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3300
3301  __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3302  __ pop(rdx);                  // No need for exception pc anymore
3303
3304  // rax: exception handler
3305
3306  // Restore SP from BP if the exception PC is a MethodHandle call site.
3307  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
3308  __ cmovptr(Assembler::notEqual, rsp, rbp);
3309
3310  // We have a handler in rax (could be deopt blob).
3311  __ mov(r8, rax);
3312
3313  // Get the exception oop
3314  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3315  // Get the exception pc in case we are deoptimized
3316  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3317#ifdef ASSERT
3318  __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3319  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3320#endif
3321  // Clear the exception oop so GC no longer processes it as a root.
3322  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3323
3324  // rax: exception oop
3325  // r8:  exception handler
3326  // rdx: exception pc
3327  // Jump to handler
3328
3329  __ jmp(r8);
3330
3331  // Make sure all code is generated
3332  masm->flush();
3333
3334  // Set exception blob
3335  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3336}
3337#endif // COMPILER2
3338