sharedRuntime_x86_64.cpp revision 605:98cb887364d3
1236884Smm/*
2236884Smm * Copyright 2003-2008 Sun Microsystems, Inc.  All Rights Reserved.
3236884Smm * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4236884Smm *
5236884Smm * This code is free software; you can redistribute it and/or modify it
6236884Smm * under the terms of the GNU General Public License version 2 only, as
7236884Smm * published by the Free Software Foundation.
8236884Smm *
9236884Smm * This code is distributed in the hope that it will be useful, but WITHOUT
10236884Smm * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11236884Smm * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12236884Smm * version 2 for more details (a copy is included in the LICENSE file that
13236884Smm * accompanied this code).
14236884Smm *
15236884Smm * You should have received a copy of the GNU General Public License version
16236884Smm * 2 along with this work; if not, write to the Free Software Foundation,
17236884Smm * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18236884Smm *
19236884Smm * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20236884Smm * CA 95054 USA or visit www.sun.com if you need additional information or
21236884Smm * have any questions.
22236884Smm *
23258717Savg */
24246586Sdelphij
25255750Sdelphij#include "incls/_precompiled.incl"
26236884Smm#include "incls/_sharedRuntime_x86_64.cpp.incl"
27236884Smm
28236884SmmDeoptimizationBlob *SharedRuntime::_deopt_blob;
29236884Smm#ifdef COMPILER2
30236884SmmUncommonTrapBlob   *SharedRuntime::_uncommon_trap_blob;
31236884SmmExceptionBlob      *OptoRuntime::_exception_blob;
32236884Smm#endif // COMPILER2
33236884Smm
34236884SmmSafepointBlob      *SharedRuntime::_polling_page_safepoint_handler_blob;
35236884SmmSafepointBlob      *SharedRuntime::_polling_page_return_handler_blob;
36236884SmmRuntimeStub*       SharedRuntime::_wrong_method_blob;
37236884SmmRuntimeStub*       SharedRuntime::_ic_miss_blob;
38236884SmmRuntimeStub*       SharedRuntime::_resolve_opt_virtual_call_blob;
39236884SmmRuntimeStub*       SharedRuntime::_resolve_virtual_call_blob;
40236884SmmRuntimeStub*       SharedRuntime::_resolve_static_call_blob;
41236884Smm
42236884Smmconst int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
43236884Smm
44236884Smm#define __ masm->
45236884Smm
46236884Smmclass SimpleRuntimeFrame {
47236884Smm
48236884Smm  public:
49236884Smm
50236884Smm  // Most of the runtime stubs have this simple frame layout.
51236884Smm  // This class exists to make the layout shared in one place.
52236884Smm  // Offsets are for compiler stack slots, which are jints.
53236884Smm  enum layout {
54236884Smm    // The frame sender code expects that rbp will be in the "natural" place and
55236884Smm    // will override any oopMap setting for it. We must therefore force the layout
56236884Smm    // so that it agrees with the frame sender code.
57236884Smm    rbp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
58236884Smm    rbp_off2,
59236884Smm    return_off, return_off2,
60236884Smm    framesize
61236884Smm  };
62236884Smm};
63236884Smm
64236884Smmclass RegisterSaver {
65236884Smm  // Capture info about frame layout.  Layout offsets are in jint
66236884Smm  // units because compiler frame slots are jints.
67236884Smm#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
68236884Smm  enum layout {
69236884Smm    fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
70236884Smm    xmm_off       = fpu_state_off + 160/BytesPerInt,            // offset in fxsave save area
71236884Smm    DEF_XMM_OFFS(0),
72236884Smm    DEF_XMM_OFFS(1),
73236884Smm    DEF_XMM_OFFS(2),
74236884Smm    DEF_XMM_OFFS(3),
75236884Smm    DEF_XMM_OFFS(4),
76236884Smm    DEF_XMM_OFFS(5),
77236884Smm    DEF_XMM_OFFS(6),
78236884Smm    DEF_XMM_OFFS(7),
79236884Smm    DEF_XMM_OFFS(8),
80236884Smm    DEF_XMM_OFFS(9),
81236884Smm    DEF_XMM_OFFS(10),
82236884Smm    DEF_XMM_OFFS(11),
83236884Smm    DEF_XMM_OFFS(12),
84236884Smm    DEF_XMM_OFFS(13),
85236884Smm    DEF_XMM_OFFS(14),
86236884Smm    DEF_XMM_OFFS(15),
87236884Smm    fpu_state_end = fpu_state_off + ((FPUStateSizeInWords-1)*wordSize / BytesPerInt),
88236884Smm    fpu_stateH_end,
89236884Smm    r15_off, r15H_off,
90236884Smm    r14_off, r14H_off,
91236884Smm    r13_off, r13H_off,
92236884Smm    r12_off, r12H_off,
93236884Smm    r11_off, r11H_off,
94259813Sdelphij    r10_off, r10H_off,
95236884Smm    r9_off,  r9H_off,
96259813Sdelphij    r8_off,  r8H_off,
97259813Sdelphij    rdi_off, rdiH_off,
98236884Smm    rsi_off, rsiH_off,
99259813Sdelphij    ignore_off, ignoreH_off,  // extra copy of rbp
100236884Smm    rsp_off, rspH_off,
101236884Smm    rbx_off, rbxH_off,
102236884Smm    rdx_off, rdxH_off,
103259813Sdelphij    rcx_off, rcxH_off,
104236884Smm    rax_off, raxH_off,
105259813Sdelphij    // 16-byte stack alignment fill word: see MacroAssembler::push/pop_IU_state
106236884Smm    align_off, alignH_off,
107236884Smm    flags_off, flagsH_off,
108236884Smm    // The frame sender code expects that rbp will be in the "natural" place and
109259813Sdelphij    // will override any oopMap setting for it. We must therefore force the layout
110236884Smm    // so that it agrees with the frame sender code.
111236884Smm    rbp_off, rbpH_off,        // copy of rbp we will restore
112236884Smm    return_off, returnH_off,  // slot for return address
113236884Smm    reg_save_size             // size in compiler stack slots
114236884Smm  };
115236884Smm
116236884Smm public:
117236884Smm  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
118259813Sdelphij  static void restore_live_registers(MacroAssembler* masm);
119259813Sdelphij
120259813Sdelphij  // Offsets into the register save area
121236884Smm  // Used by deoptimization when it is managing result register
122236884Smm  // values on its own
123259813Sdelphij
124236884Smm  static int rax_offset_in_bytes(void)    { return BytesPerInt * rax_off; }
125236884Smm  static int rdx_offset_in_bytes(void)    { return BytesPerInt * rdx_off; }
126236884Smm  static int rbx_offset_in_bytes(void)    { return BytesPerInt * rbx_off; }
127236884Smm  static int xmm0_offset_in_bytes(void)   { return BytesPerInt * xmm0_off; }
128236884Smm  static int return_offset_in_bytes(void) { return BytesPerInt * return_off; }
129236884Smm
130236884Smm  // During deoptimization only the result registers need to be restored,
131236884Smm  // all the other values have already been extracted.
132236884Smm  static void restore_result_registers(MacroAssembler* masm);
133236884Smm};
134259813Sdelphij
135236884SmmOopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
136236884Smm
137236884Smm  // Always make the frame size 16-byte aligned
138236884Smm  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
139236884Smm                                     reg_save_size*BytesPerInt, 16);
140236884Smm  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
141236884Smm  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
142236884Smm  // The caller will allocate additional_frame_words
143236884Smm  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
144236884Smm  // CodeBlob frame size is in words.
145236884Smm  int frame_size_in_words = frame_size_in_bytes / wordSize;
146236884Smm  *total_frame_words = frame_size_in_words;
147236884Smm
148236884Smm  // Save registers, fpu state, and flags.
149239774Smm  // We assume caller has already pushed the return address onto the
150239774Smm  // stack, so rsp is 8-byte aligned here.
151239774Smm  // We push rpb twice in this sequence because we want the real rbp
152246586Sdelphij  // to be under the return like a normal enter.
153246586Sdelphij
154246586Sdelphij  __ enter();          // rsp becomes 16-byte aligned here
155255750Sdelphij  __ push_CPU_state(); // Push a multiple of 16 bytes
156255750Sdelphij  if (frame::arg_reg_save_area_bytes != 0) {
157255750Sdelphij    // Allocate argument register save area
158258717Savg    __ subptr(rsp, frame::arg_reg_save_area_bytes);
159258717Savg  }
160258717Savg
161259813Sdelphij  // Set an oopmap for the call site.  This oopmap will map all
162259813Sdelphij  // oop-registers and debug-info registers as callee-saved.  This
163259813Sdelphij  // will allow deoptimization at this safepoint to find all possible
164259813Sdelphij  // debug-info recordings, as well as let GC find all oops.
165236884Smm
166  OopMapSet *oop_maps = new OopMapSet();
167  OopMap* map = new OopMap(frame_size_in_slots, 0);
168  map->set_callee_saved(VMRegImpl::stack2reg( rax_off  + additional_frame_slots), rax->as_VMReg());
169  map->set_callee_saved(VMRegImpl::stack2reg( rcx_off  + additional_frame_slots), rcx->as_VMReg());
170  map->set_callee_saved(VMRegImpl::stack2reg( rdx_off  + additional_frame_slots), rdx->as_VMReg());
171  map->set_callee_saved(VMRegImpl::stack2reg( rbx_off  + additional_frame_slots), rbx->as_VMReg());
172  // rbp location is known implicitly by the frame sender code, needs no oopmap
173  // and the location where rbp was saved by is ignored
174  map->set_callee_saved(VMRegImpl::stack2reg( rsi_off  + additional_frame_slots), rsi->as_VMReg());
175  map->set_callee_saved(VMRegImpl::stack2reg( rdi_off  + additional_frame_slots), rdi->as_VMReg());
176  map->set_callee_saved(VMRegImpl::stack2reg( r8_off   + additional_frame_slots), r8->as_VMReg());
177  map->set_callee_saved(VMRegImpl::stack2reg( r9_off   + additional_frame_slots), r9->as_VMReg());
178  map->set_callee_saved(VMRegImpl::stack2reg( r10_off  + additional_frame_slots), r10->as_VMReg());
179  map->set_callee_saved(VMRegImpl::stack2reg( r11_off  + additional_frame_slots), r11->as_VMReg());
180  map->set_callee_saved(VMRegImpl::stack2reg( r12_off  + additional_frame_slots), r12->as_VMReg());
181  map->set_callee_saved(VMRegImpl::stack2reg( r13_off  + additional_frame_slots), r13->as_VMReg());
182  map->set_callee_saved(VMRegImpl::stack2reg( r14_off  + additional_frame_slots), r14->as_VMReg());
183  map->set_callee_saved(VMRegImpl::stack2reg( r15_off  + additional_frame_slots), r15->as_VMReg());
184  map->set_callee_saved(VMRegImpl::stack2reg(xmm0_off  + additional_frame_slots), xmm0->as_VMReg());
185  map->set_callee_saved(VMRegImpl::stack2reg(xmm1_off  + additional_frame_slots), xmm1->as_VMReg());
186  map->set_callee_saved(VMRegImpl::stack2reg(xmm2_off  + additional_frame_slots), xmm2->as_VMReg());
187  map->set_callee_saved(VMRegImpl::stack2reg(xmm3_off  + additional_frame_slots), xmm3->as_VMReg());
188  map->set_callee_saved(VMRegImpl::stack2reg(xmm4_off  + additional_frame_slots), xmm4->as_VMReg());
189  map->set_callee_saved(VMRegImpl::stack2reg(xmm5_off  + additional_frame_slots), xmm5->as_VMReg());
190  map->set_callee_saved(VMRegImpl::stack2reg(xmm6_off  + additional_frame_slots), xmm6->as_VMReg());
191  map->set_callee_saved(VMRegImpl::stack2reg(xmm7_off  + additional_frame_slots), xmm7->as_VMReg());
192  map->set_callee_saved(VMRegImpl::stack2reg(xmm8_off  + additional_frame_slots), xmm8->as_VMReg());
193  map->set_callee_saved(VMRegImpl::stack2reg(xmm9_off  + additional_frame_slots), xmm9->as_VMReg());
194  map->set_callee_saved(VMRegImpl::stack2reg(xmm10_off + additional_frame_slots), xmm10->as_VMReg());
195  map->set_callee_saved(VMRegImpl::stack2reg(xmm11_off + additional_frame_slots), xmm11->as_VMReg());
196  map->set_callee_saved(VMRegImpl::stack2reg(xmm12_off + additional_frame_slots), xmm12->as_VMReg());
197  map->set_callee_saved(VMRegImpl::stack2reg(xmm13_off + additional_frame_slots), xmm13->as_VMReg());
198  map->set_callee_saved(VMRegImpl::stack2reg(xmm14_off + additional_frame_slots), xmm14->as_VMReg());
199  map->set_callee_saved(VMRegImpl::stack2reg(xmm15_off + additional_frame_slots), xmm15->as_VMReg());
200
201  // %%% These should all be a waste but we'll keep things as they were for now
202  if (true) {
203    map->set_callee_saved(VMRegImpl::stack2reg( raxH_off  + additional_frame_slots),
204                          rax->as_VMReg()->next());
205    map->set_callee_saved(VMRegImpl::stack2reg( rcxH_off  + additional_frame_slots),
206                          rcx->as_VMReg()->next());
207    map->set_callee_saved(VMRegImpl::stack2reg( rdxH_off  + additional_frame_slots),
208                          rdx->as_VMReg()->next());
209    map->set_callee_saved(VMRegImpl::stack2reg( rbxH_off  + additional_frame_slots),
210                          rbx->as_VMReg()->next());
211    // rbp location is known implicitly by the frame sender code, needs no oopmap
212    map->set_callee_saved(VMRegImpl::stack2reg( rsiH_off  + additional_frame_slots),
213                          rsi->as_VMReg()->next());
214    map->set_callee_saved(VMRegImpl::stack2reg( rdiH_off  + additional_frame_slots),
215                          rdi->as_VMReg()->next());
216    map->set_callee_saved(VMRegImpl::stack2reg( r8H_off   + additional_frame_slots),
217                          r8->as_VMReg()->next());
218    map->set_callee_saved(VMRegImpl::stack2reg( r9H_off   + additional_frame_slots),
219                          r9->as_VMReg()->next());
220    map->set_callee_saved(VMRegImpl::stack2reg( r10H_off  + additional_frame_slots),
221                          r10->as_VMReg()->next());
222    map->set_callee_saved(VMRegImpl::stack2reg( r11H_off  + additional_frame_slots),
223                          r11->as_VMReg()->next());
224    map->set_callee_saved(VMRegImpl::stack2reg( r12H_off  + additional_frame_slots),
225                          r12->as_VMReg()->next());
226    map->set_callee_saved(VMRegImpl::stack2reg( r13H_off  + additional_frame_slots),
227                          r13->as_VMReg()->next());
228    map->set_callee_saved(VMRegImpl::stack2reg( r14H_off  + additional_frame_slots),
229                          r14->as_VMReg()->next());
230    map->set_callee_saved(VMRegImpl::stack2reg( r15H_off  + additional_frame_slots),
231                          r15->as_VMReg()->next());
232    map->set_callee_saved(VMRegImpl::stack2reg(xmm0H_off  + additional_frame_slots),
233                          xmm0->as_VMReg()->next());
234    map->set_callee_saved(VMRegImpl::stack2reg(xmm1H_off  + additional_frame_slots),
235                          xmm1->as_VMReg()->next());
236    map->set_callee_saved(VMRegImpl::stack2reg(xmm2H_off  + additional_frame_slots),
237                          xmm2->as_VMReg()->next());
238    map->set_callee_saved(VMRegImpl::stack2reg(xmm3H_off  + additional_frame_slots),
239                          xmm3->as_VMReg()->next());
240    map->set_callee_saved(VMRegImpl::stack2reg(xmm4H_off  + additional_frame_slots),
241                          xmm4->as_VMReg()->next());
242    map->set_callee_saved(VMRegImpl::stack2reg(xmm5H_off  + additional_frame_slots),
243                          xmm5->as_VMReg()->next());
244    map->set_callee_saved(VMRegImpl::stack2reg(xmm6H_off  + additional_frame_slots),
245                          xmm6->as_VMReg()->next());
246    map->set_callee_saved(VMRegImpl::stack2reg(xmm7H_off  + additional_frame_slots),
247                          xmm7->as_VMReg()->next());
248    map->set_callee_saved(VMRegImpl::stack2reg(xmm8H_off  + additional_frame_slots),
249                          xmm8->as_VMReg()->next());
250    map->set_callee_saved(VMRegImpl::stack2reg(xmm9H_off  + additional_frame_slots),
251                          xmm9->as_VMReg()->next());
252    map->set_callee_saved(VMRegImpl::stack2reg(xmm10H_off + additional_frame_slots),
253                          xmm10->as_VMReg()->next());
254    map->set_callee_saved(VMRegImpl::stack2reg(xmm11H_off + additional_frame_slots),
255                          xmm11->as_VMReg()->next());
256    map->set_callee_saved(VMRegImpl::stack2reg(xmm12H_off + additional_frame_slots),
257                          xmm12->as_VMReg()->next());
258    map->set_callee_saved(VMRegImpl::stack2reg(xmm13H_off + additional_frame_slots),
259                          xmm13->as_VMReg()->next());
260    map->set_callee_saved(VMRegImpl::stack2reg(xmm14H_off + additional_frame_slots),
261                          xmm14->as_VMReg()->next());
262    map->set_callee_saved(VMRegImpl::stack2reg(xmm15H_off + additional_frame_slots),
263                          xmm15->as_VMReg()->next());
264  }
265
266  return map;
267}
268
269void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
270  if (frame::arg_reg_save_area_bytes != 0) {
271    // Pop arg register save area
272    __ addptr(rsp, frame::arg_reg_save_area_bytes);
273  }
274  // Recover CPU state
275  __ pop_CPU_state();
276  // Get the rbp described implicitly by the calling convention (no oopMap)
277  __ pop(rbp);
278}
279
280void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
281
282  // Just restore result register. Only used by deoptimization. By
283  // now any callee save register that needs to be restored to a c2
284  // caller of the deoptee has been extracted into the vframeArray
285  // and will be stuffed into the c2i adapter we create for later
286  // restoration so only result registers need to be restored here.
287
288  // Restore fp result register
289  __ movdbl(xmm0, Address(rsp, xmm0_offset_in_bytes()));
290  // Restore integer result register
291  __ movptr(rax, Address(rsp, rax_offset_in_bytes()));
292  __ movptr(rdx, Address(rsp, rdx_offset_in_bytes()));
293
294  // Pop all of the register save are off the stack except the return address
295  __ addptr(rsp, return_offset_in_bytes());
296}
297
298// The java_calling_convention describes stack locations as ideal slots on
299// a frame with no abi restrictions. Since we must observe abi restrictions
300// (like the placement of the register window) the slots must be biased by
301// the following value.
302static int reg2offset_in(VMReg r) {
303  // Account for saved rbp and return address
304  // This should really be in_preserve_stack_slots
305  return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
306}
307
308static int reg2offset_out(VMReg r) {
309  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
310}
311
312// ---------------------------------------------------------------------------
313// Read the array of BasicTypes from a signature, and compute where the
314// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
315// quantities.  Values less than VMRegImpl::stack0 are registers, those above
316// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
317// as framesizes are fixed.
318// VMRegImpl::stack0 refers to the first slot 0(sp).
319// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
320// up to RegisterImpl::number_of_registers) are the 64-bit
321// integer registers.
322
323// Note: the INPUTS in sig_bt are in units of Java argument words, which are
324// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
325// units regardless of build. Of course for i486 there is no 64 bit build
326
327// The Java calling convention is a "shifted" version of the C ABI.
328// By skipping the first C ABI register we can call non-static jni methods
329// with small numbers of arguments without having to shuffle the arguments
330// at all. Since we control the java ABI we ought to at least get some
331// advantage out of it.
332
333int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
334                                           VMRegPair *regs,
335                                           int total_args_passed,
336                                           int is_outgoing) {
337
338  // Create the mapping between argument positions and
339  // registers.
340  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
341    j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
342  };
343  static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
344    j_farg0, j_farg1, j_farg2, j_farg3,
345    j_farg4, j_farg5, j_farg6, j_farg7
346  };
347
348
349  uint int_args = 0;
350  uint fp_args = 0;
351  uint stk_args = 0; // inc by 2 each time
352
353  for (int i = 0; i < total_args_passed; i++) {
354    switch (sig_bt[i]) {
355    case T_BOOLEAN:
356    case T_CHAR:
357    case T_BYTE:
358    case T_SHORT:
359    case T_INT:
360      if (int_args < Argument::n_int_register_parameters_j) {
361        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
362      } else {
363        regs[i].set1(VMRegImpl::stack2reg(stk_args));
364        stk_args += 2;
365      }
366      break;
367    case T_VOID:
368      // halves of T_LONG or T_DOUBLE
369      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
370      regs[i].set_bad();
371      break;
372    case T_LONG:
373      assert(sig_bt[i + 1] == T_VOID, "expecting half");
374      // fall through
375    case T_OBJECT:
376    case T_ARRAY:
377    case T_ADDRESS:
378      if (int_args < Argument::n_int_register_parameters_j) {
379        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
380      } else {
381        regs[i].set2(VMRegImpl::stack2reg(stk_args));
382        stk_args += 2;
383      }
384      break;
385    case T_FLOAT:
386      if (fp_args < Argument::n_float_register_parameters_j) {
387        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
388      } else {
389        regs[i].set1(VMRegImpl::stack2reg(stk_args));
390        stk_args += 2;
391      }
392      break;
393    case T_DOUBLE:
394      assert(sig_bt[i + 1] == T_VOID, "expecting half");
395      if (fp_args < Argument::n_float_register_parameters_j) {
396        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
397      } else {
398        regs[i].set2(VMRegImpl::stack2reg(stk_args));
399        stk_args += 2;
400      }
401      break;
402    default:
403      ShouldNotReachHere();
404      break;
405    }
406  }
407
408  return round_to(stk_args, 2);
409}
410
411// Patch the callers callsite with entry to compiled code if it exists.
412static void patch_callers_callsite(MacroAssembler *masm) {
413  Label L;
414  __ verify_oop(rbx);
415  __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
416  __ jcc(Assembler::equal, L);
417
418  // Save the current stack pointer
419  __ mov(r13, rsp);
420  // Schedule the branch target address early.
421  // Call into the VM to patch the caller, then jump to compiled callee
422  // rax isn't live so capture return address while we easily can
423  __ movptr(rax, Address(rsp, 0));
424
425  // align stack so push_CPU_state doesn't fault
426  __ andptr(rsp, -(StackAlignmentInBytes));
427  __ push_CPU_state();
428
429
430  __ verify_oop(rbx);
431  // VM needs caller's callsite
432  // VM needs target method
433  // This needs to be a long call since we will relocate this adapter to
434  // the codeBuffer and it may not reach
435
436  // Allocate argument register save area
437  if (frame::arg_reg_save_area_bytes != 0) {
438    __ subptr(rsp, frame::arg_reg_save_area_bytes);
439  }
440  __ mov(c_rarg0, rbx);
441  __ mov(c_rarg1, rax);
442  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
443
444  // De-allocate argument register save area
445  if (frame::arg_reg_save_area_bytes != 0) {
446    __ addptr(rsp, frame::arg_reg_save_area_bytes);
447  }
448
449  __ pop_CPU_state();
450  // restore sp
451  __ mov(rsp, r13);
452  __ bind(L);
453}
454
455// Helper function to put tags in interpreter stack.
456static void  tag_stack(MacroAssembler *masm, const BasicType sig, int st_off) {
457  if (TaggedStackInterpreter) {
458    int tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(0);
459    if (sig == T_OBJECT || sig == T_ARRAY) {
460      __ movptr(Address(rsp, tag_offset), (int32_t) frame::TagReference);
461    } else if (sig == T_LONG || sig == T_DOUBLE) {
462      int next_tag_offset = st_off + Interpreter::expr_tag_offset_in_bytes(1);
463      __ movptr(Address(rsp, next_tag_offset), (int32_t) frame::TagValue);
464      __ movptr(Address(rsp, tag_offset), (int32_t) frame::TagValue);
465    } else {
466      __ movptr(Address(rsp, tag_offset), (int32_t) frame::TagValue);
467    }
468  }
469}
470
471
472static void gen_c2i_adapter(MacroAssembler *masm,
473                            int total_args_passed,
474                            int comp_args_on_stack,
475                            const BasicType *sig_bt,
476                            const VMRegPair *regs,
477                            Label& skip_fixup) {
478  // Before we get into the guts of the C2I adapter, see if we should be here
479  // at all.  We've come from compiled code and are attempting to jump to the
480  // interpreter, which means the caller made a static call to get here
481  // (vcalls always get a compiled target if there is one).  Check for a
482  // compiled target.  If there is one, we need to patch the caller's call.
483  patch_callers_callsite(masm);
484
485  __ bind(skip_fixup);
486
487  // Since all args are passed on the stack, total_args_passed *
488  // Interpreter::stackElementSize is the space we need. Plus 1 because
489  // we also account for the return address location since
490  // we store it first rather than hold it in rax across all the shuffling
491
492  int extraspace = (total_args_passed * Interpreter::stackElementSize()) + wordSize;
493
494  // stack is aligned, keep it that way
495  extraspace = round_to(extraspace, 2*wordSize);
496
497  // Get return address
498  __ pop(rax);
499
500  // set senderSP value
501  __ mov(r13, rsp);
502
503  __ subptr(rsp, extraspace);
504
505  // Store the return address in the expected location
506  __ movptr(Address(rsp, 0), rax);
507
508  // Now write the args into the outgoing interpreter space
509  for (int i = 0; i < total_args_passed; i++) {
510    if (sig_bt[i] == T_VOID) {
511      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
512      continue;
513    }
514
515    // offset to start parameters
516    int st_off   = (total_args_passed - i) * Interpreter::stackElementSize() +
517                   Interpreter::value_offset_in_bytes();
518    int next_off = st_off - Interpreter::stackElementSize();
519
520    // Say 4 args:
521    // i   st_off
522    // 0   32 T_LONG
523    // 1   24 T_VOID
524    // 2   16 T_OBJECT
525    // 3    8 T_BOOL
526    // -    0 return address
527    //
528    // However to make thing extra confusing. Because we can fit a long/double in
529    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
530    // leaves one slot empty and only stores to a single slot. In this case the
531    // slot that is occupied is the T_VOID slot. See I said it was confusing.
532
533    VMReg r_1 = regs[i].first();
534    VMReg r_2 = regs[i].second();
535    if (!r_1->is_valid()) {
536      assert(!r_2->is_valid(), "");
537      continue;
538    }
539    if (r_1->is_stack()) {
540      // memory to memory use rax
541      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
542      if (!r_2->is_valid()) {
543        // sign extend??
544        __ movl(rax, Address(rsp, ld_off));
545        __ movptr(Address(rsp, st_off), rax);
546        tag_stack(masm, sig_bt[i], st_off);
547
548      } else {
549
550        __ movq(rax, Address(rsp, ld_off));
551
552        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
553        // T_DOUBLE and T_LONG use two slots in the interpreter
554        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
555          // ld_off == LSW, ld_off+wordSize == MSW
556          // st_off == MSW, next_off == LSW
557          __ movq(Address(rsp, next_off), rax);
558#ifdef ASSERT
559          // Overwrite the unused slot with known junk
560          __ mov64(rax, CONST64(0xdeadffffdeadaaaa));
561          __ movptr(Address(rsp, st_off), rax);
562#endif /* ASSERT */
563          tag_stack(masm, sig_bt[i], next_off);
564        } else {
565          __ movq(Address(rsp, st_off), rax);
566          tag_stack(masm, sig_bt[i], st_off);
567        }
568      }
569    } else if (r_1->is_Register()) {
570      Register r = r_1->as_Register();
571      if (!r_2->is_valid()) {
572        // must be only an int (or less ) so move only 32bits to slot
573        // why not sign extend??
574        __ movl(Address(rsp, st_off), r);
575        tag_stack(masm, sig_bt[i], st_off);
576      } else {
577        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
578        // T_DOUBLE and T_LONG use two slots in the interpreter
579        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
580          // long/double in gpr
581#ifdef ASSERT
582          // Overwrite the unused slot with known junk
583          __ mov64(rax, CONST64(0xdeadffffdeadaaab));
584          __ movptr(Address(rsp, st_off), rax);
585#endif /* ASSERT */
586          __ movq(Address(rsp, next_off), r);
587          tag_stack(masm, sig_bt[i], next_off);
588        } else {
589          __ movptr(Address(rsp, st_off), r);
590          tag_stack(masm, sig_bt[i], st_off);
591        }
592      }
593    } else {
594      assert(r_1->is_XMMRegister(), "");
595      if (!r_2->is_valid()) {
596        // only a float use just part of the slot
597        __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
598        tag_stack(masm, sig_bt[i], st_off);
599      } else {
600#ifdef ASSERT
601        // Overwrite the unused slot with known junk
602        __ mov64(rax, CONST64(0xdeadffffdeadaaac));
603        __ movptr(Address(rsp, st_off), rax);
604#endif /* ASSERT */
605        __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
606        tag_stack(masm, sig_bt[i], next_off);
607      }
608    }
609  }
610
611  // Schedule the branch target address early.
612  __ movptr(rcx, Address(rbx, in_bytes(methodOopDesc::interpreter_entry_offset())));
613  __ jmp(rcx);
614}
615
616static void gen_i2c_adapter(MacroAssembler *masm,
617                            int total_args_passed,
618                            int comp_args_on_stack,
619                            const BasicType *sig_bt,
620                            const VMRegPair *regs) {
621
622  //
623  // We will only enter here from an interpreted frame and never from after
624  // passing thru a c2i. Azul allowed this but we do not. If we lose the
625  // race and use a c2i we will remain interpreted for the race loser(s).
626  // This removes all sorts of headaches on the x86 side and also eliminates
627  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
628
629
630  // Note: r13 contains the senderSP on entry. We must preserve it since
631  // we may do a i2c -> c2i transition if we lose a race where compiled
632  // code goes non-entrant while we get args ready.
633  // In addition we use r13 to locate all the interpreter args as
634  // we must align the stack to 16 bytes on an i2c entry else we
635  // lose alignment we expect in all compiled code and register
636  // save code can segv when fxsave instructions find improperly
637  // aligned stack pointer.
638
639  __ movptr(rax, Address(rsp, 0));
640
641  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
642  // in registers, we will occasionally have no stack args.
643  int comp_words_on_stack = 0;
644  if (comp_args_on_stack) {
645    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
646    // registers are below.  By subtracting stack0, we either get a negative
647    // number (all values in registers) or the maximum stack slot accessed.
648
649    // Convert 4-byte c2 stack slots to words.
650    comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
651    // Round up to miminum stack alignment, in wordSize
652    comp_words_on_stack = round_to(comp_words_on_stack, 2);
653    __ subptr(rsp, comp_words_on_stack * wordSize);
654  }
655
656
657  // Ensure compiled code always sees stack at proper alignment
658  __ andptr(rsp, -16);
659
660  // push the return address and misalign the stack that youngest frame always sees
661  // as far as the placement of the call instruction
662  __ push(rax);
663
664  // Will jump to the compiled code just as if compiled code was doing it.
665  // Pre-load the register-jump target early, to schedule it better.
666  __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
667
668  // Now generate the shuffle code.  Pick up all register args and move the
669  // rest through the floating point stack top.
670  for (int i = 0; i < total_args_passed; i++) {
671    if (sig_bt[i] == T_VOID) {
672      // Longs and doubles are passed in native word order, but misaligned
673      // in the 32-bit build.
674      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
675      continue;
676    }
677
678    // Pick up 0, 1 or 2 words from SP+offset.
679
680    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
681            "scrambled load targets?");
682    // Load in argument order going down.
683    // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize;
684    // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp
685    // unpredictable
686    int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize();
687
688    // Point to interpreter value (vs. tag)
689    int next_off = ld_off - Interpreter::stackElementSize();
690    //
691    //
692    //
693    VMReg r_1 = regs[i].first();
694    VMReg r_2 = regs[i].second();
695    if (!r_1->is_valid()) {
696      assert(!r_2->is_valid(), "");
697      continue;
698    }
699    if (r_1->is_stack()) {
700      // Convert stack slot to an SP offset (+ wordSize to account for return address )
701      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
702      if (!r_2->is_valid()) {
703        // sign extend???
704        __ movl(rax, Address(r13, ld_off));
705        __ movptr(Address(rsp, st_off), rax);
706      } else {
707        //
708        // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
709        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
710        // So we must adjust where to pick up the data to match the interpreter.
711        //
712        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
713        // are accessed as negative so LSW is at LOW address
714
715        // ld_off is MSW so get LSW
716        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
717                           next_off : ld_off;
718        __ movq(rax, Address(r13, offset));
719        // st_off is LSW (i.e. reg.first())
720        __ movq(Address(rsp, st_off), rax);
721      }
722    } else if (r_1->is_Register()) {  // Register argument
723      Register r = r_1->as_Register();
724      assert(r != rax, "must be different");
725      if (r_2->is_valid()) {
726        //
727        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
728        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
729        // So we must adjust where to pick up the data to match the interpreter.
730
731        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
732                           next_off : ld_off;
733
734        // this can be a misaligned move
735        __ movq(r, Address(r13, offset));
736      } else {
737        // sign extend and use a full word?
738        __ movl(r, Address(r13, ld_off));
739      }
740    } else {
741      if (!r_2->is_valid()) {
742        __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off));
743      } else {
744        __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off));
745      }
746    }
747  }
748
749  // 6243940 We might end up in handle_wrong_method if
750  // the callee is deoptimized as we race thru here. If that
751  // happens we don't want to take a safepoint because the
752  // caller frame will look interpreted and arguments are now
753  // "compiled" so it is much better to make this transition
754  // invisible to the stack walking code. Unfortunately if
755  // we try and find the callee by normal means a safepoint
756  // is possible. So we stash the desired callee in the thread
757  // and the vm will find there should this case occur.
758
759  __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
760
761  // put methodOop where a c2i would expect should we end up there
762  // only needed becaus eof c2 resolve stubs return methodOop as a result in
763  // rax
764  __ mov(rax, rbx);
765  __ jmp(r11);
766}
767
768// ---------------------------------------------------------------
769AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
770                                                            int total_args_passed,
771                                                            int comp_args_on_stack,
772                                                            const BasicType *sig_bt,
773                                                            const VMRegPair *regs) {
774  address i2c_entry = __ pc();
775
776  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
777
778  // -------------------------------------------------------------------------
779  // Generate a C2I adapter.  On entry we know rbx holds the methodOop during calls
780  // to the interpreter.  The args start out packed in the compiled layout.  They
781  // need to be unpacked into the interpreter layout.  This will almost always
782  // require some stack space.  We grow the current (compiled) stack, then repack
783  // the args.  We  finally end in a jump to the generic interpreter entry point.
784  // On exit from the interpreter, the interpreter will restore our SP (lest the
785  // compiled code, which relys solely on SP and not RBP, get sick).
786
787  address c2i_unverified_entry = __ pc();
788  Label skip_fixup;
789  Label ok;
790
791  Register holder = rax;
792  Register receiver = j_rarg0;
793  Register temp = rbx;
794
795  {
796    __ verify_oop(holder);
797    __ load_klass(temp, receiver);
798    __ verify_oop(temp);
799
800    __ cmpptr(temp, Address(holder, compiledICHolderOopDesc::holder_klass_offset()));
801    __ movptr(rbx, Address(holder, compiledICHolderOopDesc::holder_method_offset()));
802    __ jcc(Assembler::equal, ok);
803    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
804
805    __ bind(ok);
806    // Method might have been compiled since the call site was patched to
807    // interpreted if that is the case treat it as a miss so we can get
808    // the call site corrected.
809    __ cmpptr(Address(rbx, in_bytes(methodOopDesc::code_offset())), (int32_t)NULL_WORD);
810    __ jcc(Assembler::equal, skip_fixup);
811    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
812  }
813
814  address c2i_entry = __ pc();
815
816  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
817
818  __ flush();
819  return new AdapterHandlerEntry(i2c_entry, c2i_entry, c2i_unverified_entry);
820}
821
822int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
823                                         VMRegPair *regs,
824                                         int total_args_passed) {
825// We return the amount of VMRegImpl stack slots we need to reserve for all
826// the arguments NOT counting out_preserve_stack_slots.
827
828// NOTE: These arrays will have to change when c1 is ported
829#ifdef _WIN64
830    static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
831      c_rarg0, c_rarg1, c_rarg2, c_rarg3
832    };
833    static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
834      c_farg0, c_farg1, c_farg2, c_farg3
835    };
836#else
837    static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
838      c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
839    };
840    static const XMMRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
841      c_farg0, c_farg1, c_farg2, c_farg3,
842      c_farg4, c_farg5, c_farg6, c_farg7
843    };
844#endif // _WIN64
845
846
847    uint int_args = 0;
848    uint fp_args = 0;
849    uint stk_args = 0; // inc by 2 each time
850
851    for (int i = 0; i < total_args_passed; i++) {
852      switch (sig_bt[i]) {
853      case T_BOOLEAN:
854      case T_CHAR:
855      case T_BYTE:
856      case T_SHORT:
857      case T_INT:
858        if (int_args < Argument::n_int_register_parameters_c) {
859          regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
860#ifdef _WIN64
861          fp_args++;
862          // Allocate slots for callee to stuff register args the stack.
863          stk_args += 2;
864#endif
865        } else {
866          regs[i].set1(VMRegImpl::stack2reg(stk_args));
867          stk_args += 2;
868        }
869        break;
870      case T_LONG:
871        assert(sig_bt[i + 1] == T_VOID, "expecting half");
872        // fall through
873      case T_OBJECT:
874      case T_ARRAY:
875      case T_ADDRESS:
876        if (int_args < Argument::n_int_register_parameters_c) {
877          regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
878#ifdef _WIN64
879          fp_args++;
880          stk_args += 2;
881#endif
882        } else {
883          regs[i].set2(VMRegImpl::stack2reg(stk_args));
884          stk_args += 2;
885        }
886        break;
887      case T_FLOAT:
888        if (fp_args < Argument::n_float_register_parameters_c) {
889          regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
890#ifdef _WIN64
891          int_args++;
892          // Allocate slots for callee to stuff register args the stack.
893          stk_args += 2;
894#endif
895        } else {
896          regs[i].set1(VMRegImpl::stack2reg(stk_args));
897          stk_args += 2;
898        }
899        break;
900      case T_DOUBLE:
901        assert(sig_bt[i + 1] == T_VOID, "expecting half");
902        if (fp_args < Argument::n_float_register_parameters_c) {
903          regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
904#ifdef _WIN64
905          int_args++;
906          // Allocate slots for callee to stuff register args the stack.
907          stk_args += 2;
908#endif
909        } else {
910          regs[i].set2(VMRegImpl::stack2reg(stk_args));
911          stk_args += 2;
912        }
913        break;
914      case T_VOID: // Halves of longs and doubles
915        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
916        regs[i].set_bad();
917        break;
918      default:
919        ShouldNotReachHere();
920        break;
921      }
922    }
923#ifdef _WIN64
924  // windows abi requires that we always allocate enough stack space
925  // for 4 64bit registers to be stored down.
926  if (stk_args < 8) {
927    stk_args = 8;
928  }
929#endif // _WIN64
930
931  return stk_args;
932}
933
934// On 64 bit we will store integer like items to the stack as
935// 64 bits items (sparc abi) even though java would only store
936// 32bits for a parameter. On 32bit it will simply be 32 bits
937// So this routine will do 32->32 on 32bit and 32->64 on 64bit
938static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
939  if (src.first()->is_stack()) {
940    if (dst.first()->is_stack()) {
941      // stack to stack
942      __ movslq(rax, Address(rbp, reg2offset_in(src.first())));
943      __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
944    } else {
945      // stack to reg
946      __ movslq(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
947    }
948  } else if (dst.first()->is_stack()) {
949    // reg to stack
950    // Do we really have to sign extend???
951    // __ movslq(src.first()->as_Register(), src.first()->as_Register());
952    __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
953  } else {
954    // Do we really have to sign extend???
955    // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
956    if (dst.first() != src.first()) {
957      __ movq(dst.first()->as_Register(), src.first()->as_Register());
958    }
959  }
960}
961
962
963// An oop arg. Must pass a handle not the oop itself
964static void object_move(MacroAssembler* masm,
965                        OopMap* map,
966                        int oop_handle_offset,
967                        int framesize_in_slots,
968                        VMRegPair src,
969                        VMRegPair dst,
970                        bool is_receiver,
971                        int* receiver_offset) {
972
973  // must pass a handle. First figure out the location we use as a handle
974
975  Register rHandle = dst.first()->is_stack() ? rax : dst.first()->as_Register();
976
977  // See if oop is NULL if it is we need no handle
978
979  if (src.first()->is_stack()) {
980
981    // Oop is already on the stack as an argument
982    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
983    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
984    if (is_receiver) {
985      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
986    }
987
988    __ cmpptr(Address(rbp, reg2offset_in(src.first())), (int32_t)NULL_WORD);
989    __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
990    // conditionally move a NULL
991    __ cmovptr(Assembler::equal, rHandle, Address(rbp, reg2offset_in(src.first())));
992  } else {
993
994    // Oop is in an a register we must store it to the space we reserve
995    // on the stack for oop_handles and pass a handle if oop is non-NULL
996
997    const Register rOop = src.first()->as_Register();
998    int oop_slot;
999    if (rOop == j_rarg0)
1000      oop_slot = 0;
1001    else if (rOop == j_rarg1)
1002      oop_slot = 1;
1003    else if (rOop == j_rarg2)
1004      oop_slot = 2;
1005    else if (rOop == j_rarg3)
1006      oop_slot = 3;
1007    else if (rOop == j_rarg4)
1008      oop_slot = 4;
1009    else {
1010      assert(rOop == j_rarg5, "wrong register");
1011      oop_slot = 5;
1012    }
1013
1014    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
1015    int offset = oop_slot*VMRegImpl::stack_slot_size;
1016
1017    map->set_oop(VMRegImpl::stack2reg(oop_slot));
1018    // Store oop in handle area, may be NULL
1019    __ movptr(Address(rsp, offset), rOop);
1020    if (is_receiver) {
1021      *receiver_offset = offset;
1022    }
1023
1024    __ cmpptr(rOop, (int32_t)NULL_WORD);
1025    __ lea(rHandle, Address(rsp, offset));
1026    // conditionally move a NULL from the handle area where it was just stored
1027    __ cmovptr(Assembler::equal, rHandle, Address(rsp, offset));
1028  }
1029
1030  // If arg is on the stack then place it otherwise it is already in correct reg.
1031  if (dst.first()->is_stack()) {
1032    __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle);
1033  }
1034}
1035
1036// A float arg may have to do float reg int reg conversion
1037static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1038  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
1039
1040  // The calling conventions assures us that each VMregpair is either
1041  // all really one physical register or adjacent stack slots.
1042  // This greatly simplifies the cases here compared to sparc.
1043
1044  if (src.first()->is_stack()) {
1045    if (dst.first()->is_stack()) {
1046      __ movl(rax, Address(rbp, reg2offset_in(src.first())));
1047      __ movptr(Address(rsp, reg2offset_out(dst.first())), rax);
1048    } else {
1049      // stack to reg
1050      assert(dst.first()->is_XMMRegister(), "only expect xmm registers as parameters");
1051      __ movflt(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_in(src.first())));
1052    }
1053  } else if (dst.first()->is_stack()) {
1054    // reg to stack
1055    assert(src.first()->is_XMMRegister(), "only expect xmm registers as parameters");
1056    __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1057  } else {
1058    // reg to reg
1059    // In theory these overlap but the ordering is such that this is likely a nop
1060    if ( src.first() != dst.first()) {
1061      __ movdbl(dst.first()->as_XMMRegister(),  src.first()->as_XMMRegister());
1062    }
1063  }
1064}
1065
1066// A long move
1067static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1068
1069  // The calling conventions assures us that each VMregpair is either
1070  // all really one physical register or adjacent stack slots.
1071  // This greatly simplifies the cases here compared to sparc.
1072
1073  if (src.is_single_phys_reg() ) {
1074    if (dst.is_single_phys_reg()) {
1075      if (dst.first() != src.first()) {
1076        __ mov(dst.first()->as_Register(), src.first()->as_Register());
1077      }
1078    } else {
1079      assert(dst.is_single_reg(), "not a stack pair");
1080      __ movq(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register());
1081    }
1082  } else if (dst.is_single_phys_reg()) {
1083    assert(src.is_single_reg(),  "not a stack pair");
1084    __ movq(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
1085  } else {
1086    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
1087    __ movq(rax, Address(rbp, reg2offset_in(src.first())));
1088    __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
1089  }
1090}
1091
1092// A double move
1093static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
1094
1095  // The calling conventions assures us that each VMregpair is either
1096  // all really one physical register or adjacent stack slots.
1097  // This greatly simplifies the cases here compared to sparc.
1098
1099  if (src.is_single_phys_reg() ) {
1100    if (dst.is_single_phys_reg()) {
1101      // In theory these overlap but the ordering is such that this is likely a nop
1102      if ( src.first() != dst.first()) {
1103        __ movdbl(dst.first()->as_XMMRegister(), src.first()->as_XMMRegister());
1104      }
1105    } else {
1106      assert(dst.is_single_reg(), "not a stack pair");
1107      __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister());
1108    }
1109  } else if (dst.is_single_phys_reg()) {
1110    assert(src.is_single_reg(),  "not a stack pair");
1111    __ movdbl(dst.first()->as_XMMRegister(), Address(rbp, reg2offset_out(src.first())));
1112  } else {
1113    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
1114    __ movq(rax, Address(rbp, reg2offset_in(src.first())));
1115    __ movq(Address(rsp, reg2offset_out(dst.first())), rax);
1116  }
1117}
1118
1119
1120void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1121  // We always ignore the frame_slots arg and just use the space just below frame pointer
1122  // which by this time is free to use
1123  switch (ret_type) {
1124  case T_FLOAT:
1125    __ movflt(Address(rbp, -wordSize), xmm0);
1126    break;
1127  case T_DOUBLE:
1128    __ movdbl(Address(rbp, -wordSize), xmm0);
1129    break;
1130  case T_VOID:  break;
1131  default: {
1132    __ movptr(Address(rbp, -wordSize), rax);
1133    }
1134  }
1135}
1136
1137void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1138  // We always ignore the frame_slots arg and just use the space just below frame pointer
1139  // which by this time is free to use
1140  switch (ret_type) {
1141  case T_FLOAT:
1142    __ movflt(xmm0, Address(rbp, -wordSize));
1143    break;
1144  case T_DOUBLE:
1145    __ movdbl(xmm0, Address(rbp, -wordSize));
1146    break;
1147  case T_VOID:  break;
1148  default: {
1149    __ movptr(rax, Address(rbp, -wordSize));
1150    }
1151  }
1152}
1153
1154static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1155    for ( int i = first_arg ; i < arg_count ; i++ ) {
1156      if (args[i].first()->is_Register()) {
1157        __ push(args[i].first()->as_Register());
1158      } else if (args[i].first()->is_XMMRegister()) {
1159        __ subptr(rsp, 2*wordSize);
1160        __ movdbl(Address(rsp, 0), args[i].first()->as_XMMRegister());
1161      }
1162    }
1163}
1164
1165static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
1166    for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1167      if (args[i].first()->is_Register()) {
1168        __ pop(args[i].first()->as_Register());
1169      } else if (args[i].first()->is_XMMRegister()) {
1170        __ movdbl(args[i].first()->as_XMMRegister(), Address(rsp, 0));
1171        __ addptr(rsp, 2*wordSize);
1172      }
1173    }
1174}
1175
1176// ---------------------------------------------------------------------------
1177// Generate a native wrapper for a given method.  The method takes arguments
1178// in the Java compiled code convention, marshals them to the native
1179// convention (handlizes oops, etc), transitions to native, makes the call,
1180// returns to java state (possibly blocking), unhandlizes any result and
1181// returns.
1182nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1183                                                methodHandle method,
1184                                                int total_in_args,
1185                                                int comp_args_on_stack,
1186                                                BasicType *in_sig_bt,
1187                                                VMRegPair *in_regs,
1188                                                BasicType ret_type) {
1189  // Native nmethod wrappers never take possesion of the oop arguments.
1190  // So the caller will gc the arguments. The only thing we need an
1191  // oopMap for is if the call is static
1192  //
1193  // An OopMap for lock (and class if static)
1194  OopMapSet *oop_maps = new OopMapSet();
1195  intptr_t start = (intptr_t)__ pc();
1196
1197  // We have received a description of where all the java arg are located
1198  // on entry to the wrapper. We need to convert these args to where
1199  // the jni function will expect them. To figure out where they go
1200  // we convert the java signature to a C signature by inserting
1201  // the hidden arguments as arg[0] and possibly arg[1] (static method)
1202
1203  int total_c_args = total_in_args + 1;
1204  if (method->is_static()) {
1205    total_c_args++;
1206  }
1207
1208  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1209  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair,   total_c_args);
1210
1211  int argc = 0;
1212  out_sig_bt[argc++] = T_ADDRESS;
1213  if (method->is_static()) {
1214    out_sig_bt[argc++] = T_OBJECT;
1215  }
1216
1217  for (int i = 0; i < total_in_args ; i++ ) {
1218    out_sig_bt[argc++] = in_sig_bt[i];
1219  }
1220
1221  // Now figure out where the args must be stored and how much stack space
1222  // they require.
1223  //
1224  int out_arg_slots;
1225  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
1226
1227  // Compute framesize for the wrapper.  We need to handlize all oops in
1228  // incoming registers
1229
1230  // Calculate the total number of stack slots we will need.
1231
1232  // First count the abi requirement plus all of the outgoing args
1233  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1234
1235  // Now the space for the inbound oop handle area
1236
1237  int oop_handle_offset = stack_slots;
1238  stack_slots += 6*VMRegImpl::slots_per_word;
1239
1240  // Now any space we need for handlizing a klass if static method
1241
1242  int oop_temp_slot_offset = 0;
1243  int klass_slot_offset = 0;
1244  int klass_offset = -1;
1245  int lock_slot_offset = 0;
1246  bool is_static = false;
1247
1248  if (method->is_static()) {
1249    klass_slot_offset = stack_slots;
1250    stack_slots += VMRegImpl::slots_per_word;
1251    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1252    is_static = true;
1253  }
1254
1255  // Plus a lock if needed
1256
1257  if (method->is_synchronized()) {
1258    lock_slot_offset = stack_slots;
1259    stack_slots += VMRegImpl::slots_per_word;
1260  }
1261
1262  // Now a place (+2) to save return values or temp during shuffling
1263  // + 4 for return address (which we own) and saved rbp
1264  stack_slots += 6;
1265
1266  // Ok The space we have allocated will look like:
1267  //
1268  //
1269  // FP-> |                     |
1270  //      |---------------------|
1271  //      | 2 slots for moves   |
1272  //      |---------------------|
1273  //      | lock box (if sync)  |
1274  //      |---------------------| <- lock_slot_offset
1275  //      | klass (if static)   |
1276  //      |---------------------| <- klass_slot_offset
1277  //      | oopHandle area      |
1278  //      |---------------------| <- oop_handle_offset (6 java arg registers)
1279  //      | outbound memory     |
1280  //      | based arguments     |
1281  //      |                     |
1282  //      |---------------------|
1283  //      |                     |
1284  // SP-> | out_preserved_slots |
1285  //
1286  //
1287
1288
1289  // Now compute actual number of stack words we need rounding to make
1290  // stack properly aligned.
1291  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
1292
1293  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1294
1295
1296  // First thing make an ic check to see if we should even be here
1297
1298  // We are free to use all registers as temps without saving them and
1299  // restoring them except rbp. rbp is the only callee save register
1300  // as far as the interpreter and the compiler(s) are concerned.
1301
1302
1303  const Register ic_reg = rax;
1304  const Register receiver = j_rarg0;
1305  const Register tmp = rdx;
1306
1307  Label ok;
1308  Label exception_pending;
1309
1310  __ verify_oop(receiver);
1311  __ push(tmp); // spill (any other registers free here???)
1312  __ load_klass(tmp, receiver);
1313  __ cmpq(ic_reg, tmp);
1314  __ jcc(Assembler::equal, ok);
1315
1316  __ pop(tmp);
1317  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1318
1319  __ bind(ok);
1320  __ pop(tmp);
1321
1322  // Verified entry point must be aligned
1323  __ align(8);
1324
1325  int vep_offset = ((intptr_t)__ pc()) - start;
1326
1327  // The instruction at the verified entry point must be 5 bytes or longer
1328  // because it can be patched on the fly by make_non_entrant. The stack bang
1329  // instruction fits that requirement.
1330
1331  // Generate stack overflow check
1332
1333  if (UseStackBanging) {
1334    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
1335  } else {
1336    // need a 5 byte instruction to allow MT safe patching to non-entrant
1337    __ fat_nop();
1338  }
1339
1340  // Generate a new frame for the wrapper.
1341  __ enter();
1342  // -2 because return address is already present and so is saved rbp
1343  __ subptr(rsp, stack_size - 2*wordSize);
1344
1345    // Frame is now completed as far as size and linkage.
1346
1347    int frame_complete = ((intptr_t)__ pc()) - start;
1348
1349#ifdef ASSERT
1350    {
1351      Label L;
1352      __ mov(rax, rsp);
1353      __ andptr(rax, -16); // must be 16 byte boundary (see amd64 ABI)
1354      __ cmpptr(rax, rsp);
1355      __ jcc(Assembler::equal, L);
1356      __ stop("improperly aligned stack");
1357      __ bind(L);
1358    }
1359#endif /* ASSERT */
1360
1361
1362  // We use r14 as the oop handle for the receiver/klass
1363  // It is callee save so it survives the call to native
1364
1365  const Register oop_handle_reg = r14;
1366
1367
1368
1369  //
1370  // We immediately shuffle the arguments so that any vm call we have to
1371  // make from here on out (sync slow path, jvmti, etc.) we will have
1372  // captured the oops from our caller and have a valid oopMap for
1373  // them.
1374
1375  // -----------------
1376  // The Grand Shuffle
1377
1378  // The Java calling convention is either equal (linux) or denser (win64) than the
1379  // c calling convention. However the because of the jni_env argument the c calling
1380  // convention always has at least one more (and two for static) arguments than Java.
1381  // Therefore if we move the args from java -> c backwards then we will never have
1382  // a register->register conflict and we don't have to build a dependency graph
1383  // and figure out how to break any cycles.
1384  //
1385
1386  // Record esp-based slot for receiver on stack for non-static methods
1387  int receiver_offset = -1;
1388
1389  // This is a trick. We double the stack slots so we can claim
1390  // the oops in the caller's frame. Since we are sure to have
1391  // more args than the caller doubling is enough to make
1392  // sure we can capture all the incoming oop args from the
1393  // caller.
1394  //
1395  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1396
1397  // Mark location of rbp (someday)
1398  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
1399
1400  // Use eax, ebx as temporaries during any memory-memory moves we have to do
1401  // All inbound args are referenced based on rbp and all outbound args via rsp.
1402
1403
1404#ifdef ASSERT
1405  bool reg_destroyed[RegisterImpl::number_of_registers];
1406  bool freg_destroyed[XMMRegisterImpl::number_of_registers];
1407  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1408    reg_destroyed[r] = false;
1409  }
1410  for ( int f = 0 ; f < XMMRegisterImpl::number_of_registers ; f++ ) {
1411    freg_destroyed[f] = false;
1412  }
1413
1414#endif /* ASSERT */
1415
1416
1417  int c_arg = total_c_args - 1;
1418  for ( int i = total_in_args - 1; i >= 0 ; i--, c_arg-- ) {
1419#ifdef ASSERT
1420    if (in_regs[i].first()->is_Register()) {
1421      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1422    } else if (in_regs[i].first()->is_XMMRegister()) {
1423      assert(!freg_destroyed[in_regs[i].first()->as_XMMRegister()->encoding()], "destroyed reg!");
1424    }
1425    if (out_regs[c_arg].first()->is_Register()) {
1426      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1427    } else if (out_regs[c_arg].first()->is_XMMRegister()) {
1428      freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
1429    }
1430#endif /* ASSERT */
1431    switch (in_sig_bt[i]) {
1432      case T_ARRAY:
1433      case T_OBJECT:
1434        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1435                    ((i == 0) && (!is_static)),
1436                    &receiver_offset);
1437        break;
1438      case T_VOID:
1439        break;
1440
1441      case T_FLOAT:
1442        float_move(masm, in_regs[i], out_regs[c_arg]);
1443          break;
1444
1445      case T_DOUBLE:
1446        assert( i + 1 < total_in_args &&
1447                in_sig_bt[i + 1] == T_VOID &&
1448                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1449        double_move(masm, in_regs[i], out_regs[c_arg]);
1450        break;
1451
1452      case T_LONG :
1453        long_move(masm, in_regs[i], out_regs[c_arg]);
1454        break;
1455
1456      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1457
1458      default:
1459        move32_64(masm, in_regs[i], out_regs[c_arg]);
1460    }
1461  }
1462
1463  // point c_arg at the first arg that is already loaded in case we
1464  // need to spill before we call out
1465  c_arg++;
1466
1467  // Pre-load a static method's oop into r14.  Used both by locking code and
1468  // the normal JNI call code.
1469  if (method->is_static()) {
1470
1471    //  load oop into a register
1472    __ movoop(oop_handle_reg, JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()));
1473
1474    // Now handlize the static class mirror it's known not-null.
1475    __ movptr(Address(rsp, klass_offset), oop_handle_reg);
1476    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1477
1478    // Now get the handle
1479    __ lea(oop_handle_reg, Address(rsp, klass_offset));
1480    // store the klass handle as second argument
1481    __ movptr(c_rarg1, oop_handle_reg);
1482    // and protect the arg if we must spill
1483    c_arg--;
1484  }
1485
1486  // Change state to native (we save the return address in the thread, since it might not
1487  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
1488  // points into the right code segment. It does not have to be the correct return pc.
1489  // We use the same pc/oopMap repeatedly when we call out
1490
1491  intptr_t the_pc = (intptr_t) __ pc();
1492  oop_maps->add_gc_map(the_pc - start, map);
1493
1494  __ set_last_Java_frame(rsp, noreg, (address)the_pc);
1495
1496
1497  // We have all of the arguments setup at this point. We must not touch any register
1498  // argument registers at this point (what if we save/restore them there are no oop?
1499
1500  {
1501    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
1502    // protect the args we've loaded
1503    save_args(masm, total_c_args, c_arg, out_regs);
1504    __ movoop(c_rarg1, JNIHandles::make_local(method()));
1505    __ call_VM_leaf(
1506      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1507      r15_thread, c_rarg1);
1508    restore_args(masm, total_c_args, c_arg, out_regs);
1509  }
1510
1511  // Lock a synchronized method
1512
1513  // Register definitions used by locking and unlocking
1514
1515  const Register swap_reg = rax;  // Must use rax for cmpxchg instruction
1516  const Register obj_reg  = rbx;  // Will contain the oop
1517  const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
1518  const Register old_hdr  = r13;  // value of old header at unlock time
1519
1520  Label slow_path_lock;
1521  Label lock_done;
1522
1523  if (method->is_synchronized()) {
1524
1525
1526    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1527
1528    // Get the handle (the 2nd argument)
1529    __ mov(oop_handle_reg, c_rarg1);
1530
1531    // Get address of the box
1532
1533    __ lea(lock_reg, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1534
1535    // Load the oop from the handle
1536    __ movptr(obj_reg, Address(oop_handle_reg, 0));
1537
1538    if (UseBiasedLocking) {
1539      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock);
1540    }
1541
1542    // Load immediate 1 into swap_reg %rax
1543    __ movl(swap_reg, 1);
1544
1545    // Load (object->mark() | 1) into swap_reg %rax
1546    __ orptr(swap_reg, Address(obj_reg, 0));
1547
1548    // Save (object->mark() | 1) into BasicLock's displaced header
1549    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1550
1551    if (os::is_MP()) {
1552      __ lock();
1553    }
1554
1555    // src -> dest iff dest == rax else rax <- dest
1556    __ cmpxchgptr(lock_reg, Address(obj_reg, 0));
1557    __ jcc(Assembler::equal, lock_done);
1558
1559    // Hmm should this move to the slow path code area???
1560
1561    // Test if the oopMark is an obvious stack pointer, i.e.,
1562    //  1) (mark & 3) == 0, and
1563    //  2) rsp <= mark < mark + os::pagesize()
1564    // These 3 tests can be done by evaluating the following
1565    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
1566    // assuming both stack pointer and pagesize have their
1567    // least significant 2 bits clear.
1568    // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
1569
1570    __ subptr(swap_reg, rsp);
1571    __ andptr(swap_reg, 3 - os::vm_page_size());
1572
1573    // Save the test result, for recursive case, the result is zero
1574    __ movptr(Address(lock_reg, mark_word_offset), swap_reg);
1575    __ jcc(Assembler::notEqual, slow_path_lock);
1576
1577    // Slow path will re-enter here
1578
1579    __ bind(lock_done);
1580  }
1581
1582
1583  // Finally just about ready to make the JNI call
1584
1585
1586  // get JNIEnv* which is first argument to native
1587
1588  __ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
1589
1590  // Now set thread in native
1591  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
1592
1593  __ call(RuntimeAddress(method->native_function()));
1594
1595    // Either restore the MXCSR register after returning from the JNI Call
1596    // or verify that it wasn't changed.
1597    if (RestoreMXCSROnJNICalls) {
1598      __ ldmxcsr(ExternalAddress(StubRoutines::x86::mxcsr_std()));
1599
1600    }
1601    else if (CheckJNICalls ) {
1602      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::verify_mxcsr_entry())));
1603    }
1604
1605
1606  // Unpack native results.
1607  switch (ret_type) {
1608  case T_BOOLEAN: __ c2bool(rax);            break;
1609  case T_CHAR   : __ movzwl(rax, rax);      break;
1610  case T_BYTE   : __ sign_extend_byte (rax); break;
1611  case T_SHORT  : __ sign_extend_short(rax); break;
1612  case T_INT    : /* nothing to do */        break;
1613  case T_DOUBLE :
1614  case T_FLOAT  :
1615    // Result is in xmm0 we'll save as needed
1616    break;
1617  case T_ARRAY:                 // Really a handle
1618  case T_OBJECT:                // Really a handle
1619      break; // can't de-handlize until after safepoint check
1620  case T_VOID: break;
1621  case T_LONG: break;
1622  default       : ShouldNotReachHere();
1623  }
1624
1625  // Switch thread to "native transition" state before reading the synchronization state.
1626  // This additional state is necessary because reading and testing the synchronization
1627  // state is not atomic w.r.t. GC, as this scenario demonstrates:
1628  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1629  //     VM thread changes sync state to synchronizing and suspends threads for GC.
1630  //     Thread A is resumed to finish this native method, but doesn't block here since it
1631  //     didn't see any synchronization is progress, and escapes.
1632  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native_trans);
1633
1634  if(os::is_MP()) {
1635    if (UseMembar) {
1636      // Force this write out before the read below
1637      __ membar(Assembler::Membar_mask_bits(
1638           Assembler::LoadLoad | Assembler::LoadStore |
1639           Assembler::StoreLoad | Assembler::StoreStore));
1640    } else {
1641      // Write serialization page so VM thread can do a pseudo remote membar.
1642      // We use the current thread pointer to calculate a thread specific
1643      // offset to write to within the page. This minimizes bus traffic
1644      // due to cache line collision.
1645      __ serialize_memory(r15_thread, rcx);
1646    }
1647  }
1648
1649
1650  // check for safepoint operation in progress and/or pending suspend requests
1651  {
1652    Label Continue;
1653
1654    __ cmp32(ExternalAddress((address)SafepointSynchronize::address_of_state()),
1655             SafepointSynchronize::_not_synchronized);
1656
1657    Label L;
1658    __ jcc(Assembler::notEqual, L);
1659    __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
1660    __ jcc(Assembler::equal, Continue);
1661    __ bind(L);
1662
1663    // Don't use call_VM as it will see a possible pending exception and forward it
1664    // and never return here preventing us from clearing _last_native_pc down below.
1665    // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
1666    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
1667    // by hand.
1668    //
1669    save_native_result(masm, ret_type, stack_slots);
1670    __ mov(c_rarg0, r15_thread);
1671    __ mov(r12, rsp); // remember sp
1672    __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1673    __ andptr(rsp, -16); // align stack as required by ABI
1674    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
1675    __ mov(rsp, r12); // restore sp
1676    __ reinit_heapbase();
1677    // Restore any method result value
1678    restore_native_result(masm, ret_type, stack_slots);
1679    __ bind(Continue);
1680  }
1681
1682  // change thread state
1683  __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java);
1684
1685  Label reguard;
1686  Label reguard_done;
1687  __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled);
1688  __ jcc(Assembler::equal, reguard);
1689  __ bind(reguard_done);
1690
1691  // native result if any is live
1692
1693  // Unlock
1694  Label unlock_done;
1695  Label slow_path_unlock;
1696  if (method->is_synchronized()) {
1697
1698    // Get locked oop from the handle we passed to jni
1699    __ movptr(obj_reg, Address(oop_handle_reg, 0));
1700
1701    Label done;
1702
1703    if (UseBiasedLocking) {
1704      __ biased_locking_exit(obj_reg, old_hdr, done);
1705    }
1706
1707    // Simple recursive lock?
1708
1709    __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), (int32_t)NULL_WORD);
1710    __ jcc(Assembler::equal, done);
1711
1712    // Must save rax if if it is live now because cmpxchg must use it
1713    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1714      save_native_result(masm, ret_type, stack_slots);
1715    }
1716
1717
1718    // get address of the stack lock
1719    __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1720    //  get old displaced header
1721    __ movptr(old_hdr, Address(rax, 0));
1722
1723    // Atomic swap old header if oop still contains the stack lock
1724    if (os::is_MP()) {
1725      __ lock();
1726    }
1727    __ cmpxchgptr(old_hdr, Address(obj_reg, 0));
1728    __ jcc(Assembler::notEqual, slow_path_unlock);
1729
1730    // slow path re-enters here
1731    __ bind(unlock_done);
1732    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1733      restore_native_result(masm, ret_type, stack_slots);
1734    }
1735
1736    __ bind(done);
1737
1738  }
1739  {
1740    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
1741    save_native_result(masm, ret_type, stack_slots);
1742    __ movoop(c_rarg1, JNIHandles::make_local(method()));
1743    __ call_VM_leaf(
1744         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1745         r15_thread, c_rarg1);
1746    restore_native_result(masm, ret_type, stack_slots);
1747  }
1748
1749  __ reset_last_Java_frame(false, true);
1750
1751  // Unpack oop result
1752  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1753      Label L;
1754      __ testptr(rax, rax);
1755      __ jcc(Assembler::zero, L);
1756      __ movptr(rax, Address(rax, 0));
1757      __ bind(L);
1758      __ verify_oop(rax);
1759  }
1760
1761  // reset handle block
1762  __ movptr(rcx, Address(r15_thread, JavaThread::active_handles_offset()));
1763  __ movptr(Address(rcx, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD);
1764
1765  // pop our frame
1766
1767  __ leave();
1768
1769  // Any exception pending?
1770  __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1771  __ jcc(Assembler::notEqual, exception_pending);
1772
1773  // Return
1774
1775  __ ret(0);
1776
1777  // Unexpected paths are out of line and go here
1778
1779  // forward the exception
1780  __ bind(exception_pending);
1781
1782  // and forward the exception
1783  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1784
1785
1786  // Slow path locking & unlocking
1787  if (method->is_synchronized()) {
1788
1789    // BEGIN Slow path lock
1790    __ bind(slow_path_lock);
1791
1792    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
1793    // args are (oop obj, BasicLock* lock, JavaThread* thread)
1794
1795    // protect the args we've loaded
1796    save_args(masm, total_c_args, c_arg, out_regs);
1797
1798    __ mov(c_rarg0, obj_reg);
1799    __ mov(c_rarg1, lock_reg);
1800    __ mov(c_rarg2, r15_thread);
1801
1802    // Not a leaf but we have last_Java_frame setup as we want
1803    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
1804    restore_args(masm, total_c_args, c_arg, out_regs);
1805
1806#ifdef ASSERT
1807    { Label L;
1808    __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1809    __ jcc(Assembler::equal, L);
1810    __ stop("no pending exception allowed on exit from monitorenter");
1811    __ bind(L);
1812    }
1813#endif
1814    __ jmp(lock_done);
1815
1816    // END Slow path lock
1817
1818    // BEGIN Slow path unlock
1819    __ bind(slow_path_unlock);
1820
1821    // If we haven't already saved the native result we must save it now as xmm registers
1822    // are still exposed.
1823
1824    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1825      save_native_result(masm, ret_type, stack_slots);
1826    }
1827
1828    __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
1829
1830    __ mov(c_rarg0, obj_reg);
1831    __ mov(r12, rsp); // remember sp
1832    __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1833    __ andptr(rsp, -16); // align stack as required by ABI
1834
1835    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1836    // NOTE that obj_reg == rbx currently
1837    __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
1838    __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
1839
1840    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
1841    __ mov(rsp, r12); // restore sp
1842    __ reinit_heapbase();
1843#ifdef ASSERT
1844    {
1845      Label L;
1846      __ cmpptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int)NULL_WORD);
1847      __ jcc(Assembler::equal, L);
1848      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1849      __ bind(L);
1850    }
1851#endif /* ASSERT */
1852
1853    __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), rbx);
1854
1855    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
1856      restore_native_result(masm, ret_type, stack_slots);
1857    }
1858    __ jmp(unlock_done);
1859
1860    // END Slow path unlock
1861
1862  } // synchronized
1863
1864  // SLOW PATH Reguard the stack if needed
1865
1866  __ bind(reguard);
1867  save_native_result(masm, ret_type, stack_slots);
1868  __ mov(r12, rsp); // remember sp
1869  __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
1870  __ andptr(rsp, -16); // align stack as required by ABI
1871  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
1872  __ mov(rsp, r12); // restore sp
1873  __ reinit_heapbase();
1874  restore_native_result(masm, ret_type, stack_slots);
1875  // and continue
1876  __ jmp(reguard_done);
1877
1878
1879
1880  __ flush();
1881
1882  nmethod *nm = nmethod::new_native_nmethod(method,
1883                                            masm->code(),
1884                                            vep_offset,
1885                                            frame_complete,
1886                                            stack_slots / VMRegImpl::slots_per_word,
1887                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
1888                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
1889                                            oop_maps);
1890  return nm;
1891
1892}
1893
1894#ifdef HAVE_DTRACE_H
1895// ---------------------------------------------------------------------------
1896// Generate a dtrace nmethod for a given signature.  The method takes arguments
1897// in the Java compiled code convention, marshals them to the native
1898// abi and then leaves nops at the position you would expect to call a native
1899// function. When the probe is enabled the nops are replaced with a trap
1900// instruction that dtrace inserts and the trace will cause a notification
1901// to dtrace.
1902//
1903// The probes are only able to take primitive types and java/lang/String as
1904// arguments.  No other java types are allowed. Strings are converted to utf8
1905// strings so that from dtrace point of view java strings are converted to C
1906// strings. There is an arbitrary fixed limit on the total space that a method
1907// can use for converting the strings. (256 chars per string in the signature).
1908// So any java string larger then this is truncated.
1909
1910static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
1911static bool offsets_initialized = false;
1912
1913
1914nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
1915                                                methodHandle method) {
1916
1917
1918  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
1919  // be single threaded in this method.
1920  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
1921
1922  if (!offsets_initialized) {
1923    fp_offset[c_rarg0->as_VMReg()->value()] = -1 * wordSize;
1924    fp_offset[c_rarg1->as_VMReg()->value()] = -2 * wordSize;
1925    fp_offset[c_rarg2->as_VMReg()->value()] = -3 * wordSize;
1926    fp_offset[c_rarg3->as_VMReg()->value()] = -4 * wordSize;
1927    fp_offset[c_rarg4->as_VMReg()->value()] = -5 * wordSize;
1928    fp_offset[c_rarg5->as_VMReg()->value()] = -6 * wordSize;
1929
1930    fp_offset[c_farg0->as_VMReg()->value()] = -7 * wordSize;
1931    fp_offset[c_farg1->as_VMReg()->value()] = -8 * wordSize;
1932    fp_offset[c_farg2->as_VMReg()->value()] = -9 * wordSize;
1933    fp_offset[c_farg3->as_VMReg()->value()] = -10 * wordSize;
1934    fp_offset[c_farg4->as_VMReg()->value()] = -11 * wordSize;
1935    fp_offset[c_farg5->as_VMReg()->value()] = -12 * wordSize;
1936    fp_offset[c_farg6->as_VMReg()->value()] = -13 * wordSize;
1937    fp_offset[c_farg7->as_VMReg()->value()] = -14 * wordSize;
1938
1939    offsets_initialized = true;
1940  }
1941  // Fill in the signature array, for the calling-convention call.
1942  int total_args_passed = method->size_of_parameters();
1943
1944  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
1945  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
1946
1947  // The signature we are going to use for the trap that dtrace will see
1948  // java/lang/String is converted. We drop "this" and any other object
1949  // is converted to NULL.  (A one-slot java/lang/Long object reference
1950  // is converted to a two-slot long, which is why we double the allocation).
1951  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
1952  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
1953
1954  int i=0;
1955  int total_strings = 0;
1956  int first_arg_to_pass = 0;
1957  int total_c_args = 0;
1958
1959  // Skip the receiver as dtrace doesn't want to see it
1960  if( !method->is_static() ) {
1961    in_sig_bt[i++] = T_OBJECT;
1962    first_arg_to_pass = 1;
1963  }
1964
1965  // We need to convert the java args to where a native (non-jni) function
1966  // would expect them. To figure out where they go we convert the java
1967  // signature to a C signature.
1968
1969  SignatureStream ss(method->signature());
1970  for ( ; !ss.at_return_type(); ss.next()) {
1971    BasicType bt = ss.type();
1972    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
1973    out_sig_bt[total_c_args++] = bt;
1974    if( bt == T_OBJECT) {
1975      symbolOop s = ss.as_symbol_or_null();
1976      if (s == vmSymbols::java_lang_String()) {
1977        total_strings++;
1978        out_sig_bt[total_c_args-1] = T_ADDRESS;
1979      } else if (s == vmSymbols::java_lang_Boolean() ||
1980                 s == vmSymbols::java_lang_Character() ||
1981                 s == vmSymbols::java_lang_Byte() ||
1982                 s == vmSymbols::java_lang_Short() ||
1983                 s == vmSymbols::java_lang_Integer() ||
1984                 s == vmSymbols::java_lang_Float()) {
1985        out_sig_bt[total_c_args-1] = T_INT;
1986      } else if (s == vmSymbols::java_lang_Long() ||
1987                 s == vmSymbols::java_lang_Double()) {
1988        out_sig_bt[total_c_args-1] = T_LONG;
1989        out_sig_bt[total_c_args++] = T_VOID;
1990      }
1991    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
1992      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
1993      // We convert double to long
1994      out_sig_bt[total_c_args-1] = T_LONG;
1995      out_sig_bt[total_c_args++] = T_VOID;
1996    } else if ( bt == T_FLOAT) {
1997      // We convert float to int
1998      out_sig_bt[total_c_args-1] = T_INT;
1999    }
2000  }
2001
2002  assert(i==total_args_passed, "validly parsed signature");
2003
2004  // Now get the compiled-Java layout as input arguments
2005  int comp_args_on_stack;
2006  comp_args_on_stack = SharedRuntime::java_calling_convention(
2007      in_sig_bt, in_regs, total_args_passed, false);
2008
2009  // Now figure out where the args must be stored and how much stack space
2010  // they require (neglecting out_preserve_stack_slots but space for storing
2011  // the 1st six register arguments). It's weird see int_stk_helper.
2012
2013  int out_arg_slots;
2014  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args);
2015
2016  // Calculate the total number of stack slots we will need.
2017
2018  // First count the abi requirement plus all of the outgoing args
2019  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
2020
2021  // Now space for the string(s) we must convert
2022  int* string_locs   = NEW_RESOURCE_ARRAY(int, total_strings + 1);
2023  for (i = 0; i < total_strings ; i++) {
2024    string_locs[i] = stack_slots;
2025    stack_slots += max_dtrace_string_size / VMRegImpl::stack_slot_size;
2026  }
2027
2028  // Plus the temps we might need to juggle register args
2029  // regs take two slots each
2030  stack_slots += (Argument::n_int_register_parameters_c +
2031                  Argument::n_float_register_parameters_c) * 2;
2032
2033
2034  // + 4 for return address (which we own) and saved rbp,
2035
2036  stack_slots += 4;
2037
2038  // Ok The space we have allocated will look like:
2039  //
2040  //
2041  // FP-> |                     |
2042  //      |---------------------|
2043  //      | string[n]           |
2044  //      |---------------------| <- string_locs[n]
2045  //      | string[n-1]         |
2046  //      |---------------------| <- string_locs[n-1]
2047  //      | ...                 |
2048  //      | ...                 |
2049  //      |---------------------| <- string_locs[1]
2050  //      | string[0]           |
2051  //      |---------------------| <- string_locs[0]
2052  //      | outbound memory     |
2053  //      | based arguments     |
2054  //      |                     |
2055  //      |---------------------|
2056  //      |                     |
2057  // SP-> | out_preserved_slots |
2058  //
2059  //
2060
2061  // Now compute actual number of stack words we need rounding to make
2062  // stack properly aligned.
2063  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
2064
2065  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
2066
2067  intptr_t start = (intptr_t)__ pc();
2068
2069  // First thing make an ic check to see if we should even be here
2070
2071  // We are free to use all registers as temps without saving them and
2072  // restoring them except rbp. rbp, is the only callee save register
2073  // as far as the interpreter and the compiler(s) are concerned.
2074
2075  const Register ic_reg = rax;
2076  const Register receiver = rcx;
2077  Label hit;
2078  Label exception_pending;
2079
2080
2081  __ verify_oop(receiver);
2082  __ cmpl(ic_reg, Address(receiver, oopDesc::klass_offset_in_bytes()));
2083  __ jcc(Assembler::equal, hit);
2084
2085  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
2086
2087  // verified entry must be aligned for code patching.
2088  // and the first 5 bytes must be in the same cache line
2089  // if we align at 8 then we will be sure 5 bytes are in the same line
2090  __ align(8);
2091
2092  __ bind(hit);
2093
2094  int vep_offset = ((intptr_t)__ pc()) - start;
2095
2096
2097  // The instruction at the verified entry point must be 5 bytes or longer
2098  // because it can be patched on the fly by make_non_entrant. The stack bang
2099  // instruction fits that requirement.
2100
2101  // Generate stack overflow check
2102
2103  if (UseStackBanging) {
2104    if (stack_size <= StackShadowPages*os::vm_page_size()) {
2105      __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
2106    } else {
2107      __ movl(rax, stack_size);
2108      __ bang_stack_size(rax, rbx);
2109    }
2110  } else {
2111    // need a 5 byte instruction to allow MT safe patching to non-entrant
2112    __ fat_nop();
2113  }
2114
2115  assert(((uintptr_t)__ pc() - start - vep_offset) >= 5,
2116         "valid size for make_non_entrant");
2117
2118  // Generate a new frame for the wrapper.
2119  __ enter();
2120
2121  // -4 because return address is already present and so is saved rbp,
2122  if (stack_size - 2*wordSize != 0) {
2123    __ subq(rsp, stack_size - 2*wordSize);
2124  }
2125
2126  // Frame is now completed as far a size and linkage.
2127
2128  int frame_complete = ((intptr_t)__ pc()) - start;
2129
2130  int c_arg, j_arg;
2131
2132  // State of input register args
2133
2134  bool  live[ConcreteRegisterImpl::number_of_registers];
2135
2136  live[j_rarg0->as_VMReg()->value()] = false;
2137  live[j_rarg1->as_VMReg()->value()] = false;
2138  live[j_rarg2->as_VMReg()->value()] = false;
2139  live[j_rarg3->as_VMReg()->value()] = false;
2140  live[j_rarg4->as_VMReg()->value()] = false;
2141  live[j_rarg5->as_VMReg()->value()] = false;
2142
2143  live[j_farg0->as_VMReg()->value()] = false;
2144  live[j_farg1->as_VMReg()->value()] = false;
2145  live[j_farg2->as_VMReg()->value()] = false;
2146  live[j_farg3->as_VMReg()->value()] = false;
2147  live[j_farg4->as_VMReg()->value()] = false;
2148  live[j_farg5->as_VMReg()->value()] = false;
2149  live[j_farg6->as_VMReg()->value()] = false;
2150  live[j_farg7->as_VMReg()->value()] = false;
2151
2152
2153  bool rax_is_zero = false;
2154
2155  // All args (except strings) destined for the stack are moved first
2156  for (j_arg = first_arg_to_pass, c_arg = 0 ;
2157       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2158    VMRegPair src = in_regs[j_arg];
2159    VMRegPair dst = out_regs[c_arg];
2160
2161    // Get the real reg value or a dummy (rsp)
2162
2163    int src_reg = src.first()->is_reg() ?
2164                  src.first()->value() :
2165                  rsp->as_VMReg()->value();
2166
2167    bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2168                    (in_sig_bt[j_arg] == T_OBJECT &&
2169                     out_sig_bt[c_arg] != T_INT &&
2170                     out_sig_bt[c_arg] != T_ADDRESS &&
2171                     out_sig_bt[c_arg] != T_LONG);
2172
2173    live[src_reg] = !useless;
2174
2175    if (dst.first()->is_stack()) {
2176
2177      // Even though a string arg in a register is still live after this loop
2178      // after the string conversion loop (next) it will be dead so we take
2179      // advantage of that now for simpler code to manage live.
2180
2181      live[src_reg] = false;
2182      switch (in_sig_bt[j_arg]) {
2183
2184        case T_ARRAY:
2185        case T_OBJECT:
2186          {
2187            Address stack_dst(rsp, reg2offset_out(dst.first()));
2188
2189            if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2190              // need to unbox a one-word value
2191              Register in_reg = rax;
2192              if ( src.first()->is_reg() ) {
2193                in_reg = src.first()->as_Register();
2194              } else {
2195                __ movq(rax, Address(rbp, reg2offset_in(src.first())));
2196                rax_is_zero = false;
2197              }
2198              Label skipUnbox;
2199              __ movptr(Address(rsp, reg2offset_out(dst.first())),
2200                        (int32_t)NULL_WORD);
2201              __ testq(in_reg, in_reg);
2202              __ jcc(Assembler::zero, skipUnbox);
2203
2204              BasicType bt = out_sig_bt[c_arg];
2205              int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2206              Address src1(in_reg, box_offset);
2207              if ( bt == T_LONG ) {
2208                __ movq(in_reg,  src1);
2209                __ movq(stack_dst, in_reg);
2210                assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2211                ++c_arg; // skip over T_VOID to keep the loop indices in sync
2212              } else {
2213                __ movl(in_reg,  src1);
2214                __ movl(stack_dst, in_reg);
2215              }
2216
2217              __ bind(skipUnbox);
2218            } else if (out_sig_bt[c_arg] != T_ADDRESS) {
2219              // Convert the arg to NULL
2220              if (!rax_is_zero) {
2221                __ xorq(rax, rax);
2222                rax_is_zero = true;
2223              }
2224              __ movq(stack_dst, rax);
2225            }
2226          }
2227          break;
2228
2229        case T_VOID:
2230          break;
2231
2232        case T_FLOAT:
2233          // This does the right thing since we know it is destined for the
2234          // stack
2235          float_move(masm, src, dst);
2236          break;
2237
2238        case T_DOUBLE:
2239          // This does the right thing since we know it is destined for the
2240          // stack
2241          double_move(masm, src, dst);
2242          break;
2243
2244        case T_LONG :
2245          long_move(masm, src, dst);
2246          break;
2247
2248        case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
2249
2250        default:
2251          move32_64(masm, src, dst);
2252      }
2253    }
2254
2255  }
2256
2257  // If we have any strings we must store any register based arg to the stack
2258  // This includes any still live xmm registers too.
2259
2260  int sid = 0;
2261
2262  if (total_strings > 0 ) {
2263    for (j_arg = first_arg_to_pass, c_arg = 0 ;
2264         j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2265      VMRegPair src = in_regs[j_arg];
2266      VMRegPair dst = out_regs[c_arg];
2267
2268      if (src.first()->is_reg()) {
2269        Address src_tmp(rbp, fp_offset[src.first()->value()]);
2270
2271        // string oops were left untouched by the previous loop even if the
2272        // eventual (converted) arg is destined for the stack so park them
2273        // away now (except for first)
2274
2275        if (out_sig_bt[c_arg] == T_ADDRESS) {
2276          Address utf8_addr = Address(
2277              rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2278          if (sid != 1) {
2279            // The first string arg won't be killed until after the utf8
2280            // conversion
2281            __ movq(utf8_addr, src.first()->as_Register());
2282          }
2283        } else if (dst.first()->is_reg()) {
2284          if (in_sig_bt[j_arg] == T_FLOAT || in_sig_bt[j_arg] == T_DOUBLE) {
2285
2286            // Convert the xmm register to an int and store it in the reserved
2287            // location for the eventual c register arg
2288            XMMRegister f = src.first()->as_XMMRegister();
2289            if (in_sig_bt[j_arg] == T_FLOAT) {
2290              __ movflt(src_tmp, f);
2291            } else {
2292              __ movdbl(src_tmp, f);
2293            }
2294          } else {
2295            // If the arg is an oop type we don't support don't bother to store
2296            // it remember string was handled above.
2297            bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2298                            (in_sig_bt[j_arg] == T_OBJECT &&
2299                             out_sig_bt[c_arg] != T_INT &&
2300                             out_sig_bt[c_arg] != T_LONG);
2301
2302            if (!useless) {
2303              __ movq(src_tmp, src.first()->as_Register());
2304            }
2305          }
2306        }
2307      }
2308      if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2309        assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2310        ++c_arg; // skip over T_VOID to keep the loop indices in sync
2311      }
2312    }
2313
2314    // Now that the volatile registers are safe, convert all the strings
2315    sid = 0;
2316
2317    for (j_arg = first_arg_to_pass, c_arg = 0 ;
2318         j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2319      if (out_sig_bt[c_arg] == T_ADDRESS) {
2320        // It's a string
2321        Address utf8_addr = Address(
2322            rsp, string_locs[sid++] * VMRegImpl::stack_slot_size);
2323        // The first string we find might still be in the original java arg
2324        // register
2325
2326        VMReg src = in_regs[j_arg].first();
2327
2328        // We will need to eventually save the final argument to the trap
2329        // in the von-volatile location dedicated to src. This is the offset
2330        // from fp we will use.
2331        int src_off = src->is_reg() ?
2332            fp_offset[src->value()] : reg2offset_in(src);
2333
2334        // This is where the argument will eventually reside
2335        VMRegPair dst = out_regs[c_arg];
2336
2337        if (src->is_reg()) {
2338          if (sid == 1) {
2339            __ movq(c_rarg0, src->as_Register());
2340          } else {
2341            __ movq(c_rarg0, utf8_addr);
2342          }
2343        } else {
2344          // arg is still in the original location
2345          __ movq(c_rarg0, Address(rbp, reg2offset_in(src)));
2346        }
2347        Label done, convert;
2348
2349        // see if the oop is NULL
2350        __ testq(c_rarg0, c_rarg0);
2351        __ jcc(Assembler::notEqual, convert);
2352
2353        if (dst.first()->is_reg()) {
2354          // Save the ptr to utf string in the origina src loc or the tmp
2355          // dedicated to it
2356          __ movq(Address(rbp, src_off), c_rarg0);
2357        } else {
2358          __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg0);
2359        }
2360        __ jmp(done);
2361
2362        __ bind(convert);
2363
2364        __ lea(c_rarg1, utf8_addr);
2365        if (dst.first()->is_reg()) {
2366          __ movq(Address(rbp, src_off), c_rarg1);
2367        } else {
2368          __ movq(Address(rsp, reg2offset_out(dst.first())), c_rarg1);
2369        }
2370        // And do the conversion
2371        __ call(RuntimeAddress(
2372                CAST_FROM_FN_PTR(address, SharedRuntime::get_utf)));
2373
2374        __ bind(done);
2375      }
2376      if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2377        assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2378        ++c_arg; // skip over T_VOID to keep the loop indices in sync
2379      }
2380    }
2381    // The get_utf call killed all the c_arg registers
2382    live[c_rarg0->as_VMReg()->value()] = false;
2383    live[c_rarg1->as_VMReg()->value()] = false;
2384    live[c_rarg2->as_VMReg()->value()] = false;
2385    live[c_rarg3->as_VMReg()->value()] = false;
2386    live[c_rarg4->as_VMReg()->value()] = false;
2387    live[c_rarg5->as_VMReg()->value()] = false;
2388
2389    live[c_farg0->as_VMReg()->value()] = false;
2390    live[c_farg1->as_VMReg()->value()] = false;
2391    live[c_farg2->as_VMReg()->value()] = false;
2392    live[c_farg3->as_VMReg()->value()] = false;
2393    live[c_farg4->as_VMReg()->value()] = false;
2394    live[c_farg5->as_VMReg()->value()] = false;
2395    live[c_farg6->as_VMReg()->value()] = false;
2396    live[c_farg7->as_VMReg()->value()] = false;
2397  }
2398
2399  // Now we can finally move the register args to their desired locations
2400
2401  rax_is_zero = false;
2402
2403  for (j_arg = first_arg_to_pass, c_arg = 0 ;
2404       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
2405
2406    VMRegPair src = in_regs[j_arg];
2407    VMRegPair dst = out_regs[c_arg];
2408
2409    // Only need to look for args destined for the interger registers (since we
2410    // convert float/double args to look like int/long outbound)
2411    if (dst.first()->is_reg()) {
2412      Register r =  dst.first()->as_Register();
2413
2414      // Check if the java arg is unsupported and thereofre useless
2415      bool useless =  in_sig_bt[j_arg] == T_ARRAY ||
2416                      (in_sig_bt[j_arg] == T_OBJECT &&
2417                       out_sig_bt[c_arg] != T_INT &&
2418                       out_sig_bt[c_arg] != T_ADDRESS &&
2419                       out_sig_bt[c_arg] != T_LONG);
2420
2421
2422      // If we're going to kill an existing arg save it first
2423      if (live[dst.first()->value()]) {
2424        // you can't kill yourself
2425        if (src.first() != dst.first()) {
2426          __ movq(Address(rbp, fp_offset[dst.first()->value()]), r);
2427        }
2428      }
2429      if (src.first()->is_reg()) {
2430        if (live[src.first()->value()] ) {
2431          if (in_sig_bt[j_arg] == T_FLOAT) {
2432            __ movdl(r, src.first()->as_XMMRegister());
2433          } else if (in_sig_bt[j_arg] == T_DOUBLE) {
2434            __ movdq(r, src.first()->as_XMMRegister());
2435          } else if (r != src.first()->as_Register()) {
2436            if (!useless) {
2437              __ movq(r, src.first()->as_Register());
2438            }
2439          }
2440        } else {
2441          // If the arg is an oop type we don't support don't bother to store
2442          // it
2443          if (!useless) {
2444            if (in_sig_bt[j_arg] == T_DOUBLE ||
2445                in_sig_bt[j_arg] == T_LONG  ||
2446                in_sig_bt[j_arg] == T_OBJECT ) {
2447              __ movq(r, Address(rbp, fp_offset[src.first()->value()]));
2448            } else {
2449              __ movl(r, Address(rbp, fp_offset[src.first()->value()]));
2450            }
2451          }
2452        }
2453        live[src.first()->value()] = false;
2454      } else if (!useless) {
2455        // full sized move even for int should be ok
2456        __ movq(r, Address(rbp, reg2offset_in(src.first())));
2457      }
2458
2459      // At this point r has the original java arg in the final location
2460      // (assuming it wasn't useless). If the java arg was an oop
2461      // we have a bit more to do
2462
2463      if (in_sig_bt[j_arg] == T_ARRAY || in_sig_bt[j_arg] == T_OBJECT ) {
2464        if (out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
2465          // need to unbox a one-word value
2466          Label skip;
2467          __ testq(r, r);
2468          __ jcc(Assembler::equal, skip);
2469          BasicType bt = out_sig_bt[c_arg];
2470          int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
2471          Address src1(r, box_offset);
2472          if ( bt == T_LONG ) {
2473            __ movq(r, src1);
2474          } else {
2475            __ movl(r, src1);
2476          }
2477          __ bind(skip);
2478
2479        } else if (out_sig_bt[c_arg] != T_ADDRESS) {
2480          // Convert the arg to NULL
2481          __ xorq(r, r);
2482        }
2483      }
2484
2485      // dst can longer be holding an input value
2486      live[dst.first()->value()] = false;
2487    }
2488    if (in_sig_bt[j_arg] == T_OBJECT && out_sig_bt[c_arg] == T_LONG) {
2489      assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
2490      ++c_arg; // skip over T_VOID to keep the loop indices in sync
2491    }
2492  }
2493
2494
2495  // Ok now we are done. Need to place the nop that dtrace wants in order to
2496  // patch in the trap
2497  int patch_offset = ((intptr_t)__ pc()) - start;
2498
2499  __ nop();
2500
2501
2502  // Return
2503
2504  __ leave();
2505  __ ret(0);
2506
2507  __ flush();
2508
2509  nmethod *nm = nmethod::new_dtrace_nmethod(
2510      method, masm->code(), vep_offset, patch_offset, frame_complete,
2511      stack_slots / VMRegImpl::slots_per_word);
2512  return nm;
2513
2514}
2515
2516#endif // HAVE_DTRACE_H
2517
2518// this function returns the adjust size (in number of words) to a c2i adapter
2519// activation for use during deoptimization
2520int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
2521  return (callee_locals - callee_parameters) * Interpreter::stackElementWords();
2522}
2523
2524
2525uint SharedRuntime::out_preserve_stack_slots() {
2526  return 0;
2527}
2528
2529
2530//------------------------------generate_deopt_blob----------------------------
2531void SharedRuntime::generate_deopt_blob() {
2532  // Allocate space for the code
2533  ResourceMark rm;
2534  // Setup code generation tools
2535  CodeBuffer buffer("deopt_blob", 2048, 1024);
2536  MacroAssembler* masm = new MacroAssembler(&buffer);
2537  int frame_size_in_words;
2538  OopMap* map = NULL;
2539  OopMapSet *oop_maps = new OopMapSet();
2540
2541  // -------------
2542  // This code enters when returning to a de-optimized nmethod.  A return
2543  // address has been pushed on the the stack, and return values are in
2544  // registers.
2545  // If we are doing a normal deopt then we were called from the patched
2546  // nmethod from the point we returned to the nmethod. So the return
2547  // address on the stack is wrong by NativeCall::instruction_size
2548  // We will adjust the value so it looks like we have the original return
2549  // address on the stack (like when we eagerly deoptimized).
2550  // In the case of an exception pending when deoptimizing, we enter
2551  // with a return address on the stack that points after the call we patched
2552  // into the exception handler. We have the following register state from,
2553  // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2554  //    rax: exception oop
2555  //    rbx: exception handler
2556  //    rdx: throwing pc
2557  // So in this case we simply jam rdx into the useless return address and
2558  // the stack looks just like we want.
2559  //
2560  // At this point we need to de-opt.  We save the argument return
2561  // registers.  We call the first C routine, fetch_unroll_info().  This
2562  // routine captures the return values and returns a structure which
2563  // describes the current frame size and the sizes of all replacement frames.
2564  // The current frame is compiled code and may contain many inlined
2565  // functions, each with their own JVM state.  We pop the current frame, then
2566  // push all the new frames.  Then we call the C routine unpack_frames() to
2567  // populate these frames.  Finally unpack_frames() returns us the new target
2568  // address.  Notice that callee-save registers are BLOWN here; they have
2569  // already been captured in the vframeArray at the time the return PC was
2570  // patched.
2571  address start = __ pc();
2572  Label cont;
2573
2574  // Prolog for non exception case!
2575
2576  // Save everything in sight.
2577  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2578
2579  // Normal deoptimization.  Save exec mode for unpack_frames.
2580  __ movl(r14, Deoptimization::Unpack_deopt); // callee-saved
2581  __ jmp(cont);
2582
2583  int reexecute_offset = __ pc() - start;
2584
2585  // Reexecute case
2586  // return address is the pc describes what bci to do re-execute at
2587
2588  // No need to update map as each call to save_live_registers will produce identical oopmap
2589  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2590
2591  __ movl(r14, Deoptimization::Unpack_reexecute); // callee-saved
2592  __ jmp(cont);
2593
2594  int exception_offset = __ pc() - start;
2595
2596  // Prolog for exception case
2597
2598  // all registers are dead at this entry point, except for rax, and
2599  // rdx which contain the exception oop and exception pc
2600  // respectively.  Set them in TLS and fall thru to the
2601  // unpack_with_exception_in_tls entry point.
2602
2603  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
2604  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), rax);
2605
2606  int exception_in_tls_offset = __ pc() - start;
2607
2608  // new implementation because exception oop is now passed in JavaThread
2609
2610  // Prolog for exception case
2611  // All registers must be preserved because they might be used by LinearScan
2612  // Exceptiop oop and throwing PC are passed in JavaThread
2613  // tos: stack at point of call to method that threw the exception (i.e. only
2614  // args are on the stack, no return address)
2615
2616  // make room on stack for the return address
2617  // It will be patched later with the throwing pc. The correct value is not
2618  // available now because loading it from memory would destroy registers.
2619  __ push(0);
2620
2621  // Save everything in sight.
2622  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
2623
2624  // Now it is safe to overwrite any register
2625
2626  // Deopt during an exception.  Save exec mode for unpack_frames.
2627  __ movl(r14, Deoptimization::Unpack_exception); // callee-saved
2628
2629  // load throwing pc from JavaThread and patch it as the return address
2630  // of the current frame. Then clear the field in JavaThread
2631
2632  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2633  __ movptr(Address(rbp, wordSize), rdx);
2634  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2635
2636#ifdef ASSERT
2637  // verify that there is really an exception oop in JavaThread
2638  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2639  __ verify_oop(rax);
2640
2641  // verify that there is no pending exception
2642  Label no_pending_exception;
2643  __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
2644  __ testptr(rax, rax);
2645  __ jcc(Assembler::zero, no_pending_exception);
2646  __ stop("must not have pending exception here");
2647  __ bind(no_pending_exception);
2648#endif
2649
2650  __ bind(cont);
2651
2652  // Call C code.  Need thread and this frame, but NOT official VM entry
2653  // crud.  We cannot block on this call, no GC can happen.
2654  //
2655  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2656
2657  // fetch_unroll_info needs to call last_java_frame().
2658
2659  __ set_last_Java_frame(noreg, noreg, NULL);
2660#ifdef ASSERT
2661  { Label L;
2662    __ cmpptr(Address(r15_thread,
2663                    JavaThread::last_Java_fp_offset()),
2664            (int32_t)0);
2665    __ jcc(Assembler::equal, L);
2666    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2667    __ bind(L);
2668  }
2669#endif // ASSERT
2670  __ mov(c_rarg0, r15_thread);
2671  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2672
2673  // Need to have an oopmap that tells fetch_unroll_info where to
2674  // find any register it might need.
2675  oop_maps->add_gc_map(__ pc() - start, map);
2676
2677  __ reset_last_Java_frame(false, false);
2678
2679  // Load UnrollBlock* into rdi
2680  __ mov(rdi, rax);
2681
2682   Label noException;
2683  __ cmpl(r12, Deoptimization::Unpack_exception);   // Was exception pending?
2684  __ jcc(Assembler::notEqual, noException);
2685  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
2686  // QQQ this is useless it was NULL above
2687  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
2688  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int32_t)NULL_WORD);
2689  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int32_t)NULL_WORD);
2690
2691  __ verify_oop(rax);
2692
2693  // Overwrite the result registers with the exception results.
2694  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2695  // I think this is useless
2696  __ movptr(Address(rsp, RegisterSaver::rdx_offset_in_bytes()), rdx);
2697
2698  __ bind(noException);
2699
2700  // Only register save data is on the stack.
2701  // Now restore the result registers.  Everything else is either dead
2702  // or captured in the vframeArray.
2703  RegisterSaver::restore_result_registers(masm);
2704
2705  // All of the register save area has been popped of the stack. Only the
2706  // return address remains.
2707
2708  // Pop all the frames we must move/replace.
2709  //
2710  // Frame picture (youngest to oldest)
2711  // 1: self-frame (no frame link)
2712  // 2: deopting frame  (no frame link)
2713  // 3: caller of deopting frame (could be compiled/interpreted).
2714  //
2715  // Note: by leaving the return address of self-frame on the stack
2716  // and using the size of frame 2 to adjust the stack
2717  // when we are done the return to frame 3 will still be on the stack.
2718
2719  // Pop deoptimized frame
2720  __ movl(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2721  __ addptr(rsp, rcx);
2722
2723  // rsp should be pointing at the return address to the caller (3)
2724
2725  // Stack bang to make sure there's enough room for these interpreter frames.
2726  if (UseStackBanging) {
2727    __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2728    __ bang_stack_size(rbx, rcx);
2729  }
2730
2731  // Load address of array of frame pcs into rcx
2732  __ movptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2733
2734  // Trash the old pc
2735  __ addptr(rsp, wordSize);
2736
2737  // Load address of array of frame sizes into rsi
2738  __ movptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2739
2740  // Load counter into rdx
2741  __ movl(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2742
2743  // Pick up the initial fp we should save
2744  __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2745
2746  // Now adjust the caller's stack to make up for the extra locals
2747  // but record the original sp so that we can save it in the skeletal interpreter
2748  // frame and the stack walking of interpreter_sender will get the unextended sp
2749  // value and not the "real" sp value.
2750
2751  const Register sender_sp = r8;
2752
2753  __ mov(sender_sp, rsp);
2754  __ movl(rbx, Address(rdi,
2755                       Deoptimization::UnrollBlock::
2756                       caller_adjustment_offset_in_bytes()));
2757  __ subptr(rsp, rbx);
2758
2759  // Push interpreter frames in a loop
2760  Label loop;
2761  __ bind(loop);
2762  __ movptr(rbx, Address(rsi, 0));      // Load frame size
2763#ifdef CC_INTERP
2764  __ subptr(rbx, 4*wordSize);           // we'll push pc and ebp by hand and
2765#ifdef ASSERT
2766  __ push(0xDEADDEAD);                  // Make a recognizable pattern
2767  __ push(0xDEADDEAD);
2768#else /* ASSERT */
2769  __ subptr(rsp, 2*wordSize);           // skip the "static long no_param"
2770#endif /* ASSERT */
2771#else
2772  __ subptr(rbx, 2*wordSize);           // We'll push pc and ebp by hand
2773#endif // CC_INTERP
2774  __ pushptr(Address(rcx, 0));          // Save return address
2775  __ enter();                           // Save old & set new ebp
2776  __ subptr(rsp, rbx);                  // Prolog
2777#ifdef CC_INTERP
2778  __ movptr(Address(rbp,
2779                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2780            sender_sp); // Make it walkable
2781#else /* CC_INTERP */
2782  // This value is corrected by layout_activation_impl
2783  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2784  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), sender_sp); // Make it walkable
2785#endif /* CC_INTERP */
2786  __ mov(sender_sp, rsp);               // Pass sender_sp to next frame
2787  __ addptr(rsi, wordSize);             // Bump array pointer (sizes)
2788  __ addptr(rcx, wordSize);             // Bump array pointer (pcs)
2789  __ decrementl(rdx);                   // Decrement counter
2790  __ jcc(Assembler::notZero, loop);
2791  __ pushptr(Address(rcx, 0));          // Save final return address
2792
2793  // Re-push self-frame
2794  __ enter();                           // Save old & set new ebp
2795
2796  // Allocate a full sized register save area.
2797  // Return address and rbp are in place, so we allocate two less words.
2798  __ subptr(rsp, (frame_size_in_words - 2) * wordSize);
2799
2800  // Restore frame locals after moving the frame
2801  __ movdbl(Address(rsp, RegisterSaver::xmm0_offset_in_bytes()), xmm0);
2802  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
2803
2804  // Call C code.  Need thread but NOT official VM entry
2805  // crud.  We cannot block on this call, no GC can happen.  Call should
2806  // restore return values to their stack-slots with the new SP.
2807  //
2808  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
2809
2810  // Use rbp because the frames look interpreted now
2811  __ set_last_Java_frame(noreg, rbp, NULL);
2812
2813  __ mov(c_rarg0, r15_thread);
2814  __ movl(c_rarg1, r14); // second arg: exec_mode
2815  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2816
2817  // Set an oopmap for the call site
2818  oop_maps->add_gc_map(__ pc() - start,
2819                       new OopMap( frame_size_in_words, 0 ));
2820
2821  __ reset_last_Java_frame(true, false);
2822
2823  // Collect return values
2824  __ movdbl(xmm0, Address(rsp, RegisterSaver::xmm0_offset_in_bytes()));
2825  __ movptr(rax, Address(rsp, RegisterSaver::rax_offset_in_bytes()));
2826  // I think this is useless (throwing pc?)
2827  __ movptr(rdx, Address(rsp, RegisterSaver::rdx_offset_in_bytes()));
2828
2829  // Pop self-frame.
2830  __ leave();                           // Epilog
2831
2832  // Jump to interpreter
2833  __ ret(0);
2834
2835  // Make sure all code is generated
2836  masm->flush();
2837
2838  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2839  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2840}
2841
2842#ifdef COMPILER2
2843//------------------------------generate_uncommon_trap_blob--------------------
2844void SharedRuntime::generate_uncommon_trap_blob() {
2845  // Allocate space for the code
2846  ResourceMark rm;
2847  // Setup code generation tools
2848  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2849  MacroAssembler* masm = new MacroAssembler(&buffer);
2850
2851  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2852
2853  address start = __ pc();
2854
2855  // Push self-frame.  We get here with a return address on the
2856  // stack, so rsp is 8-byte aligned until we allocate our frame.
2857  __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog!
2858
2859  // No callee saved registers. rbp is assumed implicitly saved
2860  __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
2861
2862  // compiler left unloaded_class_index in j_rarg0 move to where the
2863  // runtime expects it.
2864  __ movl(c_rarg1, j_rarg0);
2865
2866  __ set_last_Java_frame(noreg, noreg, NULL);
2867
2868  // Call C code.  Need thread but NOT official VM entry
2869  // crud.  We cannot block on this call, no GC can happen.  Call should
2870  // capture callee-saved registers as well as return values.
2871  // Thread is in rdi already.
2872  //
2873  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
2874
2875  __ mov(c_rarg0, r15_thread);
2876  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)));
2877
2878  // Set an oopmap for the call site
2879  OopMapSet* oop_maps = new OopMapSet();
2880  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
2881
2882  // location of rbp is known implicitly by the frame sender code
2883
2884  oop_maps->add_gc_map(__ pc() - start, map);
2885
2886  __ reset_last_Java_frame(false, false);
2887
2888  // Load UnrollBlock* into rdi
2889  __ mov(rdi, rax);
2890
2891  // Pop all the frames we must move/replace.
2892  //
2893  // Frame picture (youngest to oldest)
2894  // 1: self-frame (no frame link)
2895  // 2: deopting frame  (no frame link)
2896  // 3: caller of deopting frame (could be compiled/interpreted).
2897
2898  // Pop self-frame.  We have no frame, and must rely only on rax and rsp.
2899  __ addptr(rsp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt); // Epilog!
2900
2901  // Pop deoptimized frame (int)
2902  __ movl(rcx, Address(rdi,
2903                       Deoptimization::UnrollBlock::
2904                       size_of_deoptimized_frame_offset_in_bytes()));
2905  __ addptr(rsp, rcx);
2906
2907  // rsp should be pointing at the return address to the caller (3)
2908
2909  // Stack bang to make sure there's enough room for these interpreter frames.
2910  if (UseStackBanging) {
2911    __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2912    __ bang_stack_size(rbx, rcx);
2913  }
2914
2915  // Load address of array of frame pcs into rcx (address*)
2916  __ movptr(rcx,
2917            Address(rdi,
2918                    Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2919
2920  // Trash the return pc
2921  __ addptr(rsp, wordSize);
2922
2923  // Load address of array of frame sizes into rsi (intptr_t*)
2924  __ movptr(rsi, Address(rdi,
2925                         Deoptimization::UnrollBlock::
2926                         frame_sizes_offset_in_bytes()));
2927
2928  // Counter
2929  __ movl(rdx, Address(rdi,
2930                       Deoptimization::UnrollBlock::
2931                       number_of_frames_offset_in_bytes())); // (int)
2932
2933  // Pick up the initial fp we should save
2934  __ movptr(rbp,
2935            Address(rdi,
2936                    Deoptimization::UnrollBlock::initial_fp_offset_in_bytes()));
2937
2938  // Now adjust the caller's stack to make up for the extra locals but
2939  // record the original sp so that we can save it in the skeletal
2940  // interpreter frame and the stack walking of interpreter_sender
2941  // will get the unextended sp value and not the "real" sp value.
2942
2943  const Register sender_sp = r8;
2944
2945  __ mov(sender_sp, rsp);
2946  __ movl(rbx, Address(rdi,
2947                       Deoptimization::UnrollBlock::
2948                       caller_adjustment_offset_in_bytes())); // (int)
2949  __ subptr(rsp, rbx);
2950
2951  // Push interpreter frames in a loop
2952  Label loop;
2953  __ bind(loop);
2954  __ movptr(rbx, Address(rsi, 0)); // Load frame size
2955  __ subptr(rbx, 2 * wordSize);    // We'll push pc and rbp by hand
2956  __ pushptr(Address(rcx, 0));     // Save return address
2957  __ enter();                      // Save old & set new rbp
2958  __ subptr(rsp, rbx);             // Prolog
2959#ifdef CC_INTERP
2960  __ movptr(Address(rbp,
2961                  -(sizeof(BytecodeInterpreter)) + in_bytes(byte_offset_of(BytecodeInterpreter, _sender_sp))),
2962            sender_sp); // Make it walkable
2963#else // CC_INTERP
2964  __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize),
2965            sender_sp);            // Make it walkable
2966  // This value is corrected by layout_activation_impl
2967  __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD );
2968#endif // CC_INTERP
2969  __ mov(sender_sp, rsp);          // Pass sender_sp to next frame
2970  __ addptr(rsi, wordSize);        // Bump array pointer (sizes)
2971  __ addptr(rcx, wordSize);        // Bump array pointer (pcs)
2972  __ decrementl(rdx);              // Decrement counter
2973  __ jcc(Assembler::notZero, loop);
2974  __ pushptr(Address(rcx, 0));     // Save final return address
2975
2976  // Re-push self-frame
2977  __ enter();                 // Save old & set new rbp
2978  __ subptr(rsp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt);
2979                              // Prolog
2980
2981  // Use rbp because the frames look interpreted now
2982  __ set_last_Java_frame(noreg, rbp, NULL);
2983
2984  // Call C code.  Need thread but NOT official VM entry
2985  // crud.  We cannot block on this call, no GC can happen.  Call should
2986  // restore return values to their stack-slots with the new SP.
2987  // Thread is in rdi already.
2988  //
2989  // BasicType unpack_frames(JavaThread* thread, int exec_mode);
2990
2991  __ mov(c_rarg0, r15_thread);
2992  __ movl(c_rarg1, Deoptimization::Unpack_uncommon_trap);
2993  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2994
2995  // Set an oopmap for the call site
2996  oop_maps->add_gc_map(__ pc() - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
2997
2998  __ reset_last_Java_frame(true, false);
2999
3000  // Pop self-frame.
3001  __ leave();                 // Epilog
3002
3003  // Jump to interpreter
3004  __ ret(0);
3005
3006  // Make sure all code is generated
3007  masm->flush();
3008
3009  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
3010                                                 SimpleRuntimeFrame::framesize >> 1);
3011}
3012#endif // COMPILER2
3013
3014
3015//------------------------------generate_handler_blob------
3016//
3017// Generate a special Compile2Runtime blob that saves all registers,
3018// and setup oopmap.
3019//
3020static SafepointBlob* generate_handler_blob(address call_ptr, bool cause_return) {
3021  assert(StubRoutines::forward_exception_entry() != NULL,
3022         "must be generated before");
3023
3024  ResourceMark rm;
3025  OopMapSet *oop_maps = new OopMapSet();
3026  OopMap* map;
3027
3028  // Allocate space for the code.  Setup code generation tools.
3029  CodeBuffer buffer("handler_blob", 2048, 1024);
3030  MacroAssembler* masm = new MacroAssembler(&buffer);
3031
3032  address start   = __ pc();
3033  address call_pc = NULL;
3034  int frame_size_in_words;
3035
3036  // Make room for return address (or push it again)
3037  if (!cause_return) {
3038    __ push(rbx);
3039  }
3040
3041  // Save registers, fpu state, and flags
3042  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
3043
3044  // The following is basically a call_VM.  However, we need the precise
3045  // address of the call in order to generate an oopmap. Hence, we do all the
3046  // work outselves.
3047
3048  __ set_last_Java_frame(noreg, noreg, NULL);
3049
3050  // The return address must always be correct so that frame constructor never
3051  // sees an invalid pc.
3052
3053  if (!cause_return) {
3054    // overwrite the dummy value we pushed on entry
3055    __ movptr(c_rarg0, Address(r15_thread, JavaThread::saved_exception_pc_offset()));
3056    __ movptr(Address(rbp, wordSize), c_rarg0);
3057  }
3058
3059  // Do the call
3060  __ mov(c_rarg0, r15_thread);
3061  __ call(RuntimeAddress(call_ptr));
3062
3063  // Set an oopmap for the call site.  This oopmap will map all
3064  // oop-registers and debug-info registers as callee-saved.  This
3065  // will allow deoptimization at this safepoint to find all possible
3066  // debug-info recordings, as well as let GC find all oops.
3067
3068  oop_maps->add_gc_map( __ pc() - start, map);
3069
3070  Label noException;
3071
3072  __ reset_last_Java_frame(false, false);
3073
3074  __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3075  __ jcc(Assembler::equal, noException);
3076
3077  // Exception pending
3078
3079  RegisterSaver::restore_live_registers(masm);
3080
3081  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3082
3083  // No exception case
3084  __ bind(noException);
3085
3086  // Normal exit, restore registers and exit.
3087  RegisterSaver::restore_live_registers(masm);
3088
3089  __ ret(0);
3090
3091  // Make sure all code is generated
3092  masm->flush();
3093
3094  // Fill-out other meta info
3095  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
3096}
3097
3098//
3099// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
3100//
3101// Generate a stub that calls into vm to find out the proper destination
3102// of a java call. All the argument registers are live at this point
3103// but since this is generic code we don't know what they are and the caller
3104// must do any gc of the args.
3105//
3106static RuntimeStub* generate_resolve_blob(address destination, const char* name) {
3107  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
3108
3109  // allocate space for the code
3110  ResourceMark rm;
3111
3112  CodeBuffer buffer(name, 1000, 512);
3113  MacroAssembler* masm                = new MacroAssembler(&buffer);
3114
3115  int frame_size_in_words;
3116
3117  OopMapSet *oop_maps = new OopMapSet();
3118  OopMap* map = NULL;
3119
3120  int start = __ offset();
3121
3122  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
3123
3124  int frame_complete = __ offset();
3125
3126  __ set_last_Java_frame(noreg, noreg, NULL);
3127
3128  __ mov(c_rarg0, r15_thread);
3129
3130  __ call(RuntimeAddress(destination));
3131
3132
3133  // Set an oopmap for the call site.
3134  // We need this not only for callee-saved registers, but also for volatile
3135  // registers that the compiler might be keeping live across a safepoint.
3136
3137  oop_maps->add_gc_map( __ offset() - start, map);
3138
3139  // rax contains the address we are going to jump to assuming no exception got installed
3140
3141  // clear last_Java_sp
3142  __ reset_last_Java_frame(false, false);
3143  // check for pending exceptions
3144  Label pending;
3145  __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
3146  __ jcc(Assembler::notEqual, pending);
3147
3148  // get the returned methodOop
3149  __ movptr(rbx, Address(r15_thread, JavaThread::vm_result_offset()));
3150  __ movptr(Address(rsp, RegisterSaver::rbx_offset_in_bytes()), rbx);
3151
3152  __ movptr(Address(rsp, RegisterSaver::rax_offset_in_bytes()), rax);
3153
3154  RegisterSaver::restore_live_registers(masm);
3155
3156  // We are back the the original state on entry and ready to go.
3157
3158  __ jmp(rax);
3159
3160  // Pending exception after the safepoint
3161
3162  __ bind(pending);
3163
3164  RegisterSaver::restore_live_registers(masm);
3165
3166  // exception pending => remove activation and forward to exception handler
3167
3168  __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
3169
3170  __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
3171  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
3172
3173  // -------------
3174  // make sure all code is generated
3175  masm->flush();
3176
3177  // return the  blob
3178  // frame_size_words or bytes??
3179  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
3180}
3181
3182
3183void SharedRuntime::generate_stubs() {
3184
3185  _wrong_method_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method),
3186                                        "wrong_method_stub");
3187  _ic_miss_blob =      generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::handle_wrong_method_ic_miss),
3188                                        "ic_miss_stub");
3189  _resolve_opt_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_opt_virtual_call_C),
3190                                        "resolve_opt_virtual_call");
3191
3192  _resolve_virtual_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),
3193                                        "resolve_virtual_call");
3194
3195  _resolve_static_call_blob = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),
3196                                        "resolve_static_call");
3197  _polling_page_safepoint_handler_blob =
3198    generate_handler_blob(CAST_FROM_FN_PTR(address,
3199                   SafepointSynchronize::handle_polling_page_exception), false);
3200
3201  _polling_page_return_handler_blob =
3202    generate_handler_blob(CAST_FROM_FN_PTR(address,
3203                   SafepointSynchronize::handle_polling_page_exception), true);
3204
3205  generate_deopt_blob();
3206
3207#ifdef COMPILER2
3208  generate_uncommon_trap_blob();
3209#endif // COMPILER2
3210}
3211
3212
3213#ifdef COMPILER2
3214// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
3215//
3216//------------------------------generate_exception_blob---------------------------
3217// creates exception blob at the end
3218// Using exception blob, this code is jumped from a compiled method.
3219// (see emit_exception_handler in x86_64.ad file)
3220//
3221// Given an exception pc at a call we call into the runtime for the
3222// handler in this method. This handler might merely restore state
3223// (i.e. callee save registers) unwind the frame and jump to the
3224// exception handler for the nmethod if there is no Java level handler
3225// for the nmethod.
3226//
3227// This code is entered with a jmp.
3228//
3229// Arguments:
3230//   rax: exception oop
3231//   rdx: exception pc
3232//
3233// Results:
3234//   rax: exception oop
3235//   rdx: exception pc in caller or ???
3236//   destination: exception handler of caller
3237//
3238// Note: the exception pc MUST be at a call (precise debug information)
3239//       Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved.
3240//
3241
3242void OptoRuntime::generate_exception_blob() {
3243  assert(!OptoRuntime::is_callee_saved_register(RDX_num), "");
3244  assert(!OptoRuntime::is_callee_saved_register(RAX_num), "");
3245  assert(!OptoRuntime::is_callee_saved_register(RCX_num), "");
3246
3247  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
3248
3249  // Allocate space for the code
3250  ResourceMark rm;
3251  // Setup code generation tools
3252  CodeBuffer buffer("exception_blob", 2048, 1024);
3253  MacroAssembler* masm = new MacroAssembler(&buffer);
3254
3255
3256  address start = __ pc();
3257
3258  // Exception pc is 'return address' for stack walker
3259  __ push(rdx);
3260  __ subptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Prolog
3261
3262  // Save callee-saved registers.  See x86_64.ad.
3263
3264  // rbp is an implicitly saved callee saved register (i.e. the calling
3265  // convention will save restore it in prolog/epilog) Other than that
3266  // there are no callee save registers now that adapter frames are gone.
3267
3268  __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
3269
3270  // Store exception in Thread object. We cannot pass any arguments to the
3271  // handle_exception call, since we do not want to make any assumption
3272  // about the size of the frame where the exception happened in.
3273  // c_rarg0 is either rdi (Linux) or rcx (Windows).
3274  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()),rax);
3275  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), rdx);
3276
3277  // This call does all the hard work.  It checks if an exception handler
3278  // exists in the method.
3279  // If so, it returns the handler address.
3280  // If not, it prepares for stack-unwinding, restoring the callee-save
3281  // registers of the frame being removed.
3282  //
3283  // address OptoRuntime::handle_exception_C(JavaThread* thread)
3284
3285  __ set_last_Java_frame(noreg, noreg, NULL);
3286  __ mov(c_rarg0, r15_thread);
3287  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
3288
3289  // Set an oopmap for the call site.  This oopmap will only be used if we
3290  // are unwinding the stack.  Hence, all locations will be dead.
3291  // Callee-saved registers will be the same as the frame above (i.e.,
3292  // handle_exception_stub), since they were restored when we got the
3293  // exception.
3294
3295  OopMapSet* oop_maps = new OopMapSet();
3296
3297  oop_maps->add_gc_map( __ pc()-start, new OopMap(SimpleRuntimeFrame::framesize, 0));
3298
3299  __ reset_last_Java_frame(false, false);
3300
3301  // Restore callee-saved registers
3302
3303  // rbp is an implicitly saved callee saved register (i.e. the calling
3304  // convention will save restore it in prolog/epilog) Other than that
3305  // there are no callee save registers no that adapter frames are gone.
3306
3307  __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
3308
3309  __ addptr(rsp, SimpleRuntimeFrame::return_off << LogBytesPerInt); // Epilog
3310  __ pop(rdx);                  // No need for exception pc anymore
3311
3312  // rax: exception handler
3313
3314  // We have a handler in rax (could be deopt blob).
3315  __ mov(r8, rax);
3316
3317  // Get the exception oop
3318  __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset()));
3319  // Get the exception pc in case we are deoptimized
3320  __ movptr(rdx, Address(r15_thread, JavaThread::exception_pc_offset()));
3321#ifdef ASSERT
3322  __ movptr(Address(r15_thread, JavaThread::exception_handler_pc_offset()), (int)NULL_WORD);
3323  __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), (int)NULL_WORD);
3324#endif
3325  // Clear the exception oop so GC no longer processes it as a root.
3326  __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), (int)NULL_WORD);
3327
3328  // rax: exception oop
3329  // r8:  exception handler
3330  // rdx: exception pc
3331  // Jump to handler
3332
3333  __ jmp(r8);
3334
3335  // Make sure all code is generated
3336  masm->flush();
3337
3338  // Set exception blob
3339  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
3340}
3341#endif // COMPILER2
3342