sharedRuntime_sparc.cpp revision 3602:da91efe96a93
1/* 2 * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25#include "precompiled.hpp" 26#include "asm/assembler.hpp" 27#include "assembler_sparc.inline.hpp" 28#include "code/debugInfoRec.hpp" 29#include "code/icBuffer.hpp" 30#include "code/vtableStubs.hpp" 31#include "interpreter/interpreter.hpp" 32#include "oops/compiledICHolder.hpp" 33#include "prims/jvmtiRedefineClassesTrace.hpp" 34#include "runtime/sharedRuntime.hpp" 35#include "runtime/vframeArray.hpp" 36#include "vmreg_sparc.inline.hpp" 37#ifdef COMPILER1 38#include "c1/c1_Runtime1.hpp" 39#endif 40#ifdef COMPILER2 41#include "opto/runtime.hpp" 42#endif 43#ifdef SHARK 44#include "compiler/compileBroker.hpp" 45#include "shark/sharkCompiler.hpp" 46#endif 47 48#define __ masm-> 49 50 51class RegisterSaver { 52 53 // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. 54 // The Oregs are problematic. In the 32bit build the compiler can 55 // have O registers live with 64 bit quantities. A window save will 56 // cut the heads off of the registers. We have to do a very extensive 57 // stack dance to save and restore these properly. 58 59 // Note that the Oregs problem only exists if we block at either a polling 60 // page exception a compiled code safepoint that was not originally a call 61 // or deoptimize following one of these kinds of safepoints. 62 63 // Lots of registers to save. For all builds, a window save will preserve 64 // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit 65 // builds a window-save will preserve the %o registers. In the LION build 66 // we need to save the 64-bit %o registers which requires we save them 67 // before the window-save (as then they become %i registers and get their 68 // heads chopped off on interrupt). We have to save some %g registers here 69 // as well. 70 enum { 71 // This frame's save area. Includes extra space for the native call: 72 // vararg's layout space and the like. Briefly holds the caller's 73 // register save area. 74 call_args_area = frame::register_save_words_sp_offset + 75 frame::memory_parameter_word_sp_offset*wordSize, 76 // Make sure save locations are always 8 byte aligned. 77 // can't use round_to because it doesn't produce compile time constant 78 start_of_extra_save_area = ((call_args_area + 7) & ~7), 79 g1_offset = start_of_extra_save_area, // g-regs needing saving 80 g3_offset = g1_offset+8, 81 g4_offset = g3_offset+8, 82 g5_offset = g4_offset+8, 83 o0_offset = g5_offset+8, 84 o1_offset = o0_offset+8, 85 o2_offset = o1_offset+8, 86 o3_offset = o2_offset+8, 87 o4_offset = o3_offset+8, 88 o5_offset = o4_offset+8, 89 start_of_flags_save_area = o5_offset+8, 90 ccr_offset = start_of_flags_save_area, 91 fsr_offset = ccr_offset + 8, 92 d00_offset = fsr_offset+8, // Start of float save area 93 register_save_size = d00_offset+8*32 94 }; 95 96 97 public: 98 99 static int Oexception_offset() { return o0_offset; }; 100 static int G3_offset() { return g3_offset; }; 101 static int G5_offset() { return g5_offset; }; 102 static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); 103 static void restore_live_registers(MacroAssembler* masm); 104 105 // During deoptimization only the result register need to be restored 106 // all the other values have already been extracted. 107 108 static void restore_result_registers(MacroAssembler* masm); 109}; 110 111OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { 112 // Record volatile registers as callee-save values in an OopMap so their save locations will be 113 // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for 114 // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers 115 // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame 116 // (as the stub's I's) when the runtime routine called by the stub creates its frame. 117 int i; 118 // Always make the frame size 16 byte aligned. 119 int frame_size = round_to(additional_frame_words + register_save_size, 16); 120 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words 121 int frame_size_in_slots = frame_size / sizeof(jint); 122 // CodeBlob frame size is in words. 123 *total_frame_words = frame_size / wordSize; 124 // OopMap* map = new OopMap(*total_frame_words, 0); 125 OopMap* map = new OopMap(frame_size_in_slots, 0); 126 127#if !defined(_LP64) 128 129 // Save 64-bit O registers; they will get their heads chopped off on a 'save'. 130 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 131 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 132 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 133 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 134 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 135 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 136#endif /* _LP64 */ 137 138 __ save(SP, -frame_size, SP); 139 140#ifndef _LP64 141 // Reload the 64 bit Oregs. Although they are now Iregs we load them 142 // to Oregs here to avoid interrupts cutting off their heads 143 144 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 145 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 146 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 147 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 148 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 149 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 150 151 __ stx(O0, SP, o0_offset+STACK_BIAS); 152 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg()); 153 154 __ stx(O1, SP, o1_offset+STACK_BIAS); 155 156 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg()); 157 158 __ stx(O2, SP, o2_offset+STACK_BIAS); 159 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg()); 160 161 __ stx(O3, SP, o3_offset+STACK_BIAS); 162 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg()); 163 164 __ stx(O4, SP, o4_offset+STACK_BIAS); 165 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg()); 166 167 __ stx(O5, SP, o5_offset+STACK_BIAS); 168 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg()); 169#endif /* _LP64 */ 170 171 172#ifdef _LP64 173 int debug_offset = 0; 174#else 175 int debug_offset = 4; 176#endif 177 // Save the G's 178 __ stx(G1, SP, g1_offset+STACK_BIAS); 179 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg()); 180 181 __ stx(G3, SP, g3_offset+STACK_BIAS); 182 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg()); 183 184 __ stx(G4, SP, g4_offset+STACK_BIAS); 185 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg()); 186 187 __ stx(G5, SP, g5_offset+STACK_BIAS); 188 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg()); 189 190 // This is really a waste but we'll keep things as they were for now 191 if (true) { 192#ifndef _LP64 193 map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next()); 194 map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next()); 195 map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next()); 196 map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next()); 197 map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next()); 198 map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next()); 199 map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next()); 200 map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next()); 201 map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next()); 202 map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next()); 203#endif /* _LP64 */ 204 } 205 206 207 // Save the flags 208 __ rdccr( G5 ); 209 __ stx(G5, SP, ccr_offset+STACK_BIAS); 210 __ stxfsr(SP, fsr_offset+STACK_BIAS); 211 212 // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles) 213 int offset = d00_offset; 214 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 215 FloatRegister f = as_FloatRegister(i); 216 __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS); 217 // Record as callee saved both halves of double registers (2 float registers). 218 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); 219 map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); 220 offset += sizeof(double); 221 } 222 223 // And we're done. 224 225 return map; 226} 227 228 229// Pop the current frame and restore all the registers that we 230// saved. 231void RegisterSaver::restore_live_registers(MacroAssembler* masm) { 232 233 // Restore all the FP registers 234 for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 235 __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i)); 236 } 237 238 __ ldx(SP, ccr_offset+STACK_BIAS, G1); 239 __ wrccr (G1) ; 240 241 // Restore the G's 242 // Note that G2 (AKA GThread) must be saved and restored separately. 243 // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr. 244 245 __ ldx(SP, g1_offset+STACK_BIAS, G1); 246 __ ldx(SP, g3_offset+STACK_BIAS, G3); 247 __ ldx(SP, g4_offset+STACK_BIAS, G4); 248 __ ldx(SP, g5_offset+STACK_BIAS, G5); 249 250 251#if !defined(_LP64) 252 // Restore the 64-bit O's. 253 __ ldx(SP, o0_offset+STACK_BIAS, O0); 254 __ ldx(SP, o1_offset+STACK_BIAS, O1); 255 __ ldx(SP, o2_offset+STACK_BIAS, O2); 256 __ ldx(SP, o3_offset+STACK_BIAS, O3); 257 __ ldx(SP, o4_offset+STACK_BIAS, O4); 258 __ ldx(SP, o5_offset+STACK_BIAS, O5); 259 260 // And temporarily place them in TLS 261 262 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 263 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 264 __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 265 __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 266 __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 267 __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 268#endif /* _LP64 */ 269 270 // Restore flags 271 272 __ ldxfsr(SP, fsr_offset+STACK_BIAS); 273 274 __ restore(); 275 276#if !defined(_LP64) 277 // Now reload the 64bit Oregs after we've restore the window. 278 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 279 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 280 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 281 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 282 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 283 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 284#endif /* _LP64 */ 285 286} 287 288// Pop the current frame and restore the registers that might be holding 289// a result. 290void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 291 292#if !defined(_LP64) 293 // 32bit build returns longs in G1 294 __ ldx(SP, g1_offset+STACK_BIAS, G1); 295 296 // Retrieve the 64-bit O's. 297 __ ldx(SP, o0_offset+STACK_BIAS, O0); 298 __ ldx(SP, o1_offset+STACK_BIAS, O1); 299 // and save to TLS 300 __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 301 __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 302#endif /* _LP64 */ 303 304 __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0)); 305 306 __ restore(); 307 308#if !defined(_LP64) 309 // Now reload the 64bit Oregs after we've restore the window. 310 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 311 __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 312#endif /* _LP64 */ 313 314} 315 316// The java_calling_convention describes stack locations as ideal slots on 317// a frame with no abi restrictions. Since we must observe abi restrictions 318// (like the placement of the register window) the slots must be biased by 319// the following value. 320static int reg2offset(VMReg r) { 321 return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 322} 323 324static VMRegPair reg64_to_VMRegPair(Register r) { 325 VMRegPair ret; 326 if (wordSize == 8) { 327 ret.set2(r->as_VMReg()); 328 } else { 329 ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); 330 } 331 return ret; 332} 333 334// --------------------------------------------------------------------------- 335// Read the array of BasicTypes from a signature, and compute where the 336// arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) 337// quantities. Values less than VMRegImpl::stack0 are registers, those above 338// refer to 4-byte stack slots. All stack slots are based off of the window 339// top. VMRegImpl::stack0 refers to the first slot past the 16-word window, 340// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 341// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit 342// integer registers. Values 64-95 are the (32-bit only) float registers. 343// Each 32-bit quantity is given its own number, so the integer registers 344// (in either 32- or 64-bit builds) use 2 numbers. For example, there is 345// an O0-low and an O0-high. Essentially, all int register numbers are doubled. 346 347// Register results are passed in O0-O5, for outgoing call arguments. To 348// convert to incoming arguments, convert all O's to I's. The regs array 349// refer to the low and hi 32-bit words of 64-bit registers or stack slots. 350// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a 351// 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was 352// passed (used as a placeholder for the other half of longs and doubles in 353// the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is 354// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). 355// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() 356// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the 357// same VMRegPair. 358 359// Note: the INPUTS in sig_bt are in units of Java argument words, which are 360// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 361// units regardless of build. 362 363 364// --------------------------------------------------------------------------- 365// The compiled Java calling convention. The Java convention always passes 366// 64-bit values in adjacent aligned locations (either registers or stack), 367// floats in float registers and doubles in aligned float pairs. Values are 368// packed in the registers. There is no backing varargs store for values in 369// registers. In the 32-bit build, longs are passed in G1 and G4 (cannot be 370// passed in I's, because longs in I's get their heads chopped off at 371// interrupt). 372int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 373 VMRegPair *regs, 374 int total_args_passed, 375 int is_outgoing) { 376 assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); 377 378 // Convention is to pack the first 6 int/oop args into the first 6 registers 379 // (I0-I5), extras spill to the stack. Then pack the first 8 float args 380 // into F0-F7, extras spill to the stack. Then pad all register sets to 381 // align. Then put longs and doubles into the same registers as they fit, 382 // else spill to the stack. 383 const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; 384 const int flt_reg_max = 8; 385 // 386 // Where 32-bit 1-reg longs start being passed 387 // In tiered we must pass on stack because c1 can't use a "pair" in a single reg. 388 // So make it look like we've filled all the G regs that c2 wants to use. 389 Register g_reg = TieredCompilation ? noreg : G1; 390 391 // Count int/oop and float args. See how many stack slots we'll need and 392 // where the longs & doubles will go. 393 int int_reg_cnt = 0; 394 int flt_reg_cnt = 0; 395 // int stk_reg_pairs = frame::register_save_words*(wordSize>>2); 396 // int stk_reg_pairs = SharedRuntime::out_preserve_stack_slots(); 397 int stk_reg_pairs = 0; 398 for (int i = 0; i < total_args_passed; i++) { 399 switch (sig_bt[i]) { 400 case T_LONG: // LP64, longs compete with int args 401 assert(sig_bt[i+1] == T_VOID, ""); 402#ifdef _LP64 403 if (int_reg_cnt < int_reg_max) int_reg_cnt++; 404#endif 405 break; 406 case T_OBJECT: 407 case T_ARRAY: 408 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 409 if (int_reg_cnt < int_reg_max) int_reg_cnt++; 410#ifndef _LP64 411 else stk_reg_pairs++; 412#endif 413 break; 414 case T_INT: 415 case T_SHORT: 416 case T_CHAR: 417 case T_BYTE: 418 case T_BOOLEAN: 419 if (int_reg_cnt < int_reg_max) int_reg_cnt++; 420 else stk_reg_pairs++; 421 break; 422 case T_FLOAT: 423 if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++; 424 else stk_reg_pairs++; 425 break; 426 case T_DOUBLE: 427 assert(sig_bt[i+1] == T_VOID, ""); 428 break; 429 case T_VOID: 430 break; 431 default: 432 ShouldNotReachHere(); 433 } 434 } 435 436 // This is where the longs/doubles start on the stack. 437 stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round 438 439 int flt_reg_pairs = (flt_reg_cnt+1) & ~1; 440 441 // int stk_reg = frame::register_save_words*(wordSize>>2); 442 // int stk_reg = SharedRuntime::out_preserve_stack_slots(); 443 int stk_reg = 0; 444 int int_reg = 0; 445 int flt_reg = 0; 446 447 // Now do the signature layout 448 for (int i = 0; i < total_args_passed; i++) { 449 switch (sig_bt[i]) { 450 case T_INT: 451 case T_SHORT: 452 case T_CHAR: 453 case T_BYTE: 454 case T_BOOLEAN: 455#ifndef _LP64 456 case T_OBJECT: 457 case T_ARRAY: 458 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 459#endif // _LP64 460 if (int_reg < int_reg_max) { 461 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 462 regs[i].set1(r->as_VMReg()); 463 } else { 464 regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); 465 } 466 break; 467 468#ifdef _LP64 469 case T_OBJECT: 470 case T_ARRAY: 471 case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 472 if (int_reg < int_reg_max) { 473 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 474 regs[i].set2(r->as_VMReg()); 475 } else { 476 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 477 stk_reg_pairs += 2; 478 } 479 break; 480#endif // _LP64 481 482 case T_LONG: 483 assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 484#ifdef _LP64 485 if (int_reg < int_reg_max) { 486 Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 487 regs[i].set2(r->as_VMReg()); 488 } else { 489 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 490 stk_reg_pairs += 2; 491 } 492#else 493#ifdef COMPILER2 494 // For 32-bit build, can't pass longs in O-regs because they become 495 // I-regs and get trashed. Use G-regs instead. G1 and G4 are almost 496 // spare and available. This convention isn't used by the Sparc ABI or 497 // anywhere else. If we're tiered then we don't use G-regs because c1 498 // can't deal with them as a "pair". (Tiered makes this code think g's are filled) 499 // G0: zero 500 // G1: 1st Long arg 501 // G2: global allocated to TLS 502 // G3: used in inline cache check 503 // G4: 2nd Long arg 504 // G5: used in inline cache check 505 // G6: used by OS 506 // G7: used by OS 507 508 if (g_reg == G1) { 509 regs[i].set2(G1->as_VMReg()); // This long arg in G1 510 g_reg = G4; // Where the next arg goes 511 } else if (g_reg == G4) { 512 regs[i].set2(G4->as_VMReg()); // The 2nd long arg in G4 513 g_reg = noreg; // No more longs in registers 514 } else { 515 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 516 stk_reg_pairs += 2; 517 } 518#else // COMPILER2 519 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 520 stk_reg_pairs += 2; 521#endif // COMPILER2 522#endif // _LP64 523 break; 524 525 case T_FLOAT: 526 if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg()); 527 else regs[i].set1(VMRegImpl::stack2reg(stk_reg++)); 528 break; 529 case T_DOUBLE: 530 assert(sig_bt[i+1] == T_VOID, "expecting half"); 531 if (flt_reg_pairs + 1 < flt_reg_max) { 532 regs[i].set2(as_FloatRegister(flt_reg_pairs)->as_VMReg()); 533 flt_reg_pairs += 2; 534 } else { 535 regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs)); 536 stk_reg_pairs += 2; 537 } 538 break; 539 case T_VOID: regs[i].set_bad(); break; // Halves of longs & doubles 540 default: 541 ShouldNotReachHere(); 542 } 543 } 544 545 // retun the amount of stack space these arguments will need. 546 return stk_reg_pairs; 547 548} 549 550// Helper class mostly to avoid passing masm everywhere, and handle 551// store displacement overflow logic. 552class AdapterGenerator { 553 MacroAssembler *masm; 554 Register Rdisp; 555 void set_Rdisp(Register r) { Rdisp = r; } 556 557 void patch_callers_callsite(); 558 559 // base+st_off points to top of argument 560 int arg_offset(const int st_off) { return st_off; } 561 int next_arg_offset(const int st_off) { 562 return st_off - Interpreter::stackElementSize; 563 } 564 565 // Argument slot values may be loaded first into a register because 566 // they might not fit into displacement. 567 RegisterOrConstant arg_slot(const int st_off); 568 RegisterOrConstant next_arg_slot(const int st_off); 569 570 // Stores long into offset pointed to by base 571 void store_c2i_long(Register r, Register base, 572 const int st_off, bool is_stack); 573 void store_c2i_object(Register r, Register base, 574 const int st_off); 575 void store_c2i_int(Register r, Register base, 576 const int st_off); 577 void store_c2i_double(VMReg r_2, 578 VMReg r_1, Register base, const int st_off); 579 void store_c2i_float(FloatRegister f, Register base, 580 const int st_off); 581 582 public: 583 void gen_c2i_adapter(int total_args_passed, 584 // VMReg max_arg, 585 int comp_args_on_stack, // VMRegStackSlots 586 const BasicType *sig_bt, 587 const VMRegPair *regs, 588 Label& skip_fixup); 589 void gen_i2c_adapter(int total_args_passed, 590 // VMReg max_arg, 591 int comp_args_on_stack, // VMRegStackSlots 592 const BasicType *sig_bt, 593 const VMRegPair *regs); 594 595 AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} 596}; 597 598 599// Patch the callers callsite with entry to compiled code if it exists. 600void AdapterGenerator::patch_callers_callsite() { 601 Label L; 602 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 603 __ br_null(G3_scratch, false, Assembler::pt, L); 604 // Schedule the branch target address early. 605 __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 606 // Call into the VM to patch the caller, then jump to compiled callee 607 __ save_frame(4); // Args in compiled layout; do not blow them 608 609 // Must save all the live Gregs the list is: 610 // G1: 1st Long arg (32bit build) 611 // G2: global allocated to TLS 612 // G3: used in inline cache check (scratch) 613 // G4: 2nd Long arg (32bit build); 614 // G5: used in inline cache check (Method*) 615 616 // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. 617 618#ifdef _LP64 619 // mov(s,d) 620 __ mov(G1, L1); 621 __ mov(G4, L4); 622 __ mov(G5_method, L5); 623 __ mov(G5_method, O0); // VM needs target method 624 __ mov(I7, O1); // VM needs caller's callsite 625 // Must be a leaf call... 626 // can be very far once the blob has been relocated 627 AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); 628 __ relocate(relocInfo::runtime_call_type); 629 __ jumpl_to(dest, O7, O7); 630 __ delayed()->mov(G2_thread, L7_thread_cache); 631 __ mov(L7_thread_cache, G2_thread); 632 __ mov(L1, G1); 633 __ mov(L4, G4); 634 __ mov(L5, G5_method); 635#else 636 __ stx(G1, FP, -8 + STACK_BIAS); 637 __ stx(G4, FP, -16 + STACK_BIAS); 638 __ mov(G5_method, L5); 639 __ mov(G5_method, O0); // VM needs target method 640 __ mov(I7, O1); // VM needs caller's callsite 641 // Must be a leaf call... 642 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type); 643 __ delayed()->mov(G2_thread, L7_thread_cache); 644 __ mov(L7_thread_cache, G2_thread); 645 __ ldx(FP, -8 + STACK_BIAS, G1); 646 __ ldx(FP, -16 + STACK_BIAS, G4); 647 __ mov(L5, G5_method); 648 __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 649#endif /* _LP64 */ 650 651 __ restore(); // Restore args 652 __ bind(L); 653} 654 655 656RegisterOrConstant AdapterGenerator::arg_slot(const int st_off) { 657 RegisterOrConstant roc(arg_offset(st_off)); 658 return __ ensure_simm13_or_reg(roc, Rdisp); 659} 660 661RegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) { 662 RegisterOrConstant roc(next_arg_offset(st_off)); 663 return __ ensure_simm13_or_reg(roc, Rdisp); 664} 665 666 667// Stores long into offset pointed to by base 668void AdapterGenerator::store_c2i_long(Register r, Register base, 669 const int st_off, bool is_stack) { 670#ifdef _LP64 671 // In V9, longs are given 2 64-bit slots in the interpreter, but the 672 // data is passed in only 1 slot. 673 __ stx(r, base, next_arg_slot(st_off)); 674#else 675#ifdef COMPILER2 676 // Misaligned store of 64-bit data 677 __ stw(r, base, arg_slot(st_off)); // lo bits 678 __ srlx(r, 32, r); 679 __ stw(r, base, next_arg_slot(st_off)); // hi bits 680#else 681 if (is_stack) { 682 // Misaligned store of 64-bit data 683 __ stw(r, base, arg_slot(st_off)); // lo bits 684 __ srlx(r, 32, r); 685 __ stw(r, base, next_arg_slot(st_off)); // hi bits 686 } else { 687 __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits 688 __ stw(r , base, next_arg_slot(st_off)); // hi bits 689 } 690#endif // COMPILER2 691#endif // _LP64 692} 693 694void AdapterGenerator::store_c2i_object(Register r, Register base, 695 const int st_off) { 696 __ st_ptr (r, base, arg_slot(st_off)); 697} 698 699void AdapterGenerator::store_c2i_int(Register r, Register base, 700 const int st_off) { 701 __ st (r, base, arg_slot(st_off)); 702} 703 704// Stores into offset pointed to by base 705void AdapterGenerator::store_c2i_double(VMReg r_2, 706 VMReg r_1, Register base, const int st_off) { 707#ifdef _LP64 708 // In V9, doubles are given 2 64-bit slots in the interpreter, but the 709 // data is passed in only 1 slot. 710 __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 711#else 712 // Need to marshal 64-bit value from misaligned Lesp loads 713 __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 714 __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) ); 715#endif 716} 717 718void AdapterGenerator::store_c2i_float(FloatRegister f, Register base, 719 const int st_off) { 720 __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); 721} 722 723void AdapterGenerator::gen_c2i_adapter( 724 int total_args_passed, 725 // VMReg max_arg, 726 int comp_args_on_stack, // VMRegStackSlots 727 const BasicType *sig_bt, 728 const VMRegPair *regs, 729 Label& skip_fixup) { 730 731 // Before we get into the guts of the C2I adapter, see if we should be here 732 // at all. We've come from compiled code and are attempting to jump to the 733 // interpreter, which means the caller made a static call to get here 734 // (vcalls always get a compiled target if there is one). Check for a 735 // compiled target. If there is one, we need to patch the caller's call. 736 // However we will run interpreted if we come thru here. The next pass 737 // thru the call site will run compiled. If we ran compiled here then 738 // we can (theorectically) do endless i2c->c2i->i2c transitions during 739 // deopt/uncommon trap cycles. If we always go interpreted here then 740 // we can have at most one and don't need to play any tricks to keep 741 // from endlessly growing the stack. 742 // 743 // Actually if we detected that we had an i2c->c2i transition here we 744 // ought to be able to reset the world back to the state of the interpreted 745 // call and not bother building another interpreter arg area. We don't 746 // do that at this point. 747 748 patch_callers_callsite(); 749 750 __ bind(skip_fixup); 751 752 // Since all args are passed on the stack, total_args_passed*wordSize is the 753 // space we need. Add in varargs area needed by the interpreter. Round up 754 // to stack alignment. 755 const int arg_size = total_args_passed * Interpreter::stackElementSize; 756 const int varargs_area = 757 (frame::varargs_offset - frame::register_save_words)*wordSize; 758 const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); 759 760 int bias = STACK_BIAS; 761 const int interp_arg_offset = frame::varargs_offset*wordSize + 762 (total_args_passed-1)*Interpreter::stackElementSize; 763 764 Register base = SP; 765 766#ifdef _LP64 767 // In the 64bit build because of wider slots and STACKBIAS we can run 768 // out of bits in the displacement to do loads and stores. Use g3 as 769 // temporary displacement. 770 if (!Assembler::is_simm13(extraspace)) { 771 __ set(extraspace, G3_scratch); 772 __ sub(SP, G3_scratch, SP); 773 } else { 774 __ sub(SP, extraspace, SP); 775 } 776 set_Rdisp(G3_scratch); 777#else 778 __ sub(SP, extraspace, SP); 779#endif // _LP64 780 781 // First write G1 (if used) to where ever it must go 782 for (int i=0; i<total_args_passed; i++) { 783 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 784 VMReg r_1 = regs[i].first(); 785 VMReg r_2 = regs[i].second(); 786 if (r_1 == G1_scratch->as_VMReg()) { 787 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 788 store_c2i_object(G1_scratch, base, st_off); 789 } else if (sig_bt[i] == T_LONG) { 790 assert(!TieredCompilation, "should not use register args for longs"); 791 store_c2i_long(G1_scratch, base, st_off, false); 792 } else { 793 store_c2i_int(G1_scratch, base, st_off); 794 } 795 } 796 } 797 798 // Now write the args into the outgoing interpreter space 799 for (int i=0; i<total_args_passed; i++) { 800 const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 801 VMReg r_1 = regs[i].first(); 802 VMReg r_2 = regs[i].second(); 803 if (!r_1->is_valid()) { 804 assert(!r_2->is_valid(), ""); 805 continue; 806 } 807 // Skip G1 if found as we did it first in order to free it up 808 if (r_1 == G1_scratch->as_VMReg()) { 809 continue; 810 } 811#ifdef ASSERT 812 bool G1_forced = false; 813#endif // ASSERT 814 if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 815#ifdef _LP64 816 Register ld_off = Rdisp; 817 __ set(reg2offset(r_1) + extraspace + bias, ld_off); 818#else 819 int ld_off = reg2offset(r_1) + extraspace + bias; 820#endif // _LP64 821#ifdef ASSERT 822 G1_forced = true; 823#endif // ASSERT 824 r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle 825 if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); 826 else __ ldx(base, ld_off, G1_scratch); 827 } 828 829 if (r_1->is_Register()) { 830 Register r = r_1->as_Register()->after_restore(); 831 if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 832 store_c2i_object(r, base, st_off); 833 } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 834#ifndef _LP64 835 if (TieredCompilation) { 836 assert(G1_forced || sig_bt[i] != T_LONG, "should not use register args for longs"); 837 } 838#endif // _LP64 839 store_c2i_long(r, base, st_off, r_2->is_stack()); 840 } else { 841 store_c2i_int(r, base, st_off); 842 } 843 } else { 844 assert(r_1->is_FloatRegister(), ""); 845 if (sig_bt[i] == T_FLOAT) { 846 store_c2i_float(r_1->as_FloatRegister(), base, st_off); 847 } else { 848 assert(sig_bt[i] == T_DOUBLE, "wrong type"); 849 store_c2i_double(r_2, r_1, base, st_off); 850 } 851 } 852 } 853 854#ifdef _LP64 855 // Need to reload G3_scratch, used for temporary displacements. 856 __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 857 858 // Pass O5_savedSP as an argument to the interpreter. 859 // The interpreter will restore SP to this value before returning. 860 __ set(extraspace, G1); 861 __ add(SP, G1, O5_savedSP); 862#else 863 // Pass O5_savedSP as an argument to the interpreter. 864 // The interpreter will restore SP to this value before returning. 865 __ add(SP, extraspace, O5_savedSP); 866#endif // _LP64 867 868 __ mov((frame::varargs_offset)*wordSize - 869 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); 870 // Jump to the interpreter just as if interpreter was doing it. 871 __ jmpl(G3_scratch, 0, G0); 872 // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp 873 // (really L0) is in use by the compiled frame as a generic temp. However, 874 // the interpreter does not know where its args are without some kind of 875 // arg pointer being passed in. Pass it in Gargs. 876 __ delayed()->add(SP, G1, Gargs); 877} 878 879static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg, 880 address code_start, address code_end, 881 Label& L_ok) { 882 Label L_fail; 883 __ set(ExternalAddress(code_start), temp_reg); 884 __ set(pointer_delta(code_end, code_start, 1), temp2_reg); 885 __ cmp(pc_reg, temp_reg); 886 __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail); 887 __ delayed()->add(temp_reg, temp2_reg, temp_reg); 888 __ cmp(pc_reg, temp_reg); 889 __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); 890 __ bind(L_fail); 891} 892 893void AdapterGenerator::gen_i2c_adapter( 894 int total_args_passed, 895 // VMReg max_arg, 896 int comp_args_on_stack, // VMRegStackSlots 897 const BasicType *sig_bt, 898 const VMRegPair *regs) { 899 900 // Generate an I2C adapter: adjust the I-frame to make space for the C-frame 901 // layout. Lesp was saved by the calling I-frame and will be restored on 902 // return. Meanwhile, outgoing arg space is all owned by the callee 903 // C-frame, so we can mangle it at will. After adjusting the frame size, 904 // hoist register arguments and repack other args according to the compiled 905 // code convention. Finally, end in a jump to the compiled code. The entry 906 // point address is the start of the buffer. 907 908 // We will only enter here from an interpreted frame and never from after 909 // passing thru a c2i. Azul allowed this but we do not. If we lose the 910 // race and use a c2i we will remain interpreted for the race loser(s). 911 // This removes all sorts of headaches on the x86 side and also eliminates 912 // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 913 914 // More detail: 915 // Adapters can be frameless because they do not require the caller 916 // to perform additional cleanup work, such as correcting the stack pointer. 917 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 918 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 919 // even if a callee has modified the stack pointer. 920 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 921 // routinely repairs its caller's stack pointer (from sender_sp, which is set 922 // up via the senderSP register). 923 // In other words, if *either* the caller or callee is interpreted, we can 924 // get the stack pointer repaired after a call. 925 // This is why c2i and i2c adapters cannot be indefinitely composed. 926 // In particular, if a c2i adapter were to somehow call an i2c adapter, 927 // both caller and callee would be compiled methods, and neither would 928 // clean up the stack pointer changes performed by the two adapters. 929 // If this happens, control eventually transfers back to the compiled 930 // caller, but with an uncorrected stack, causing delayed havoc. 931 932 if (VerifyAdapterCalls && 933 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 934 // So, let's test for cascading c2i/i2c adapters right now. 935 // assert(Interpreter::contains($return_addr) || 936 // StubRoutines::contains($return_addr), 937 // "i2c adapter must return to an interpreter frame"); 938 __ block_comment("verify_i2c { "); 939 Label L_ok; 940 if (Interpreter::code() != NULL) 941 range_check(masm, O7, O0, O1, 942 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 943 L_ok); 944 if (StubRoutines::code1() != NULL) 945 range_check(masm, O7, O0, O1, 946 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 947 L_ok); 948 if (StubRoutines::code2() != NULL) 949 range_check(masm, O7, O0, O1, 950 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 951 L_ok); 952 const char* msg = "i2c adapter must return to an interpreter frame"; 953 __ block_comment(msg); 954 __ stop(msg); 955 __ bind(L_ok); 956 __ block_comment("} verify_i2ce "); 957 } 958 959 // As you can see from the list of inputs & outputs there are not a lot 960 // of temp registers to work with: mostly G1, G3 & G4. 961 962 // Inputs: 963 // G2_thread - TLS 964 // G5_method - Method oop 965 // G4 (Gargs) - Pointer to interpreter's args 966 // O0..O4 - free for scratch 967 // O5_savedSP - Caller's saved SP, to be restored if needed 968 // O6 - Current SP! 969 // O7 - Valid return address 970 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 971 972 // Outputs: 973 // G2_thread - TLS 974 // G1, G4 - Outgoing long args in 32-bit build 975 // O0-O5 - Outgoing args in compiled layout 976 // O6 - Adjusted or restored SP 977 // O7 - Valid return address 978 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 979 // F0-F7 - more outgoing args 980 981 982 // Gargs is the incoming argument base, and also an outgoing argument. 983 __ sub(Gargs, BytesPerWord, Gargs); 984 985 // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME 986 // WITH O7 HOLDING A VALID RETURN PC 987 // 988 // | | 989 // : java stack : 990 // | | 991 // +--------------+ <--- start of outgoing args 992 // | receiver | | 993 // : rest of args : |---size is java-arg-words 994 // | | | 995 // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I 996 // | | | 997 // : unused : |---Space for max Java stack, plus stack alignment 998 // | | | 999 // +--------------+ <--- SP + 16*wordsize 1000 // | | 1001 // : window : 1002 // | | 1003 // +--------------+ <--- SP 1004 1005 // WE REPACK THE STACK. We use the common calling convention layout as 1006 // discovered by calling SharedRuntime::calling_convention. We assume it 1007 // causes an arbitrary shuffle of memory, which may require some register 1008 // temps to do the shuffle. We hope for (and optimize for) the case where 1009 // temps are not needed. We may have to resize the stack slightly, in case 1010 // we need alignment padding (32-bit interpreter can pass longs & doubles 1011 // misaligned, but the compilers expect them aligned). 1012 // 1013 // | | 1014 // : java stack : 1015 // | | 1016 // +--------------+ <--- start of outgoing args 1017 // | pad, align | | 1018 // +--------------+ | 1019 // | ints, floats | |---Outgoing stack args, packed low. 1020 // +--------------+ | First few args in registers. 1021 // : doubles : | 1022 // | longs | | 1023 // +--------------+ <--- SP' + 16*wordsize 1024 // | | 1025 // : window : 1026 // | | 1027 // +--------------+ <--- SP' 1028 1029 // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME 1030 // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP 1031 // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. 1032 1033 // Cut-out for having no stack args. Since up to 6 args are passed 1034 // in registers, we will commonly have no stack args. 1035 if (comp_args_on_stack > 0) { 1036 1037 // Convert VMReg stack slots to words. 1038 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 1039 // Round up to miminum stack alignment, in wordSize 1040 comp_words_on_stack = round_to(comp_words_on_stack, 2); 1041 // Now compute the distance from Lesp to SP. This calculation does not 1042 // include the space for total_args_passed because Lesp has not yet popped 1043 // the arguments. 1044 __ sub(SP, (comp_words_on_stack)*wordSize, SP); 1045 } 1046 1047 // Will jump to the compiled code just as if compiled code was doing it. 1048 // Pre-load the register-jump target early, to schedule it better. 1049 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); 1050 1051 // Now generate the shuffle code. Pick up all register args and move the 1052 // rest through G1_scratch. 1053 for (int i=0; i<total_args_passed; i++) { 1054 if (sig_bt[i] == T_VOID) { 1055 // Longs and doubles are passed in native word order, but misaligned 1056 // in the 32-bit build. 1057 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 1058 continue; 1059 } 1060 1061 // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the 1062 // 32-bit build and aligned in the 64-bit build. Look for the obvious 1063 // ldx/lddf optimizations. 1064 1065 // Load in argument order going down. 1066 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; 1067 set_Rdisp(G1_scratch); 1068 1069 VMReg r_1 = regs[i].first(); 1070 VMReg r_2 = regs[i].second(); 1071 if (!r_1->is_valid()) { 1072 assert(!r_2->is_valid(), ""); 1073 continue; 1074 } 1075 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 1076 r_1 = F8->as_VMReg(); // as part of the load/store shuffle 1077 if (r_2->is_valid()) r_2 = r_1->next(); 1078 } 1079 if (r_1->is_Register()) { // Register argument 1080 Register r = r_1->as_Register()->after_restore(); 1081 if (!r_2->is_valid()) { 1082 __ ld(Gargs, arg_slot(ld_off), r); 1083 } else { 1084#ifdef _LP64 1085 // In V9, longs are given 2 64-bit slots in the interpreter, but the 1086 // data is passed in only 1 slot. 1087 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ? 1088 next_arg_slot(ld_off) : arg_slot(ld_off); 1089 __ ldx(Gargs, slot, r); 1090#else 1091 // Need to load a 64-bit value into G1/G4, but G1/G4 is being used in the 1092 // stack shuffle. Load the first 2 longs into G1/G4 later. 1093#endif 1094 } 1095 } else { 1096 assert(r_1->is_FloatRegister(), ""); 1097 if (!r_2->is_valid()) { 1098 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 1099 } else { 1100#ifdef _LP64 1101 // In V9, doubles are given 2 64-bit slots in the interpreter, but the 1102 // data is passed in only 1 slot. This code also handles longs that 1103 // are passed on the stack, but need a stack-to-stack move through a 1104 // spare float register. 1105 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 1106 next_arg_slot(ld_off) : arg_slot(ld_off); 1107 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 1108#else 1109 // Need to marshal 64-bit value from misaligned Lesp loads 1110 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); 1111 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 1112#endif 1113 } 1114 } 1115 // Was the argument really intended to be on the stack, but was loaded 1116 // into F8/F9? 1117 if (regs[i].first()->is_stack()) { 1118 assert(r_1->as_FloatRegister() == F8, "fix this code"); 1119 // Convert stack slot to an SP offset 1120 int st_off = reg2offset(regs[i].first()) + STACK_BIAS; 1121 // Store down the shuffled stack word. Target address _is_ aligned. 1122 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp); 1123 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot); 1124 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); 1125 } 1126 } 1127 bool made_space = false; 1128#ifndef _LP64 1129 // May need to pick up a few long args in G1/G4 1130 bool g4_crushed = false; 1131 bool g3_crushed = false; 1132 for (int i=0; i<total_args_passed; i++) { 1133 if (regs[i].first()->is_Register() && regs[i].second()->is_valid()) { 1134 // Load in argument order going down 1135 int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; 1136 // Need to marshal 64-bit value from misaligned Lesp loads 1137 Register r = regs[i].first()->as_Register()->after_restore(); 1138 if (r == G1 || r == G4) { 1139 assert(!g4_crushed, "ordering problem"); 1140 if (r == G4){ 1141 g4_crushed = true; 1142 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1143 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1144 } else { 1145 // better schedule this way 1146 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1147 __ lduw(Gargs, arg_slot(ld_off) , G3_scratch); // Load lo bits 1148 } 1149 g3_crushed = true; 1150 __ sllx(r, 32, r); 1151 __ or3(G3_scratch, r, r); 1152 } else { 1153 assert(r->is_out(), "longs passed in two O registers"); 1154 __ ld (Gargs, arg_slot(ld_off) , r->successor()); // Load lo bits 1155 __ ld (Gargs, next_arg_slot(ld_off), r); // Load hi bits 1156 } 1157 } 1158 } 1159#endif 1160 1161 // Jump to the compiled code just as if compiled code was doing it. 1162 // 1163#ifndef _LP64 1164 if (g3_crushed) { 1165 // Rats load was wasted, at least it is in cache... 1166 __ ld_ptr(G5_method, Method::from_compiled_offset(), G3); 1167 } 1168#endif /* _LP64 */ 1169 1170 // 6243940 We might end up in handle_wrong_method if 1171 // the callee is deoptimized as we race thru here. If that 1172 // happens we don't want to take a safepoint because the 1173 // caller frame will look interpreted and arguments are now 1174 // "compiled" so it is much better to make this transition 1175 // invisible to the stack walking code. Unfortunately if 1176 // we try and find the callee by normal means a safepoint 1177 // is possible. So we stash the desired callee in the thread 1178 // and the vm will find there should this case occur. 1179 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); 1180 __ st_ptr(G5_method, callee_target_addr); 1181 1182 if (StressNonEntrant) { 1183 // Open a big window for deopt failure 1184 __ save_frame(0); 1185 __ mov(G0, L0); 1186 Label loop; 1187 __ bind(loop); 1188 __ sub(L0, 1, L0); 1189 __ br_null_short(L0, Assembler::pt, loop); 1190 1191 __ restore(); 1192 } 1193 1194 1195 __ jmpl(G3, 0, G0); 1196 __ delayed()->nop(); 1197} 1198 1199// --------------------------------------------------------------- 1200AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1201 int total_args_passed, 1202 // VMReg max_arg, 1203 int comp_args_on_stack, // VMRegStackSlots 1204 const BasicType *sig_bt, 1205 const VMRegPair *regs, 1206 AdapterFingerPrint* fingerprint) { 1207 address i2c_entry = __ pc(); 1208 1209 AdapterGenerator agen(masm); 1210 1211 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); 1212 1213 1214 // ------------------------------------------------------------------------- 1215 // Generate a C2I adapter. On entry we know G5 holds the Method*. The 1216 // args start out packed in the compiled layout. They need to be unpacked 1217 // into the interpreter layout. This will almost always require some stack 1218 // space. We grow the current (compiled) stack, then repack the args. We 1219 // finally end in a jump to the generic interpreter entry point. On exit 1220 // from the interpreter, the interpreter will restore our SP (lest the 1221 // compiled code, which relys solely on SP and not FP, get sick). 1222 1223 address c2i_unverified_entry = __ pc(); 1224 Label skip_fixup; 1225 { 1226#if !defined(_LP64) && defined(COMPILER2) 1227 Register R_temp = L0; // another scratch register 1228#else 1229 Register R_temp = G1; // another scratch register 1230#endif 1231 1232 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1233 1234 __ verify_oop(O0); 1235 __ load_klass(O0, G3_scratch); 1236 1237#if !defined(_LP64) && defined(COMPILER2) 1238 __ save(SP, -frame::register_save_words*wordSize, SP); 1239 __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1240 __ cmp(G3_scratch, R_temp); 1241 __ restore(); 1242#else 1243 __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1244 __ cmp(G3_scratch, R_temp); 1245#endif 1246 1247 Label ok, ok2; 1248 __ brx(Assembler::equal, false, Assembler::pt, ok); 1249 __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_method_offset(), G5_method); 1250 __ jump_to(ic_miss, G3_scratch); 1251 __ delayed()->nop(); 1252 1253 __ bind(ok); 1254 // Method might have been compiled since the call site was patched to 1255 // interpreted if that is the case treat it as a miss so we can get 1256 // the call site corrected. 1257 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1258 __ bind(ok2); 1259 __ br_null(G3_scratch, false, Assembler::pt, skip_fixup); 1260 __ delayed()->ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 1261 __ jump_to(ic_miss, G3_scratch); 1262 __ delayed()->nop(); 1263 1264 } 1265 1266 address c2i_entry = __ pc(); 1267 1268 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 1269 1270 __ flush(); 1271 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); 1272 1273} 1274 1275// Helper function for native calling conventions 1276static VMReg int_stk_helper( int i ) { 1277 // Bias any stack based VMReg we get by ignoring the window area 1278 // but not the register parameter save area. 1279 // 1280 // This is strange for the following reasons. We'd normally expect 1281 // the calling convention to return an VMReg for a stack slot 1282 // completely ignoring any abi reserved area. C2 thinks of that 1283 // abi area as only out_preserve_stack_slots. This does not include 1284 // the area allocated by the C abi to store down integer arguments 1285 // because the java calling convention does not use it. So 1286 // since c2 assumes that there are only out_preserve_stack_slots 1287 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack 1288 // location the c calling convention must add in this bias amount 1289 // to make up for the fact that the out_preserve_stack_slots is 1290 // insufficient for C calls. What a mess. I sure hope those 6 1291 // stack words were worth it on every java call! 1292 1293 // Another way of cleaning this up would be for out_preserve_stack_slots 1294 // to take a parameter to say whether it was C or java calling conventions. 1295 // Then things might look a little better (but not much). 1296 1297 int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; 1298 if( mem_parm_offset < 0 ) { 1299 return as_oRegister(i)->as_VMReg(); 1300 } else { 1301 int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; 1302 // Now return a biased offset that will be correct when out_preserve_slots is added back in 1303 return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); 1304 } 1305} 1306 1307 1308int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1309 VMRegPair *regs, 1310 int total_args_passed) { 1311 1312 // Return the number of VMReg stack_slots needed for the args. 1313 // This value does not include an abi space (like register window 1314 // save area). 1315 1316 // The native convention is V8 if !LP64 1317 // The LP64 convention is the V9 convention which is slightly more sane. 1318 1319 // We return the amount of VMReg stack slots we need to reserve for all 1320 // the arguments NOT counting out_preserve_stack_slots. Since we always 1321 // have space for storing at least 6 registers to memory we start with that. 1322 // See int_stk_helper for a further discussion. 1323 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); 1324 1325#ifdef _LP64 1326 // V9 convention: All things "as-if" on double-wide stack slots. 1327 // Hoist any int/ptr/long's in the first 6 to int regs. 1328 // Hoist any flt/dbl's in the first 16 dbl regs. 1329 int j = 0; // Count of actual args, not HALVES 1330 for( int i=0; i<total_args_passed; i++, j++ ) { 1331 switch( sig_bt[i] ) { 1332 case T_BOOLEAN: 1333 case T_BYTE: 1334 case T_CHAR: 1335 case T_INT: 1336 case T_SHORT: 1337 regs[i].set1( int_stk_helper( j ) ); break; 1338 case T_LONG: 1339 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1340 case T_ADDRESS: // raw pointers, like current thread, for VM calls 1341 case T_ARRAY: 1342 case T_OBJECT: 1343 regs[i].set2( int_stk_helper( j ) ); 1344 break; 1345 case T_FLOAT: 1346 if ( j < 16 ) { 1347 // V9ism: floats go in ODD registers 1348 regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg()); 1349 } else { 1350 // V9ism: floats go in ODD stack slot 1351 regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1))); 1352 } 1353 break; 1354 case T_DOUBLE: 1355 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1356 if ( j < 16 ) { 1357 // V9ism: doubles go in EVEN/ODD regs 1358 regs[i].set2(as_FloatRegister(j<<1)->as_VMReg()); 1359 } else { 1360 // V9ism: doubles go in EVEN/ODD stack slots 1361 regs[i].set2(VMRegImpl::stack2reg(j<<1)); 1362 } 1363 break; 1364 case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES 1365 default: 1366 ShouldNotReachHere(); 1367 } 1368 if (regs[i].first()->is_stack()) { 1369 int off = regs[i].first()->reg2stack(); 1370 if (off > max_stack_slots) max_stack_slots = off; 1371 } 1372 if (regs[i].second()->is_stack()) { 1373 int off = regs[i].second()->reg2stack(); 1374 if (off > max_stack_slots) max_stack_slots = off; 1375 } 1376 } 1377 1378#else // _LP64 1379 // V8 convention: first 6 things in O-regs, rest on stack. 1380 // Alignment is willy-nilly. 1381 for( int i=0; i<total_args_passed; i++ ) { 1382 switch( sig_bt[i] ) { 1383 case T_ADDRESS: // raw pointers, like current thread, for VM calls 1384 case T_ARRAY: 1385 case T_BOOLEAN: 1386 case T_BYTE: 1387 case T_CHAR: 1388 case T_FLOAT: 1389 case T_INT: 1390 case T_OBJECT: 1391 case T_SHORT: 1392 regs[i].set1( int_stk_helper( i ) ); 1393 break; 1394 case T_DOUBLE: 1395 case T_LONG: 1396 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1397 regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) ); 1398 break; 1399 case T_VOID: regs[i].set_bad(); break; 1400 default: 1401 ShouldNotReachHere(); 1402 } 1403 if (regs[i].first()->is_stack()) { 1404 int off = regs[i].first()->reg2stack(); 1405 if (off > max_stack_slots) max_stack_slots = off; 1406 } 1407 if (regs[i].second()->is_stack()) { 1408 int off = regs[i].second()->reg2stack(); 1409 if (off > max_stack_slots) max_stack_slots = off; 1410 } 1411 } 1412#endif // _LP64 1413 1414 return round_to(max_stack_slots + 1, 2); 1415 1416} 1417 1418 1419// --------------------------------------------------------------------------- 1420void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1421 switch (ret_type) { 1422 case T_FLOAT: 1423 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); 1424 break; 1425 case T_DOUBLE: 1426 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); 1427 break; 1428 } 1429} 1430 1431void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1432 switch (ret_type) { 1433 case T_FLOAT: 1434 __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); 1435 break; 1436 case T_DOUBLE: 1437 __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); 1438 break; 1439 } 1440} 1441 1442// Check and forward and pending exception. Thread is stored in 1443// L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there 1444// is no exception handler. We merely pop this frame off and throw the 1445// exception in the caller's frame. 1446static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { 1447 Label L; 1448 __ br_null(Rex_oop, false, Assembler::pt, L); 1449 __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception 1450 // Since this is a native call, we *know* the proper exception handler 1451 // without calling into the VM: it's the empty function. Just pop this 1452 // frame and then jump to forward_exception_entry; O7 will contain the 1453 // native caller's return PC. 1454 AddressLiteral exception_entry(StubRoutines::forward_exception_entry()); 1455 __ jump_to(exception_entry, G3_scratch); 1456 __ delayed()->restore(); // Pop this frame off. 1457 __ bind(L); 1458} 1459 1460// A simple move of integer like type 1461static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1462 if (src.first()->is_stack()) { 1463 if (dst.first()->is_stack()) { 1464 // stack to stack 1465 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1466 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1467 } else { 1468 // stack to reg 1469 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1470 } 1471 } else if (dst.first()->is_stack()) { 1472 // reg to stack 1473 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1474 } else { 1475 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1476 } 1477} 1478 1479// On 64 bit we will store integer like items to the stack as 1480// 64 bits items (sparc abi) even though java would only store 1481// 32bits for a parameter. On 32bit it will simply be 32 bits 1482// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1483static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1484 if (src.first()->is_stack()) { 1485 if (dst.first()->is_stack()) { 1486 // stack to stack 1487 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1488 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1489 } else { 1490 // stack to reg 1491 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1492 } 1493 } else if (dst.first()->is_stack()) { 1494 // reg to stack 1495 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1496 } else { 1497 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1498 } 1499} 1500 1501 1502static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1503 if (src.first()->is_stack()) { 1504 if (dst.first()->is_stack()) { 1505 // stack to stack 1506 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1507 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1508 } else { 1509 // stack to reg 1510 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1511 } 1512 } else if (dst.first()->is_stack()) { 1513 // reg to stack 1514 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1515 } else { 1516 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1517 } 1518} 1519 1520 1521// An oop arg. Must pass a handle not the oop itself 1522static void object_move(MacroAssembler* masm, 1523 OopMap* map, 1524 int oop_handle_offset, 1525 int framesize_in_slots, 1526 VMRegPair src, 1527 VMRegPair dst, 1528 bool is_receiver, 1529 int* receiver_offset) { 1530 1531 // must pass a handle. First figure out the location we use as a handle 1532 1533 if (src.first()->is_stack()) { 1534 // Oop is already on the stack 1535 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); 1536 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); 1537 __ ld_ptr(rHandle, 0, L4); 1538#ifdef _LP64 1539 __ movr( Assembler::rc_z, L4, G0, rHandle ); 1540#else 1541 __ tst( L4 ); 1542 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1543#endif 1544 if (dst.first()->is_stack()) { 1545 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1546 } 1547 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1548 if (is_receiver) { 1549 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1550 } 1551 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1552 } else { 1553 // Oop is in an input register pass we must flush it to the stack 1554 const Register rOop = src.first()->as_Register(); 1555 const Register rHandle = L5; 1556 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; 1557 int offset = oop_slot*VMRegImpl::stack_slot_size; 1558 Label skip; 1559 __ st_ptr(rOop, SP, offset + STACK_BIAS); 1560 if (is_receiver) { 1561 *receiver_offset = oop_slot * VMRegImpl::stack_slot_size; 1562 } 1563 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1564 __ add(SP, offset + STACK_BIAS, rHandle); 1565#ifdef _LP64 1566 __ movr( Assembler::rc_z, rOop, G0, rHandle ); 1567#else 1568 __ tst( rOop ); 1569 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1570#endif 1571 1572 if (dst.first()->is_stack()) { 1573 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1574 } else { 1575 __ mov(rHandle, dst.first()->as_Register()); 1576 } 1577 } 1578} 1579 1580// A float arg may have to do float reg int reg conversion 1581static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1582 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1583 1584 if (src.first()->is_stack()) { 1585 if (dst.first()->is_stack()) { 1586 // stack to stack the easiest of the bunch 1587 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1588 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1589 } else { 1590 // stack to reg 1591 if (dst.first()->is_Register()) { 1592 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1593 } else { 1594 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1595 } 1596 } 1597 } else if (dst.first()->is_stack()) { 1598 // reg to stack 1599 if (src.first()->is_Register()) { 1600 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1601 } else { 1602 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1603 } 1604 } else { 1605 // reg to reg 1606 if (src.first()->is_Register()) { 1607 if (dst.first()->is_Register()) { 1608 // gpr -> gpr 1609 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1610 } else { 1611 // gpr -> fpr 1612 __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); 1613 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); 1614 } 1615 } else if (dst.first()->is_Register()) { 1616 // fpr -> gpr 1617 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); 1618 __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); 1619 } else { 1620 // fpr -> fpr 1621 // In theory these overlap but the ordering is such that this is likely a nop 1622 if ( src.first() != dst.first()) { 1623 __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1624 } 1625 } 1626 } 1627} 1628 1629static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1630 VMRegPair src_lo(src.first()); 1631 VMRegPair src_hi(src.second()); 1632 VMRegPair dst_lo(dst.first()); 1633 VMRegPair dst_hi(dst.second()); 1634 simple_move32(masm, src_lo, dst_lo); 1635 simple_move32(masm, src_hi, dst_hi); 1636} 1637 1638// A long move 1639static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1640 1641 // Do the simple ones here else do two int moves 1642 if (src.is_single_phys_reg() ) { 1643 if (dst.is_single_phys_reg()) { 1644 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1645 } else { 1646 // split src into two separate registers 1647 // Remember hi means hi address or lsw on sparc 1648 // Move msw to lsw 1649 if (dst.second()->is_reg()) { 1650 // MSW -> MSW 1651 __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); 1652 // Now LSW -> LSW 1653 // this will only move lo -> lo and ignore hi 1654 VMRegPair split(dst.second()); 1655 simple_move32(masm, src, split); 1656 } else { 1657 VMRegPair split(src.first(), L4->as_VMReg()); 1658 // MSW -> MSW (lo ie. first word) 1659 __ srax(src.first()->as_Register(), 32, L4); 1660 split_long_move(masm, split, dst); 1661 } 1662 } 1663 } else if (dst.is_single_phys_reg()) { 1664 if (src.is_adjacent_aligned_on_stack(2)) { 1665 __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1666 } else { 1667 // dst is a single reg. 1668 // Remember lo is low address not msb for stack slots 1669 // and lo is the "real" register for registers 1670 // src is 1671 1672 VMRegPair split; 1673 1674 if (src.first()->is_reg()) { 1675 // src.lo (msw) is a reg, src.hi is stk/reg 1676 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] 1677 split.set_pair(dst.first(), src.first()); 1678 } else { 1679 // msw is stack move to L5 1680 // lsw is stack move to dst.lo (real reg) 1681 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 1682 split.set_pair(dst.first(), L5->as_VMReg()); 1683 } 1684 1685 // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) 1686 // msw -> src.lo/L5, lsw -> dst.lo 1687 split_long_move(masm, src, split); 1688 1689 // So dst now has the low order correct position the 1690 // msw half 1691 __ sllx(split.first()->as_Register(), 32, L5); 1692 1693 const Register d = dst.first()->as_Register(); 1694 __ or3(L5, d, d); 1695 } 1696 } else { 1697 // For LP64 we can probably do better. 1698 split_long_move(masm, src, dst); 1699 } 1700} 1701 1702// A double move 1703static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1704 1705 // The painful thing here is that like long_move a VMRegPair might be 1706 // 1: a single physical register 1707 // 2: two physical registers (v8) 1708 // 3: a physical reg [lo] and a stack slot [hi] (v8) 1709 // 4: two stack slots 1710 1711 // Since src is always a java calling convention we know that the src pair 1712 // is always either all registers or all stack (and aligned?) 1713 1714 // in a register [lo] and a stack slot [hi] 1715 if (src.first()->is_stack()) { 1716 if (dst.first()->is_stack()) { 1717 // stack to stack the easiest of the bunch 1718 // ought to be a way to do this where if alignment is ok we use ldd/std when possible 1719 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1720 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1721 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1722 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1723 } else { 1724 // stack to reg 1725 if (dst.second()->is_stack()) { 1726 // stack -> reg, stack -> stack 1727 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1728 if (dst.first()->is_Register()) { 1729 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1730 } else { 1731 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1732 } 1733 // This was missing. (very rare case) 1734 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1735 } else { 1736 // stack -> reg 1737 // Eventually optimize for alignment QQQ 1738 if (dst.first()->is_Register()) { 1739 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1740 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); 1741 } else { 1742 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1743 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); 1744 } 1745 } 1746 } 1747 } else if (dst.first()->is_stack()) { 1748 // reg to stack 1749 if (src.first()->is_Register()) { 1750 // Eventually optimize for alignment QQQ 1751 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1752 if (src.second()->is_stack()) { 1753 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1754 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1755 } else { 1756 __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); 1757 } 1758 } else { 1759 // fpr to stack 1760 if (src.second()->is_stack()) { 1761 ShouldNotReachHere(); 1762 } else { 1763 // Is the stack aligned? 1764 if (reg2offset(dst.first()) & 0x7) { 1765 // No do as pairs 1766 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1767 __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); 1768 } else { 1769 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1770 } 1771 } 1772 } 1773 } else { 1774 // reg to reg 1775 if (src.first()->is_Register()) { 1776 if (dst.first()->is_Register()) { 1777 // gpr -> gpr 1778 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1779 __ mov(src.second()->as_Register(), dst.second()->as_Register()); 1780 } else { 1781 // gpr -> fpr 1782 // ought to be able to do a single store 1783 __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); 1784 __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); 1785 // ought to be able to do a single load 1786 __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); 1787 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); 1788 } 1789 } else if (dst.first()->is_Register()) { 1790 // fpr -> gpr 1791 // ought to be able to do a single store 1792 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); 1793 // ought to be able to do a single load 1794 // REMEMBER first() is low address not LSB 1795 __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); 1796 if (dst.second()->is_Register()) { 1797 __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); 1798 } else { 1799 __ ld(FP, -4 + STACK_BIAS, L4); 1800 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1801 } 1802 } else { 1803 // fpr -> fpr 1804 // In theory these overlap but the ordering is such that this is likely a nop 1805 if ( src.first() != dst.first()) { 1806 __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1807 } 1808 } 1809 } 1810} 1811 1812// Creates an inner frame if one hasn't already been created, and 1813// saves a copy of the thread in L7_thread_cache 1814static void create_inner_frame(MacroAssembler* masm, bool* already_created) { 1815 if (!*already_created) { 1816 __ save_frame(0); 1817 // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below 1818 // Don't use save_thread because it smashes G2 and we merely want to save a 1819 // copy 1820 __ mov(G2_thread, L7_thread_cache); 1821 *already_created = true; 1822 } 1823} 1824 1825 1826static void save_or_restore_arguments(MacroAssembler* masm, 1827 const int stack_slots, 1828 const int total_in_args, 1829 const int arg_save_area, 1830 OopMap* map, 1831 VMRegPair* in_regs, 1832 BasicType* in_sig_bt) { 1833 // if map is non-NULL then the code should store the values, 1834 // otherwise it should load them. 1835 if (map != NULL) { 1836 // Fill in the map 1837 for (int i = 0; i < total_in_args; i++) { 1838 if (in_sig_bt[i] == T_ARRAY) { 1839 if (in_regs[i].first()->is_stack()) { 1840 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1841 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1842 } else if (in_regs[i].first()->is_Register()) { 1843 map->set_oop(in_regs[i].first()); 1844 } else { 1845 ShouldNotReachHere(); 1846 } 1847 } 1848 } 1849 } 1850 1851 // Save or restore double word values 1852 int handle_index = 0; 1853 for (int i = 0; i < total_in_args; i++) { 1854 int slot = handle_index + arg_save_area; 1855 int offset = slot * VMRegImpl::stack_slot_size; 1856 if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) { 1857 const Register reg = in_regs[i].first()->as_Register(); 1858 if (reg->is_global()) { 1859 handle_index += 2; 1860 assert(handle_index <= stack_slots, "overflow"); 1861 if (map != NULL) { 1862 __ stx(reg, SP, offset + STACK_BIAS); 1863 } else { 1864 __ ldx(SP, offset + STACK_BIAS, reg); 1865 } 1866 } 1867 } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) { 1868 handle_index += 2; 1869 assert(handle_index <= stack_slots, "overflow"); 1870 if (map != NULL) { 1871 __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1872 } else { 1873 __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1874 } 1875 } 1876 } 1877 // Save floats 1878 for (int i = 0; i < total_in_args; i++) { 1879 int slot = handle_index + arg_save_area; 1880 int offset = slot * VMRegImpl::stack_slot_size; 1881 if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) { 1882 handle_index++; 1883 assert(handle_index <= stack_slots, "overflow"); 1884 if (map != NULL) { 1885 __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1886 } else { 1887 __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1888 } 1889 } 1890 } 1891 1892} 1893 1894 1895// Check GC_locker::needs_gc and enter the runtime if it's true. This 1896// keeps a new JNI critical region from starting until a GC has been 1897// forced. Save down any oops in registers and describe them in an 1898// OopMap. 1899static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1900 const int stack_slots, 1901 const int total_in_args, 1902 const int arg_save_area, 1903 OopMapSet* oop_maps, 1904 VMRegPair* in_regs, 1905 BasicType* in_sig_bt) { 1906 __ block_comment("check GC_locker::needs_gc"); 1907 Label cont; 1908 AddressLiteral sync_state(GC_locker::needs_gc_address()); 1909 __ load_bool_contents(sync_state, G3_scratch); 1910 __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont); 1911 __ delayed()->nop(); 1912 1913 // Save down any values that are live in registers and call into the 1914 // runtime to halt for a GC 1915 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1916 save_or_restore_arguments(masm, stack_slots, total_in_args, 1917 arg_save_area, map, in_regs, in_sig_bt); 1918 1919 __ mov(G2_thread, L7_thread_cache); 1920 1921 __ set_last_Java_frame(SP, noreg); 1922 1923 __ block_comment("block_for_jni_critical"); 1924 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type); 1925 __ delayed()->mov(L7_thread_cache, O0); 1926 oop_maps->add_gc_map( __ offset(), map); 1927 1928 __ restore_thread(L7_thread_cache); // restore G2_thread 1929 __ reset_last_Java_frame(); 1930 1931 // Reload all the register arguments 1932 save_or_restore_arguments(masm, stack_slots, total_in_args, 1933 arg_save_area, NULL, in_regs, in_sig_bt); 1934 1935 __ bind(cont); 1936#ifdef ASSERT 1937 if (StressCriticalJNINatives) { 1938 // Stress register saving 1939 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1940 save_or_restore_arguments(masm, stack_slots, total_in_args, 1941 arg_save_area, map, in_regs, in_sig_bt); 1942 // Destroy argument registers 1943 for (int i = 0; i < total_in_args; i++) { 1944 if (in_regs[i].first()->is_Register()) { 1945 const Register reg = in_regs[i].first()->as_Register(); 1946 if (reg->is_global()) { 1947 __ mov(G0, reg); 1948 } 1949 } else if (in_regs[i].first()->is_FloatRegister()) { 1950 __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); 1951 } 1952 } 1953 1954 save_or_restore_arguments(masm, stack_slots, total_in_args, 1955 arg_save_area, NULL, in_regs, in_sig_bt); 1956 } 1957#endif 1958} 1959 1960// Unpack an array argument into a pointer to the body and the length 1961// if the array is non-null, otherwise pass 0 for both. 1962static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { 1963 // Pass the length, ptr pair 1964 Label is_null, done; 1965 if (reg.first()->is_stack()) { 1966 VMRegPair tmp = reg64_to_VMRegPair(L2); 1967 // Load the arg up from the stack 1968 move_ptr(masm, reg, tmp); 1969 reg = tmp; 1970 } 1971 __ cmp(reg.first()->as_Register(), G0); 1972 __ brx(Assembler::equal, false, Assembler::pt, is_null); 1973 __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4); 1974 move_ptr(masm, reg64_to_VMRegPair(L4), body_arg); 1975 __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4); 1976 move32_64(masm, reg64_to_VMRegPair(L4), length_arg); 1977 __ ba_short(done); 1978 __ bind(is_null); 1979 // Pass zeros 1980 move_ptr(masm, reg64_to_VMRegPair(G0), body_arg); 1981 move32_64(masm, reg64_to_VMRegPair(G0), length_arg); 1982 __ bind(done); 1983} 1984 1985static void verify_oop_args(MacroAssembler* masm, 1986 int total_args_passed, 1987 const BasicType* sig_bt, 1988 const VMRegPair* regs) { 1989 Register temp_reg = G5_method; // not part of any compiled calling seq 1990 if (VerifyOops) { 1991 for (int i = 0; i < total_args_passed; i++) { 1992 if (sig_bt[i] == T_OBJECT || 1993 sig_bt[i] == T_ARRAY) { 1994 VMReg r = regs[i].first(); 1995 assert(r->is_valid(), "bad oop arg"); 1996 if (r->is_stack()) { 1997 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1998 ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg); 1999 __ ld_ptr(SP, ld_off, temp_reg); 2000 __ verify_oop(temp_reg); 2001 } else { 2002 __ verify_oop(r->as_Register()); 2003 } 2004 } 2005 } 2006 } 2007} 2008 2009static void gen_special_dispatch(MacroAssembler* masm, 2010 int total_args_passed, 2011 int comp_args_on_stack, 2012 vmIntrinsics::ID special_dispatch, 2013 const BasicType* sig_bt, 2014 const VMRegPair* regs) { 2015 verify_oop_args(masm, total_args_passed, sig_bt, regs); 2016 2017 // Now write the args into the outgoing interpreter space 2018 bool has_receiver = false; 2019 Register receiver_reg = noreg; 2020 int member_arg_pos = -1; 2021 Register member_reg = noreg; 2022 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); 2023 if (ref_kind != 0) { 2024 member_arg_pos = total_args_passed - 1; // trailing MemberName argument 2025 member_reg = G5_method; // known to be free at this point 2026 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 2027 } else if (special_dispatch == vmIntrinsics::_invokeBasic) { 2028 has_receiver = true; 2029 } else { 2030 fatal(err_msg("special_dispatch=%d", special_dispatch)); 2031 } 2032 2033 if (member_reg != noreg) { 2034 // Load the member_arg into register, if necessary. 2035 assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); 2036 assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); 2037 VMReg r = regs[member_arg_pos].first(); 2038 assert(r->is_valid(), "bad member arg"); 2039 if (r->is_stack()) { 2040 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 2041 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 2042 __ ld_ptr(SP, ld_off, member_reg); 2043 } else { 2044 // no data motion is needed 2045 member_reg = r->as_Register(); 2046 } 2047 } 2048 2049 if (has_receiver) { 2050 // Make sure the receiver is loaded into a register. 2051 assert(total_args_passed > 0, "oob"); 2052 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 2053 VMReg r = regs[0].first(); 2054 assert(r->is_valid(), "bad receiver arg"); 2055 if (r->is_stack()) { 2056 // Porting note: This assumes that compiled calling conventions always 2057 // pass the receiver oop in a register. If this is not true on some 2058 // platform, pick a temp and load the receiver from stack. 2059 assert(false, "receiver always in a register"); 2060 receiver_reg = G3_scratch; // known to be free at this point 2061 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 2062 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 2063 __ ld_ptr(SP, ld_off, receiver_reg); 2064 } else { 2065 // no data motion is needed 2066 receiver_reg = r->as_Register(); 2067 } 2068 } 2069 2070 // Figure out which address we are really jumping to: 2071 MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, 2072 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 2073} 2074 2075// --------------------------------------------------------------------------- 2076// Generate a native wrapper for a given method. The method takes arguments 2077// in the Java compiled code convention, marshals them to the native 2078// convention (handlizes oops, etc), transitions to native, makes the call, 2079// returns to java state (possibly blocking), unhandlizes any result and 2080// returns. 2081// 2082// Critical native functions are a shorthand for the use of 2083// GetPrimtiveArrayCritical and disallow the use of any other JNI 2084// functions. The wrapper is expected to unpack the arguments before 2085// passing them to the callee and perform checks before and after the 2086// native call to ensure that they GC_locker 2087// lock_critical/unlock_critical semantics are followed. Some other 2088// parts of JNI setup are skipped like the tear down of the JNI handle 2089// block and the check for pending exceptions it's impossible for them 2090// to be thrown. 2091// 2092// They are roughly structured like this: 2093// if (GC_locker::needs_gc()) 2094// SharedRuntime::block_for_jni_critical(); 2095// tranistion to thread_in_native 2096// unpack arrray arguments and call native entry point 2097// check for safepoint in progress 2098// check if any thread suspend flags are set 2099// call into JVM and possible unlock the JNI critical 2100// if a GC was suppressed while in the critical native. 2101// transition back to thread_in_Java 2102// return to caller 2103// 2104nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 2105 methodHandle method, 2106 int compile_id, 2107 int total_in_args, 2108 int comp_args_on_stack, // in VMRegStackSlots 2109 BasicType* in_sig_bt, 2110 VMRegPair* in_regs, 2111 BasicType ret_type) { 2112 if (method->is_method_handle_intrinsic()) { 2113 vmIntrinsics::ID iid = method->intrinsic_id(); 2114 intptr_t start = (intptr_t)__ pc(); 2115 int vep_offset = ((intptr_t)__ pc()) - start; 2116 gen_special_dispatch(masm, 2117 total_in_args, 2118 comp_args_on_stack, 2119 method->intrinsic_id(), 2120 in_sig_bt, 2121 in_regs); 2122 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 2123 __ flush(); 2124 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 2125 return nmethod::new_native_nmethod(method, 2126 compile_id, 2127 masm->code(), 2128 vep_offset, 2129 frame_complete, 2130 stack_slots / VMRegImpl::slots_per_word, 2131 in_ByteSize(-1), 2132 in_ByteSize(-1), 2133 (OopMapSet*)NULL); 2134 } 2135 bool is_critical_native = true; 2136 address native_func = method->critical_native_function(); 2137 if (native_func == NULL) { 2138 native_func = method->native_function(); 2139 is_critical_native = false; 2140 } 2141 assert(native_func != NULL, "must have function"); 2142 2143 // Native nmethod wrappers never take possesion of the oop arguments. 2144 // So the caller will gc the arguments. The only thing we need an 2145 // oopMap for is if the call is static 2146 // 2147 // An OopMap for lock (and class if static), and one for the VM call itself 2148 OopMapSet *oop_maps = new OopMapSet(); 2149 intptr_t start = (intptr_t)__ pc(); 2150 2151 // First thing make an ic check to see if we should even be here 2152 { 2153 Label L; 2154 const Register temp_reg = G3_scratch; 2155 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 2156 __ verify_oop(O0); 2157 __ load_klass(O0, temp_reg); 2158 __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 2159 2160 __ jump_to(ic_miss, temp_reg); 2161 __ delayed()->nop(); 2162 __ align(CodeEntryAlignment); 2163 __ bind(L); 2164 } 2165 2166 int vep_offset = ((intptr_t)__ pc()) - start; 2167 2168#ifdef COMPILER1 2169 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { 2170 // Object.hashCode can pull the hashCode from the header word 2171 // instead of doing a full VM transition once it's been computed. 2172 // Since hashCode is usually polymorphic at call sites we can't do 2173 // this optimization at the call site without a lot of work. 2174 Label slowCase; 2175 Register receiver = O0; 2176 Register result = O0; 2177 Register header = G3_scratch; 2178 Register hash = G3_scratch; // overwrite header value with hash value 2179 Register mask = G1; // to get hash field from header 2180 2181 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. 2182 // We depend on hash_mask being at most 32 bits and avoid the use of 2183 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit 2184 // vm: see markOop.hpp. 2185 __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); 2186 __ sethi(markOopDesc::hash_mask, mask); 2187 __ btst(markOopDesc::unlocked_value, header); 2188 __ br(Assembler::zero, false, Assembler::pn, slowCase); 2189 if (UseBiasedLocking) { 2190 // Check if biased and fall through to runtime if so 2191 __ delayed()->nop(); 2192 __ btst(markOopDesc::biased_lock_bit_in_place, header); 2193 __ br(Assembler::notZero, false, Assembler::pn, slowCase); 2194 } 2195 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask); 2196 2197 // Check for a valid (non-zero) hash code and get its value. 2198#ifdef _LP64 2199 __ srlx(header, markOopDesc::hash_shift, hash); 2200#else 2201 __ srl(header, markOopDesc::hash_shift, hash); 2202#endif 2203 __ andcc(hash, mask, hash); 2204 __ br(Assembler::equal, false, Assembler::pn, slowCase); 2205 __ delayed()->nop(); 2206 2207 // leaf return. 2208 __ retl(); 2209 __ delayed()->mov(hash, result); 2210 __ bind(slowCase); 2211 } 2212#endif // COMPILER1 2213 2214 2215 // We have received a description of where all the java arg are located 2216 // on entry to the wrapper. We need to convert these args to where 2217 // the jni function will expect them. To figure out where they go 2218 // we convert the java signature to a C signature by inserting 2219 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2220 2221 int total_c_args = total_in_args; 2222 int total_save_slots = 6 * VMRegImpl::slots_per_word; 2223 if (!is_critical_native) { 2224 total_c_args += 1; 2225 if (method->is_static()) { 2226 total_c_args++; 2227 } 2228 } else { 2229 for (int i = 0; i < total_in_args; i++) { 2230 if (in_sig_bt[i] == T_ARRAY) { 2231 // These have to be saved and restored across the safepoint 2232 total_c_args++; 2233 } 2234 } 2235 } 2236 2237 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2238 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2239 BasicType* in_elem_bt = NULL; 2240 2241 int argc = 0; 2242 if (!is_critical_native) { 2243 out_sig_bt[argc++] = T_ADDRESS; 2244 if (method->is_static()) { 2245 out_sig_bt[argc++] = T_OBJECT; 2246 } 2247 2248 for (int i = 0; i < total_in_args ; i++ ) { 2249 out_sig_bt[argc++] = in_sig_bt[i]; 2250 } 2251 } else { 2252 Thread* THREAD = Thread::current(); 2253 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); 2254 SignatureStream ss(method->signature()); 2255 for (int i = 0; i < total_in_args ; i++ ) { 2256 if (in_sig_bt[i] == T_ARRAY) { 2257 // Arrays are passed as int, elem* pair 2258 out_sig_bt[argc++] = T_INT; 2259 out_sig_bt[argc++] = T_ADDRESS; 2260 Symbol* atype = ss.as_symbol(CHECK_NULL); 2261 const char* at = atype->as_C_string(); 2262 if (strlen(at) == 2) { 2263 assert(at[0] == '[', "must be"); 2264 switch (at[1]) { 2265 case 'B': in_elem_bt[i] = T_BYTE; break; 2266 case 'C': in_elem_bt[i] = T_CHAR; break; 2267 case 'D': in_elem_bt[i] = T_DOUBLE; break; 2268 case 'F': in_elem_bt[i] = T_FLOAT; break; 2269 case 'I': in_elem_bt[i] = T_INT; break; 2270 case 'J': in_elem_bt[i] = T_LONG; break; 2271 case 'S': in_elem_bt[i] = T_SHORT; break; 2272 case 'Z': in_elem_bt[i] = T_BOOLEAN; break; 2273 default: ShouldNotReachHere(); 2274 } 2275 } 2276 } else { 2277 out_sig_bt[argc++] = in_sig_bt[i]; 2278 in_elem_bt[i] = T_VOID; 2279 } 2280 if (in_sig_bt[i] != T_VOID) { 2281 assert(in_sig_bt[i] == ss.type(), "must match"); 2282 ss.next(); 2283 } 2284 } 2285 } 2286 2287 // Now figure out where the args must be stored and how much stack space 2288 // they require (neglecting out_preserve_stack_slots but space for storing 2289 // the 1st six register arguments). It's weird see int_stk_helper. 2290 // 2291 int out_arg_slots; 2292 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 2293 2294 if (is_critical_native) { 2295 // Critical natives may have to call out so they need a save area 2296 // for register arguments. 2297 int double_slots = 0; 2298 int single_slots = 0; 2299 for ( int i = 0; i < total_in_args; i++) { 2300 if (in_regs[i].first()->is_Register()) { 2301 const Register reg = in_regs[i].first()->as_Register(); 2302 switch (in_sig_bt[i]) { 2303 case T_ARRAY: 2304 case T_BOOLEAN: 2305 case T_BYTE: 2306 case T_SHORT: 2307 case T_CHAR: 2308 case T_INT: assert(reg->is_in(), "don't need to save these"); break; 2309 case T_LONG: if (reg->is_global()) double_slots++; break; 2310 default: ShouldNotReachHere(); 2311 } 2312 } else if (in_regs[i].first()->is_FloatRegister()) { 2313 switch (in_sig_bt[i]) { 2314 case T_FLOAT: single_slots++; break; 2315 case T_DOUBLE: double_slots++; break; 2316 default: ShouldNotReachHere(); 2317 } 2318 } 2319 } 2320 total_save_slots = double_slots * 2 + single_slots; 2321 } 2322 2323 // Compute framesize for the wrapper. We need to handlize all oops in 2324 // registers. We must create space for them here that is disjoint from 2325 // the windowed save area because we have no control over when we might 2326 // flush the window again and overwrite values that gc has since modified. 2327 // (The live window race) 2328 // 2329 // We always just allocate 6 word for storing down these object. This allow 2330 // us to simply record the base and use the Ireg number to decide which 2331 // slot to use. (Note that the reg number is the inbound number not the 2332 // outbound number). 2333 // We must shuffle args to match the native convention, and include var-args space. 2334 2335 // Calculate the total number of stack slots we will need. 2336 2337 // First count the abi requirement plus all of the outgoing args 2338 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2339 2340 // Now the space for the inbound oop handle area 2341 2342 int oop_handle_offset = round_to(stack_slots, 2); 2343 stack_slots += total_save_slots; 2344 2345 // Now any space we need for handlizing a klass if static method 2346 2347 int klass_slot_offset = 0; 2348 int klass_offset = -1; 2349 int lock_slot_offset = 0; 2350 bool is_static = false; 2351 2352 if (method->is_static()) { 2353 klass_slot_offset = stack_slots; 2354 stack_slots += VMRegImpl::slots_per_word; 2355 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2356 is_static = true; 2357 } 2358 2359 // Plus a lock if needed 2360 2361 if (method->is_synchronized()) { 2362 lock_slot_offset = stack_slots; 2363 stack_slots += VMRegImpl::slots_per_word; 2364 } 2365 2366 // Now a place to save return value or as a temporary for any gpr -> fpr moves 2367 stack_slots += 2; 2368 2369 // Ok The space we have allocated will look like: 2370 // 2371 // 2372 // FP-> | | 2373 // |---------------------| 2374 // | 2 slots for moves | 2375 // |---------------------| 2376 // | lock box (if sync) | 2377 // |---------------------| <- lock_slot_offset 2378 // | klass (if static) | 2379 // |---------------------| <- klass_slot_offset 2380 // | oopHandle area | 2381 // |---------------------| <- oop_handle_offset 2382 // | outbound memory | 2383 // | based arguments | 2384 // | | 2385 // |---------------------| 2386 // | vararg area | 2387 // |---------------------| 2388 // | | 2389 // SP-> | out_preserved_slots | 2390 // 2391 // 2392 2393 2394 // Now compute actual number of stack words we need rounding to make 2395 // stack properly aligned. 2396 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); 2397 2398 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2399 2400 // Generate stack overflow check before creating frame 2401 __ generate_stack_overflow_check(stack_size); 2402 2403 // Generate a new frame for the wrapper. 2404 __ save(SP, -stack_size, SP); 2405 2406 int frame_complete = ((intptr_t)__ pc()) - start; 2407 2408 __ verify_thread(); 2409 2410 if (is_critical_native) { 2411 check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, 2412 oop_handle_offset, oop_maps, in_regs, in_sig_bt); 2413 } 2414 2415 // 2416 // We immediately shuffle the arguments so that any vm call we have to 2417 // make from here on out (sync slow path, jvmti, etc.) we will have 2418 // captured the oops from our caller and have a valid oopMap for 2419 // them. 2420 2421 // ----------------- 2422 // The Grand Shuffle 2423 // 2424 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 2425 // (derived from JavaThread* which is in L7_thread_cache) and, if static, 2426 // the class mirror instead of a receiver. This pretty much guarantees that 2427 // register layout will not match. We ignore these extra arguments during 2428 // the shuffle. The shuffle is described by the two calling convention 2429 // vectors we have in our possession. We simply walk the java vector to 2430 // get the source locations and the c vector to get the destinations. 2431 // Because we have a new window and the argument registers are completely 2432 // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about 2433 // here. 2434 2435 // This is a trick. We double the stack slots so we can claim 2436 // the oops in the caller's frame. Since we are sure to have 2437 // more args than the caller doubling is enough to make 2438 // sure we can capture all the incoming oop args from the 2439 // caller. 2440 // 2441 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2442 // Record sp-based slot for receiver on stack for non-static methods 2443 int receiver_offset = -1; 2444 2445 // We move the arguments backward because the floating point registers 2446 // destination will always be to a register with a greater or equal register 2447 // number or the stack. 2448 2449#ifdef ASSERT 2450 bool reg_destroyed[RegisterImpl::number_of_registers]; 2451 bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 2452 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 2453 reg_destroyed[r] = false; 2454 } 2455 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 2456 freg_destroyed[f] = false; 2457 } 2458 2459#endif /* ASSERT */ 2460 2461 for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) { 2462 2463#ifdef ASSERT 2464 if (in_regs[i].first()->is_Register()) { 2465 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); 2466 } else if (in_regs[i].first()->is_FloatRegister()) { 2467 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); 2468 } 2469 if (out_regs[c_arg].first()->is_Register()) { 2470 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2471 } else if (out_regs[c_arg].first()->is_FloatRegister()) { 2472 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; 2473 } 2474#endif /* ASSERT */ 2475 2476 switch (in_sig_bt[i]) { 2477 case T_ARRAY: 2478 if (is_critical_native) { 2479 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]); 2480 c_arg--; 2481 break; 2482 } 2483 case T_OBJECT: 2484 assert(!is_critical_native, "no oop arguments"); 2485 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2486 ((i == 0) && (!is_static)), 2487 &receiver_offset); 2488 break; 2489 case T_VOID: 2490 break; 2491 2492 case T_FLOAT: 2493 float_move(masm, in_regs[i], out_regs[c_arg]); 2494 break; 2495 2496 case T_DOUBLE: 2497 assert( i + 1 < total_in_args && 2498 in_sig_bt[i + 1] == T_VOID && 2499 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2500 double_move(masm, in_regs[i], out_regs[c_arg]); 2501 break; 2502 2503 case T_LONG : 2504 long_move(masm, in_regs[i], out_regs[c_arg]); 2505 break; 2506 2507 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2508 2509 default: 2510 move32_64(masm, in_regs[i], out_regs[c_arg]); 2511 } 2512 } 2513 2514 // Pre-load a static method's oop into O1. Used both by locking code and 2515 // the normal JNI call code. 2516 if (method->is_static() && !is_critical_native) { 2517 __ set_oop_constant(JNIHandles::make_local(Klass::cast(method->method_holder())->java_mirror()), O1); 2518 2519 // Now handlize the static class mirror in O1. It's known not-null. 2520 __ st_ptr(O1, SP, klass_offset + STACK_BIAS); 2521 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2522 __ add(SP, klass_offset + STACK_BIAS, O1); 2523 } 2524 2525 2526 const Register L6_handle = L6; 2527 2528 if (method->is_synchronized()) { 2529 assert(!is_critical_native, "unhandled"); 2530 __ mov(O1, L6_handle); 2531 } 2532 2533 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs 2534 // except O6/O7. So if we must call out we must push a new frame. We immediately 2535 // push a new frame and flush the windows. 2536#ifdef _LP64 2537 intptr_t thepc = (intptr_t) __ pc(); 2538 { 2539 address here = __ pc(); 2540 // Call the next instruction 2541 __ call(here + 8, relocInfo::none); 2542 __ delayed()->nop(); 2543 } 2544#else 2545 intptr_t thepc = __ load_pc_address(O7, 0); 2546#endif /* _LP64 */ 2547 2548 // We use the same pc/oopMap repeatedly when we call out 2549 oop_maps->add_gc_map(thepc - start, map); 2550 2551 // O7 now has the pc loaded that we will use when we finally call to native. 2552 2553 // Save thread in L7; it crosses a bunch of VM calls below 2554 // Don't use save_thread because it smashes G2 and we merely 2555 // want to save a copy 2556 __ mov(G2_thread, L7_thread_cache); 2557 2558 2559 // If we create an inner frame once is plenty 2560 // when we create it we must also save G2_thread 2561 bool inner_frame_created = false; 2562 2563 // dtrace method entry support 2564 { 2565 SkipIfEqual skip_if( 2566 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 2567 // create inner frame 2568 __ save_frame(0); 2569 __ mov(G2_thread, L7_thread_cache); 2570 __ set_metadata_constant(method(), O1); 2571 __ call_VM_leaf(L7_thread_cache, 2572 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2573 G2_thread, O1); 2574 __ restore(); 2575 } 2576 2577 // RedefineClasses() tracing support for obsolete method entry 2578 if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 2579 // create inner frame 2580 __ save_frame(0); 2581 __ mov(G2_thread, L7_thread_cache); 2582 __ set_metadata_constant(method(), O1); 2583 __ call_VM_leaf(L7_thread_cache, 2584 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2585 G2_thread, O1); 2586 __ restore(); 2587 } 2588 2589 // We are in the jni frame unless saved_frame is true in which case 2590 // we are in one frame deeper (the "inner" frame). If we are in the 2591 // "inner" frames the args are in the Iregs and if the jni frame then 2592 // they are in the Oregs. 2593 // If we ever need to go to the VM (for locking, jvmti) then 2594 // we will always be in the "inner" frame. 2595 2596 // Lock a synchronized method 2597 int lock_offset = -1; // Set if locked 2598 if (method->is_synchronized()) { 2599 Register Roop = O1; 2600 const Register L3_box = L3; 2601 2602 create_inner_frame(masm, &inner_frame_created); 2603 2604 __ ld_ptr(I1, 0, O1); 2605 Label done; 2606 2607 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); 2608 __ add(FP, lock_offset+STACK_BIAS, L3_box); 2609#ifdef ASSERT 2610 if (UseBiasedLocking) { 2611 // making the box point to itself will make it clear it went unused 2612 // but also be obviously invalid 2613 __ st_ptr(L3_box, L3_box, 0); 2614 } 2615#endif // ASSERT 2616 // 2617 // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch 2618 // 2619 __ compiler_lock_object(Roop, L1, L3_box, L2); 2620 __ br(Assembler::equal, false, Assembler::pt, done); 2621 __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); 2622 2623 2624 // None of the above fast optimizations worked so we have to get into the 2625 // slow case of monitor enter. Inline a special case of call_VM that 2626 // disallows any pending_exception. 2627 __ mov(Roop, O0); // Need oop in O0 2628 __ mov(L3_box, O1); 2629 2630 // Record last_Java_sp, in case the VM code releases the JVM lock. 2631 2632 __ set_last_Java_frame(FP, I7); 2633 2634 // do the call 2635 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); 2636 __ delayed()->mov(L7_thread_cache, O2); 2637 2638 __ restore_thread(L7_thread_cache); // restore G2_thread 2639 __ reset_last_Java_frame(); 2640 2641#ifdef ASSERT 2642 { Label L; 2643 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 2644 __ br_null_short(O0, Assembler::pt, L); 2645 __ stop("no pending exception allowed on exit from IR::monitorenter"); 2646 __ bind(L); 2647 } 2648#endif 2649 __ bind(done); 2650 } 2651 2652 2653 // Finally just about ready to make the JNI call 2654 2655 __ flush_windows(); 2656 if (inner_frame_created) { 2657 __ restore(); 2658 } else { 2659 // Store only what we need from this frame 2660 // QQQ I think that non-v9 (like we care) we don't need these saves 2661 // either as the flush traps and the current window goes too. 2662 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 2663 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 2664 } 2665 2666 // get JNIEnv* which is first argument to native 2667 if (!is_critical_native) { 2668 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); 2669 } 2670 2671 // Use that pc we placed in O7 a while back as the current frame anchor 2672 __ set_last_Java_frame(SP, O7); 2673 2674 // We flushed the windows ages ago now mark them as flushed before transitioning. 2675 __ set(JavaFrameAnchor::flushed, G3_scratch); 2676 __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); 2677 2678 // Transition from _thread_in_Java to _thread_in_native. 2679 __ set(_thread_in_native, G3_scratch); 2680 2681#ifdef _LP64 2682 AddressLiteral dest(native_func); 2683 __ relocate(relocInfo::runtime_call_type); 2684 __ jumpl_to(dest, O7, O7); 2685#else 2686 __ call(native_func, relocInfo::runtime_call_type); 2687#endif 2688 __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2689 2690 __ restore_thread(L7_thread_cache); // restore G2_thread 2691 2692 // Unpack native results. For int-types, we do any needed sign-extension 2693 // and move things into I0. The return value there will survive any VM 2694 // calls for blocking or unlocking. An FP or OOP result (handle) is done 2695 // specially in the slow-path code. 2696 switch (ret_type) { 2697 case T_VOID: break; // Nothing to do! 2698 case T_FLOAT: break; // Got it where we want it (unless slow-path) 2699 case T_DOUBLE: break; // Got it where we want it (unless slow-path) 2700 // In 64 bits build result is in O0, in O0, O1 in 32bit build 2701 case T_LONG: 2702#ifndef _LP64 2703 __ mov(O1, I1); 2704#endif 2705 // Fall thru 2706 case T_OBJECT: // Really a handle 2707 case T_ARRAY: 2708 case T_INT: 2709 __ mov(O0, I0); 2710 break; 2711 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false 2712 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; 2713 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! 2714 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; 2715 break; // Cannot de-handlize until after reclaiming jvm_lock 2716 default: 2717 ShouldNotReachHere(); 2718 } 2719 2720 Label after_transition; 2721 // must we block? 2722 2723 // Block, if necessary, before resuming in _thread_in_Java state. 2724 // In order for GC to work, don't clear the last_Java_sp until after blocking. 2725 { Label no_block; 2726 AddressLiteral sync_state(SafepointSynchronize::address_of_state()); 2727 2728 // Switch thread to "native transition" state before reading the synchronization state. 2729 // This additional state is necessary because reading and testing the synchronization 2730 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2731 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2732 // VM thread changes sync state to synchronizing and suspends threads for GC. 2733 // Thread A is resumed to finish this native method, but doesn't block here since it 2734 // didn't see any synchronization is progress, and escapes. 2735 __ set(_thread_in_native_trans, G3_scratch); 2736 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2737 if(os::is_MP()) { 2738 if (UseMembar) { 2739 // Force this write out before the read below 2740 __ membar(Assembler::StoreLoad); 2741 } else { 2742 // Write serialization page so VM thread can do a pseudo remote membar. 2743 // We use the current thread pointer to calculate a thread specific 2744 // offset to write to within the page. This minimizes bus traffic 2745 // due to cache line collision. 2746 __ serialize_memory(G2_thread, G1_scratch, G3_scratch); 2747 } 2748 } 2749 __ load_contents(sync_state, G3_scratch); 2750 __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); 2751 2752 Label L; 2753 Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); 2754 __ br(Assembler::notEqual, false, Assembler::pn, L); 2755 __ delayed()->ld(suspend_state, G3_scratch); 2756 __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); 2757 __ bind(L); 2758 2759 // Block. Save any potential method result value before the operation and 2760 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 2761 // lets us share the oopMap we used when we went native rather the create 2762 // a distinct one for this pc 2763 // 2764 save_native_result(masm, ret_type, stack_slots); 2765 if (!is_critical_native) { 2766 __ call_VM_leaf(L7_thread_cache, 2767 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), 2768 G2_thread); 2769 } else { 2770 __ call_VM_leaf(L7_thread_cache, 2771 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), 2772 G2_thread); 2773 } 2774 2775 // Restore any method result value 2776 restore_native_result(masm, ret_type, stack_slots); 2777 2778 if (is_critical_native) { 2779 // The call above performed the transition to thread_in_Java so 2780 // skip the transition logic below. 2781 __ ba(after_transition); 2782 __ delayed()->nop(); 2783 } 2784 2785 __ bind(no_block); 2786 } 2787 2788 // thread state is thread_in_native_trans. Any safepoint blocking has already 2789 // happened so we can now change state to _thread_in_Java. 2790 __ set(_thread_in_Java, G3_scratch); 2791 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2792 __ bind(after_transition); 2793 2794 Label no_reguard; 2795 __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); 2796 __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard); 2797 2798 save_native_result(masm, ret_type, stack_slots); 2799 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 2800 __ delayed()->nop(); 2801 2802 __ restore_thread(L7_thread_cache); // restore G2_thread 2803 restore_native_result(masm, ret_type, stack_slots); 2804 2805 __ bind(no_reguard); 2806 2807 // Handle possible exception (will unlock if necessary) 2808 2809 // native result if any is live in freg or I0 (and I1 if long and 32bit vm) 2810 2811 // Unlock 2812 if (method->is_synchronized()) { 2813 Label done; 2814 Register I2_ex_oop = I2; 2815 const Register L3_box = L3; 2816 // Get locked oop from the handle we passed to jni 2817 __ ld_ptr(L6_handle, 0, L4); 2818 __ add(SP, lock_offset+STACK_BIAS, L3_box); 2819 // Must save pending exception around the slow-path VM call. Since it's a 2820 // leaf call, the pending exception (if any) can be kept in a register. 2821 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); 2822 // Now unlock 2823 // (Roop, Rmark, Rbox, Rscratch) 2824 __ compiler_unlock_object(L4, L1, L3_box, L2); 2825 __ br(Assembler::equal, false, Assembler::pt, done); 2826 __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); 2827 2828 // save and restore any potential method result value around the unlocking 2829 // operation. Will save in I0 (or stack for FP returns). 2830 save_native_result(masm, ret_type, stack_slots); 2831 2832 // Must clear pending-exception before re-entering the VM. Since this is 2833 // a leaf call, pending-exception-oop can be safely kept in a register. 2834 __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); 2835 2836 // slow case of monitor enter. Inline a special case of call_VM that 2837 // disallows any pending_exception. 2838 __ mov(L3_box, O1); 2839 2840 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); 2841 __ delayed()->mov(L4, O0); // Need oop in O0 2842 2843 __ restore_thread(L7_thread_cache); // restore G2_thread 2844 2845#ifdef ASSERT 2846 { Label L; 2847 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 2848 __ br_null_short(O0, Assembler::pt, L); 2849 __ stop("no pending exception allowed on exit from IR::monitorexit"); 2850 __ bind(L); 2851 } 2852#endif 2853 restore_native_result(masm, ret_type, stack_slots); 2854 // check_forward_pending_exception jump to forward_exception if any pending 2855 // exception is set. The forward_exception routine expects to see the 2856 // exception in pending_exception and not in a register. Kind of clumsy, 2857 // since all folks who branch to forward_exception must have tested 2858 // pending_exception first and hence have it in a register already. 2859 __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); 2860 __ bind(done); 2861 } 2862 2863 // Tell dtrace about this method exit 2864 { 2865 SkipIfEqual skip_if( 2866 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 2867 save_native_result(masm, ret_type, stack_slots); 2868 __ set_metadata_constant(method(), O1); 2869 __ call_VM_leaf(L7_thread_cache, 2870 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2871 G2_thread, O1); 2872 restore_native_result(masm, ret_type, stack_slots); 2873 } 2874 2875 // Clear "last Java frame" SP and PC. 2876 __ verify_thread(); // G2_thread must be correct 2877 __ reset_last_Java_frame(); 2878 2879 // Unpack oop result 2880 if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 2881 Label L; 2882 __ addcc(G0, I0, G0); 2883 __ brx(Assembler::notZero, true, Assembler::pt, L); 2884 __ delayed()->ld_ptr(I0, 0, I0); 2885 __ mov(G0, I0); 2886 __ bind(L); 2887 __ verify_oop(I0); 2888 } 2889 2890 if (!is_critical_native) { 2891 // reset handle block 2892 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); 2893 __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes()); 2894 2895 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); 2896 check_forward_pending_exception(masm, G3_scratch); 2897 } 2898 2899 2900 // Return 2901 2902#ifndef _LP64 2903 if (ret_type == T_LONG) { 2904 2905 // Must leave proper result in O0,O1 and G1 (c2/tiered only) 2906 __ sllx(I0, 32, G1); // Shift bits into high G1 2907 __ srl (I1, 0, I1); // Zero extend O1 (harmless?) 2908 __ or3 (I1, G1, G1); // OR 64 bits into G1 2909 } 2910#endif 2911 2912 __ ret(); 2913 __ delayed()->restore(); 2914 2915 __ flush(); 2916 2917 nmethod *nm = nmethod::new_native_nmethod(method, 2918 compile_id, 2919 masm->code(), 2920 vep_offset, 2921 frame_complete, 2922 stack_slots / VMRegImpl::slots_per_word, 2923 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2924 in_ByteSize(lock_offset), 2925 oop_maps); 2926 2927 if (is_critical_native) { 2928 nm->set_lazy_critical_native(true); 2929 } 2930 return nm; 2931 2932} 2933 2934#ifdef HAVE_DTRACE_H 2935// --------------------------------------------------------------------------- 2936// Generate a dtrace nmethod for a given signature. The method takes arguments 2937// in the Java compiled code convention, marshals them to the native 2938// abi and then leaves nops at the position you would expect to call a native 2939// function. When the probe is enabled the nops are replaced with a trap 2940// instruction that dtrace inserts and the trace will cause a notification 2941// to dtrace. 2942// 2943// The probes are only able to take primitive types and java/lang/String as 2944// arguments. No other java types are allowed. Strings are converted to utf8 2945// strings so that from dtrace point of view java strings are converted to C 2946// strings. There is an arbitrary fixed limit on the total space that a method 2947// can use for converting the strings. (256 chars per string in the signature). 2948// So any java string larger then this is truncated. 2949 2950static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; 2951static bool offsets_initialized = false; 2952 2953nmethod *SharedRuntime::generate_dtrace_nmethod( 2954 MacroAssembler *masm, methodHandle method) { 2955 2956 2957 // generate_dtrace_nmethod is guarded by a mutex so we are sure to 2958 // be single threaded in this method. 2959 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); 2960 2961 // Fill in the signature array, for the calling-convention call. 2962 int total_args_passed = method->size_of_parameters(); 2963 2964 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); 2965 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); 2966 2967 // The signature we are going to use for the trap that dtrace will see 2968 // java/lang/String is converted. We drop "this" and any other object 2969 // is converted to NULL. (A one-slot java/lang/Long object reference 2970 // is converted to a two-slot long, which is why we double the allocation). 2971 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); 2972 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); 2973 2974 int i=0; 2975 int total_strings = 0; 2976 int first_arg_to_pass = 0; 2977 int total_c_args = 0; 2978 2979 // Skip the receiver as dtrace doesn't want to see it 2980 if( !method->is_static() ) { 2981 in_sig_bt[i++] = T_OBJECT; 2982 first_arg_to_pass = 1; 2983 } 2984 2985 SignatureStream ss(method->signature()); 2986 for ( ; !ss.at_return_type(); ss.next()) { 2987 BasicType bt = ss.type(); 2988 in_sig_bt[i++] = bt; // Collect remaining bits of signature 2989 out_sig_bt[total_c_args++] = bt; 2990 if( bt == T_OBJECT) { 2991 Symbol* s = ss.as_symbol_or_null(); 2992 if (s == vmSymbols::java_lang_String()) { 2993 total_strings++; 2994 out_sig_bt[total_c_args-1] = T_ADDRESS; 2995 } else if (s == vmSymbols::java_lang_Boolean() || 2996 s == vmSymbols::java_lang_Byte()) { 2997 out_sig_bt[total_c_args-1] = T_BYTE; 2998 } else if (s == vmSymbols::java_lang_Character() || 2999 s == vmSymbols::java_lang_Short()) { 3000 out_sig_bt[total_c_args-1] = T_SHORT; 3001 } else if (s == vmSymbols::java_lang_Integer() || 3002 s == vmSymbols::java_lang_Float()) { 3003 out_sig_bt[total_c_args-1] = T_INT; 3004 } else if (s == vmSymbols::java_lang_Long() || 3005 s == vmSymbols::java_lang_Double()) { 3006 out_sig_bt[total_c_args-1] = T_LONG; 3007 out_sig_bt[total_c_args++] = T_VOID; 3008 } 3009 } else if ( bt == T_LONG || bt == T_DOUBLE ) { 3010 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots 3011 // We convert double to long 3012 out_sig_bt[total_c_args-1] = T_LONG; 3013 out_sig_bt[total_c_args++] = T_VOID; 3014 } else if ( bt == T_FLOAT) { 3015 // We convert float to int 3016 out_sig_bt[total_c_args-1] = T_INT; 3017 } 3018 } 3019 3020 assert(i==total_args_passed, "validly parsed signature"); 3021 3022 // Now get the compiled-Java layout as input arguments 3023 int comp_args_on_stack; 3024 comp_args_on_stack = SharedRuntime::java_calling_convention( 3025 in_sig_bt, in_regs, total_args_passed, false); 3026 3027 // We have received a description of where all the java arg are located 3028 // on entry to the wrapper. We need to convert these args to where 3029 // the a native (non-jni) function would expect them. To figure out 3030 // where they go we convert the java signature to a C signature and remove 3031 // T_VOID for any long/double we might have received. 3032 3033 3034 // Now figure out where the args must be stored and how much stack space 3035 // they require (neglecting out_preserve_stack_slots but space for storing 3036 // the 1st six register arguments). It's weird see int_stk_helper. 3037 // 3038 int out_arg_slots; 3039 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); 3040 3041 // Calculate the total number of stack slots we will need. 3042 3043 // First count the abi requirement plus all of the outgoing args 3044 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 3045 3046 // Plus a temp for possible converion of float/double/long register args 3047 3048 int conversion_temp = stack_slots; 3049 stack_slots += 2; 3050 3051 3052 // Now space for the string(s) we must convert 3053 3054 int string_locs = stack_slots; 3055 stack_slots += total_strings * 3056 (max_dtrace_string_size / VMRegImpl::stack_slot_size); 3057 3058 // Ok The space we have allocated will look like: 3059 // 3060 // 3061 // FP-> | | 3062 // |---------------------| 3063 // | string[n] | 3064 // |---------------------| <- string_locs[n] 3065 // | string[n-1] | 3066 // |---------------------| <- string_locs[n-1] 3067 // | ... | 3068 // | ... | 3069 // |---------------------| <- string_locs[1] 3070 // | string[0] | 3071 // |---------------------| <- string_locs[0] 3072 // | temp | 3073 // |---------------------| <- conversion_temp 3074 // | outbound memory | 3075 // | based arguments | 3076 // | | 3077 // |---------------------| 3078 // | | 3079 // SP-> | out_preserved_slots | 3080 // 3081 // 3082 3083 // Now compute actual number of stack words we need rounding to make 3084 // stack properly aligned. 3085 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); 3086 3087 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 3088 3089 intptr_t start = (intptr_t)__ pc(); 3090 3091 // First thing make an ic check to see if we should even be here 3092 3093 { 3094 Label L; 3095 const Register temp_reg = G3_scratch; 3096 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 3097 __ verify_oop(O0); 3098 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); 3099 __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 3100 3101 __ jump_to(ic_miss, temp_reg); 3102 __ delayed()->nop(); 3103 __ align(CodeEntryAlignment); 3104 __ bind(L); 3105 } 3106 3107 int vep_offset = ((intptr_t)__ pc()) - start; 3108 3109 3110 // The instruction at the verified entry point must be 5 bytes or longer 3111 // because it can be patched on the fly by make_non_entrant. The stack bang 3112 // instruction fits that requirement. 3113 3114 // Generate stack overflow check before creating frame 3115 __ generate_stack_overflow_check(stack_size); 3116 3117 assert(((intptr_t)__ pc() - start - vep_offset) >= 5, 3118 "valid size for make_non_entrant"); 3119 3120 // Generate a new frame for the wrapper. 3121 __ save(SP, -stack_size, SP); 3122 3123 // Frame is now completed as far a size and linkage. 3124 3125 int frame_complete = ((intptr_t)__ pc()) - start; 3126 3127#ifdef ASSERT 3128 bool reg_destroyed[RegisterImpl::number_of_registers]; 3129 bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 3130 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 3131 reg_destroyed[r] = false; 3132 } 3133 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 3134 freg_destroyed[f] = false; 3135 } 3136 3137#endif /* ASSERT */ 3138 3139 VMRegPair zero; 3140 const Register g0 = G0; // without this we get a compiler warning (why??) 3141 zero.set2(g0->as_VMReg()); 3142 3143 int c_arg, j_arg; 3144 3145 Register conversion_off = noreg; 3146 3147 for (j_arg = first_arg_to_pass, c_arg = 0 ; 3148 j_arg < total_args_passed ; j_arg++, c_arg++ ) { 3149 3150 VMRegPair src = in_regs[j_arg]; 3151 VMRegPair dst = out_regs[c_arg]; 3152 3153#ifdef ASSERT 3154 if (src.first()->is_Register()) { 3155 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); 3156 } else if (src.first()->is_FloatRegister()) { 3157 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( 3158 FloatRegisterImpl::S)], "ack!"); 3159 } 3160 if (dst.first()->is_Register()) { 3161 reg_destroyed[dst.first()->as_Register()->encoding()] = true; 3162 } else if (dst.first()->is_FloatRegister()) { 3163 freg_destroyed[dst.first()->as_FloatRegister()->encoding( 3164 FloatRegisterImpl::S)] = true; 3165 } 3166#endif /* ASSERT */ 3167 3168 switch (in_sig_bt[j_arg]) { 3169 case T_ARRAY: 3170 case T_OBJECT: 3171 { 3172 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || 3173 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 3174 // need to unbox a one-slot value 3175 Register in_reg = L0; 3176 Register tmp = L2; 3177 if ( src.first()->is_reg() ) { 3178 in_reg = src.first()->as_Register(); 3179 } else { 3180 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), 3181 "must be"); 3182 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); 3183 } 3184 // If the final destination is an acceptable register 3185 if ( dst.first()->is_reg() ) { 3186 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { 3187 tmp = dst.first()->as_Register(); 3188 } 3189 } 3190 3191 Label skipUnbox; 3192 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { 3193 __ mov(G0, tmp->successor()); 3194 } 3195 __ br_null(in_reg, true, Assembler::pn, skipUnbox); 3196 __ delayed()->mov(G0, tmp); 3197 3198 BasicType bt = out_sig_bt[c_arg]; 3199 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 3200 switch (bt) { 3201 case T_BYTE: 3202 __ ldub(in_reg, box_offset, tmp); break; 3203 case T_SHORT: 3204 __ lduh(in_reg, box_offset, tmp); break; 3205 case T_INT: 3206 __ ld(in_reg, box_offset, tmp); break; 3207 case T_LONG: 3208 __ ld_long(in_reg, box_offset, tmp); break; 3209 default: ShouldNotReachHere(); 3210 } 3211 3212 __ bind(skipUnbox); 3213 // If tmp wasn't final destination copy to final destination 3214 if (tmp == L2) { 3215 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); 3216 if (out_sig_bt[c_arg] == T_LONG) { 3217 long_move(masm, tmp_as_VM, dst); 3218 } else { 3219 move32_64(masm, tmp_as_VM, out_regs[c_arg]); 3220 } 3221 } 3222 if (out_sig_bt[c_arg] == T_LONG) { 3223 assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 3224 ++c_arg; // move over the T_VOID to keep the loop indices in sync 3225 } 3226 } else if (out_sig_bt[c_arg] == T_ADDRESS) { 3227 Register s = 3228 src.first()->is_reg() ? src.first()->as_Register() : L2; 3229 Register d = 3230 dst.first()->is_reg() ? dst.first()->as_Register() : L2; 3231 3232 // We store the oop now so that the conversion pass can reach 3233 // while in the inner frame. This will be the only store if 3234 // the oop is NULL. 3235 if (s != L2) { 3236 // src is register 3237 if (d != L2) { 3238 // dst is register 3239 __ mov(s, d); 3240 } else { 3241 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3242 STACK_BIAS), "must be"); 3243 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); 3244 } 3245 } else { 3246 // src not a register 3247 assert(Assembler::is_simm13(reg2offset(src.first()) + 3248 STACK_BIAS), "must be"); 3249 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); 3250 if (d == L2) { 3251 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3252 STACK_BIAS), "must be"); 3253 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); 3254 } 3255 } 3256 } else if (out_sig_bt[c_arg] != T_VOID) { 3257 // Convert the arg to NULL 3258 if (dst.first()->is_reg()) { 3259 __ mov(G0, dst.first()->as_Register()); 3260 } else { 3261 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3262 STACK_BIAS), "must be"); 3263 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); 3264 } 3265 } 3266 } 3267 break; 3268 case T_VOID: 3269 break; 3270 3271 case T_FLOAT: 3272 if (src.first()->is_stack()) { 3273 // Stack to stack/reg is simple 3274 move32_64(masm, src, dst); 3275 } else { 3276 if (dst.first()->is_reg()) { 3277 // freg -> reg 3278 int off = 3279 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3280 Register d = dst.first()->as_Register(); 3281 if (Assembler::is_simm13(off)) { 3282 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3283 SP, off); 3284 __ ld(SP, off, d); 3285 } else { 3286 if (conversion_off == noreg) { 3287 __ set(off, L6); 3288 conversion_off = L6; 3289 } 3290 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3291 SP, conversion_off); 3292 __ ld(SP, conversion_off , d); 3293 } 3294 } else { 3295 // freg -> mem 3296 int off = STACK_BIAS + reg2offset(dst.first()); 3297 if (Assembler::is_simm13(off)) { 3298 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3299 SP, off); 3300 } else { 3301 if (conversion_off == noreg) { 3302 __ set(off, L6); 3303 conversion_off = L6; 3304 } 3305 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3306 SP, conversion_off); 3307 } 3308 } 3309 } 3310 break; 3311 3312 case T_DOUBLE: 3313 assert( j_arg + 1 < total_args_passed && 3314 in_sig_bt[j_arg + 1] == T_VOID && 3315 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 3316 if (src.first()->is_stack()) { 3317 // Stack to stack/reg is simple 3318 long_move(masm, src, dst); 3319 } else { 3320 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; 3321 3322 // Destination could be an odd reg on 32bit in which case 3323 // we can't load direct to the destination. 3324 3325 if (!d->is_even() && wordSize == 4) { 3326 d = L2; 3327 } 3328 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3329 if (Assembler::is_simm13(off)) { 3330 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 3331 SP, off); 3332 __ ld_long(SP, off, d); 3333 } else { 3334 if (conversion_off == noreg) { 3335 __ set(off, L6); 3336 conversion_off = L6; 3337 } 3338 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 3339 SP, conversion_off); 3340 __ ld_long(SP, conversion_off, d); 3341 } 3342 if (d == L2) { 3343 long_move(masm, reg64_to_VMRegPair(L2), dst); 3344 } 3345 } 3346 break; 3347 3348 case T_LONG : 3349 // 32bit can't do a split move of something like g1 -> O0, O1 3350 // so use a memory temp 3351 if (src.is_single_phys_reg() && wordSize == 4) { 3352 Register tmp = L2; 3353 if (dst.first()->is_reg() && 3354 (wordSize == 8 || dst.first()->as_Register()->is_even())) { 3355 tmp = dst.first()->as_Register(); 3356 } 3357 3358 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3359 if (Assembler::is_simm13(off)) { 3360 __ stx(src.first()->as_Register(), SP, off); 3361 __ ld_long(SP, off, tmp); 3362 } else { 3363 if (conversion_off == noreg) { 3364 __ set(off, L6); 3365 conversion_off = L6; 3366 } 3367 __ stx(src.first()->as_Register(), SP, conversion_off); 3368 __ ld_long(SP, conversion_off, tmp); 3369 } 3370 3371 if (tmp == L2) { 3372 long_move(masm, reg64_to_VMRegPair(L2), dst); 3373 } 3374 } else { 3375 long_move(masm, src, dst); 3376 } 3377 break; 3378 3379 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 3380 3381 default: 3382 move32_64(masm, src, dst); 3383 } 3384 } 3385 3386 3387 // If we have any strings we must store any register based arg to the stack 3388 // This includes any still live xmm registers too. 3389 3390 if (total_strings > 0 ) { 3391 3392 // protect all the arg registers 3393 __ save_frame(0); 3394 __ mov(G2_thread, L7_thread_cache); 3395 const Register L2_string_off = L2; 3396 3397 // Get first string offset 3398 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); 3399 3400 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { 3401 if (out_sig_bt[c_arg] == T_ADDRESS) { 3402 3403 VMRegPair dst = out_regs[c_arg]; 3404 const Register d = dst.first()->is_reg() ? 3405 dst.first()->as_Register()->after_save() : noreg; 3406 3407 // It's a string the oop and it was already copied to the out arg 3408 // position 3409 if (d != noreg) { 3410 __ mov(d, O0); 3411 } else { 3412 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 3413 "must be"); 3414 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); 3415 } 3416 Label skip; 3417 3418 __ br_null(O0, false, Assembler::pn, skip); 3419 __ delayed()->add(FP, L2_string_off, O1); 3420 3421 if (d != noreg) { 3422 __ mov(O1, d); 3423 } else { 3424 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 3425 "must be"); 3426 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); 3427 } 3428 3429 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), 3430 relocInfo::runtime_call_type); 3431 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); 3432 3433 __ bind(skip); 3434 3435 } 3436 3437 } 3438 __ mov(L7_thread_cache, G2_thread); 3439 __ restore(); 3440 3441 } 3442 3443 3444 // Ok now we are done. Need to place the nop that dtrace wants in order to 3445 // patch in the trap 3446 3447 int patch_offset = ((intptr_t)__ pc()) - start; 3448 3449 __ nop(); 3450 3451 3452 // Return 3453 3454 __ ret(); 3455 __ delayed()->restore(); 3456 3457 __ flush(); 3458 3459 nmethod *nm = nmethod::new_dtrace_nmethod( 3460 method, masm->code(), vep_offset, patch_offset, frame_complete, 3461 stack_slots / VMRegImpl::slots_per_word); 3462 return nm; 3463 3464} 3465 3466#endif // HAVE_DTRACE_H 3467 3468// this function returns the adjust size (in number of words) to a c2i adapter 3469// activation for use during deoptimization 3470int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 3471 assert(callee_locals >= callee_parameters, 3472 "test and remove; got more parms than locals"); 3473 if (callee_locals < callee_parameters) 3474 return 0; // No adjustment for negative locals 3475 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; 3476 return round_to(diff, WordsPerLong); 3477} 3478 3479// "Top of Stack" slots that may be unused by the calling convention but must 3480// otherwise be preserved. 3481// On Intel these are not necessary and the value can be zero. 3482// On Sparc this describes the words reserved for storing a register window 3483// when an interrupt occurs. 3484uint SharedRuntime::out_preserve_stack_slots() { 3485 return frame::register_save_words * VMRegImpl::slots_per_word; 3486} 3487 3488static void gen_new_frame(MacroAssembler* masm, bool deopt) { 3489// 3490// Common out the new frame generation for deopt and uncommon trap 3491// 3492 Register G3pcs = G3_scratch; // Array of new pcs (input) 3493 Register Oreturn0 = O0; 3494 Register Oreturn1 = O1; 3495 Register O2UnrollBlock = O2; 3496 Register O3array = O3; // Array of frame sizes (input) 3497 Register O4array_size = O4; // number of frames (input) 3498 Register O7frame_size = O7; // number of frames (input) 3499 3500 __ ld_ptr(O3array, 0, O7frame_size); 3501 __ sub(G0, O7frame_size, O7frame_size); 3502 __ save(SP, O7frame_size, SP); 3503 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc 3504 3505 #ifdef ASSERT 3506 // make sure that the frames are aligned properly 3507#ifndef _LP64 3508 __ btst(wordSize*2-1, SP); 3509 __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc); 3510#endif 3511 #endif 3512 3513 // Deopt needs to pass some extra live values from frame to frame 3514 3515 if (deopt) { 3516 __ mov(Oreturn0->after_save(), Oreturn0); 3517 __ mov(Oreturn1->after_save(), Oreturn1); 3518 } 3519 3520 __ mov(O4array_size->after_save(), O4array_size); 3521 __ sub(O4array_size, 1, O4array_size); 3522 __ mov(O3array->after_save(), O3array); 3523 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); 3524 __ add(G3pcs, wordSize, G3pcs); // point to next pc value 3525 3526 #ifdef ASSERT 3527 // trash registers to show a clear pattern in backtraces 3528 __ set(0xDEAD0000, I0); 3529 __ add(I0, 2, I1); 3530 __ add(I0, 4, I2); 3531 __ add(I0, 6, I3); 3532 __ add(I0, 8, I4); 3533 // Don't touch I5 could have valuable savedSP 3534 __ set(0xDEADBEEF, L0); 3535 __ mov(L0, L1); 3536 __ mov(L0, L2); 3537 __ mov(L0, L3); 3538 __ mov(L0, L4); 3539 __ mov(L0, L5); 3540 3541 // trash the return value as there is nothing to return yet 3542 __ set(0xDEAD0001, O7); 3543 #endif 3544 3545 __ mov(SP, O5_savedSP); 3546} 3547 3548 3549static void make_new_frames(MacroAssembler* masm, bool deopt) { 3550 // 3551 // loop through the UnrollBlock info and create new frames 3552 // 3553 Register G3pcs = G3_scratch; 3554 Register Oreturn0 = O0; 3555 Register Oreturn1 = O1; 3556 Register O2UnrollBlock = O2; 3557 Register O3array = O3; 3558 Register O4array_size = O4; 3559 Label loop; 3560 3561 // Before we make new frames, check to see if stack is available. 3562 // Do this after the caller's return address is on top of stack 3563 if (UseStackBanging) { 3564 // Get total frame size for interpreted frames 3565 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); 3566 __ bang_stack_size(O4, O3, G3_scratch); 3567 } 3568 3569 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); 3570 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); 3571 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array); 3572 3573 // Adjust old interpreter frame to make space for new frame's extra java locals 3574 // 3575 // We capture the original sp for the transition frame only because it is needed in 3576 // order to properly calculate interpreter_sp_adjustment. Even though in real life 3577 // every interpreter frame captures a savedSP it is only needed at the transition 3578 // (fortunately). If we had to have it correct everywhere then we would need to 3579 // be told the sp_adjustment for each frame we create. If the frame size array 3580 // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] 3581 // for each frame we create and keep up the illusion every where. 3582 // 3583 3584 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7); 3585 __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment 3586 __ sub(SP, O7, SP); 3587 3588#ifdef ASSERT 3589 // make sure that there is at least one entry in the array 3590 __ tst(O4array_size); 3591 __ breakpoint_trap(Assembler::zero, Assembler::icc); 3592#endif 3593 3594 // Now push the new interpreter frames 3595 __ bind(loop); 3596 3597 // allocate a new frame, filling the registers 3598 3599 gen_new_frame(masm, deopt); // allocate an interpreter frame 3600 3601 __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); 3602 __ delayed()->add(O3array, wordSize, O3array); 3603 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc 3604 3605} 3606 3607//------------------------------generate_deopt_blob---------------------------- 3608// Ought to generate an ideal graph & compile, but here's some SPARC ASM 3609// instead. 3610void SharedRuntime::generate_deopt_blob() { 3611 // allocate space for the code 3612 ResourceMark rm; 3613 // setup code generation tools 3614 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code 3615 if (UseStackBanging) { 3616 pad += StackShadowPages*16 + 32; 3617 } 3618#ifdef _LP64 3619 CodeBuffer buffer("deopt_blob", 2100+pad, 512); 3620#else 3621 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread) 3622 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread) 3623 CodeBuffer buffer("deopt_blob", 1600+pad, 512); 3624#endif /* _LP64 */ 3625 MacroAssembler* masm = new MacroAssembler(&buffer); 3626 FloatRegister Freturn0 = F0; 3627 Register Greturn1 = G1; 3628 Register Oreturn0 = O0; 3629 Register Oreturn1 = O1; 3630 Register O2UnrollBlock = O2; 3631 Register L0deopt_mode = L0; 3632 Register G4deopt_mode = G4_scratch; 3633 int frame_size_words; 3634 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS); 3635#if !defined(_LP64) && defined(COMPILER2) 3636 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS); 3637#endif 3638 Label cont; 3639 3640 OopMapSet *oop_maps = new OopMapSet(); 3641 3642 // 3643 // This is the entry point for code which is returning to a de-optimized 3644 // frame. 3645 // The steps taken by this frame are as follows: 3646 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) 3647 // and all potentially live registers (at a pollpoint many registers can be live). 3648 // 3649 // - call the C routine: Deoptimization::fetch_unroll_info (this function 3650 // returns information about the number and size of interpreter frames 3651 // which are equivalent to the frame which is being deoptimized) 3652 // - deallocate the unpack frame, restoring only results values. Other 3653 // volatile registers will now be captured in the vframeArray as needed. 3654 // - deallocate the deoptimization frame 3655 // - in a loop using the information returned in the previous step 3656 // push new interpreter frames (take care to propagate the return 3657 // values through each new frame pushed) 3658 // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) 3659 // - call the C routine: Deoptimization::unpack_frames (this function 3660 // lays out values on the interpreter frame which was just created) 3661 // - deallocate the dummy unpack_frame 3662 // - ensure that all the return values are correctly set and then do 3663 // a return to the interpreter entry point 3664 // 3665 // Refer to the following methods for more information: 3666 // - Deoptimization::fetch_unroll_info 3667 // - Deoptimization::unpack_frames 3668 3669 OopMap* map = NULL; 3670 3671 int start = __ offset(); 3672 3673 // restore G2, the trampoline destroyed it 3674 __ get_thread(); 3675 3676 // On entry we have been called by the deoptimized nmethod with a call that 3677 // replaced the original call (or safepoint polling location) so the deoptimizing 3678 // pc is now in O7. Return values are still in the expected places 3679 3680 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3681 __ ba(cont); 3682 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); 3683 3684 int exception_offset = __ offset() - start; 3685 3686 // restore G2, the trampoline destroyed it 3687 __ get_thread(); 3688 3689 // On entry we have been jumped to by the exception handler (or exception_blob 3690 // for server). O0 contains the exception oop and O7 contains the original 3691 // exception pc. So if we push a frame here it will look to the 3692 // stack walking code (fetch_unroll_info) just like a normal call so 3693 // state will be extracted normally. 3694 3695 // save exception oop in JavaThread and fall through into the 3696 // exception_in_tls case since they are handled in same way except 3697 // for where the pending exception is kept. 3698 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); 3699 3700 // 3701 // Vanilla deoptimization with an exception pending in exception_oop 3702 // 3703 int exception_in_tls_offset = __ offset() - start; 3704 3705 // No need to update oop_map as each call to save_live_registers will produce identical oopmap 3706 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3707 3708 // Restore G2_thread 3709 __ get_thread(); 3710 3711#ifdef ASSERT 3712 { 3713 // verify that there is really an exception oop in exception_oop 3714 Label has_exception; 3715 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); 3716 __ br_notnull_short(Oexception, Assembler::pt, has_exception); 3717 __ stop("no exception in thread"); 3718 __ bind(has_exception); 3719 3720 // verify that there is no pending exception 3721 Label no_pending_exception; 3722 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 3723 __ ld_ptr(exception_addr, Oexception); 3724 __ br_null_short(Oexception, Assembler::pt, no_pending_exception); 3725 __ stop("must not have pending exception here"); 3726 __ bind(no_pending_exception); 3727 } 3728#endif 3729 3730 __ ba(cont); 3731 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; 3732 3733 // 3734 // Reexecute entry, similar to c2 uncommon trap 3735 // 3736 int reexecute_offset = __ offset() - start; 3737 3738 // No need to update oop_map as each call to save_live_registers will produce identical oopmap 3739 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3740 3741 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode); 3742 3743 __ bind(cont); 3744 3745 __ set_last_Java_frame(SP, noreg); 3746 3747 // do the call by hand so we can get the oopmap 3748 3749 __ mov(G2_thread, L7_thread_cache); 3750 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); 3751 __ delayed()->mov(G2_thread, O0); 3752 3753 // Set an oopmap for the call site this describes all our saved volatile registers 3754 3755 oop_maps->add_gc_map( __ offset()-start, map); 3756 3757 __ mov(L7_thread_cache, G2_thread); 3758 3759 __ reset_last_Java_frame(); 3760 3761 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers 3762 // so this move will survive 3763 3764 __ mov(L0deopt_mode, G4deopt_mode); 3765 3766 __ mov(O0, O2UnrollBlock->after_save()); 3767 3768 RegisterSaver::restore_result_registers(masm); 3769 3770 Label noException; 3771 __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); 3772 3773 // Move the pending exception from exception_oop to Oexception so 3774 // the pending exception will be picked up the interpreter. 3775 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); 3776 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); 3777 __ bind(noException); 3778 3779 // deallocate the deoptimization frame taking care to preserve the return values 3780 __ mov(Oreturn0, Oreturn0->after_save()); 3781 __ mov(Oreturn1, Oreturn1->after_save()); 3782 __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 3783 __ restore(); 3784 3785 // Allocate new interpreter frame(s) and possible c2i adapter frame 3786 3787 make_new_frames(masm, true); 3788 3789 // push a dummy "unpack_frame" taking care of float return values and 3790 // call Deoptimization::unpack_frames to have the unpacker layout 3791 // information in the interpreter frames just created and then return 3792 // to the interpreter entry point 3793 __ save(SP, -frame_size_words*wordSize, SP); 3794 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); 3795#if !defined(_LP64) 3796#if defined(COMPILER2) 3797 // 32-bit 1-register longs return longs in G1 3798 __ stx(Greturn1, saved_Greturn1_addr); 3799#endif 3800 __ set_last_Java_frame(SP, noreg); 3801 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode); 3802#else 3803 // LP64 uses g4 in set_last_Java_frame 3804 __ mov(G4deopt_mode, O1); 3805 __ set_last_Java_frame(SP, G0); 3806 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); 3807#endif 3808 __ reset_last_Java_frame(); 3809 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); 3810 3811#if !defined(_LP64) && defined(COMPILER2) 3812 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into 3813 // I0/I1 if the return value is long. 3814 Label not_long; 3815 __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long); 3816 __ ldd(saved_Greturn1_addr,I0); 3817 __ bind(not_long); 3818#endif 3819 __ ret(); 3820 __ delayed()->restore(); 3821 3822 masm->flush(); 3823 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); 3824 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3825} 3826 3827#ifdef COMPILER2 3828 3829//------------------------------generate_uncommon_trap_blob-------------------- 3830// Ought to generate an ideal graph & compile, but here's some SPARC ASM 3831// instead. 3832void SharedRuntime::generate_uncommon_trap_blob() { 3833 // allocate space for the code 3834 ResourceMark rm; 3835 // setup code generation tools 3836 int pad = VerifyThread ? 512 : 0; 3837 if (UseStackBanging) { 3838 pad += StackShadowPages*16 + 32; 3839 } 3840#ifdef _LP64 3841 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); 3842#else 3843 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) 3844 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) 3845 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); 3846#endif 3847 MacroAssembler* masm = new MacroAssembler(&buffer); 3848 Register O2UnrollBlock = O2; 3849 Register O2klass_index = O2; 3850 3851 // 3852 // This is the entry point for all traps the compiler takes when it thinks 3853 // it cannot handle further execution of compilation code. The frame is 3854 // deoptimized in these cases and converted into interpreter frames for 3855 // execution 3856 // The steps taken by this frame are as follows: 3857 // - push a fake "unpack_frame" 3858 // - call the C routine Deoptimization::uncommon_trap (this function 3859 // packs the current compiled frame into vframe arrays and returns 3860 // information about the number and size of interpreter frames which 3861 // are equivalent to the frame which is being deoptimized) 3862 // - deallocate the "unpack_frame" 3863 // - deallocate the deoptimization frame 3864 // - in a loop using the information returned in the previous step 3865 // push interpreter frames; 3866 // - create a dummy "unpack_frame" 3867 // - call the C routine: Deoptimization::unpack_frames (this function 3868 // lays out values on the interpreter frame which was just created) 3869 // - deallocate the dummy unpack_frame 3870 // - return to the interpreter entry point 3871 // 3872 // Refer to the following methods for more information: 3873 // - Deoptimization::uncommon_trap 3874 // - Deoptimization::unpack_frame 3875 3876 // the unloaded class index is in O0 (first parameter to this blob) 3877 3878 // push a dummy "unpack_frame" 3879 // and call Deoptimization::uncommon_trap to pack the compiled frame into 3880 // vframe array and return the UnrollBlock information 3881 __ save_frame(0); 3882 __ set_last_Java_frame(SP, noreg); 3883 __ mov(I0, O2klass_index); 3884 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); 3885 __ reset_last_Java_frame(); 3886 __ mov(O0, O2UnrollBlock->after_save()); 3887 __ restore(); 3888 3889 // deallocate the deoptimized frame taking care to preserve the return values 3890 __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 3891 __ restore(); 3892 3893 // Allocate new interpreter frame(s) and possible c2i adapter frame 3894 3895 make_new_frames(masm, false); 3896 3897 // push a dummy "unpack_frame" taking care of float return values and 3898 // call Deoptimization::unpack_frames to have the unpacker layout 3899 // information in the interpreter frames just created and then return 3900 // to the interpreter entry point 3901 __ save_frame(0); 3902 __ set_last_Java_frame(SP, noreg); 3903 __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case 3904 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); 3905 __ reset_last_Java_frame(); 3906 __ ret(); 3907 __ delayed()->restore(); 3908 3909 masm->flush(); 3910 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); 3911} 3912 3913#endif // COMPILER2 3914 3915//------------------------------generate_handler_blob------------------- 3916// 3917// Generate a special Compile2Runtime blob that saves all registers, and sets 3918// up an OopMap. 3919// 3920// This blob is jumped to (via a breakpoint and the signal handler) from a 3921// safepoint in compiled code. On entry to this blob, O7 contains the 3922// address in the original nmethod at which we should resume normal execution. 3923// Thus, this blob looks like a subroutine which must preserve lots of 3924// registers and return normally. Note that O7 is never register-allocated, 3925// so it is guaranteed to be free here. 3926// 3927 3928// The hardest part of what this blob must do is to save the 64-bit %o 3929// registers in the 32-bit build. A simple 'save' turn the %o's to %i's and 3930// an interrupt will chop off their heads. Making space in the caller's frame 3931// first will let us save the 64-bit %o's before save'ing, but we cannot hand 3932// the adjusted FP off to the GC stack-crawler: this will modify the caller's 3933// SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save 3934// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). 3935// Tricky, tricky, tricky... 3936 3937SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) { 3938 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3939 3940 // allocate space for the code 3941 ResourceMark rm; 3942 // setup code generation tools 3943 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 3944 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 3945 // even larger with TraceJumps 3946 int pad = TraceJumps ? 512 : 0; 3947 CodeBuffer buffer("handler_blob", 1600 + pad, 512); 3948 MacroAssembler* masm = new MacroAssembler(&buffer); 3949 int frame_size_words; 3950 OopMapSet *oop_maps = new OopMapSet(); 3951 OopMap* map = NULL; 3952 3953 int start = __ offset(); 3954 3955 // If this causes a return before the processing, then do a "restore" 3956 if (cause_return) { 3957 __ restore(); 3958 } else { 3959 // Make it look like we were called via the poll 3960 // so that frame constructor always sees a valid return address 3961 __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7); 3962 __ sub(O7, frame::pc_return_offset, O7); 3963 } 3964 3965 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3966 3967 // setup last_Java_sp (blows G4) 3968 __ set_last_Java_frame(SP, noreg); 3969 3970 // call into the runtime to handle illegal instructions exception 3971 // Do not use call_VM_leaf, because we need to make a GC map at this call site. 3972 __ mov(G2_thread, O0); 3973 __ save_thread(L7_thread_cache); 3974 __ call(call_ptr); 3975 __ delayed()->nop(); 3976 3977 // Set an oopmap for the call site. 3978 // We need this not only for callee-saved registers, but also for volatile 3979 // registers that the compiler might be keeping live across a safepoint. 3980 3981 oop_maps->add_gc_map( __ offset() - start, map); 3982 3983 __ restore_thread(L7_thread_cache); 3984 // clear last_Java_sp 3985 __ reset_last_Java_frame(); 3986 3987 // Check for exceptions 3988 Label pending; 3989 3990 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 3991 __ br_notnull_short(O1, Assembler::pn, pending); 3992 3993 RegisterSaver::restore_live_registers(masm); 3994 3995 // We are back the the original state on entry and ready to go. 3996 3997 __ retl(); 3998 __ delayed()->nop(); 3999 4000 // Pending exception after the safepoint 4001 4002 __ bind(pending); 4003 4004 RegisterSaver::restore_live_registers(masm); 4005 4006 // We are back the the original state on entry. 4007 4008 // Tail-call forward_exception_entry, with the issuing PC in O7, 4009 // so it looks like the original nmethod called forward_exception_entry. 4010 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 4011 __ JMP(O0, 0); 4012 __ delayed()->nop(); 4013 4014 // ------------- 4015 // make sure all code is generated 4016 masm->flush(); 4017 4018 // return exception blob 4019 return SafepointBlob::create(&buffer, oop_maps, frame_size_words); 4020} 4021 4022// 4023// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 4024// 4025// Generate a stub that calls into vm to find out the proper destination 4026// of a java call. All the argument registers are live at this point 4027// but since this is generic code we don't know what they are and the caller 4028// must do any gc of the args. 4029// 4030RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 4031 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 4032 4033 // allocate space for the code 4034 ResourceMark rm; 4035 // setup code generation tools 4036 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 4037 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 4038 // even larger with TraceJumps 4039 int pad = TraceJumps ? 512 : 0; 4040 CodeBuffer buffer(name, 1600 + pad, 512); 4041 MacroAssembler* masm = new MacroAssembler(&buffer); 4042 int frame_size_words; 4043 OopMapSet *oop_maps = new OopMapSet(); 4044 OopMap* map = NULL; 4045 4046 int start = __ offset(); 4047 4048 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 4049 4050 int frame_complete = __ offset(); 4051 4052 // setup last_Java_sp (blows G4) 4053 __ set_last_Java_frame(SP, noreg); 4054 4055 // call into the runtime to handle illegal instructions exception 4056 // Do not use call_VM_leaf, because we need to make a GC map at this call site. 4057 __ mov(G2_thread, O0); 4058 __ save_thread(L7_thread_cache); 4059 __ call(destination, relocInfo::runtime_call_type); 4060 __ delayed()->nop(); 4061 4062 // O0 contains the address we are going to jump to assuming no exception got installed 4063 4064 // Set an oopmap for the call site. 4065 // We need this not only for callee-saved registers, but also for volatile 4066 // registers that the compiler might be keeping live across a safepoint. 4067 4068 oop_maps->add_gc_map( __ offset() - start, map); 4069 4070 __ restore_thread(L7_thread_cache); 4071 // clear last_Java_sp 4072 __ reset_last_Java_frame(); 4073 4074 // Check for exceptions 4075 Label pending; 4076 4077 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 4078 __ br_notnull_short(O1, Assembler::pn, pending); 4079 4080 // get the returned Method* 4081 4082 __ get_vm_result_2(G5_method); 4083 __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); 4084 4085 // O0 is where we want to jump, overwrite G3 which is saved and scratch 4086 4087 __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); 4088 4089 RegisterSaver::restore_live_registers(masm); 4090 4091 // We are back the the original state on entry and ready to go. 4092 4093 __ JMP(G3, 0); 4094 __ delayed()->nop(); 4095 4096 // Pending exception after the safepoint 4097 4098 __ bind(pending); 4099 4100 RegisterSaver::restore_live_registers(masm); 4101 4102 // We are back the the original state on entry. 4103 4104 // Tail-call forward_exception_entry, with the issuing PC in O7, 4105 // so it looks like the original nmethod called forward_exception_entry. 4106 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 4107 __ JMP(O0, 0); 4108 __ delayed()->nop(); 4109 4110 // ------------- 4111 // make sure all code is generated 4112 masm->flush(); 4113 4114 // return the blob 4115 // frame_size_words or bytes?? 4116 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); 4117} 4118