sharedRuntime_sparc.cpp revision 5976:2b8e28fdf503
11817Sdg/* 21817Sdg * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. 31817Sdg * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 41817Sdg * 51817Sdg * This code is free software; you can redistribute it and/or modify it 61817Sdg * under the terms of the GNU General Public License version 2 only, as 71817Sdg * published by the Free Software Foundation. 81817Sdg * 91817Sdg * This code is distributed in the hope that it will be useful, but WITHOUT 101817Sdg * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 111817Sdg * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 121817Sdg * version 2 for more details (a copy is included in the LICENSE file that 131817Sdg * accompanied this code). 141817Sdg * 151817Sdg * You should have received a copy of the GNU General Public License version 161817Sdg * 2 along with this work; if not, write to the Free Software Foundation, 171817Sdg * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 181817Sdg * 191817Sdg * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 201817Sdg * or visit www.oracle.com if you need additional information or have any 211817Sdg * questions. 221817Sdg * 231817Sdg */ 241817Sdg 251817Sdg#include "precompiled.hpp" 261817Sdg#include "asm/macroAssembler.inline.hpp" 271817Sdg#include "code/debugInfoRec.hpp" 281817Sdg#include "code/icBuffer.hpp" 2950477Speter#include "code/vtableStubs.hpp" 301817Sdg#include "interpreter/interpreter.hpp" 311817Sdg#include "oops/compiledICHolder.hpp" 324Srgrimes#include "prims/jvmtiRedefineClassesTrace.hpp" 334Srgrimes#include "runtime/sharedRuntime.hpp" 3491497Smarkm#include "runtime/vframeArray.hpp" 3591497Smarkm#include "vmreg_sparc.inline.hpp" 364Srgrimes#ifdef COMPILER1 374Srgrimes#include "c1/c1_Runtime1.hpp" 38719Swollman#endif 394479Sbde#ifdef COMPILER2 40719Swollman#include "opto/runtime.hpp" 41143063Sjoerg#endif 42143063Sjoerg#ifdef SHARK 43143063Sjoerg#include "compiler/compileBroker.hpp" 44143063Sjoerg#include "shark/sharkCompiler.hpp" 45181775Skmacy#endif 46181775Skmacy 47181775Skmacy#define __ masm-> 48184040Skmacy 49181775Skmacy 50181775Skmacyclass RegisterSaver { 51181775Skmacy 52197693Skmacy // Used for saving volatile registers. This is Gregs, Fregs, I/L/O. 53197693Skmacy // The Oregs are problematic. In the 32bit build the compiler can 54181775Skmacy // have O registers live with 64 bit quantities. A window save will 55181775Skmacy // cut the heads off of the registers. We have to do a very extensive 56103778Speter // stack dance to save and restore these properly. 5793264Sdillon 58220627Sjkim // Note that the Oregs problem only exists if we block at either a polling 59220627Sjkim // page exception a compiled code safepoint that was not originally a call 60220627Sjkim // or deoptimize following one of these kinds of safepoints. 6138392Sdfr 62220627Sjkim // Lots of registers to save. For all builds, a window save will preserve 63220627Sjkim // the %i and %l registers. For the 32-bit longs-in-two entries and 64-bit 64220627Sjkim // builds a window-save will preserve the %o registers. In the LION build 6538392Sdfr // we need to save the 64-bit %o registers which requires we save them 66143063Sjoerg // before the window-save (as then they become %i registers and get their 674Srgrimes // heads chopped off on interrupt). We have to save some %g registers here 6815122Sbde // as well. 6915122Sbde enum { 703102Sdg // This frame's save area. Includes extra space for the native call: 714479Sbde // vararg's layout space and the like. Briefly holds the caller's 723102Sdg // register save area. 733102Sdg call_args_area = frame::register_save_words_sp_offset + 7455672Sbde frame::memory_parameter_word_sp_offset*wordSize, 7555672Sbde // Make sure save locations are always 8 byte aligned. 7655672Sbde // can't use round_to because it doesn't produce compile time constant 7755672Sbde start_of_extra_save_area = ((call_args_area + 7) & ~7), 7855672Sbde g1_offset = start_of_extra_save_area, // g-regs needing saving 79194115Sed g3_offset = g1_offset+8, 8055672Sbde g4_offset = g3_offset+8, 8155672Sbde g5_offset = g4_offset+8, 8255672Sbde o0_offset = g5_offset+8, 8355672Sbde o1_offset = o0_offset+8, 8455672Sbde o2_offset = o1_offset+8, 8555672Sbde o3_offset = o2_offset+8, 8655672Sbde o4_offset = o3_offset+8, 8755672Sbde o5_offset = o4_offset+8, 88194115Sed start_of_flags_save_area = o5_offset+8, 8955672Sbde ccr_offset = start_of_flags_save_area, 9055672Sbde fsr_offset = ccr_offset + 8, 9155672Sbde d00_offset = fsr_offset+8, // Start of float save area 923102Sdg register_save_size = d00_offset+8*32 93195940Skib }; 94195940Skib 95195940Skib 96195940Skib public: 97195940Skib 98195940Skib static int Oexception_offset() { return o0_offset; }; 99195940Skib static int G3_offset() { return g3_offset; }; 100238311Sjhb static int G5_offset() { return g5_offset; }; 101238311Sjhb static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); 102238311Sjhb static void restore_live_registers(MacroAssembler* masm); 103238311Sjhb 104238311Sjhb // During deoptimization only the result register need to be restored 105238311Sjhb // all the other values have already been extracted. 106238311Sjhb 1074479Sbde static void restore_result_registers(MacroAssembler* masm); 1083102Sdg}; 109181775Skmacy 110181775SkmacyOopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { 111181775Skmacy // Record volatile registers as callee-save values in an OopMap so their save locations will be 112184040Skmacy // propagated to the caller frame's RegisterMap during StackFrameStream construction (needed for 113181775Skmacy // deoptimization; see compiledVFrame::create_stack_value). The caller's I, L and O registers 1143102Sdg // are saved in register windows - I's and L's in the caller's frame and O's in the stub frame 1153102Sdg // (as the stub's I's) when the runtime routine called by the stub creates its frame. 1163102Sdg int i; 11794386Sdwmalone // Always make the frame size 16 byte aligned. 11894386Sdwmalone int frame_size = round_to(additional_frame_words + register_save_size, 16); 11994386Sdwmalone // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words 12094386Sdwmalone int frame_size_in_slots = frame_size / sizeof(jint); 12194386Sdwmalone // CodeBlob frame size is in words. 12294386Sdwmalone *total_frame_words = frame_size / wordSize; 12394386Sdwmalone // OopMap* map = new OopMap(*total_frame_words, 0); 12494386Sdwmalone OopMap* map = new OopMap(frame_size_in_slots, 0); 125146170Snectar 126146170Snectar#if !defined(_LP64) 127146170Snectar 128146170Snectar // Save 64-bit O registers; they will get their heads chopped off on a 'save'. 129146170Snectar __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 130146170Snectar __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 131146170Snectar __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 132146170Snectar __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 1334479Sbde __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 1343102Sdg __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 135181775Skmacy#endif /* _LP64 */ 136181775Skmacy 137184040Skmacy __ save(SP, -frame_size, SP); 13810342Sbde 139181775Skmacy#ifndef _LP64 1403102Sdg // Reload the 64 bit Oregs. Although they are now Iregs we load them 1413102Sdg // to Oregs here to avoid interrupts cutting off their heads 142197647Savg 143223796Sjkim __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 144178299Sjeff __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 145223796Sjkim __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 146223796Sjkim __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 147223796Sjkim __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 148178299Sjeff __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 149178299Sjeff 150197647Savg __ stx(O0, SP, o0_offset+STACK_BIAS); 151223796Sjkim map->set_callee_saved(VMRegImpl::stack2reg((o0_offset + 4)>>2), O0->as_VMReg()); 152178299Sjeff 153223796Sjkim __ stx(O1, SP, o1_offset+STACK_BIAS); 154223796Sjkim 155178299Sjeff map->set_callee_saved(VMRegImpl::stack2reg((o1_offset + 4)>>2), O1->as_VMReg()); 156178299Sjeff 157195940Skib __ stx(O2, SP, o2_offset+STACK_BIAS); 158238972Skib map->set_callee_saved(VMRegImpl::stack2reg((o2_offset + 4)>>2), O2->as_VMReg()); 159238972Skib 160238972Skib __ stx(O3, SP, o3_offset+STACK_BIAS); 161238972Skib map->set_callee_saved(VMRegImpl::stack2reg((o3_offset + 4)>>2), O3->as_VMReg()); 162238972Skib 163238972Skib __ stx(O4, SP, o4_offset+STACK_BIAS); 164238972Skib map->set_callee_saved(VMRegImpl::stack2reg((o4_offset + 4)>>2), O4->as_VMReg()); 165195940Skib 166195940Skib __ stx(O5, SP, o5_offset+STACK_BIAS); 167195940Skib map->set_callee_saved(VMRegImpl::stack2reg((o5_offset + 4)>>2), O5->as_VMReg()); 168195940Skib#endif /* _LP64 */ 169195940Skib 170195940Skib 171126846Sbde#ifdef _LP64 172126846Sbde int debug_offset = 0; 17350054Speter#else 17450054Speter int debug_offset = 4; 17550054Speter#endif 17650054Speter // Save the G's 17750054Speter __ stx(G1, SP, g1_offset+STACK_BIAS); 17850054Speter map->set_callee_saved(VMRegImpl::stack2reg((g1_offset + debug_offset)>>2), G1->as_VMReg()); 1794479Sbde 1804479Sbde __ stx(G3, SP, g3_offset+STACK_BIAS); 18155672Sbde map->set_callee_saved(VMRegImpl::stack2reg((g3_offset + debug_offset)>>2), G3->as_VMReg()); 18255672Sbde 1834479Sbde __ stx(G4, SP, g4_offset+STACK_BIAS); 184100078Smarkm map->set_callee_saved(VMRegImpl::stack2reg((g4_offset + debug_offset)>>2), G4->as_VMReg()); 18550054Speter 18650054Speter __ stx(G5, SP, g5_offset+STACK_BIAS); 18717384Swollman map->set_callee_saved(VMRegImpl::stack2reg((g5_offset + debug_offset)>>2), G5->as_VMReg()); 18817384Swollman 18917384Swollman // This is really a waste but we'll keep things as they were for now 19017384Swollman if (true) { 19117384Swollman#ifndef _LP64 192100078Smarkm map->set_callee_saved(VMRegImpl::stack2reg((o0_offset)>>2), O0->as_VMReg()->next()); 19317384Swollman map->set_callee_saved(VMRegImpl::stack2reg((o1_offset)>>2), O1->as_VMReg()->next()); 19417384Swollman map->set_callee_saved(VMRegImpl::stack2reg((o2_offset)>>2), O2->as_VMReg()->next()); 195126846Sbde map->set_callee_saved(VMRegImpl::stack2reg((o3_offset)>>2), O3->as_VMReg()->next()); 196126846Sbde map->set_callee_saved(VMRegImpl::stack2reg((o4_offset)>>2), O4->as_VMReg()->next()); 197103749Smarkm map->set_callee_saved(VMRegImpl::stack2reg((o5_offset)>>2), O5->as_VMReg()->next()); 198103749Smarkm map->set_callee_saved(VMRegImpl::stack2reg((g1_offset)>>2), G1->as_VMReg()->next()); 199103749Smarkm map->set_callee_saved(VMRegImpl::stack2reg((g3_offset)>>2), G3->as_VMReg()->next()); 200103749Smarkm map->set_callee_saved(VMRegImpl::stack2reg((g4_offset)>>2), G4->as_VMReg()->next()); 201103749Smarkm map->set_callee_saved(VMRegImpl::stack2reg((g5_offset)>>2), G5->as_VMReg()->next()); 202103749Smarkm#endif /* _LP64 */ 2034479Sbde } 204190919Sed 2053102Sdg 2064479Sbde // Save the flags 2074479Sbde __ rdccr( G5 ); 208220629Sjkim __ stx(G5, SP, ccr_offset+STACK_BIAS); 2094479Sbde __ stxfsr(SP, fsr_offset+STACK_BIAS); 2103102Sdg 2113102Sdg // Save all the FP registers: 32 doubles (32 floats correspond to the 2 halves of the first 16 doubles) 21237552Sbde int offset = d00_offset; 2134479Sbde for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 2142826Sdg FloatRegister f = as_FloatRegister(i); 21537552Sbde __ stf(FloatRegisterImpl::D, f, SP, offset+STACK_BIAS); 2162826Sdg // Record as callee saved both halves of double registers (2 float registers). 217220629Sjkim map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), f->as_VMReg()); 2184479Sbde map->set_callee_saved(VMRegImpl::stack2reg((offset + sizeof(float))>>2), f->as_VMReg()->next()); 2192826Sdg offset += sizeof(double); 2202826Sdg } 2214479Sbde 222201369Sobrien // And we're done. 2232826Sdg 2244479Sbde return map; 225201369Sobrien} 22688118Sjhb 22742427Sbde 2284479Sbde// Pop the current frame and restore all the registers that we 2292826Sdg// saved. 2304479Sbdevoid RegisterSaver::restore_live_registers(MacroAssembler* masm) { 231201369Sobrien 2324479Sbde // Restore all the FP registers 2334479Sbde for( int i=0; i<FloatRegisterImpl::number_of_registers; i+=2 ) { 234201369Sobrien __ ldf(FloatRegisterImpl::D, SP, d00_offset+i*sizeof(float)+STACK_BIAS, as_FloatRegister(i)); 23588118Sjhb } 23642427Sbde 2372826Sdg __ ldx(SP, ccr_offset+STACK_BIAS, G1); 2382826Sdg __ wrccr (G1) ; 2394479Sbde 240201369Sobrien // Restore the G's 2412826Sdg // Note that G2 (AKA GThread) must be saved and restored separately. 2424479Sbde // TODO-FIXME: save and restore some of the other ASRs, viz., %asi and %gsr. 243201369Sobrien 24488118Sjhb __ ldx(SP, g1_offset+STACK_BIAS, G1); 24542427Sbde __ ldx(SP, g3_offset+STACK_BIAS, G3); 2462826Sdg __ ldx(SP, g4_offset+STACK_BIAS, G4); 2472826Sdg __ ldx(SP, g5_offset+STACK_BIAS, G5); 24818567Sbde 24924112Skato 25024112Skato#if !defined(_LP64) 25124112Skato // Restore the 64-bit O's. 25224112Skato __ ldx(SP, o0_offset+STACK_BIAS, O0); 25324112Skato __ ldx(SP, o1_offset+STACK_BIAS, O1); 2544479Sbde __ ldx(SP, o2_offset+STACK_BIAS, O2); 2554479Sbde __ ldx(SP, o3_offset+STACK_BIAS, O3); 2562826Sdg __ ldx(SP, o4_offset+STACK_BIAS, O4); 2574479Sbde __ ldx(SP, o5_offset+STACK_BIAS, O5); 2584479Sbde 259220629Sjkim // And temporarily place them in TLS 2604479Sbde 2612826Sdg __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 2622826Sdg __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 2634479Sbde __ stx(O2, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8); 264190919Sed __ stx(O3, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8); 2652826Sdg __ stx(O4, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8); 266190919Sed __ stx(O5, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8); 2672826Sdg#endif /* _LP64 */ 2682826Sdg 2694479Sbde // Restore flags 27037552Sbde 2714Srgrimes __ ldxfsr(SP, fsr_offset+STACK_BIAS); 272220629Sjkim 2734Srgrimes __ restore(); 2744Srgrimes 2754479Sbde#if !defined(_LP64) 276201369Sobrien // Now reload the 64bit Oregs after we've restore the window. 2774Srgrimes __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 2784479Sbde __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 279201369Sobrien __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+2*8, O2); 28088118Sjhb __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+3*8, O3); 2814Srgrimes __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+4*8, O4); 2824Srgrimes __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+5*8, O5); 2834479Sbde#endif /* _LP64 */ 284201369Sobrien 2854Srgrimes} 2864479Sbde 287201369Sobrien// Pop the current frame and restore the registers that might be holding 28888118Sjhb// a result. 2894Srgrimesvoid RegisterSaver::restore_result_registers(MacroAssembler* masm) { 2904Srgrimes 2914479Sbde#if !defined(_LP64) 292201369Sobrien // 32bit build returns longs in G1 2934Srgrimes __ ldx(SP, g1_offset+STACK_BIAS, G1); 2944479Sbde 295201369Sobrien // Retrieve the 64-bit O's. 29688118Sjhb __ ldx(SP, o0_offset+STACK_BIAS, O0); 2974Srgrimes __ ldx(SP, o1_offset+STACK_BIAS, O1); 2984Srgrimes // and save to TLS 2994479Sbde __ stx(O0, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8); 3004479Sbde __ stx(O1, G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8); 3014Srgrimes#endif /* _LP64 */ 302220629Sjkim 3034Srgrimes __ ldf(FloatRegisterImpl::D, SP, d00_offset+STACK_BIAS, as_FloatRegister(0)); 3044Srgrimes 30597114Sjhb __ restore(); 30697139Sjhb 30797114Sjhb#if !defined(_LP64) 30897114Sjhb // Now reload the 64bit Oregs after we've restore the window. 30997114Sjhb __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+0*8, O0); 31097114Sjhb __ ldx(G2_thread, JavaThread::o_reg_temps_offset_in_bytes()+1*8, O1); 31137552Sbde#endif /* _LP64 */ 312197693Skmacy 313197693Skmacy} 314197693Skmacy 3154479Sbde// Is vector's size (in bytes) bigger than a size saved by default? 316197693Skmacy// 8 bytes FP registers are saved by default on SPARC. 3174479Sbdebool SharedRuntime::is_wide_vector(int size) { 31837552Sbde // Note, MaxVectorSize == 8 on SPARC. 3194479Sbde assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size)); 3208876Srgrimes return size > 8; 3214479Sbde} 3224479Sbde 3234479Sbde// The java_calling_convention describes stack locations as ideal slots on 324171797Snjl// a frame with no abi restrictions. Since we must observe abi restrictions 32515122Sbde// (like the placement of the register window) the slots must be biased by 3264479Sbde// the following value. 327171797Snjlstatic int reg2offset(VMReg r) { 32815122Sbde return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 32969006Smarkm} 33015122Sbde 3314479Sbdestatic VMRegPair reg64_to_VMRegPair(Register r) { 3324479Sbde VMRegPair ret; 333171797Snjl if (wordSize == 8) { 33415122Sbde ret.set2(r->as_VMReg()); 33514825Swollman } else { 336171797Snjl ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); 33715122Sbde } 33869006Smarkm return ret; 33915122Sbde} 34014825Swollman 34114825Swollman// --------------------------------------------------------------------------- 342171797Snjl// Read the array of BasicTypes from a signature, and compute where the 34314825Swollman// arguments should go. Values in the VMRegPair regs array refer to 4-byte (VMRegImpl::stack_slot_size) 34414825Swollman// quantities. Values less than VMRegImpl::stack0 are registers, those above 345171797Snjl// refer to 4-byte stack slots. All stack slots are based off of the window 34615122Sbde// top. VMRegImpl::stack0 refers to the first slot past the 16-word window, 34769006Smarkm// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register 34815122Sbde// values 0-63 (up to RegisterImpl::number_of_registers) are the 64-bit 34914825Swollman// integer registers. Values 64-95 are the (32-bit only) float registers. 35014825Swollman// Each 32-bit quantity is given its own number, so the integer registers 351220631Sjkim// (in either 32- or 64-bit builds) use 2 numbers. For example, there is 352220631Sjkim// an O0-low and an O0-high. Essentially, all int register numbers are doubled. 353220631Sjkim 354220631Sjkim// Register results are passed in O0-O5, for outgoing call arguments. To 355220631Sjkim// convert to incoming arguments, convert all O's to I's. The regs array 356220631Sjkim// refer to the low and hi 32-bit words of 64-bit registers or stack slots. 357220631Sjkim// If the regs[].second() field is set to VMRegImpl::Bad(), it means it's unused (a 358220631Sjkim// 32-bit value was passed). If both are VMRegImpl::Bad(), it means no value was 359220631Sjkim// passed (used as a placeholder for the other half of longs and doubles in 36015122Sbde// the 64-bit build). regs[].second() is either VMRegImpl::Bad() or regs[].second() is 36124112Skato// regs[].first()+1 (regs[].first() may be misaligned in the C calling convention). 36224112Skato// Sparc never passes a value in regs[].second() but not regs[].first() (regs[].first() 36324112Skato// == VMRegImpl::Bad() && regs[].second() != VMRegImpl::Bad()) nor unrelated values in the 36424112Skato// same VMRegPair. 36524112Skato 36624112Skato// Note: the INPUTS in sig_bt are in units of Java argument words, which are 367197693Skmacy// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 368197693Skmacy// units regardless of build. 369197693Skmacy 37037552Sbde 371197693Skmacy// --------------------------------------------------------------------------- 37214825Swollman// The compiled Java calling convention. The Java convention always passes 37315122Sbde// 64-bit values in adjacent aligned locations (either registers or stack), 37414825Swollman// floats in float registers and doubles in aligned float pairs. There is 37514825Swollman// no backing varargs store for values in registers. 37614825Swollman// In the 32-bit build, longs are passed on the stack (cannot be 377171797Snjl// passed in I's, because longs in I's get their heads chopped off at 37814825Swollman// interrupt). 37969006Smarkmint SharedRuntime::java_calling_convention(const BasicType *sig_bt, 38014825Swollman VMRegPair *regs, 38114825Swollman int total_args_passed, 38299862Speter int is_outgoing) { 38399862Speter assert(F31->as_VMReg()->is_reg(), "overlapping stack/register numbers"); 38499862Speter 38599862Speter const int int_reg_max = SPARC_ARGS_IN_REGS_NUM; 38699862Speter const int flt_reg_max = 8; 38799862Speter 38899862Speter int int_reg = 0; 38946129Sluoqi int flt_reg = 0; 39099862Speter int slot = 0; 39199862Speter 39299862Speter for (int i = 0; i < total_args_passed; i++) { 39399862Speter switch (sig_bt[i]) { 39499862Speter case T_INT: 39599862Speter case T_SHORT: 39699862Speter case T_CHAR: 39799862Speter case T_BYTE: 39899862Speter case T_BOOLEAN: 39999862Speter#ifndef _LP64 40099862Speter case T_OBJECT: 40199862Speter case T_ARRAY: 40299862Speter case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 403184040Skmacy#endif // _LP64 404184040Skmacy if (int_reg < int_reg_max) { 405184040Skmacy Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 40699862Speter regs[i].set1(r->as_VMReg()); 40799862Speter } else { 40899862Speter regs[i].set1(VMRegImpl::stack2reg(slot++)); 40999862Speter } 41099862Speter break; 41199862Speter 41299862Speter#ifdef _LP64 413181775Skmacy case T_LONG: 414181775Skmacy assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 415181775Skmacy // fall-through 41699862Speter case T_OBJECT: 417181775Skmacy case T_ARRAY: 41899862Speter case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address 41999862Speter if (int_reg < int_reg_max) { 42099862Speter Register r = is_outgoing ? as_oRegister(int_reg++) : as_iRegister(int_reg++); 42199862Speter regs[i].set2(r->as_VMReg()); 42299862Speter } else { 42399862Speter slot = round_to(slot, 2); // align 42499862Speter regs[i].set2(VMRegImpl::stack2reg(slot)); 42599862Speter slot += 2; 42699862Speter } 42799862Speter break; 42899862Speter#else 42999862Speter case T_LONG: 43099862Speter assert(sig_bt[i+1] == T_VOID, "expecting VOID in other half"); 43199862Speter // On 32-bit SPARC put longs always on the stack to keep the pressure off 43299862Speter // integer argument registers. They should be used for oops. 43399862Speter slot = round_to(slot, 2); // align 43499862Speter regs[i].set2(VMRegImpl::stack2reg(slot)); 43599862Speter slot += 2; 43699862Speter#endif 43799862Speter break; 43899862Speter 43999862Speter case T_FLOAT: 44099862Speter if (flt_reg < flt_reg_max) { 44199862Speter FloatRegister r = as_FloatRegister(flt_reg++); 44299862Speter regs[i].set1(r->as_VMReg()); 44399862Speter } else { 44499862Speter regs[i].set1(VMRegImpl::stack2reg(slot++)); 44599862Speter } 44699862Speter break; 44799862Speter 44899862Speter case T_DOUBLE: 44999862Speter assert(sig_bt[i+1] == T_VOID, "expecting half"); 450181775Skmacy if (round_to(flt_reg, 2) + 1 < flt_reg_max) { 451181775Skmacy flt_reg = round_to(flt_reg, 2); // align 452181775Skmacy FloatRegister r = as_FloatRegister(flt_reg); 45399862Speter regs[i].set2(r->as_VMReg()); 454181775Skmacy flt_reg += 2; 45599862Speter } else { 45699862Speter slot = round_to(slot, 2); // align 45799862Speter regs[i].set2(VMRegImpl::stack2reg(slot)); 45899862Speter slot += 2; 45999862Speter } 46099862Speter break; 46199862Speter 46299862Speter case T_VOID: 46399862Speter regs[i].set_bad(); // Halves of longs & doubles 46499862Speter break; 465181775Skmacy 466181775Skmacy default: 467181775Skmacy fatal(err_msg_res("unknown basic type %d", sig_bt[i])); 46899862Speter break; 469181775Skmacy } 47099862Speter } 47199862Speter 472212177Srdivacky // retun the amount of stack space these arguments will need. 47346129Sluoqi return slot; 47446129Sluoqi} 475212177Srdivacky 476212177Srdivacky// Helper class mostly to avoid passing masm everywhere, and handle 47746129Sluoqi// store displacement overflow logic. 47846129Sluoqiclass AdapterGenerator { 47946129Sluoqi MacroAssembler *masm; 480171797Snjl Register Rdisp; 481171797Snjl void set_Rdisp(Register r) { Rdisp = r; } 482171797Snjl 483171797Snjl void patch_callers_callsite(); 484171797Snjl 485171797Snjl // base+st_off points to top of argument 486171797Snjl int arg_offset(const int st_off) { return st_off; } 487171797Snjl int next_arg_offset(const int st_off) { 488212177Srdivacky return st_off - Interpreter::stackElementSize; 48946129Sluoqi } 49046129Sluoqi 491212177Srdivacky // Argument slot values may be loaded first into a register because 492212177Srdivacky // they might not fit into displacement. 49346129Sluoqi RegisterOrConstant arg_slot(const int st_off); 49446129Sluoqi RegisterOrConstant next_arg_slot(const int st_off); 49546129Sluoqi 496171797Snjl // Stores long into offset pointed to by base 497171797Snjl void store_c2i_long(Register r, Register base, 498171797Snjl const int st_off, bool is_stack); 499171797Snjl void store_c2i_object(Register r, Register base, 500171797Snjl const int st_off); 501171797Snjl void store_c2i_int(Register r, Register base, 502171797Snjl const int st_off); 503171797Snjl void store_c2i_double(VMReg r_2, 504171797Snjl VMReg r_1, Register base, const int st_off); 505171797Snjl void store_c2i_float(FloatRegister f, Register base, 506171797Snjl const int st_off); 507171797Snjl 508171797Snjl public: 509171797Snjl void gen_c2i_adapter(int total_args_passed, 510171797Snjl // VMReg max_arg, 511171797Snjl int comp_args_on_stack, // VMRegStackSlots 512212177Srdivacky const BasicType *sig_bt, 513127813Smarcel const VMRegPair *regs, 514127813Smarcel Label& skip_fixup); 515212177Srdivacky void gen_i2c_adapter(int total_args_passed, 516212177Srdivacky // VMReg max_arg, 517127813Smarcel int comp_args_on_stack, // VMRegStackSlots 518127813Smarcel const BasicType *sig_bt, 519127813Smarcel const VMRegPair *regs); 520171797Snjl 521171797Snjl AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {} 522171797Snjl}; 523171797Snjl 524171797Snjl 525171797Snjl// Patch the callers callsite with entry to compiled code if it exists. 526171797Snjlvoid AdapterGenerator::patch_callers_callsite() { 527171797Snjl Label L; 52846129Sluoqi __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 529212177Srdivacky __ br_null(G3_scratch, false, Assembler::pt, L); 53046129Sluoqi __ delayed()->nop(); 531212177Srdivacky // Call into the VM to patch the caller, then jump to compiled callee 53246129Sluoqi __ save_frame(4); // Args in compiled layout; do not blow them 53346129Sluoqi 53446129Sluoqi // Must save all the live Gregs the list is: 535212177Srdivacky // G1: 1st Long arg (32bit build) 53646129Sluoqi // G2: global allocated to TLS 537212177Srdivacky // G3: used in inline cache check (scratch) 53846129Sluoqi // G4: 2nd Long arg (32bit build); 53946129Sluoqi // G5: used in inline cache check (Method*) 540103778Speter 541103778Speter // The longs must go to the stack by hand since in the 32 bit build they can be trashed by window ops. 542103778Speter 543103778Speter#ifdef _LP64 544103778Speter // mov(s,d) 545103778Speter __ mov(G1, L1); 546103778Speter __ mov(G4, L4); 547103778Speter __ mov(G5_method, L5); 548103778Speter __ mov(G5_method, O0); // VM needs target method 549103778Speter __ mov(I7, O1); // VM needs caller's callsite 550103778Speter // Must be a leaf call... 551103778Speter // can be very far once the blob has been relocated 552103778Speter AddressLiteral dest(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)); 553103778Speter __ relocate(relocInfo::runtime_call_type); 554103778Speter __ jumpl_to(dest, O7, O7); 555103778Speter __ delayed()->mov(G2_thread, L7_thread_cache); 556103778Speter __ mov(L7_thread_cache, G2_thread); 557103778Speter __ mov(L1, G1); 55857362Sbsd __ mov(L4, G4); 55957362Sbsd __ mov(L5, G5_method); 56057362Sbsd#else 56157362Sbsd __ stx(G1, FP, -8 + STACK_BIAS); 56257376Sbsd __ stx(G4, FP, -16 + STACK_BIAS); 56357362Sbsd __ mov(G5_method, L5); 56457362Sbsd __ mov(G5_method, O0); // VM needs target method 56557362Sbsd __ mov(I7, O1); // VM needs caller's callsite 56678903Sbsd // Must be a leaf call... 56797115Sjhb __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), relocInfo::runtime_call_type); 56878903Sbsd __ delayed()->mov(G2_thread, L7_thread_cache); 56997115Sjhb __ mov(L7_thread_cache, G2_thread); 57078903Sbsd __ ldx(FP, -8 + STACK_BIAS, G1); 57178903Sbsd __ ldx(FP, -16 + STACK_BIAS, G4); 57257362Sbsd __ mov(L5, G5_method); 57357362Sbsd#endif /* _LP64 */ 57457362Sbsd 57557362Sbsd __ restore(); // Restore args 57657376Sbsd __ bind(L); 57757362Sbsd} 57857362Sbsd 57957362Sbsd 58078903SbsdRegisterOrConstant AdapterGenerator::arg_slot(const int st_off) { 58197115Sjhb RegisterOrConstant roc(arg_offset(st_off)); 58278903Sbsd return __ ensure_simm13_or_reg(roc, Rdisp); 58397115Sjhb} 58478903Sbsd 58578903SbsdRegisterOrConstant AdapterGenerator::next_arg_slot(const int st_off) { 58657362Sbsd RegisterOrConstant roc(next_arg_offset(st_off)); 58757362Sbsd return __ ensure_simm13_or_reg(roc, Rdisp); 58857362Sbsd} 58957362Sbsd 59057376Sbsd 59157362Sbsd// Stores long into offset pointed to by base 59257362Sbsdvoid AdapterGenerator::store_c2i_long(Register r, Register base, 59357362Sbsd const int st_off, bool is_stack) { 59478903Sbsd#ifdef _LP64 59597115Sjhb // In V9, longs are given 2 64-bit slots in the interpreter, but the 59678903Sbsd // data is passed in only 1 slot. 59797115Sjhb __ stx(r, base, next_arg_slot(st_off)); 59878903Sbsd#else 59978903Sbsd#ifdef COMPILER2 60057362Sbsd // Misaligned store of 64-bit data 60157362Sbsd __ stw(r, base, arg_slot(st_off)); // lo bits 60257362Sbsd __ srlx(r, 32, r); 60357362Sbsd __ stw(r, base, next_arg_slot(st_off)); // hi bits 60457376Sbsd#else 60557362Sbsd if (is_stack) { 60657362Sbsd // Misaligned store of 64-bit data 60757362Sbsd __ stw(r, base, arg_slot(st_off)); // lo bits 60878903Sbsd __ srlx(r, 32, r); 60997115Sjhb __ stw(r, base, next_arg_slot(st_off)); // hi bits 61078903Sbsd } else { 61197115Sjhb __ stw(r->successor(), base, arg_slot(st_off) ); // lo bits 61278903Sbsd __ stw(r , base, next_arg_slot(st_off)); // hi bits 61378903Sbsd } 61457362Sbsd#endif // COMPILER2 61578903Sbsd#endif // _LP64 61678903Sbsd} 61778903Sbsd 61878903Sbsdvoid AdapterGenerator::store_c2i_object(Register r, Register base, 61978903Sbsd const int st_off) { 62078903Sbsd __ st_ptr (r, base, arg_slot(st_off)); 62178903Sbsd} 62278903Sbsd 62397115Sjhbvoid AdapterGenerator::store_c2i_int(Register r, Register base, 62478903Sbsd const int st_off) { 62597115Sjhb __ st (r, base, arg_slot(st_off)); 62678903Sbsd} 62778903Sbsd 62878903Sbsd// Stores into offset pointed to by base 62978903Sbsdvoid AdapterGenerator::store_c2i_double(VMReg r_2, 63078903Sbsd VMReg r_1, Register base, const int st_off) { 63178903Sbsd#ifdef _LP64 63278903Sbsd // In V9, doubles are given 2 64-bit slots in the interpreter, but the 63378903Sbsd // data is passed in only 1 slot. 63478903Sbsd __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 63578903Sbsd#else 63678903Sbsd // Need to marshal 64-bit value from misaligned Lesp loads 63797115Sjhb __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), base, next_arg_slot(st_off)); 63878903Sbsd __ stf(FloatRegisterImpl::S, r_2->as_FloatRegister(), base, arg_slot(st_off) ); 63997115Sjhb#endif 64078903Sbsd} 64178903Sbsd 64278903Sbsdvoid AdapterGenerator::store_c2i_float(FloatRegister f, Register base, 64357362Sbsd const int st_off) { 64457362Sbsd __ stf(FloatRegisterImpl::S, f, base, arg_slot(st_off)); 64557362Sbsd} 64657376Sbsd 64757362Sbsdvoid AdapterGenerator::gen_c2i_adapter( 64857362Sbsd int total_args_passed, 64957362Sbsd // VMReg max_arg, 65078903Sbsd int comp_args_on_stack, // VMRegStackSlots 65197115Sjhb const BasicType *sig_bt, 65278903Sbsd const VMRegPair *regs, 65397115Sjhb Label& L_skip_fixup) { 65478903Sbsd 65578903Sbsd // Before we get into the guts of the C2I adapter, see if we should be here 65657362Sbsd // at all. We've come from compiled code and are attempting to jump to the 65757362Sbsd // interpreter, which means the caller made a static call to get here 65857362Sbsd // (vcalls always get a compiled target if there is one). Check for a 65957362Sbsd // compiled target. If there is one, we need to patch the caller's call. 66057376Sbsd // However we will run interpreted if we come thru here. The next pass 66157362Sbsd // thru the call site will run compiled. If we ran compiled here then 66257362Sbsd // we can (theorectically) do endless i2c->c2i->i2c transitions during 66357362Sbsd // deopt/uncommon trap cycles. If we always go interpreted here then 66478903Sbsd // we can have at most one and don't need to play any tricks to keep 66597115Sjhb // from endlessly growing the stack. 66678903Sbsd // 66797115Sjhb // Actually if we detected that we had an i2c->c2i transition here we 66878903Sbsd // ought to be able to reset the world back to the state of the interpreted 66978903Sbsd // call and not bother building another interpreter arg area. We don't 670194295Sjhb // do that at this point. 671194295Sjhb 672194295Sjhb patch_callers_callsite(); 673194295Sjhb 674194295Sjhb __ bind(L_skip_fixup); 675194295Sjhb 676194295Sjhb // Since all args are passed on the stack, total_args_passed*wordSize is the 677194295Sjhb // space we need. Add in varargs area needed by the interpreter. Round up 678194295Sjhb // to stack alignment. 679194295Sjhb const int arg_size = total_args_passed * Interpreter::stackElementSize; 680194295Sjhb const int varargs_area = 681194295Sjhb (frame::varargs_offset - frame::register_save_words)*wordSize; 682194295Sjhb const int extraspace = round_to(arg_size + varargs_area, 2*wordSize); 683194295Sjhb 68492860Simp const int bias = STACK_BIAS; 68592860Simp const int interp_arg_offset = frame::varargs_offset*wordSize + 68692860Simp (total_args_passed-1)*Interpreter::stackElementSize; 687181911Skmacy 68892860Simp const Register base = SP; 68992860Simp 69092860Simp // Make some extra space on the stack. 69192860Simp __ sub(SP, __ ensure_simm13_or_reg(extraspace, G3_scratch), SP); 69292860Simp set_Rdisp(G3_scratch); 69392860Simp 69492860Simp // Write the args into the outgoing interpreter space. 69592860Simp for (int i = 0; i < total_args_passed; i++) { 69692860Simp const int st_off = interp_arg_offset - (i*Interpreter::stackElementSize) + bias; 69792860Simp VMReg r_1 = regs[i].first(); 69892860Simp VMReg r_2 = regs[i].second(); 69992860Simp if (!r_1->is_valid()) { 700143063Sjoerg assert(!r_2->is_valid(), ""); 7014Srgrimes continue; 70292819Simp } 70392819Simp if (r_1->is_stack()) { // Pretend stack targets are loaded into G1 70492819Simp RegisterOrConstant ld_off = reg2offset(r_1) + extraspace + bias; 705238311Sjhb ld_off = __ ensure_simm13_or_reg(ld_off, Rdisp); 706238311Sjhb r_1 = G1_scratch->as_VMReg();// as part of the load/store shuffle 707238311Sjhb if (!r_2->is_valid()) __ ld (base, ld_off, G1_scratch); 70892819Simp else __ ldx(base, ld_off, G1_scratch); 70992819Simp } 71092819Simp 711103749Smarkm if (r_1->is_Register()) { 712126656Sbde Register r = r_1->as_Register()->after_restore(); 71392819Simp if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) { 71492819Simp store_c2i_object(r, base, st_off); 715201369Sobrien } else if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 716201369Sobrien store_c2i_long(r, base, st_off, r_2->is_stack()); 717201369Sobrien } else { 718126656Sbde store_c2i_int(r, base, st_off); 719126656Sbde } 72092819Simp } else { 72192819Simp assert(r_1->is_FloatRegister(), ""); 72292819Simp if (sig_bt[i] == T_FLOAT) { 72392819Simp store_c2i_float(r_1->as_FloatRegister(), base, st_off); 724126656Sbde } else { 725126656Sbde assert(sig_bt[i] == T_DOUBLE, "wrong type"); 72699862Speter store_c2i_double(r_2, r_1, base, st_off); 72799862Speter } 72899862Speter } 729126656Sbde } 730126656Sbde 731126656Sbde // Load the interpreter entry point. 732126656Sbde __ ld_ptr(G5_method, in_bytes(Method::interpreter_entry_offset()), G3_scratch); 733126656Sbde 734126656Sbde // Pass O5_savedSP as an argument to the interpreter. 735126656Sbde // The interpreter will restore SP to this value before returning. 736126656Sbde __ add(SP, __ ensure_simm13_or_reg(extraspace, G1), O5_savedSP); 737212177Srdivacky 738212177Srdivacky __ mov((frame::varargs_offset)*wordSize - 739103778Speter 1*Interpreter::stackElementSize+bias+BytesPerWord, G1); 74092819Simp // Jump to the interpreter just as if interpreter was doing it. 74192819Simp __ jmpl(G3_scratch, 0, G0); 742201369Sobrien // Setup Lesp for the call. Cannot actually set Lesp as the current Lesp 743201369Sobrien // (really L0) is in use by the compiled frame as a generic temp. However, 744201369Sobrien // the interpreter does not know where its args are without some kind of 74592819Simp // arg pointer being passed in. Pass it in Gargs. 74699862Speter __ delayed()->add(SP, G1, Gargs); 74792819Simp} 74899862Speter 74999862Speterstatic void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg, 750171797Snjl address code_start, address code_end, 751171797Snjl Label& L_ok) { 75297114Sjhb Label L_fail; 75397114Sjhb __ set(ExternalAddress(code_start), temp_reg); 75497114Sjhb __ set(pointer_delta(code_end, code_start, 1), temp2_reg); 75597114Sjhb __ cmp(pc_reg, temp_reg); 75697114Sjhb __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail); 75797114Sjhb __ delayed()->add(temp_reg, temp2_reg, temp_reg); 75897114Sjhb __ cmp(pc_reg, temp_reg); 75997114Sjhb __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok); 760171797Snjl __ bind(L_fail); 761194295Sjhb} 762126656Sbde 763126656Sbdevoid AdapterGenerator::gen_i2c_adapter( 764171797Snjl int total_args_passed, 765126656Sbde // VMReg max_arg, 766171797Snjl int comp_args_on_stack, // VMRegStackSlots 767171797Snjl const BasicType *sig_bt, 768171797Snjl const VMRegPair *regs) { 769126656Sbde 770194295Sjhb // Generate an I2C adapter: adjust the I-frame to make space for the C-frame 771126656Sbde // layout. Lesp was saved by the calling I-frame and will be restored on 772171797Snjl // return. Meanwhile, outgoing arg space is all owned by the callee 7734Srgrimes // C-frame, so we can mangle it at will. After adjusting the frame size, 774143063Sjoerg // hoist register arguments and repack other args according to the compiled 7754Srgrimes // code convention. Finally, end in a jump to the compiled code. The entry 77692761Salfred // point address is the start of the buffer. 77793264Sdillon 778181430Sstas // We will only enter here from an interpreted frame and never from after 779181430Sstas // passing thru a c2i. Azul allowed this but we do not. If we lose the 780181430Sstas // race and use a c2i we will remain interpreted for the race loser(s). 781181430Sstas // This removes all sorts of headaches on the x86 side and also eliminates 782181430Sstas // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. 7834479Sbde 784 // More detail: 785 // Adapters can be frameless because they do not require the caller 786 // to perform additional cleanup work, such as correcting the stack pointer. 787 // An i2c adapter is frameless because the *caller* frame, which is interpreted, 788 // routinely repairs its own stack pointer (from interpreter_frame_last_sp), 789 // even if a callee has modified the stack pointer. 790 // A c2i adapter is frameless because the *callee* frame, which is interpreted, 791 // routinely repairs its caller's stack pointer (from sender_sp, which is set 792 // up via the senderSP register). 793 // In other words, if *either* the caller or callee is interpreted, we can 794 // get the stack pointer repaired after a call. 795 // This is why c2i and i2c adapters cannot be indefinitely composed. 796 // In particular, if a c2i adapter were to somehow call an i2c adapter, 797 // both caller and callee would be compiled methods, and neither would 798 // clean up the stack pointer changes performed by the two adapters. 799 // If this happens, control eventually transfers back to the compiled 800 // caller, but with an uncorrected stack, causing delayed havoc. 801 802 if (VerifyAdapterCalls && 803 (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { 804 // So, let's test for cascading c2i/i2c adapters right now. 805 // assert(Interpreter::contains($return_addr) || 806 // StubRoutines::contains($return_addr), 807 // "i2c adapter must return to an interpreter frame"); 808 __ block_comment("verify_i2c { "); 809 Label L_ok; 810 if (Interpreter::code() != NULL) 811 range_check(masm, O7, O0, O1, 812 Interpreter::code()->code_start(), Interpreter::code()->code_end(), 813 L_ok); 814 if (StubRoutines::code1() != NULL) 815 range_check(masm, O7, O0, O1, 816 StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), 817 L_ok); 818 if (StubRoutines::code2() != NULL) 819 range_check(masm, O7, O0, O1, 820 StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), 821 L_ok); 822 const char* msg = "i2c adapter must return to an interpreter frame"; 823 __ block_comment(msg); 824 __ stop(msg); 825 __ bind(L_ok); 826 __ block_comment("} verify_i2ce "); 827 } 828 829 // As you can see from the list of inputs & outputs there are not a lot 830 // of temp registers to work with: mostly G1, G3 & G4. 831 832 // Inputs: 833 // G2_thread - TLS 834 // G5_method - Method oop 835 // G4 (Gargs) - Pointer to interpreter's args 836 // O0..O4 - free for scratch 837 // O5_savedSP - Caller's saved SP, to be restored if needed 838 // O6 - Current SP! 839 // O7 - Valid return address 840 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 841 842 // Outputs: 843 // G2_thread - TLS 844 // O0-O5 - Outgoing args in compiled layout 845 // O6 - Adjusted or restored SP 846 // O7 - Valid return address 847 // L0-L7, I0-I7 - Caller's temps (no frame pushed yet) 848 // F0-F7 - more outgoing args 849 850 851 // Gargs is the incoming argument base, and also an outgoing argument. 852 __ sub(Gargs, BytesPerWord, Gargs); 853 854 // ON ENTRY TO THE CODE WE ARE MAKING, WE HAVE AN INTERPRETED FRAME 855 // WITH O7 HOLDING A VALID RETURN PC 856 // 857 // | | 858 // : java stack : 859 // | | 860 // +--------------+ <--- start of outgoing args 861 // | receiver | | 862 // : rest of args : |---size is java-arg-words 863 // | | | 864 // +--------------+ <--- O4_args (misaligned) and Lesp if prior is not C2I 865 // | | | 866 // : unused : |---Space for max Java stack, plus stack alignment 867 // | | | 868 // +--------------+ <--- SP + 16*wordsize 869 // | | 870 // : window : 871 // | | 872 // +--------------+ <--- SP 873 874 // WE REPACK THE STACK. We use the common calling convention layout as 875 // discovered by calling SharedRuntime::calling_convention. We assume it 876 // causes an arbitrary shuffle of memory, which may require some register 877 // temps to do the shuffle. We hope for (and optimize for) the case where 878 // temps are not needed. We may have to resize the stack slightly, in case 879 // we need alignment padding (32-bit interpreter can pass longs & doubles 880 // misaligned, but the compilers expect them aligned). 881 // 882 // | | 883 // : java stack : 884 // | | 885 // +--------------+ <--- start of outgoing args 886 // | pad, align | | 887 // +--------------+ | 888 // | ints, longs, | | 889 // | floats, | |---Outgoing stack args. 890 // : doubles : | First few args in registers. 891 // | | | 892 // +--------------+ <--- SP' + 16*wordsize 893 // | | 894 // : window : 895 // | | 896 // +--------------+ <--- SP' 897 898 // ON EXIT FROM THE CODE WE ARE MAKING, WE STILL HAVE AN INTERPRETED FRAME 899 // WITH O7 HOLDING A VALID RETURN PC - ITS JUST THAT THE ARGS ARE NOW SETUP 900 // FOR COMPILED CODE AND THE FRAME SLIGHTLY GROWN. 901 902 // Cut-out for having no stack args. Since up to 6 args are passed 903 // in registers, we will commonly have no stack args. 904 if (comp_args_on_stack > 0) { 905 // Convert VMReg stack slots to words. 906 int comp_words_on_stack = round_to(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 907 // Round up to miminum stack alignment, in wordSize 908 comp_words_on_stack = round_to(comp_words_on_stack, 2); 909 // Now compute the distance from Lesp to SP. This calculation does not 910 // include the space for total_args_passed because Lesp has not yet popped 911 // the arguments. 912 __ sub(SP, (comp_words_on_stack)*wordSize, SP); 913 } 914 915 // Now generate the shuffle code. Pick up all register args and move the 916 // rest through G1_scratch. 917 for (int i = 0; i < total_args_passed; i++) { 918 if (sig_bt[i] == T_VOID) { 919 // Longs and doubles are passed in native word order, but misaligned 920 // in the 32-bit build. 921 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 922 continue; 923 } 924 925 // Pick up 0, 1 or 2 words from Lesp+offset. Assume mis-aligned in the 926 // 32-bit build and aligned in the 64-bit build. Look for the obvious 927 // ldx/lddf optimizations. 928 929 // Load in argument order going down. 930 const int ld_off = (total_args_passed-i)*Interpreter::stackElementSize; 931 set_Rdisp(G1_scratch); 932 933 VMReg r_1 = regs[i].first(); 934 VMReg r_2 = regs[i].second(); 935 if (!r_1->is_valid()) { 936 assert(!r_2->is_valid(), ""); 937 continue; 938 } 939 if (r_1->is_stack()) { // Pretend stack targets are loaded into F8/F9 940 r_1 = F8->as_VMReg(); // as part of the load/store shuffle 941 if (r_2->is_valid()) r_2 = r_1->next(); 942 } 943 if (r_1->is_Register()) { // Register argument 944 Register r = r_1->as_Register()->after_restore(); 945 if (!r_2->is_valid()) { 946 __ ld(Gargs, arg_slot(ld_off), r); 947 } else { 948#ifdef _LP64 949 // In V9, longs are given 2 64-bit slots in the interpreter, but the 950 // data is passed in only 1 slot. 951 RegisterOrConstant slot = (sig_bt[i] == T_LONG) ? 952 next_arg_slot(ld_off) : arg_slot(ld_off); 953 __ ldx(Gargs, slot, r); 954#else 955 fatal("longs should be on stack"); 956#endif 957 } 958 } else { 959 assert(r_1->is_FloatRegister(), ""); 960 if (!r_2->is_valid()) { 961 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_1->as_FloatRegister()); 962 } else { 963#ifdef _LP64 964 // In V9, doubles are given 2 64-bit slots in the interpreter, but the 965 // data is passed in only 1 slot. This code also handles longs that 966 // are passed on the stack, but need a stack-to-stack move through a 967 // spare float register. 968 RegisterOrConstant slot = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 969 next_arg_slot(ld_off) : arg_slot(ld_off); 970 __ ldf(FloatRegisterImpl::D, Gargs, slot, r_1->as_FloatRegister()); 971#else 972 // Need to marshal 64-bit value from misaligned Lesp loads 973 __ ldf(FloatRegisterImpl::S, Gargs, next_arg_slot(ld_off), r_1->as_FloatRegister()); 974 __ ldf(FloatRegisterImpl::S, Gargs, arg_slot(ld_off), r_2->as_FloatRegister()); 975#endif 976 } 977 } 978 // Was the argument really intended to be on the stack, but was loaded 979 // into F8/F9? 980 if (regs[i].first()->is_stack()) { 981 assert(r_1->as_FloatRegister() == F8, "fix this code"); 982 // Convert stack slot to an SP offset 983 int st_off = reg2offset(regs[i].first()) + STACK_BIAS; 984 // Store down the shuffled stack word. Target address _is_ aligned. 985 RegisterOrConstant slot = __ ensure_simm13_or_reg(st_off, Rdisp); 986 if (!r_2->is_valid()) __ stf(FloatRegisterImpl::S, r_1->as_FloatRegister(), SP, slot); 987 else __ stf(FloatRegisterImpl::D, r_1->as_FloatRegister(), SP, slot); 988 } 989 } 990 991 // Jump to the compiled code just as if compiled code was doing it. 992 __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3); 993 994 // 6243940 We might end up in handle_wrong_method if 995 // the callee is deoptimized as we race thru here. If that 996 // happens we don't want to take a safepoint because the 997 // caller frame will look interpreted and arguments are now 998 // "compiled" so it is much better to make this transition 999 // invisible to the stack walking code. Unfortunately if 1000 // we try and find the callee by normal means a safepoint 1001 // is possible. So we stash the desired callee in the thread 1002 // and the vm will find there should this case occur. 1003 Address callee_target_addr(G2_thread, JavaThread::callee_target_offset()); 1004 __ st_ptr(G5_method, callee_target_addr); 1005 1006 if (StressNonEntrant) { 1007 // Open a big window for deopt failure 1008 __ save_frame(0); 1009 __ mov(G0, L0); 1010 Label loop; 1011 __ bind(loop); 1012 __ sub(L0, 1, L0); 1013 __ br_null_short(L0, Assembler::pt, loop); 1014 __ restore(); 1015 } 1016 1017 __ jmpl(G3, 0, G0); 1018 __ delayed()->nop(); 1019} 1020 1021// --------------------------------------------------------------- 1022AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 1023 int total_args_passed, 1024 // VMReg max_arg, 1025 int comp_args_on_stack, // VMRegStackSlots 1026 const BasicType *sig_bt, 1027 const VMRegPair *regs, 1028 AdapterFingerPrint* fingerprint) { 1029 address i2c_entry = __ pc(); 1030 1031 AdapterGenerator agen(masm); 1032 1033 agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs); 1034 1035 1036 // ------------------------------------------------------------------------- 1037 // Generate a C2I adapter. On entry we know G5 holds the Method*. The 1038 // args start out packed in the compiled layout. They need to be unpacked 1039 // into the interpreter layout. This will almost always require some stack 1040 // space. We grow the current (compiled) stack, then repack the args. We 1041 // finally end in a jump to the generic interpreter entry point. On exit 1042 // from the interpreter, the interpreter will restore our SP (lest the 1043 // compiled code, which relys solely on SP and not FP, get sick). 1044 1045 address c2i_unverified_entry = __ pc(); 1046 Label L_skip_fixup; 1047 { 1048 Register R_temp = G1; // another scratch register 1049 1050 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1051 1052 __ verify_oop(O0); 1053 __ load_klass(O0, G3_scratch); 1054 1055 __ ld_ptr(G5_method, CompiledICHolder::holder_klass_offset(), R_temp); 1056 __ cmp(G3_scratch, R_temp); 1057 1058 Label ok, ok2; 1059 __ brx(Assembler::equal, false, Assembler::pt, ok); 1060 __ delayed()->ld_ptr(G5_method, CompiledICHolder::holder_method_offset(), G5_method); 1061 __ jump_to(ic_miss, G3_scratch); 1062 __ delayed()->nop(); 1063 1064 __ bind(ok); 1065 // Method might have been compiled since the call site was patched to 1066 // interpreted if that is the case treat it as a miss so we can get 1067 // the call site corrected. 1068 __ ld_ptr(G5_method, in_bytes(Method::code_offset()), G3_scratch); 1069 __ bind(ok2); 1070 __ br_null(G3_scratch, false, Assembler::pt, L_skip_fixup); 1071 __ delayed()->nop(); 1072 __ jump_to(ic_miss, G3_scratch); 1073 __ delayed()->nop(); 1074 1075 } 1076 1077 address c2i_entry = __ pc(); 1078 1079 agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup); 1080 1081 __ flush(); 1082 return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); 1083 1084} 1085 1086// Helper function for native calling conventions 1087static VMReg int_stk_helper( int i ) { 1088 // Bias any stack based VMReg we get by ignoring the window area 1089 // but not the register parameter save area. 1090 // 1091 // This is strange for the following reasons. We'd normally expect 1092 // the calling convention to return an VMReg for a stack slot 1093 // completely ignoring any abi reserved area. C2 thinks of that 1094 // abi area as only out_preserve_stack_slots. This does not include 1095 // the area allocated by the C abi to store down integer arguments 1096 // because the java calling convention does not use it. So 1097 // since c2 assumes that there are only out_preserve_stack_slots 1098 // to bias the optoregs (which impacts VMRegs) when actually referencing any actual stack 1099 // location the c calling convention must add in this bias amount 1100 // to make up for the fact that the out_preserve_stack_slots is 1101 // insufficient for C calls. What a mess. I sure hope those 6 1102 // stack words were worth it on every java call! 1103 1104 // Another way of cleaning this up would be for out_preserve_stack_slots 1105 // to take a parameter to say whether it was C or java calling conventions. 1106 // Then things might look a little better (but not much). 1107 1108 int mem_parm_offset = i - SPARC_ARGS_IN_REGS_NUM; 1109 if( mem_parm_offset < 0 ) { 1110 return as_oRegister(i)->as_VMReg(); 1111 } else { 1112 int actual_offset = (mem_parm_offset + frame::memory_parameter_word_sp_offset) * VMRegImpl::slots_per_word; 1113 // Now return a biased offset that will be correct when out_preserve_slots is added back in 1114 return VMRegImpl::stack2reg(actual_offset - SharedRuntime::out_preserve_stack_slots()); 1115 } 1116} 1117 1118 1119int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 1120 VMRegPair *regs, 1121 VMRegPair *regs2, 1122 int total_args_passed) { 1123 assert(regs2 == NULL, "not needed on sparc"); 1124 1125 // Return the number of VMReg stack_slots needed for the args. 1126 // This value does not include an abi space (like register window 1127 // save area). 1128 1129 // The native convention is V8 if !LP64 1130 // The LP64 convention is the V9 convention which is slightly more sane. 1131 1132 // We return the amount of VMReg stack slots we need to reserve for all 1133 // the arguments NOT counting out_preserve_stack_slots. Since we always 1134 // have space for storing at least 6 registers to memory we start with that. 1135 // See int_stk_helper for a further discussion. 1136 int max_stack_slots = (frame::varargs_offset * VMRegImpl::slots_per_word) - SharedRuntime::out_preserve_stack_slots(); 1137 1138#ifdef _LP64 1139 // V9 convention: All things "as-if" on double-wide stack slots. 1140 // Hoist any int/ptr/long's in the first 6 to int regs. 1141 // Hoist any flt/dbl's in the first 16 dbl regs. 1142 int j = 0; // Count of actual args, not HALVES 1143 for( int i=0; i<total_args_passed; i++, j++ ) { 1144 switch( sig_bt[i] ) { 1145 case T_BOOLEAN: 1146 case T_BYTE: 1147 case T_CHAR: 1148 case T_INT: 1149 case T_SHORT: 1150 regs[i].set1( int_stk_helper( j ) ); break; 1151 case T_LONG: 1152 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1153 case T_ADDRESS: // raw pointers, like current thread, for VM calls 1154 case T_ARRAY: 1155 case T_OBJECT: 1156 case T_METADATA: 1157 regs[i].set2( int_stk_helper( j ) ); 1158 break; 1159 case T_FLOAT: 1160 if ( j < 16 ) { 1161 // V9ism: floats go in ODD registers 1162 regs[i].set1(as_FloatRegister(1 + (j<<1))->as_VMReg()); 1163 } else { 1164 // V9ism: floats go in ODD stack slot 1165 regs[i].set1(VMRegImpl::stack2reg(1 + (j<<1))); 1166 } 1167 break; 1168 case T_DOUBLE: 1169 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1170 if ( j < 16 ) { 1171 // V9ism: doubles go in EVEN/ODD regs 1172 regs[i].set2(as_FloatRegister(j<<1)->as_VMReg()); 1173 } else { 1174 // V9ism: doubles go in EVEN/ODD stack slots 1175 regs[i].set2(VMRegImpl::stack2reg(j<<1)); 1176 } 1177 break; 1178 case T_VOID: regs[i].set_bad(); j--; break; // Do not count HALVES 1179 default: 1180 ShouldNotReachHere(); 1181 } 1182 if (regs[i].first()->is_stack()) { 1183 int off = regs[i].first()->reg2stack(); 1184 if (off > max_stack_slots) max_stack_slots = off; 1185 } 1186 if (regs[i].second()->is_stack()) { 1187 int off = regs[i].second()->reg2stack(); 1188 if (off > max_stack_slots) max_stack_slots = off; 1189 } 1190 } 1191 1192#else // _LP64 1193 // V8 convention: first 6 things in O-regs, rest on stack. 1194 // Alignment is willy-nilly. 1195 for( int i=0; i<total_args_passed; i++ ) { 1196 switch( sig_bt[i] ) { 1197 case T_ADDRESS: // raw pointers, like current thread, for VM calls 1198 case T_ARRAY: 1199 case T_BOOLEAN: 1200 case T_BYTE: 1201 case T_CHAR: 1202 case T_FLOAT: 1203 case T_INT: 1204 case T_OBJECT: 1205 case T_METADATA: 1206 case T_SHORT: 1207 regs[i].set1( int_stk_helper( i ) ); 1208 break; 1209 case T_DOUBLE: 1210 case T_LONG: 1211 assert( sig_bt[i+1] == T_VOID, "expecting half" ); 1212 regs[i].set_pair( int_stk_helper( i+1 ), int_stk_helper( i ) ); 1213 break; 1214 case T_VOID: regs[i].set_bad(); break; 1215 default: 1216 ShouldNotReachHere(); 1217 } 1218 if (regs[i].first()->is_stack()) { 1219 int off = regs[i].first()->reg2stack(); 1220 if (off > max_stack_slots) max_stack_slots = off; 1221 } 1222 if (regs[i].second()->is_stack()) { 1223 int off = regs[i].second()->reg2stack(); 1224 if (off > max_stack_slots) max_stack_slots = off; 1225 } 1226 } 1227#endif // _LP64 1228 1229 return round_to(max_stack_slots + 1, 2); 1230 1231} 1232 1233 1234// --------------------------------------------------------------------------- 1235void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1236 switch (ret_type) { 1237 case T_FLOAT: 1238 __ stf(FloatRegisterImpl::S, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS); 1239 break; 1240 case T_DOUBLE: 1241 __ stf(FloatRegisterImpl::D, F0, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS); 1242 break; 1243 } 1244} 1245 1246void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { 1247 switch (ret_type) { 1248 case T_FLOAT: 1249 __ ldf(FloatRegisterImpl::S, SP, frame_slots*VMRegImpl::stack_slot_size - 4+STACK_BIAS, F0); 1250 break; 1251 case T_DOUBLE: 1252 __ ldf(FloatRegisterImpl::D, SP, frame_slots*VMRegImpl::stack_slot_size - 8+STACK_BIAS, F0); 1253 break; 1254 } 1255} 1256 1257// Check and forward and pending exception. Thread is stored in 1258// L7_thread_cache and possibly NOT in G2_thread. Since this is a native call, there 1259// is no exception handler. We merely pop this frame off and throw the 1260// exception in the caller's frame. 1261static void check_forward_pending_exception(MacroAssembler *masm, Register Rex_oop) { 1262 Label L; 1263 __ br_null(Rex_oop, false, Assembler::pt, L); 1264 __ delayed()->mov(L7_thread_cache, G2_thread); // restore in case we have exception 1265 // Since this is a native call, we *know* the proper exception handler 1266 // without calling into the VM: it's the empty function. Just pop this 1267 // frame and then jump to forward_exception_entry; O7 will contain the 1268 // native caller's return PC. 1269 AddressLiteral exception_entry(StubRoutines::forward_exception_entry()); 1270 __ jump_to(exception_entry, G3_scratch); 1271 __ delayed()->restore(); // Pop this frame off. 1272 __ bind(L); 1273} 1274 1275// A simple move of integer like type 1276static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1277 if (src.first()->is_stack()) { 1278 if (dst.first()->is_stack()) { 1279 // stack to stack 1280 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1281 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1282 } else { 1283 // stack to reg 1284 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1285 } 1286 } else if (dst.first()->is_stack()) { 1287 // reg to stack 1288 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1289 } else { 1290 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1291 } 1292} 1293 1294// On 64 bit we will store integer like items to the stack as 1295// 64 bits items (sparc abi) even though java would only store 1296// 32bits for a parameter. On 32bit it will simply be 32 bits 1297// So this routine will do 32->32 on 32bit and 32->64 on 64bit 1298static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1299 if (src.first()->is_stack()) { 1300 if (dst.first()->is_stack()) { 1301 // stack to stack 1302 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1303 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1304 } else { 1305 // stack to reg 1306 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1307 } 1308 } else if (dst.first()->is_stack()) { 1309 // reg to stack 1310 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1311 } else { 1312 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1313 } 1314} 1315 1316 1317static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1318 if (src.first()->is_stack()) { 1319 if (dst.first()->is_stack()) { 1320 // stack to stack 1321 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1322 __ st_ptr(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1323 } else { 1324 // stack to reg 1325 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1326 } 1327 } else if (dst.first()->is_stack()) { 1328 // reg to stack 1329 __ st_ptr(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1330 } else { 1331 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1332 } 1333} 1334 1335 1336// An oop arg. Must pass a handle not the oop itself 1337static void object_move(MacroAssembler* masm, 1338 OopMap* map, 1339 int oop_handle_offset, 1340 int framesize_in_slots, 1341 VMRegPair src, 1342 VMRegPair dst, 1343 bool is_receiver, 1344 int* receiver_offset) { 1345 1346 // must pass a handle. First figure out the location we use as a handle 1347 1348 if (src.first()->is_stack()) { 1349 // Oop is already on the stack 1350 Register rHandle = dst.first()->is_stack() ? L5 : dst.first()->as_Register(); 1351 __ add(FP, reg2offset(src.first()) + STACK_BIAS, rHandle); 1352 __ ld_ptr(rHandle, 0, L4); 1353#ifdef _LP64 1354 __ movr( Assembler::rc_z, L4, G0, rHandle ); 1355#else 1356 __ tst( L4 ); 1357 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1358#endif 1359 if (dst.first()->is_stack()) { 1360 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1361 } 1362 int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1363 if (is_receiver) { 1364 *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; 1365 } 1366 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); 1367 } else { 1368 // Oop is in an input register pass we must flush it to the stack 1369 const Register rOop = src.first()->as_Register(); 1370 const Register rHandle = L5; 1371 int oop_slot = rOop->input_number() * VMRegImpl::slots_per_word + oop_handle_offset; 1372 int offset = oop_slot*VMRegImpl::stack_slot_size; 1373 Label skip; 1374 __ st_ptr(rOop, SP, offset + STACK_BIAS); 1375 if (is_receiver) { 1376 *receiver_offset = oop_slot * VMRegImpl::stack_slot_size; 1377 } 1378 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1379 __ add(SP, offset + STACK_BIAS, rHandle); 1380#ifdef _LP64 1381 __ movr( Assembler::rc_z, rOop, G0, rHandle ); 1382#else 1383 __ tst( rOop ); 1384 __ movcc( Assembler::zero, false, Assembler::icc, G0, rHandle ); 1385#endif 1386 1387 if (dst.first()->is_stack()) { 1388 __ st_ptr(rHandle, SP, reg2offset(dst.first()) + STACK_BIAS); 1389 } else { 1390 __ mov(rHandle, dst.first()->as_Register()); 1391 } 1392 } 1393} 1394 1395// A float arg may have to do float reg int reg conversion 1396static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1397 assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); 1398 1399 if (src.first()->is_stack()) { 1400 if (dst.first()->is_stack()) { 1401 // stack to stack the easiest of the bunch 1402 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1403 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1404 } else { 1405 // stack to reg 1406 if (dst.first()->is_Register()) { 1407 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1408 } else { 1409 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1410 } 1411 } 1412 } else if (dst.first()->is_stack()) { 1413 // reg to stack 1414 if (src.first()->is_Register()) { 1415 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1416 } else { 1417 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1418 } 1419 } else { 1420 // reg to reg 1421 if (src.first()->is_Register()) { 1422 if (dst.first()->is_Register()) { 1423 // gpr -> gpr 1424 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1425 } else { 1426 // gpr -> fpr 1427 __ st(src.first()->as_Register(), FP, -4 + STACK_BIAS); 1428 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.first()->as_FloatRegister()); 1429 } 1430 } else if (dst.first()->is_Register()) { 1431 // fpr -> gpr 1432 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), FP, -4 + STACK_BIAS); 1433 __ ld(FP, -4 + STACK_BIAS, dst.first()->as_Register()); 1434 } else { 1435 // fpr -> fpr 1436 // In theory these overlap but the ordering is such that this is likely a nop 1437 if ( src.first() != dst.first()) { 1438 __ fmov(FloatRegisterImpl::S, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1439 } 1440 } 1441 } 1442} 1443 1444static void split_long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1445 VMRegPair src_lo(src.first()); 1446 VMRegPair src_hi(src.second()); 1447 VMRegPair dst_lo(dst.first()); 1448 VMRegPair dst_hi(dst.second()); 1449 simple_move32(masm, src_lo, dst_lo); 1450 simple_move32(masm, src_hi, dst_hi); 1451} 1452 1453// A long move 1454static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1455 1456 // Do the simple ones here else do two int moves 1457 if (src.is_single_phys_reg() ) { 1458 if (dst.is_single_phys_reg()) { 1459 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1460 } else { 1461 // split src into two separate registers 1462 // Remember hi means hi address or lsw on sparc 1463 // Move msw to lsw 1464 if (dst.second()->is_reg()) { 1465 // MSW -> MSW 1466 __ srax(src.first()->as_Register(), 32, dst.first()->as_Register()); 1467 // Now LSW -> LSW 1468 // this will only move lo -> lo and ignore hi 1469 VMRegPair split(dst.second()); 1470 simple_move32(masm, src, split); 1471 } else { 1472 VMRegPair split(src.first(), L4->as_VMReg()); 1473 // MSW -> MSW (lo ie. first word) 1474 __ srax(src.first()->as_Register(), 32, L4); 1475 split_long_move(masm, split, dst); 1476 } 1477 } 1478 } else if (dst.is_single_phys_reg()) { 1479 if (src.is_adjacent_aligned_on_stack(2)) { 1480 __ ldx(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1481 } else { 1482 // dst is a single reg. 1483 // Remember lo is low address not msb for stack slots 1484 // and lo is the "real" register for registers 1485 // src is 1486 1487 VMRegPair split; 1488 1489 if (src.first()->is_reg()) { 1490 // src.lo (msw) is a reg, src.hi is stk/reg 1491 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> src.lo [the MSW is in the LSW of the reg] 1492 split.set_pair(dst.first(), src.first()); 1493 } else { 1494 // msw is stack move to L5 1495 // lsw is stack move to dst.lo (real reg) 1496 // we will move: src.hi (LSW) -> dst.lo, src.lo (MSW) -> L5 1497 split.set_pair(dst.first(), L5->as_VMReg()); 1498 } 1499 1500 // src.lo -> src.lo/L5, src.hi -> dst.lo (the real reg) 1501 // msw -> src.lo/L5, lsw -> dst.lo 1502 split_long_move(masm, src, split); 1503 1504 // So dst now has the low order correct position the 1505 // msw half 1506 __ sllx(split.first()->as_Register(), 32, L5); 1507 1508 const Register d = dst.first()->as_Register(); 1509 __ or3(L5, d, d); 1510 } 1511 } else { 1512 // For LP64 we can probably do better. 1513 split_long_move(masm, src, dst); 1514 } 1515} 1516 1517// A double move 1518static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { 1519 1520 // The painful thing here is that like long_move a VMRegPair might be 1521 // 1: a single physical register 1522 // 2: two physical registers (v8) 1523 // 3: a physical reg [lo] and a stack slot [hi] (v8) 1524 // 4: two stack slots 1525 1526 // Since src is always a java calling convention we know that the src pair 1527 // is always either all registers or all stack (and aligned?) 1528 1529 // in a register [lo] and a stack slot [hi] 1530 if (src.first()->is_stack()) { 1531 if (dst.first()->is_stack()) { 1532 // stack to stack the easiest of the bunch 1533 // ought to be a way to do this where if alignment is ok we use ldd/std when possible 1534 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, L5); 1535 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1536 __ st(L5, SP, reg2offset(dst.first()) + STACK_BIAS); 1537 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1538 } else { 1539 // stack to reg 1540 if (dst.second()->is_stack()) { 1541 // stack -> reg, stack -> stack 1542 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1543 if (dst.first()->is_Register()) { 1544 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1545 } else { 1546 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1547 } 1548 // This was missing. (very rare case) 1549 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1550 } else { 1551 // stack -> reg 1552 // Eventually optimize for alignment QQQ 1553 if (dst.first()->is_Register()) { 1554 __ ld(FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_Register()); 1555 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_Register()); 1556 } else { 1557 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.first()) + STACK_BIAS, dst.first()->as_FloatRegister()); 1558 __ ldf(FloatRegisterImpl::S, FP, reg2offset(src.second()) + STACK_BIAS, dst.second()->as_FloatRegister()); 1559 } 1560 } 1561 } 1562 } else if (dst.first()->is_stack()) { 1563 // reg to stack 1564 if (src.first()->is_Register()) { 1565 // Eventually optimize for alignment QQQ 1566 __ st(src.first()->as_Register(), SP, reg2offset(dst.first()) + STACK_BIAS); 1567 if (src.second()->is_stack()) { 1568 __ ld(FP, reg2offset(src.second()) + STACK_BIAS, L4); 1569 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1570 } else { 1571 __ st(src.second()->as_Register(), SP, reg2offset(dst.second()) + STACK_BIAS); 1572 } 1573 } else { 1574 // fpr to stack 1575 if (src.second()->is_stack()) { 1576 ShouldNotReachHere(); 1577 } else { 1578 // Is the stack aligned? 1579 if (reg2offset(dst.first()) & 0x7) { 1580 // No do as pairs 1581 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1582 __ stf(FloatRegisterImpl::S, src.second()->as_FloatRegister(), SP, reg2offset(dst.second()) + STACK_BIAS); 1583 } else { 1584 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), SP, reg2offset(dst.first()) + STACK_BIAS); 1585 } 1586 } 1587 } 1588 } else { 1589 // reg to reg 1590 if (src.first()->is_Register()) { 1591 if (dst.first()->is_Register()) { 1592 // gpr -> gpr 1593 __ mov(src.first()->as_Register(), dst.first()->as_Register()); 1594 __ mov(src.second()->as_Register(), dst.second()->as_Register()); 1595 } else { 1596 // gpr -> fpr 1597 // ought to be able to do a single store 1598 __ stx(src.first()->as_Register(), FP, -8 + STACK_BIAS); 1599 __ stx(src.second()->as_Register(), FP, -4 + STACK_BIAS); 1600 // ought to be able to do a single load 1601 __ ldf(FloatRegisterImpl::S, FP, -8 + STACK_BIAS, dst.first()->as_FloatRegister()); 1602 __ ldf(FloatRegisterImpl::S, FP, -4 + STACK_BIAS, dst.second()->as_FloatRegister()); 1603 } 1604 } else if (dst.first()->is_Register()) { 1605 // fpr -> gpr 1606 // ought to be able to do a single store 1607 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), FP, -8 + STACK_BIAS); 1608 // ought to be able to do a single load 1609 // REMEMBER first() is low address not LSB 1610 __ ld(FP, -8 + STACK_BIAS, dst.first()->as_Register()); 1611 if (dst.second()->is_Register()) { 1612 __ ld(FP, -4 + STACK_BIAS, dst.second()->as_Register()); 1613 } else { 1614 __ ld(FP, -4 + STACK_BIAS, L4); 1615 __ st(L4, SP, reg2offset(dst.second()) + STACK_BIAS); 1616 } 1617 } else { 1618 // fpr -> fpr 1619 // In theory these overlap but the ordering is such that this is likely a nop 1620 if ( src.first() != dst.first()) { 1621 __ fmov(FloatRegisterImpl::D, src.first()->as_FloatRegister(), dst.first()->as_FloatRegister()); 1622 } 1623 } 1624 } 1625} 1626 1627// Creates an inner frame if one hasn't already been created, and 1628// saves a copy of the thread in L7_thread_cache 1629static void create_inner_frame(MacroAssembler* masm, bool* already_created) { 1630 if (!*already_created) { 1631 __ save_frame(0); 1632 // Save thread in L7 (INNER FRAME); it crosses a bunch of VM calls below 1633 // Don't use save_thread because it smashes G2 and we merely want to save a 1634 // copy 1635 __ mov(G2_thread, L7_thread_cache); 1636 *already_created = true; 1637 } 1638} 1639 1640 1641static void save_or_restore_arguments(MacroAssembler* masm, 1642 const int stack_slots, 1643 const int total_in_args, 1644 const int arg_save_area, 1645 OopMap* map, 1646 VMRegPair* in_regs, 1647 BasicType* in_sig_bt) { 1648 // if map is non-NULL then the code should store the values, 1649 // otherwise it should load them. 1650 if (map != NULL) { 1651 // Fill in the map 1652 for (int i = 0; i < total_in_args; i++) { 1653 if (in_sig_bt[i] == T_ARRAY) { 1654 if (in_regs[i].first()->is_stack()) { 1655 int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 1656 map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); 1657 } else if (in_regs[i].first()->is_Register()) { 1658 map->set_oop(in_regs[i].first()); 1659 } else { 1660 ShouldNotReachHere(); 1661 } 1662 } 1663 } 1664 } 1665 1666 // Save or restore double word values 1667 int handle_index = 0; 1668 for (int i = 0; i < total_in_args; i++) { 1669 int slot = handle_index + arg_save_area; 1670 int offset = slot * VMRegImpl::stack_slot_size; 1671 if (in_sig_bt[i] == T_LONG && in_regs[i].first()->is_Register()) { 1672 const Register reg = in_regs[i].first()->as_Register(); 1673 if (reg->is_global()) { 1674 handle_index += 2; 1675 assert(handle_index <= stack_slots, "overflow"); 1676 if (map != NULL) { 1677 __ stx(reg, SP, offset + STACK_BIAS); 1678 } else { 1679 __ ldx(SP, offset + STACK_BIAS, reg); 1680 } 1681 } 1682 } else if (in_sig_bt[i] == T_DOUBLE && in_regs[i].first()->is_FloatRegister()) { 1683 handle_index += 2; 1684 assert(handle_index <= stack_slots, "overflow"); 1685 if (map != NULL) { 1686 __ stf(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1687 } else { 1688 __ ldf(FloatRegisterImpl::D, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1689 } 1690 } 1691 } 1692 // Save floats 1693 for (int i = 0; i < total_in_args; i++) { 1694 int slot = handle_index + arg_save_area; 1695 int offset = slot * VMRegImpl::stack_slot_size; 1696 if (in_sig_bt[i] == T_FLOAT && in_regs[i].first()->is_FloatRegister()) { 1697 handle_index++; 1698 assert(handle_index <= stack_slots, "overflow"); 1699 if (map != NULL) { 1700 __ stf(FloatRegisterImpl::S, in_regs[i].first()->as_FloatRegister(), SP, offset + STACK_BIAS); 1701 } else { 1702 __ ldf(FloatRegisterImpl::S, SP, offset + STACK_BIAS, in_regs[i].first()->as_FloatRegister()); 1703 } 1704 } 1705 } 1706 1707} 1708 1709 1710// Check GC_locker::needs_gc and enter the runtime if it's true. This 1711// keeps a new JNI critical region from starting until a GC has been 1712// forced. Save down any oops in registers and describe them in an 1713// OopMap. 1714static void check_needs_gc_for_critical_native(MacroAssembler* masm, 1715 const int stack_slots, 1716 const int total_in_args, 1717 const int arg_save_area, 1718 OopMapSet* oop_maps, 1719 VMRegPair* in_regs, 1720 BasicType* in_sig_bt) { 1721 __ block_comment("check GC_locker::needs_gc"); 1722 Label cont; 1723 AddressLiteral sync_state(GC_locker::needs_gc_address()); 1724 __ load_bool_contents(sync_state, G3_scratch); 1725 __ cmp_zero_and_br(Assembler::equal, G3_scratch, cont); 1726 __ delayed()->nop(); 1727 1728 // Save down any values that are live in registers and call into the 1729 // runtime to halt for a GC 1730 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1731 save_or_restore_arguments(masm, stack_slots, total_in_args, 1732 arg_save_area, map, in_regs, in_sig_bt); 1733 1734 __ mov(G2_thread, L7_thread_cache); 1735 1736 __ set_last_Java_frame(SP, noreg); 1737 1738 __ block_comment("block_for_jni_critical"); 1739 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical), relocInfo::runtime_call_type); 1740 __ delayed()->mov(L7_thread_cache, O0); 1741 oop_maps->add_gc_map( __ offset(), map); 1742 1743 __ restore_thread(L7_thread_cache); // restore G2_thread 1744 __ reset_last_Java_frame(); 1745 1746 // Reload all the register arguments 1747 save_or_restore_arguments(masm, stack_slots, total_in_args, 1748 arg_save_area, NULL, in_regs, in_sig_bt); 1749 1750 __ bind(cont); 1751#ifdef ASSERT 1752 if (StressCriticalJNINatives) { 1753 // Stress register saving 1754 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1755 save_or_restore_arguments(masm, stack_slots, total_in_args, 1756 arg_save_area, map, in_regs, in_sig_bt); 1757 // Destroy argument registers 1758 for (int i = 0; i < total_in_args; i++) { 1759 if (in_regs[i].first()->is_Register()) { 1760 const Register reg = in_regs[i].first()->as_Register(); 1761 if (reg->is_global()) { 1762 __ mov(G0, reg); 1763 } 1764 } else if (in_regs[i].first()->is_FloatRegister()) { 1765 __ fneg(FloatRegisterImpl::D, in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); 1766 } 1767 } 1768 1769 save_or_restore_arguments(masm, stack_slots, total_in_args, 1770 arg_save_area, NULL, in_regs, in_sig_bt); 1771 } 1772#endif 1773} 1774 1775// Unpack an array argument into a pointer to the body and the length 1776// if the array is non-null, otherwise pass 0 for both. 1777static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { 1778 // Pass the length, ptr pair 1779 Label is_null, done; 1780 if (reg.first()->is_stack()) { 1781 VMRegPair tmp = reg64_to_VMRegPair(L2); 1782 // Load the arg up from the stack 1783 move_ptr(masm, reg, tmp); 1784 reg = tmp; 1785 } 1786 __ cmp(reg.first()->as_Register(), G0); 1787 __ brx(Assembler::equal, false, Assembler::pt, is_null); 1788 __ delayed()->add(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type), L4); 1789 move_ptr(masm, reg64_to_VMRegPair(L4), body_arg); 1790 __ ld(reg.first()->as_Register(), arrayOopDesc::length_offset_in_bytes(), L4); 1791 move32_64(masm, reg64_to_VMRegPair(L4), length_arg); 1792 __ ba_short(done); 1793 __ bind(is_null); 1794 // Pass zeros 1795 move_ptr(masm, reg64_to_VMRegPair(G0), body_arg); 1796 move32_64(masm, reg64_to_VMRegPair(G0), length_arg); 1797 __ bind(done); 1798} 1799 1800static void verify_oop_args(MacroAssembler* masm, 1801 methodHandle method, 1802 const BasicType* sig_bt, 1803 const VMRegPair* regs) { 1804 Register temp_reg = G5_method; // not part of any compiled calling seq 1805 if (VerifyOops) { 1806 for (int i = 0; i < method->size_of_parameters(); i++) { 1807 if (sig_bt[i] == T_OBJECT || 1808 sig_bt[i] == T_ARRAY) { 1809 VMReg r = regs[i].first(); 1810 assert(r->is_valid(), "bad oop arg"); 1811 if (r->is_stack()) { 1812 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1813 ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg); 1814 __ ld_ptr(SP, ld_off, temp_reg); 1815 __ verify_oop(temp_reg); 1816 } else { 1817 __ verify_oop(r->as_Register()); 1818 } 1819 } 1820 } 1821 } 1822} 1823 1824static void gen_special_dispatch(MacroAssembler* masm, 1825 methodHandle method, 1826 const BasicType* sig_bt, 1827 const VMRegPair* regs) { 1828 verify_oop_args(masm, method, sig_bt, regs); 1829 vmIntrinsics::ID iid = method->intrinsic_id(); 1830 1831 // Now write the args into the outgoing interpreter space 1832 bool has_receiver = false; 1833 Register receiver_reg = noreg; 1834 int member_arg_pos = -1; 1835 Register member_reg = noreg; 1836 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); 1837 if (ref_kind != 0) { 1838 member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument 1839 member_reg = G5_method; // known to be free at this point 1840 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 1841 } else if (iid == vmIntrinsics::_invokeBasic) { 1842 has_receiver = true; 1843 } else { 1844 fatal(err_msg_res("unexpected intrinsic id %d", iid)); 1845 } 1846 1847 if (member_reg != noreg) { 1848 // Load the member_arg into register, if necessary. 1849 SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); 1850 VMReg r = regs[member_arg_pos].first(); 1851 if (r->is_stack()) { 1852 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1853 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1854 __ ld_ptr(SP, ld_off, member_reg); 1855 } else { 1856 // no data motion is needed 1857 member_reg = r->as_Register(); 1858 } 1859 } 1860 1861 if (has_receiver) { 1862 // Make sure the receiver is loaded into a register. 1863 assert(method->size_of_parameters() > 0, "oob"); 1864 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1865 VMReg r = regs[0].first(); 1866 assert(r->is_valid(), "bad receiver arg"); 1867 if (r->is_stack()) { 1868 // Porting note: This assumes that compiled calling conventions always 1869 // pass the receiver oop in a register. If this is not true on some 1870 // platform, pick a temp and load the receiver from stack. 1871 fatal("receiver always in a register"); 1872 receiver_reg = G3_scratch; // known to be free at this point 1873 RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS; 1874 ld_off = __ ensure_simm13_or_reg(ld_off, member_reg); 1875 __ ld_ptr(SP, ld_off, receiver_reg); 1876 } else { 1877 // no data motion is needed 1878 receiver_reg = r->as_Register(); 1879 } 1880 } 1881 1882 // Figure out which address we are really jumping to: 1883 MethodHandles::generate_method_handle_dispatch(masm, iid, 1884 receiver_reg, member_reg, /*for_compiler_entry:*/ true); 1885} 1886 1887// --------------------------------------------------------------------------- 1888// Generate a native wrapper for a given method. The method takes arguments 1889// in the Java compiled code convention, marshals them to the native 1890// convention (handlizes oops, etc), transitions to native, makes the call, 1891// returns to java state (possibly blocking), unhandlizes any result and 1892// returns. 1893// 1894// Critical native functions are a shorthand for the use of 1895// GetPrimtiveArrayCritical and disallow the use of any other JNI 1896// functions. The wrapper is expected to unpack the arguments before 1897// passing them to the callee and perform checks before and after the 1898// native call to ensure that they GC_locker 1899// lock_critical/unlock_critical semantics are followed. Some other 1900// parts of JNI setup are skipped like the tear down of the JNI handle 1901// block and the check for pending exceptions it's impossible for them 1902// to be thrown. 1903// 1904// They are roughly structured like this: 1905// if (GC_locker::needs_gc()) 1906// SharedRuntime::block_for_jni_critical(); 1907// tranistion to thread_in_native 1908// unpack arrray arguments and call native entry point 1909// check for safepoint in progress 1910// check if any thread suspend flags are set 1911// call into JVM and possible unlock the JNI critical 1912// if a GC was suppressed while in the critical native. 1913// transition back to thread_in_Java 1914// return to caller 1915// 1916nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, 1917 methodHandle method, 1918 int compile_id, 1919 BasicType* in_sig_bt, 1920 VMRegPair* in_regs, 1921 BasicType ret_type) { 1922 if (method->is_method_handle_intrinsic()) { 1923 vmIntrinsics::ID iid = method->intrinsic_id(); 1924 intptr_t start = (intptr_t)__ pc(); 1925 int vep_offset = ((intptr_t)__ pc()) - start; 1926 gen_special_dispatch(masm, 1927 method, 1928 in_sig_bt, 1929 in_regs); 1930 int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period 1931 __ flush(); 1932 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually 1933 return nmethod::new_native_nmethod(method, 1934 compile_id, 1935 masm->code(), 1936 vep_offset, 1937 frame_complete, 1938 stack_slots / VMRegImpl::slots_per_word, 1939 in_ByteSize(-1), 1940 in_ByteSize(-1), 1941 (OopMapSet*)NULL); 1942 } 1943 bool is_critical_native = true; 1944 address native_func = method->critical_native_function(); 1945 if (native_func == NULL) { 1946 native_func = method->native_function(); 1947 is_critical_native = false; 1948 } 1949 assert(native_func != NULL, "must have function"); 1950 1951 // Native nmethod wrappers never take possesion of the oop arguments. 1952 // So the caller will gc the arguments. The only thing we need an 1953 // oopMap for is if the call is static 1954 // 1955 // An OopMap for lock (and class if static), and one for the VM call itself 1956 OopMapSet *oop_maps = new OopMapSet(); 1957 intptr_t start = (intptr_t)__ pc(); 1958 1959 // First thing make an ic check to see if we should even be here 1960 { 1961 Label L; 1962 const Register temp_reg = G3_scratch; 1963 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 1964 __ verify_oop(O0); 1965 __ load_klass(O0, temp_reg); 1966 __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 1967 1968 __ jump_to(ic_miss, temp_reg); 1969 __ delayed()->nop(); 1970 __ align(CodeEntryAlignment); 1971 __ bind(L); 1972 } 1973 1974 int vep_offset = ((intptr_t)__ pc()) - start; 1975 1976#ifdef COMPILER1 1977 if (InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) { 1978 // Object.hashCode can pull the hashCode from the header word 1979 // instead of doing a full VM transition once it's been computed. 1980 // Since hashCode is usually polymorphic at call sites we can't do 1981 // this optimization at the call site without a lot of work. 1982 Label slowCase; 1983 Register receiver = O0; 1984 Register result = O0; 1985 Register header = G3_scratch; 1986 Register hash = G3_scratch; // overwrite header value with hash value 1987 Register mask = G1; // to get hash field from header 1988 1989 // Read the header and build a mask to get its hash field. Give up if the object is not unlocked. 1990 // We depend on hash_mask being at most 32 bits and avoid the use of 1991 // hash_mask_in_place because it could be larger than 32 bits in a 64-bit 1992 // vm: see markOop.hpp. 1993 __ ld_ptr(receiver, oopDesc::mark_offset_in_bytes(), header); 1994 __ sethi(markOopDesc::hash_mask, mask); 1995 __ btst(markOopDesc::unlocked_value, header); 1996 __ br(Assembler::zero, false, Assembler::pn, slowCase); 1997 if (UseBiasedLocking) { 1998 // Check if biased and fall through to runtime if so 1999 __ delayed()->nop(); 2000 __ btst(markOopDesc::biased_lock_bit_in_place, header); 2001 __ br(Assembler::notZero, false, Assembler::pn, slowCase); 2002 } 2003 __ delayed()->or3(mask, markOopDesc::hash_mask & 0x3ff, mask); 2004 2005 // Check for a valid (non-zero) hash code and get its value. 2006#ifdef _LP64 2007 __ srlx(header, markOopDesc::hash_shift, hash); 2008#else 2009 __ srl(header, markOopDesc::hash_shift, hash); 2010#endif 2011 __ andcc(hash, mask, hash); 2012 __ br(Assembler::equal, false, Assembler::pn, slowCase); 2013 __ delayed()->nop(); 2014 2015 // leaf return. 2016 __ retl(); 2017 __ delayed()->mov(hash, result); 2018 __ bind(slowCase); 2019 } 2020#endif // COMPILER1 2021 2022 2023 // We have received a description of where all the java arg are located 2024 // on entry to the wrapper. We need to convert these args to where 2025 // the jni function will expect them. To figure out where they go 2026 // we convert the java signature to a C signature by inserting 2027 // the hidden arguments as arg[0] and possibly arg[1] (static method) 2028 2029 const int total_in_args = method->size_of_parameters(); 2030 int total_c_args = total_in_args; 2031 int total_save_slots = 6 * VMRegImpl::slots_per_word; 2032 if (!is_critical_native) { 2033 total_c_args += 1; 2034 if (method->is_static()) { 2035 total_c_args++; 2036 } 2037 } else { 2038 for (int i = 0; i < total_in_args; i++) { 2039 if (in_sig_bt[i] == T_ARRAY) { 2040 // These have to be saved and restored across the safepoint 2041 total_c_args++; 2042 } 2043 } 2044 } 2045 2046 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 2047 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 2048 BasicType* in_elem_bt = NULL; 2049 2050 int argc = 0; 2051 if (!is_critical_native) { 2052 out_sig_bt[argc++] = T_ADDRESS; 2053 if (method->is_static()) { 2054 out_sig_bt[argc++] = T_OBJECT; 2055 } 2056 2057 for (int i = 0; i < total_in_args ; i++ ) { 2058 out_sig_bt[argc++] = in_sig_bt[i]; 2059 } 2060 } else { 2061 Thread* THREAD = Thread::current(); 2062 in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); 2063 SignatureStream ss(method->signature()); 2064 for (int i = 0; i < total_in_args ; i++ ) { 2065 if (in_sig_bt[i] == T_ARRAY) { 2066 // Arrays are passed as int, elem* pair 2067 out_sig_bt[argc++] = T_INT; 2068 out_sig_bt[argc++] = T_ADDRESS; 2069 Symbol* atype = ss.as_symbol(CHECK_NULL); 2070 const char* at = atype->as_C_string(); 2071 if (strlen(at) == 2) { 2072 assert(at[0] == '[', "must be"); 2073 switch (at[1]) { 2074 case 'B': in_elem_bt[i] = T_BYTE; break; 2075 case 'C': in_elem_bt[i] = T_CHAR; break; 2076 case 'D': in_elem_bt[i] = T_DOUBLE; break; 2077 case 'F': in_elem_bt[i] = T_FLOAT; break; 2078 case 'I': in_elem_bt[i] = T_INT; break; 2079 case 'J': in_elem_bt[i] = T_LONG; break; 2080 case 'S': in_elem_bt[i] = T_SHORT; break; 2081 case 'Z': in_elem_bt[i] = T_BOOLEAN; break; 2082 default: ShouldNotReachHere(); 2083 } 2084 } 2085 } else { 2086 out_sig_bt[argc++] = in_sig_bt[i]; 2087 in_elem_bt[i] = T_VOID; 2088 } 2089 if (in_sig_bt[i] != T_VOID) { 2090 assert(in_sig_bt[i] == ss.type(), "must match"); 2091 ss.next(); 2092 } 2093 } 2094 } 2095 2096 // Now figure out where the args must be stored and how much stack space 2097 // they require (neglecting out_preserve_stack_slots but space for storing 2098 // the 1st six register arguments). It's weird see int_stk_helper. 2099 // 2100 int out_arg_slots; 2101 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 2102 2103 if (is_critical_native) { 2104 // Critical natives may have to call out so they need a save area 2105 // for register arguments. 2106 int double_slots = 0; 2107 int single_slots = 0; 2108 for ( int i = 0; i < total_in_args; i++) { 2109 if (in_regs[i].first()->is_Register()) { 2110 const Register reg = in_regs[i].first()->as_Register(); 2111 switch (in_sig_bt[i]) { 2112 case T_ARRAY: 2113 case T_BOOLEAN: 2114 case T_BYTE: 2115 case T_SHORT: 2116 case T_CHAR: 2117 case T_INT: assert(reg->is_in(), "don't need to save these"); break; 2118 case T_LONG: if (reg->is_global()) double_slots++; break; 2119 default: ShouldNotReachHere(); 2120 } 2121 } else if (in_regs[i].first()->is_FloatRegister()) { 2122 switch (in_sig_bt[i]) { 2123 case T_FLOAT: single_slots++; break; 2124 case T_DOUBLE: double_slots++; break; 2125 default: ShouldNotReachHere(); 2126 } 2127 } 2128 } 2129 total_save_slots = double_slots * 2 + single_slots; 2130 } 2131 2132 // Compute framesize for the wrapper. We need to handlize all oops in 2133 // registers. We must create space for them here that is disjoint from 2134 // the windowed save area because we have no control over when we might 2135 // flush the window again and overwrite values that gc has since modified. 2136 // (The live window race) 2137 // 2138 // We always just allocate 6 word for storing down these object. This allow 2139 // us to simply record the base and use the Ireg number to decide which 2140 // slot to use. (Note that the reg number is the inbound number not the 2141 // outbound number). 2142 // We must shuffle args to match the native convention, and include var-args space. 2143 2144 // Calculate the total number of stack slots we will need. 2145 2146 // First count the abi requirement plus all of the outgoing args 2147 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2148 2149 // Now the space for the inbound oop handle area 2150 2151 int oop_handle_offset = round_to(stack_slots, 2); 2152 stack_slots += total_save_slots; 2153 2154 // Now any space we need for handlizing a klass if static method 2155 2156 int klass_slot_offset = 0; 2157 int klass_offset = -1; 2158 int lock_slot_offset = 0; 2159 bool is_static = false; 2160 2161 if (method->is_static()) { 2162 klass_slot_offset = stack_slots; 2163 stack_slots += VMRegImpl::slots_per_word; 2164 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 2165 is_static = true; 2166 } 2167 2168 // Plus a lock if needed 2169 2170 if (method->is_synchronized()) { 2171 lock_slot_offset = stack_slots; 2172 stack_slots += VMRegImpl::slots_per_word; 2173 } 2174 2175 // Now a place to save return value or as a temporary for any gpr -> fpr moves 2176 stack_slots += 2; 2177 2178 // Ok The space we have allocated will look like: 2179 // 2180 // 2181 // FP-> | | 2182 // |---------------------| 2183 // | 2 slots for moves | 2184 // |---------------------| 2185 // | lock box (if sync) | 2186 // |---------------------| <- lock_slot_offset 2187 // | klass (if static) | 2188 // |---------------------| <- klass_slot_offset 2189 // | oopHandle area | 2190 // |---------------------| <- oop_handle_offset 2191 // | outbound memory | 2192 // | based arguments | 2193 // | | 2194 // |---------------------| 2195 // | vararg area | 2196 // |---------------------| 2197 // | | 2198 // SP-> | out_preserved_slots | 2199 // 2200 // 2201 2202 2203 // Now compute actual number of stack words we need rounding to make 2204 // stack properly aligned. 2205 stack_slots = round_to(stack_slots, 2 * VMRegImpl::slots_per_word); 2206 2207 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2208 2209 // Generate stack overflow check before creating frame 2210 __ generate_stack_overflow_check(stack_size); 2211 2212 // Generate a new frame for the wrapper. 2213 __ save(SP, -stack_size, SP); 2214 2215 int frame_complete = ((intptr_t)__ pc()) - start; 2216 2217 __ verify_thread(); 2218 2219 if (is_critical_native) { 2220 check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, 2221 oop_handle_offset, oop_maps, in_regs, in_sig_bt); 2222 } 2223 2224 // 2225 // We immediately shuffle the arguments so that any vm call we have to 2226 // make from here on out (sync slow path, jvmti, etc.) we will have 2227 // captured the oops from our caller and have a valid oopMap for 2228 // them. 2229 2230 // ----------------- 2231 // The Grand Shuffle 2232 // 2233 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 2234 // (derived from JavaThread* which is in L7_thread_cache) and, if static, 2235 // the class mirror instead of a receiver. This pretty much guarantees that 2236 // register layout will not match. We ignore these extra arguments during 2237 // the shuffle. The shuffle is described by the two calling convention 2238 // vectors we have in our possession. We simply walk the java vector to 2239 // get the source locations and the c vector to get the destinations. 2240 // Because we have a new window and the argument registers are completely 2241 // disjoint ( I0 -> O1, I1 -> O2, ...) we have nothing to worry about 2242 // here. 2243 2244 // This is a trick. We double the stack slots so we can claim 2245 // the oops in the caller's frame. Since we are sure to have 2246 // more args than the caller doubling is enough to make 2247 // sure we can capture all the incoming oop args from the 2248 // caller. 2249 // 2250 OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 2251 // Record sp-based slot for receiver on stack for non-static methods 2252 int receiver_offset = -1; 2253 2254 // We move the arguments backward because the floating point registers 2255 // destination will always be to a register with a greater or equal register 2256 // number or the stack. 2257 2258#ifdef ASSERT 2259 bool reg_destroyed[RegisterImpl::number_of_registers]; 2260 bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 2261 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 2262 reg_destroyed[r] = false; 2263 } 2264 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 2265 freg_destroyed[f] = false; 2266 } 2267 2268#endif /* ASSERT */ 2269 2270 for ( int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0 ; i--, c_arg-- ) { 2271 2272#ifdef ASSERT 2273 if (in_regs[i].first()->is_Register()) { 2274 assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "ack!"); 2275 } else if (in_regs[i].first()->is_FloatRegister()) { 2276 assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)], "ack!"); 2277 } 2278 if (out_regs[c_arg].first()->is_Register()) { 2279 reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; 2280 } else if (out_regs[c_arg].first()->is_FloatRegister()) { 2281 freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding(FloatRegisterImpl::S)] = true; 2282 } 2283#endif /* ASSERT */ 2284 2285 switch (in_sig_bt[i]) { 2286 case T_ARRAY: 2287 if (is_critical_native) { 2288 unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg], out_regs[c_arg - 1]); 2289 c_arg--; 2290 break; 2291 } 2292 case T_OBJECT: 2293 assert(!is_critical_native, "no oop arguments"); 2294 object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], 2295 ((i == 0) && (!is_static)), 2296 &receiver_offset); 2297 break; 2298 case T_VOID: 2299 break; 2300 2301 case T_FLOAT: 2302 float_move(masm, in_regs[i], out_regs[c_arg]); 2303 break; 2304 2305 case T_DOUBLE: 2306 assert( i + 1 < total_in_args && 2307 in_sig_bt[i + 1] == T_VOID && 2308 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 2309 double_move(masm, in_regs[i], out_regs[c_arg]); 2310 break; 2311 2312 case T_LONG : 2313 long_move(masm, in_regs[i], out_regs[c_arg]); 2314 break; 2315 2316 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 2317 2318 default: 2319 move32_64(masm, in_regs[i], out_regs[c_arg]); 2320 } 2321 } 2322 2323 // Pre-load a static method's oop into O1. Used both by locking code and 2324 // the normal JNI call code. 2325 if (method->is_static() && !is_critical_native) { 2326 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), O1); 2327 2328 // Now handlize the static class mirror in O1. It's known not-null. 2329 __ st_ptr(O1, SP, klass_offset + STACK_BIAS); 2330 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 2331 __ add(SP, klass_offset + STACK_BIAS, O1); 2332 } 2333 2334 2335 const Register L6_handle = L6; 2336 2337 if (method->is_synchronized()) { 2338 assert(!is_critical_native, "unhandled"); 2339 __ mov(O1, L6_handle); 2340 } 2341 2342 // We have all of the arguments setup at this point. We MUST NOT touch any Oregs 2343 // except O6/O7. So if we must call out we must push a new frame. We immediately 2344 // push a new frame and flush the windows. 2345#ifdef _LP64 2346 intptr_t thepc = (intptr_t) __ pc(); 2347 { 2348 address here = __ pc(); 2349 // Call the next instruction 2350 __ call(here + 8, relocInfo::none); 2351 __ delayed()->nop(); 2352 } 2353#else 2354 intptr_t thepc = __ load_pc_address(O7, 0); 2355#endif /* _LP64 */ 2356 2357 // We use the same pc/oopMap repeatedly when we call out 2358 oop_maps->add_gc_map(thepc - start, map); 2359 2360 // O7 now has the pc loaded that we will use when we finally call to native. 2361 2362 // Save thread in L7; it crosses a bunch of VM calls below 2363 // Don't use save_thread because it smashes G2 and we merely 2364 // want to save a copy 2365 __ mov(G2_thread, L7_thread_cache); 2366 2367 2368 // If we create an inner frame once is plenty 2369 // when we create it we must also save G2_thread 2370 bool inner_frame_created = false; 2371 2372 // dtrace method entry support 2373 { 2374 SkipIfEqual skip_if( 2375 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 2376 // create inner frame 2377 __ save_frame(0); 2378 __ mov(G2_thread, L7_thread_cache); 2379 __ set_metadata_constant(method(), O1); 2380 __ call_VM_leaf(L7_thread_cache, 2381 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), 2382 G2_thread, O1); 2383 __ restore(); 2384 } 2385 2386 // RedefineClasses() tracing support for obsolete method entry 2387 if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { 2388 // create inner frame 2389 __ save_frame(0); 2390 __ mov(G2_thread, L7_thread_cache); 2391 __ set_metadata_constant(method(), O1); 2392 __ call_VM_leaf(L7_thread_cache, 2393 CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), 2394 G2_thread, O1); 2395 __ restore(); 2396 } 2397 2398 // We are in the jni frame unless saved_frame is true in which case 2399 // we are in one frame deeper (the "inner" frame). If we are in the 2400 // "inner" frames the args are in the Iregs and if the jni frame then 2401 // they are in the Oregs. 2402 // If we ever need to go to the VM (for locking, jvmti) then 2403 // we will always be in the "inner" frame. 2404 2405 // Lock a synchronized method 2406 int lock_offset = -1; // Set if locked 2407 if (method->is_synchronized()) { 2408 Register Roop = O1; 2409 const Register L3_box = L3; 2410 2411 create_inner_frame(masm, &inner_frame_created); 2412 2413 __ ld_ptr(I1, 0, O1); 2414 Label done; 2415 2416 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); 2417 __ add(FP, lock_offset+STACK_BIAS, L3_box); 2418#ifdef ASSERT 2419 if (UseBiasedLocking) { 2420 // making the box point to itself will make it clear it went unused 2421 // but also be obviously invalid 2422 __ st_ptr(L3_box, L3_box, 0); 2423 } 2424#endif // ASSERT 2425 // 2426 // Compiler_lock_object (Roop, Rmark, Rbox, Rscratch) -- kills Rmark, Rbox, Rscratch 2427 // 2428 __ compiler_lock_object(Roop, L1, L3_box, L2); 2429 __ br(Assembler::equal, false, Assembler::pt, done); 2430 __ delayed() -> add(FP, lock_offset+STACK_BIAS, L3_box); 2431 2432 2433 // None of the above fast optimizations worked so we have to get into the 2434 // slow case of monitor enter. Inline a special case of call_VM that 2435 // disallows any pending_exception. 2436 __ mov(Roop, O0); // Need oop in O0 2437 __ mov(L3_box, O1); 2438 2439 // Record last_Java_sp, in case the VM code releases the JVM lock. 2440 2441 __ set_last_Java_frame(FP, I7); 2442 2443 // do the call 2444 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); 2445 __ delayed()->mov(L7_thread_cache, O2); 2446 2447 __ restore_thread(L7_thread_cache); // restore G2_thread 2448 __ reset_last_Java_frame(); 2449 2450#ifdef ASSERT 2451 { Label L; 2452 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 2453 __ br_null_short(O0, Assembler::pt, L); 2454 __ stop("no pending exception allowed on exit from IR::monitorenter"); 2455 __ bind(L); 2456 } 2457#endif 2458 __ bind(done); 2459 } 2460 2461 2462 // Finally just about ready to make the JNI call 2463 2464 __ flushw(); 2465 if (inner_frame_created) { 2466 __ restore(); 2467 } else { 2468 // Store only what we need from this frame 2469 // QQQ I think that non-v9 (like we care) we don't need these saves 2470 // either as the flush traps and the current window goes too. 2471 __ st_ptr(FP, SP, FP->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 2472 __ st_ptr(I7, SP, I7->sp_offset_in_saved_window()*wordSize + STACK_BIAS); 2473 } 2474 2475 // get JNIEnv* which is first argument to native 2476 if (!is_critical_native) { 2477 __ add(G2_thread, in_bytes(JavaThread::jni_environment_offset()), O0); 2478 } 2479 2480 // Use that pc we placed in O7 a while back as the current frame anchor 2481 __ set_last_Java_frame(SP, O7); 2482 2483 // We flushed the windows ages ago now mark them as flushed before transitioning. 2484 __ set(JavaFrameAnchor::flushed, G3_scratch); 2485 __ st(G3_scratch, G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); 2486 2487 // Transition from _thread_in_Java to _thread_in_native. 2488 __ set(_thread_in_native, G3_scratch); 2489 2490#ifdef _LP64 2491 AddressLiteral dest(native_func); 2492 __ relocate(relocInfo::runtime_call_type); 2493 __ jumpl_to(dest, O7, O7); 2494#else 2495 __ call(native_func, relocInfo::runtime_call_type); 2496#endif 2497 __ delayed()->st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2498 2499 __ restore_thread(L7_thread_cache); // restore G2_thread 2500 2501 // Unpack native results. For int-types, we do any needed sign-extension 2502 // and move things into I0. The return value there will survive any VM 2503 // calls for blocking or unlocking. An FP or OOP result (handle) is done 2504 // specially in the slow-path code. 2505 switch (ret_type) { 2506 case T_VOID: break; // Nothing to do! 2507 case T_FLOAT: break; // Got it where we want it (unless slow-path) 2508 case T_DOUBLE: break; // Got it where we want it (unless slow-path) 2509 // In 64 bits build result is in O0, in O0, O1 in 32bit build 2510 case T_LONG: 2511#ifndef _LP64 2512 __ mov(O1, I1); 2513#endif 2514 // Fall thru 2515 case T_OBJECT: // Really a handle 2516 case T_ARRAY: 2517 case T_INT: 2518 __ mov(O0, I0); 2519 break; 2520 case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, I0); break; // !0 => true; 0 => false 2521 case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, I0); break; 2522 case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, I0); break; // cannot use and3, 0xFFFF too big as immediate value! 2523 case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, I0); break; 2524 break; // Cannot de-handlize until after reclaiming jvm_lock 2525 default: 2526 ShouldNotReachHere(); 2527 } 2528 2529 Label after_transition; 2530 // must we block? 2531 2532 // Block, if necessary, before resuming in _thread_in_Java state. 2533 // In order for GC to work, don't clear the last_Java_sp until after blocking. 2534 { Label no_block; 2535 AddressLiteral sync_state(SafepointSynchronize::address_of_state()); 2536 2537 // Switch thread to "native transition" state before reading the synchronization state. 2538 // This additional state is necessary because reading and testing the synchronization 2539 // state is not atomic w.r.t. GC, as this scenario demonstrates: 2540 // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 2541 // VM thread changes sync state to synchronizing and suspends threads for GC. 2542 // Thread A is resumed to finish this native method, but doesn't block here since it 2543 // didn't see any synchronization is progress, and escapes. 2544 __ set(_thread_in_native_trans, G3_scratch); 2545 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2546 if(os::is_MP()) { 2547 if (UseMembar) { 2548 // Force this write out before the read below 2549 __ membar(Assembler::StoreLoad); 2550 } else { 2551 // Write serialization page so VM thread can do a pseudo remote membar. 2552 // We use the current thread pointer to calculate a thread specific 2553 // offset to write to within the page. This minimizes bus traffic 2554 // due to cache line collision. 2555 __ serialize_memory(G2_thread, G1_scratch, G3_scratch); 2556 } 2557 } 2558 __ load_contents(sync_state, G3_scratch); 2559 __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); 2560 2561 Label L; 2562 Address suspend_state(G2_thread, JavaThread::suspend_flags_offset()); 2563 __ br(Assembler::notEqual, false, Assembler::pn, L); 2564 __ delayed()->ld(suspend_state, G3_scratch); 2565 __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); 2566 __ bind(L); 2567 2568 // Block. Save any potential method result value before the operation and 2569 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 2570 // lets us share the oopMap we used when we went native rather the create 2571 // a distinct one for this pc 2572 // 2573 save_native_result(masm, ret_type, stack_slots); 2574 if (!is_critical_native) { 2575 __ call_VM_leaf(L7_thread_cache, 2576 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), 2577 G2_thread); 2578 } else { 2579 __ call_VM_leaf(L7_thread_cache, 2580 CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), 2581 G2_thread); 2582 } 2583 2584 // Restore any method result value 2585 restore_native_result(masm, ret_type, stack_slots); 2586 2587 if (is_critical_native) { 2588 // The call above performed the transition to thread_in_Java so 2589 // skip the transition logic below. 2590 __ ba(after_transition); 2591 __ delayed()->nop(); 2592 } 2593 2594 __ bind(no_block); 2595 } 2596 2597 // thread state is thread_in_native_trans. Any safepoint blocking has already 2598 // happened so we can now change state to _thread_in_Java. 2599 __ set(_thread_in_Java, G3_scratch); 2600 __ st(G3_scratch, G2_thread, JavaThread::thread_state_offset()); 2601 __ bind(after_transition); 2602 2603 Label no_reguard; 2604 __ ld(G2_thread, JavaThread::stack_guard_state_offset(), G3_scratch); 2605 __ cmp_and_br_short(G3_scratch, JavaThread::stack_guard_yellow_disabled, Assembler::notEqual, Assembler::pt, no_reguard); 2606 2607 save_native_result(masm, ret_type, stack_slots); 2608 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); 2609 __ delayed()->nop(); 2610 2611 __ restore_thread(L7_thread_cache); // restore G2_thread 2612 restore_native_result(masm, ret_type, stack_slots); 2613 2614 __ bind(no_reguard); 2615 2616 // Handle possible exception (will unlock if necessary) 2617 2618 // native result if any is live in freg or I0 (and I1 if long and 32bit vm) 2619 2620 // Unlock 2621 if (method->is_synchronized()) { 2622 Label done; 2623 Register I2_ex_oop = I2; 2624 const Register L3_box = L3; 2625 // Get locked oop from the handle we passed to jni 2626 __ ld_ptr(L6_handle, 0, L4); 2627 __ add(SP, lock_offset+STACK_BIAS, L3_box); 2628 // Must save pending exception around the slow-path VM call. Since it's a 2629 // leaf call, the pending exception (if any) can be kept in a register. 2630 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), I2_ex_oop); 2631 // Now unlock 2632 // (Roop, Rmark, Rbox, Rscratch) 2633 __ compiler_unlock_object(L4, L1, L3_box, L2); 2634 __ br(Assembler::equal, false, Assembler::pt, done); 2635 __ delayed()-> add(SP, lock_offset+STACK_BIAS, L3_box); 2636 2637 // save and restore any potential method result value around the unlocking 2638 // operation. Will save in I0 (or stack for FP returns). 2639 save_native_result(masm, ret_type, stack_slots); 2640 2641 // Must clear pending-exception before re-entering the VM. Since this is 2642 // a leaf call, pending-exception-oop can be safely kept in a register. 2643 __ st_ptr(G0, G2_thread, in_bytes(Thread::pending_exception_offset())); 2644 2645 // slow case of monitor enter. Inline a special case of call_VM that 2646 // disallows any pending_exception. 2647 __ mov(L3_box, O1); 2648 2649 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type); 2650 __ delayed()->mov(L4, O0); // Need oop in O0 2651 2652 __ restore_thread(L7_thread_cache); // restore G2_thread 2653 2654#ifdef ASSERT 2655 { Label L; 2656 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O0); 2657 __ br_null_short(O0, Assembler::pt, L); 2658 __ stop("no pending exception allowed on exit from IR::monitorexit"); 2659 __ bind(L); 2660 } 2661#endif 2662 restore_native_result(masm, ret_type, stack_slots); 2663 // check_forward_pending_exception jump to forward_exception if any pending 2664 // exception is set. The forward_exception routine expects to see the 2665 // exception in pending_exception and not in a register. Kind of clumsy, 2666 // since all folks who branch to forward_exception must have tested 2667 // pending_exception first and hence have it in a register already. 2668 __ st_ptr(I2_ex_oop, G2_thread, in_bytes(Thread::pending_exception_offset())); 2669 __ bind(done); 2670 } 2671 2672 // Tell dtrace about this method exit 2673 { 2674 SkipIfEqual skip_if( 2675 masm, G3_scratch, &DTraceMethodProbes, Assembler::zero); 2676 save_native_result(masm, ret_type, stack_slots); 2677 __ set_metadata_constant(method(), O1); 2678 __ call_VM_leaf(L7_thread_cache, 2679 CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), 2680 G2_thread, O1); 2681 restore_native_result(masm, ret_type, stack_slots); 2682 } 2683 2684 // Clear "last Java frame" SP and PC. 2685 __ verify_thread(); // G2_thread must be correct 2686 __ reset_last_Java_frame(); 2687 2688 // Unpack oop result 2689 if (ret_type == T_OBJECT || ret_type == T_ARRAY) { 2690 Label L; 2691 __ addcc(G0, I0, G0); 2692 __ brx(Assembler::notZero, true, Assembler::pt, L); 2693 __ delayed()->ld_ptr(I0, 0, I0); 2694 __ mov(G0, I0); 2695 __ bind(L); 2696 __ verify_oop(I0); 2697 } 2698 2699 if (!is_critical_native) { 2700 // reset handle block 2701 __ ld_ptr(G2_thread, in_bytes(JavaThread::active_handles_offset()), L5); 2702 __ st_ptr(G0, L5, JNIHandleBlock::top_offset_in_bytes()); 2703 2704 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), G3_scratch); 2705 check_forward_pending_exception(masm, G3_scratch); 2706 } 2707 2708 2709 // Return 2710 2711#ifndef _LP64 2712 if (ret_type == T_LONG) { 2713 2714 // Must leave proper result in O0,O1 and G1 (c2/tiered only) 2715 __ sllx(I0, 32, G1); // Shift bits into high G1 2716 __ srl (I1, 0, I1); // Zero extend O1 (harmless?) 2717 __ or3 (I1, G1, G1); // OR 64 bits into G1 2718 } 2719#endif 2720 2721 __ ret(); 2722 __ delayed()->restore(); 2723 2724 __ flush(); 2725 2726 nmethod *nm = nmethod::new_native_nmethod(method, 2727 compile_id, 2728 masm->code(), 2729 vep_offset, 2730 frame_complete, 2731 stack_slots / VMRegImpl::slots_per_word, 2732 (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2733 in_ByteSize(lock_offset), 2734 oop_maps); 2735 2736 if (is_critical_native) { 2737 nm->set_lazy_critical_native(true); 2738 } 2739 return nm; 2740 2741} 2742 2743#ifdef HAVE_DTRACE_H 2744// --------------------------------------------------------------------------- 2745// Generate a dtrace nmethod for a given signature. The method takes arguments 2746// in the Java compiled code convention, marshals them to the native 2747// abi and then leaves nops at the position you would expect to call a native 2748// function. When the probe is enabled the nops are replaced with a trap 2749// instruction that dtrace inserts and the trace will cause a notification 2750// to dtrace. 2751// 2752// The probes are only able to take primitive types and java/lang/String as 2753// arguments. No other java types are allowed. Strings are converted to utf8 2754// strings so that from dtrace point of view java strings are converted to C 2755// strings. There is an arbitrary fixed limit on the total space that a method 2756// can use for converting the strings. (256 chars per string in the signature). 2757// So any java string larger then this is truncated. 2758 2759static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; 2760static bool offsets_initialized = false; 2761 2762nmethod *SharedRuntime::generate_dtrace_nmethod( 2763 MacroAssembler *masm, methodHandle method) { 2764 2765 2766 // generate_dtrace_nmethod is guarded by a mutex so we are sure to 2767 // be single threaded in this method. 2768 assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); 2769 2770 // Fill in the signature array, for the calling-convention call. 2771 int total_args_passed = method->size_of_parameters(); 2772 2773 BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); 2774 VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); 2775 2776 // The signature we are going to use for the trap that dtrace will see 2777 // java/lang/String is converted. We drop "this" and any other object 2778 // is converted to NULL. (A one-slot java/lang/Long object reference 2779 // is converted to a two-slot long, which is why we double the allocation). 2780 BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); 2781 VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); 2782 2783 int i=0; 2784 int total_strings = 0; 2785 int first_arg_to_pass = 0; 2786 int total_c_args = 0; 2787 2788 // Skip the receiver as dtrace doesn't want to see it 2789 if( !method->is_static() ) { 2790 in_sig_bt[i++] = T_OBJECT; 2791 first_arg_to_pass = 1; 2792 } 2793 2794 SignatureStream ss(method->signature()); 2795 for ( ; !ss.at_return_type(); ss.next()) { 2796 BasicType bt = ss.type(); 2797 in_sig_bt[i++] = bt; // Collect remaining bits of signature 2798 out_sig_bt[total_c_args++] = bt; 2799 if( bt == T_OBJECT) { 2800 Symbol* s = ss.as_symbol_or_null(); 2801 if (s == vmSymbols::java_lang_String()) { 2802 total_strings++; 2803 out_sig_bt[total_c_args-1] = T_ADDRESS; 2804 } else if (s == vmSymbols::java_lang_Boolean() || 2805 s == vmSymbols::java_lang_Byte()) { 2806 out_sig_bt[total_c_args-1] = T_BYTE; 2807 } else if (s == vmSymbols::java_lang_Character() || 2808 s == vmSymbols::java_lang_Short()) { 2809 out_sig_bt[total_c_args-1] = T_SHORT; 2810 } else if (s == vmSymbols::java_lang_Integer() || 2811 s == vmSymbols::java_lang_Float()) { 2812 out_sig_bt[total_c_args-1] = T_INT; 2813 } else if (s == vmSymbols::java_lang_Long() || 2814 s == vmSymbols::java_lang_Double()) { 2815 out_sig_bt[total_c_args-1] = T_LONG; 2816 out_sig_bt[total_c_args++] = T_VOID; 2817 } 2818 } else if ( bt == T_LONG || bt == T_DOUBLE ) { 2819 in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots 2820 // We convert double to long 2821 out_sig_bt[total_c_args-1] = T_LONG; 2822 out_sig_bt[total_c_args++] = T_VOID; 2823 } else if ( bt == T_FLOAT) { 2824 // We convert float to int 2825 out_sig_bt[total_c_args-1] = T_INT; 2826 } 2827 } 2828 2829 assert(i==total_args_passed, "validly parsed signature"); 2830 2831 // Now get the compiled-Java layout as input arguments 2832 int comp_args_on_stack; 2833 comp_args_on_stack = SharedRuntime::java_calling_convention( 2834 in_sig_bt, in_regs, total_args_passed, false); 2835 2836 // We have received a description of where all the java arg are located 2837 // on entry to the wrapper. We need to convert these args to where 2838 // the a native (non-jni) function would expect them. To figure out 2839 // where they go we convert the java signature to a C signature and remove 2840 // T_VOID for any long/double we might have received. 2841 2842 2843 // Now figure out where the args must be stored and how much stack space 2844 // they require (neglecting out_preserve_stack_slots but space for storing 2845 // the 1st six register arguments). It's weird see int_stk_helper. 2846 // 2847 int out_arg_slots; 2848 out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); 2849 2850 // Calculate the total number of stack slots we will need. 2851 2852 // First count the abi requirement plus all of the outgoing args 2853 int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; 2854 2855 // Plus a temp for possible converion of float/double/long register args 2856 2857 int conversion_temp = stack_slots; 2858 stack_slots += 2; 2859 2860 2861 // Now space for the string(s) we must convert 2862 2863 int string_locs = stack_slots; 2864 stack_slots += total_strings * 2865 (max_dtrace_string_size / VMRegImpl::stack_slot_size); 2866 2867 // Ok The space we have allocated will look like: 2868 // 2869 // 2870 // FP-> | | 2871 // |---------------------| 2872 // | string[n] | 2873 // |---------------------| <- string_locs[n] 2874 // | string[n-1] | 2875 // |---------------------| <- string_locs[n-1] 2876 // | ... | 2877 // | ... | 2878 // |---------------------| <- string_locs[1] 2879 // | string[0] | 2880 // |---------------------| <- string_locs[0] 2881 // | temp | 2882 // |---------------------| <- conversion_temp 2883 // | outbound memory | 2884 // | based arguments | 2885 // | | 2886 // |---------------------| 2887 // | | 2888 // SP-> | out_preserved_slots | 2889 // 2890 // 2891 2892 // Now compute actual number of stack words we need rounding to make 2893 // stack properly aligned. 2894 stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); 2895 2896 int stack_size = stack_slots * VMRegImpl::stack_slot_size; 2897 2898 intptr_t start = (intptr_t)__ pc(); 2899 2900 // First thing make an ic check to see if we should even be here 2901 2902 { 2903 Label L; 2904 const Register temp_reg = G3_scratch; 2905 AddressLiteral ic_miss(SharedRuntime::get_ic_miss_stub()); 2906 __ verify_oop(O0); 2907 __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); 2908 __ cmp_and_brx_short(temp_reg, G5_inline_cache_reg, Assembler::equal, Assembler::pt, L); 2909 2910 __ jump_to(ic_miss, temp_reg); 2911 __ delayed()->nop(); 2912 __ align(CodeEntryAlignment); 2913 __ bind(L); 2914 } 2915 2916 int vep_offset = ((intptr_t)__ pc()) - start; 2917 2918 2919 // The instruction at the verified entry point must be 5 bytes or longer 2920 // because it can be patched on the fly by make_non_entrant. The stack bang 2921 // instruction fits that requirement. 2922 2923 // Generate stack overflow check before creating frame 2924 __ generate_stack_overflow_check(stack_size); 2925 2926 assert(((intptr_t)__ pc() - start - vep_offset) >= 5, 2927 "valid size for make_non_entrant"); 2928 2929 // Generate a new frame for the wrapper. 2930 __ save(SP, -stack_size, SP); 2931 2932 // Frame is now completed as far a size and linkage. 2933 2934 int frame_complete = ((intptr_t)__ pc()) - start; 2935 2936#ifdef ASSERT 2937 bool reg_destroyed[RegisterImpl::number_of_registers]; 2938 bool freg_destroyed[FloatRegisterImpl::number_of_registers]; 2939 for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { 2940 reg_destroyed[r] = false; 2941 } 2942 for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { 2943 freg_destroyed[f] = false; 2944 } 2945 2946#endif /* ASSERT */ 2947 2948 VMRegPair zero; 2949 const Register g0 = G0; // without this we get a compiler warning (why??) 2950 zero.set2(g0->as_VMReg()); 2951 2952 int c_arg, j_arg; 2953 2954 Register conversion_off = noreg; 2955 2956 for (j_arg = first_arg_to_pass, c_arg = 0 ; 2957 j_arg < total_args_passed ; j_arg++, c_arg++ ) { 2958 2959 VMRegPair src = in_regs[j_arg]; 2960 VMRegPair dst = out_regs[c_arg]; 2961 2962#ifdef ASSERT 2963 if (src.first()->is_Register()) { 2964 assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); 2965 } else if (src.first()->is_FloatRegister()) { 2966 assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( 2967 FloatRegisterImpl::S)], "ack!"); 2968 } 2969 if (dst.first()->is_Register()) { 2970 reg_destroyed[dst.first()->as_Register()->encoding()] = true; 2971 } else if (dst.first()->is_FloatRegister()) { 2972 freg_destroyed[dst.first()->as_FloatRegister()->encoding( 2973 FloatRegisterImpl::S)] = true; 2974 } 2975#endif /* ASSERT */ 2976 2977 switch (in_sig_bt[j_arg]) { 2978 case T_ARRAY: 2979 case T_OBJECT: 2980 { 2981 if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || 2982 out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { 2983 // need to unbox a one-slot value 2984 Register in_reg = L0; 2985 Register tmp = L2; 2986 if ( src.first()->is_reg() ) { 2987 in_reg = src.first()->as_Register(); 2988 } else { 2989 assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), 2990 "must be"); 2991 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); 2992 } 2993 // If the final destination is an acceptable register 2994 if ( dst.first()->is_reg() ) { 2995 if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { 2996 tmp = dst.first()->as_Register(); 2997 } 2998 } 2999 3000 Label skipUnbox; 3001 if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { 3002 __ mov(G0, tmp->successor()); 3003 } 3004 __ br_null(in_reg, true, Assembler::pn, skipUnbox); 3005 __ delayed()->mov(G0, tmp); 3006 3007 BasicType bt = out_sig_bt[c_arg]; 3008 int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); 3009 switch (bt) { 3010 case T_BYTE: 3011 __ ldub(in_reg, box_offset, tmp); break; 3012 case T_SHORT: 3013 __ lduh(in_reg, box_offset, tmp); break; 3014 case T_INT: 3015 __ ld(in_reg, box_offset, tmp); break; 3016 case T_LONG: 3017 __ ld_long(in_reg, box_offset, tmp); break; 3018 default: ShouldNotReachHere(); 3019 } 3020 3021 __ bind(skipUnbox); 3022 // If tmp wasn't final destination copy to final destination 3023 if (tmp == L2) { 3024 VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); 3025 if (out_sig_bt[c_arg] == T_LONG) { 3026 long_move(masm, tmp_as_VM, dst); 3027 } else { 3028 move32_64(masm, tmp_as_VM, out_regs[c_arg]); 3029 } 3030 } 3031 if (out_sig_bt[c_arg] == T_LONG) { 3032 assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); 3033 ++c_arg; // move over the T_VOID to keep the loop indices in sync 3034 } 3035 } else if (out_sig_bt[c_arg] == T_ADDRESS) { 3036 Register s = 3037 src.first()->is_reg() ? src.first()->as_Register() : L2; 3038 Register d = 3039 dst.first()->is_reg() ? dst.first()->as_Register() : L2; 3040 3041 // We store the oop now so that the conversion pass can reach 3042 // while in the inner frame. This will be the only store if 3043 // the oop is NULL. 3044 if (s != L2) { 3045 // src is register 3046 if (d != L2) { 3047 // dst is register 3048 __ mov(s, d); 3049 } else { 3050 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3051 STACK_BIAS), "must be"); 3052 __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); 3053 } 3054 } else { 3055 // src not a register 3056 assert(Assembler::is_simm13(reg2offset(src.first()) + 3057 STACK_BIAS), "must be"); 3058 __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); 3059 if (d == L2) { 3060 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3061 STACK_BIAS), "must be"); 3062 __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); 3063 } 3064 } 3065 } else if (out_sig_bt[c_arg] != T_VOID) { 3066 // Convert the arg to NULL 3067 if (dst.first()->is_reg()) { 3068 __ mov(G0, dst.first()->as_Register()); 3069 } else { 3070 assert(Assembler::is_simm13(reg2offset(dst.first()) + 3071 STACK_BIAS), "must be"); 3072 __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); 3073 } 3074 } 3075 } 3076 break; 3077 case T_VOID: 3078 break; 3079 3080 case T_FLOAT: 3081 if (src.first()->is_stack()) { 3082 // Stack to stack/reg is simple 3083 move32_64(masm, src, dst); 3084 } else { 3085 if (dst.first()->is_reg()) { 3086 // freg -> reg 3087 int off = 3088 STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3089 Register d = dst.first()->as_Register(); 3090 if (Assembler::is_simm13(off)) { 3091 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3092 SP, off); 3093 __ ld(SP, off, d); 3094 } else { 3095 if (conversion_off == noreg) { 3096 __ set(off, L6); 3097 conversion_off = L6; 3098 } 3099 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3100 SP, conversion_off); 3101 __ ld(SP, conversion_off , d); 3102 } 3103 } else { 3104 // freg -> mem 3105 int off = STACK_BIAS + reg2offset(dst.first()); 3106 if (Assembler::is_simm13(off)) { 3107 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3108 SP, off); 3109 } else { 3110 if (conversion_off == noreg) { 3111 __ set(off, L6); 3112 conversion_off = L6; 3113 } 3114 __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), 3115 SP, conversion_off); 3116 } 3117 } 3118 } 3119 break; 3120 3121 case T_DOUBLE: 3122 assert( j_arg + 1 < total_args_passed && 3123 in_sig_bt[j_arg + 1] == T_VOID && 3124 out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); 3125 if (src.first()->is_stack()) { 3126 // Stack to stack/reg is simple 3127 long_move(masm, src, dst); 3128 } else { 3129 Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; 3130 3131 // Destination could be an odd reg on 32bit in which case 3132 // we can't load direct to the destination. 3133 3134 if (!d->is_even() && wordSize == 4) { 3135 d = L2; 3136 } 3137 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3138 if (Assembler::is_simm13(off)) { 3139 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 3140 SP, off); 3141 __ ld_long(SP, off, d); 3142 } else { 3143 if (conversion_off == noreg) { 3144 __ set(off, L6); 3145 conversion_off = L6; 3146 } 3147 __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), 3148 SP, conversion_off); 3149 __ ld_long(SP, conversion_off, d); 3150 } 3151 if (d == L2) { 3152 long_move(masm, reg64_to_VMRegPair(L2), dst); 3153 } 3154 } 3155 break; 3156 3157 case T_LONG : 3158 // 32bit can't do a split move of something like g1 -> O0, O1 3159 // so use a memory temp 3160 if (src.is_single_phys_reg() && wordSize == 4) { 3161 Register tmp = L2; 3162 if (dst.first()->is_reg() && 3163 (wordSize == 8 || dst.first()->as_Register()->is_even())) { 3164 tmp = dst.first()->as_Register(); 3165 } 3166 3167 int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; 3168 if (Assembler::is_simm13(off)) { 3169 __ stx(src.first()->as_Register(), SP, off); 3170 __ ld_long(SP, off, tmp); 3171 } else { 3172 if (conversion_off == noreg) { 3173 __ set(off, L6); 3174 conversion_off = L6; 3175 } 3176 __ stx(src.first()->as_Register(), SP, conversion_off); 3177 __ ld_long(SP, conversion_off, tmp); 3178 } 3179 3180 if (tmp == L2) { 3181 long_move(masm, reg64_to_VMRegPair(L2), dst); 3182 } 3183 } else { 3184 long_move(masm, src, dst); 3185 } 3186 break; 3187 3188 case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); 3189 3190 default: 3191 move32_64(masm, src, dst); 3192 } 3193 } 3194 3195 3196 // If we have any strings we must store any register based arg to the stack 3197 // This includes any still live xmm registers too. 3198 3199 if (total_strings > 0 ) { 3200 3201 // protect all the arg registers 3202 __ save_frame(0); 3203 __ mov(G2_thread, L7_thread_cache); 3204 const Register L2_string_off = L2; 3205 3206 // Get first string offset 3207 __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); 3208 3209 for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { 3210 if (out_sig_bt[c_arg] == T_ADDRESS) { 3211 3212 VMRegPair dst = out_regs[c_arg]; 3213 const Register d = dst.first()->is_reg() ? 3214 dst.first()->as_Register()->after_save() : noreg; 3215 3216 // It's a string the oop and it was already copied to the out arg 3217 // position 3218 if (d != noreg) { 3219 __ mov(d, O0); 3220 } else { 3221 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 3222 "must be"); 3223 __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); 3224 } 3225 Label skip; 3226 3227 __ br_null(O0, false, Assembler::pn, skip); 3228 __ delayed()->add(FP, L2_string_off, O1); 3229 3230 if (d != noreg) { 3231 __ mov(O1, d); 3232 } else { 3233 assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), 3234 "must be"); 3235 __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); 3236 } 3237 3238 __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), 3239 relocInfo::runtime_call_type); 3240 __ delayed()->add(L2_string_off, max_dtrace_string_size, L2_string_off); 3241 3242 __ bind(skip); 3243 3244 } 3245 3246 } 3247 __ mov(L7_thread_cache, G2_thread); 3248 __ restore(); 3249 3250 } 3251 3252 3253 // Ok now we are done. Need to place the nop that dtrace wants in order to 3254 // patch in the trap 3255 3256 int patch_offset = ((intptr_t)__ pc()) - start; 3257 3258 __ nop(); 3259 3260 3261 // Return 3262 3263 __ ret(); 3264 __ delayed()->restore(); 3265 3266 __ flush(); 3267 3268 nmethod *nm = nmethod::new_dtrace_nmethod( 3269 method, masm->code(), vep_offset, patch_offset, frame_complete, 3270 stack_slots / VMRegImpl::slots_per_word); 3271 return nm; 3272 3273} 3274 3275#endif // HAVE_DTRACE_H 3276 3277// this function returns the adjust size (in number of words) to a c2i adapter 3278// activation for use during deoptimization 3279int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 3280 assert(callee_locals >= callee_parameters, 3281 "test and remove; got more parms than locals"); 3282 if (callee_locals < callee_parameters) 3283 return 0; // No adjustment for negative locals 3284 int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; 3285 return round_to(diff, WordsPerLong); 3286} 3287 3288// "Top of Stack" slots that may be unused by the calling convention but must 3289// otherwise be preserved. 3290// On Intel these are not necessary and the value can be zero. 3291// On Sparc this describes the words reserved for storing a register window 3292// when an interrupt occurs. 3293uint SharedRuntime::out_preserve_stack_slots() { 3294 return frame::register_save_words * VMRegImpl::slots_per_word; 3295} 3296 3297static void gen_new_frame(MacroAssembler* masm, bool deopt) { 3298// 3299// Common out the new frame generation for deopt and uncommon trap 3300// 3301 Register G3pcs = G3_scratch; // Array of new pcs (input) 3302 Register Oreturn0 = O0; 3303 Register Oreturn1 = O1; 3304 Register O2UnrollBlock = O2; 3305 Register O3array = O3; // Array of frame sizes (input) 3306 Register O4array_size = O4; // number of frames (input) 3307 Register O7frame_size = O7; // number of frames (input) 3308 3309 __ ld_ptr(O3array, 0, O7frame_size); 3310 __ sub(G0, O7frame_size, O7frame_size); 3311 __ save(SP, O7frame_size, SP); 3312 __ ld_ptr(G3pcs, 0, I7); // load frame's new pc 3313 3314 #ifdef ASSERT 3315 // make sure that the frames are aligned properly 3316#ifndef _LP64 3317 __ btst(wordSize*2-1, SP); 3318 __ breakpoint_trap(Assembler::notZero, Assembler::ptr_cc); 3319#endif 3320 #endif 3321 3322 // Deopt needs to pass some extra live values from frame to frame 3323 3324 if (deopt) { 3325 __ mov(Oreturn0->after_save(), Oreturn0); 3326 __ mov(Oreturn1->after_save(), Oreturn1); 3327 } 3328 3329 __ mov(O4array_size->after_save(), O4array_size); 3330 __ sub(O4array_size, 1, O4array_size); 3331 __ mov(O3array->after_save(), O3array); 3332 __ mov(O2UnrollBlock->after_save(), O2UnrollBlock); 3333 __ add(G3pcs, wordSize, G3pcs); // point to next pc value 3334 3335 #ifdef ASSERT 3336 // trash registers to show a clear pattern in backtraces 3337 __ set(0xDEAD0000, I0); 3338 __ add(I0, 2, I1); 3339 __ add(I0, 4, I2); 3340 __ add(I0, 6, I3); 3341 __ add(I0, 8, I4); 3342 // Don't touch I5 could have valuable savedSP 3343 __ set(0xDEADBEEF, L0); 3344 __ mov(L0, L1); 3345 __ mov(L0, L2); 3346 __ mov(L0, L3); 3347 __ mov(L0, L4); 3348 __ mov(L0, L5); 3349 3350 // trash the return value as there is nothing to return yet 3351 __ set(0xDEAD0001, O7); 3352 #endif 3353 3354 __ mov(SP, O5_savedSP); 3355} 3356 3357 3358static void make_new_frames(MacroAssembler* masm, bool deopt) { 3359 // 3360 // loop through the UnrollBlock info and create new frames 3361 // 3362 Register G3pcs = G3_scratch; 3363 Register Oreturn0 = O0; 3364 Register Oreturn1 = O1; 3365 Register O2UnrollBlock = O2; 3366 Register O3array = O3; 3367 Register O4array_size = O4; 3368 Label loop; 3369 3370 // Before we make new frames, check to see if stack is available. 3371 // Do this after the caller's return address is on top of stack 3372 if (UseStackBanging) { 3373 // Get total frame size for interpreted frames 3374 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes(), O4); 3375 __ bang_stack_size(O4, O3, G3_scratch); 3376 } 3377 3378 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes(), O4array_size); 3379 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes(), G3pcs); 3380 __ ld_ptr(O2UnrollBlock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes(), O3array); 3381 3382 // Adjust old interpreter frame to make space for new frame's extra java locals 3383 // 3384 // We capture the original sp for the transition frame only because it is needed in 3385 // order to properly calculate interpreter_sp_adjustment. Even though in real life 3386 // every interpreter frame captures a savedSP it is only needed at the transition 3387 // (fortunately). If we had to have it correct everywhere then we would need to 3388 // be told the sp_adjustment for each frame we create. If the frame size array 3389 // were to have twice the frame count entries then we could have pairs [sp_adjustment, frame_size] 3390 // for each frame we create and keep up the illusion every where. 3391 // 3392 3393 __ ld(O2UnrollBlock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes(), O7); 3394 __ mov(SP, O5_savedSP); // remember initial sender's original sp before adjustment 3395 __ sub(SP, O7, SP); 3396 3397#ifdef ASSERT 3398 // make sure that there is at least one entry in the array 3399 __ tst(O4array_size); 3400 __ breakpoint_trap(Assembler::zero, Assembler::icc); 3401#endif 3402 3403 // Now push the new interpreter frames 3404 __ bind(loop); 3405 3406 // allocate a new frame, filling the registers 3407 3408 gen_new_frame(masm, deopt); // allocate an interpreter frame 3409 3410 __ cmp_zero_and_br(Assembler::notZero, O4array_size, loop); 3411 __ delayed()->add(O3array, wordSize, O3array); 3412 __ ld_ptr(G3pcs, 0, O7); // load final frame new pc 3413 3414} 3415 3416//------------------------------generate_deopt_blob---------------------------- 3417// Ought to generate an ideal graph & compile, but here's some SPARC ASM 3418// instead. 3419void SharedRuntime::generate_deopt_blob() { 3420 // allocate space for the code 3421 ResourceMark rm; 3422 // setup code generation tools 3423 int pad = VerifyThread ? 512 : 0;// Extra slop space for more verify code 3424 if (UseStackBanging) { 3425 pad += StackShadowPages*16 + 32; 3426 } 3427#ifdef _LP64 3428 CodeBuffer buffer("deopt_blob", 2100+pad, 512); 3429#else 3430 // Measured 8/7/03 at 1212 in 32bit debug build (no VerifyThread) 3431 // Measured 8/7/03 at 1396 in 32bit debug build (VerifyThread) 3432 CodeBuffer buffer("deopt_blob", 1600+pad, 512); 3433#endif /* _LP64 */ 3434 MacroAssembler* masm = new MacroAssembler(&buffer); 3435 FloatRegister Freturn0 = F0; 3436 Register Greturn1 = G1; 3437 Register Oreturn0 = O0; 3438 Register Oreturn1 = O1; 3439 Register O2UnrollBlock = O2; 3440 Register L0deopt_mode = L0; 3441 Register G4deopt_mode = G4_scratch; 3442 int frame_size_words; 3443 Address saved_Freturn0_addr(FP, -sizeof(double) + STACK_BIAS); 3444#if !defined(_LP64) && defined(COMPILER2) 3445 Address saved_Greturn1_addr(FP, -sizeof(double) -sizeof(jlong) + STACK_BIAS); 3446#endif 3447 Label cont; 3448 3449 OopMapSet *oop_maps = new OopMapSet(); 3450 3451 // 3452 // This is the entry point for code which is returning to a de-optimized 3453 // frame. 3454 // The steps taken by this frame are as follows: 3455 // - push a dummy "register_save" and save the return values (O0, O1, F0/F1, G1) 3456 // and all potentially live registers (at a pollpoint many registers can be live). 3457 // 3458 // - call the C routine: Deoptimization::fetch_unroll_info (this function 3459 // returns information about the number and size of interpreter frames 3460 // which are equivalent to the frame which is being deoptimized) 3461 // - deallocate the unpack frame, restoring only results values. Other 3462 // volatile registers will now be captured in the vframeArray as needed. 3463 // - deallocate the deoptimization frame 3464 // - in a loop using the information returned in the previous step 3465 // push new interpreter frames (take care to propagate the return 3466 // values through each new frame pushed) 3467 // - create a dummy "unpack_frame" and save the return values (O0, O1, F0) 3468 // - call the C routine: Deoptimization::unpack_frames (this function 3469 // lays out values on the interpreter frame which was just created) 3470 // - deallocate the dummy unpack_frame 3471 // - ensure that all the return values are correctly set and then do 3472 // a return to the interpreter entry point 3473 // 3474 // Refer to the following methods for more information: 3475 // - Deoptimization::fetch_unroll_info 3476 // - Deoptimization::unpack_frames 3477 3478 OopMap* map = NULL; 3479 3480 int start = __ offset(); 3481 3482 // restore G2, the trampoline destroyed it 3483 __ get_thread(); 3484 3485 // On entry we have been called by the deoptimized nmethod with a call that 3486 // replaced the original call (or safepoint polling location) so the deoptimizing 3487 // pc is now in O7. Return values are still in the expected places 3488 3489 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3490 __ ba(cont); 3491 __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode); 3492 3493 int exception_offset = __ offset() - start; 3494 3495 // restore G2, the trampoline destroyed it 3496 __ get_thread(); 3497 3498 // On entry we have been jumped to by the exception handler (or exception_blob 3499 // for server). O0 contains the exception oop and O7 contains the original 3500 // exception pc. So if we push a frame here it will look to the 3501 // stack walking code (fetch_unroll_info) just like a normal call so 3502 // state will be extracted normally. 3503 3504 // save exception oop in JavaThread and fall through into the 3505 // exception_in_tls case since they are handled in same way except 3506 // for where the pending exception is kept. 3507 __ st_ptr(Oexception, G2_thread, JavaThread::exception_oop_offset()); 3508 3509 // 3510 // Vanilla deoptimization with an exception pending in exception_oop 3511 // 3512 int exception_in_tls_offset = __ offset() - start; 3513 3514 // No need to update oop_map as each call to save_live_registers will produce identical oopmap 3515 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3516 3517 // Restore G2_thread 3518 __ get_thread(); 3519 3520#ifdef ASSERT 3521 { 3522 // verify that there is really an exception oop in exception_oop 3523 Label has_exception; 3524 __ ld_ptr(G2_thread, JavaThread::exception_oop_offset(), Oexception); 3525 __ br_notnull_short(Oexception, Assembler::pt, has_exception); 3526 __ stop("no exception in thread"); 3527 __ bind(has_exception); 3528 3529 // verify that there is no pending exception 3530 Label no_pending_exception; 3531 Address exception_addr(G2_thread, Thread::pending_exception_offset()); 3532 __ ld_ptr(exception_addr, Oexception); 3533 __ br_null_short(Oexception, Assembler::pt, no_pending_exception); 3534 __ stop("must not have pending exception here"); 3535 __ bind(no_pending_exception); 3536 } 3537#endif 3538 3539 __ ba(cont); 3540 __ delayed()->mov(Deoptimization::Unpack_exception, L0deopt_mode);; 3541 3542 // 3543 // Reexecute entry, similar to c2 uncommon trap 3544 // 3545 int reexecute_offset = __ offset() - start; 3546 3547 // No need to update oop_map as each call to save_live_registers will produce identical oopmap 3548 (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3549 3550 __ mov(Deoptimization::Unpack_reexecute, L0deopt_mode); 3551 3552 __ bind(cont); 3553 3554 __ set_last_Java_frame(SP, noreg); 3555 3556 // do the call by hand so we can get the oopmap 3557 3558 __ mov(G2_thread, L7_thread_cache); 3559 __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); 3560 __ delayed()->mov(G2_thread, O0); 3561 3562 // Set an oopmap for the call site this describes all our saved volatile registers 3563 3564 oop_maps->add_gc_map( __ offset()-start, map); 3565 3566 __ mov(L7_thread_cache, G2_thread); 3567 3568 __ reset_last_Java_frame(); 3569 3570 // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers 3571 // so this move will survive 3572 3573 __ mov(L0deopt_mode, G4deopt_mode); 3574 3575 __ mov(O0, O2UnrollBlock->after_save()); 3576 3577 RegisterSaver::restore_result_registers(masm); 3578 3579 Label noException; 3580 __ cmp_and_br_short(G4deopt_mode, Deoptimization::Unpack_exception, Assembler::notEqual, Assembler::pt, noException); 3581 3582 // Move the pending exception from exception_oop to Oexception so 3583 // the pending exception will be picked up the interpreter. 3584 __ ld_ptr(G2_thread, in_bytes(JavaThread::exception_oop_offset()), Oexception); 3585 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_oop_offset())); 3586 __ st_ptr(G0, G2_thread, in_bytes(JavaThread::exception_pc_offset())); 3587 __ bind(noException); 3588 3589 // deallocate the deoptimization frame taking care to preserve the return values 3590 __ mov(Oreturn0, Oreturn0->after_save()); 3591 __ mov(Oreturn1, Oreturn1->after_save()); 3592 __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 3593 __ restore(); 3594 3595 // Allocate new interpreter frame(s) and possible c2i adapter frame 3596 3597 make_new_frames(masm, true); 3598 3599 // push a dummy "unpack_frame" taking care of float return values and 3600 // call Deoptimization::unpack_frames to have the unpacker layout 3601 // information in the interpreter frames just created and then return 3602 // to the interpreter entry point 3603 __ save(SP, -frame_size_words*wordSize, SP); 3604 __ stf(FloatRegisterImpl::D, Freturn0, saved_Freturn0_addr); 3605#if !defined(_LP64) 3606#if defined(COMPILER2) 3607 // 32-bit 1-register longs return longs in G1 3608 __ stx(Greturn1, saved_Greturn1_addr); 3609#endif 3610 __ set_last_Java_frame(SP, noreg); 3611 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, G4deopt_mode); 3612#else 3613 // LP64 uses g4 in set_last_Java_frame 3614 __ mov(G4deopt_mode, O1); 3615 __ set_last_Java_frame(SP, G0); 3616 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O1); 3617#endif 3618 __ reset_last_Java_frame(); 3619 __ ldf(FloatRegisterImpl::D, saved_Freturn0_addr, Freturn0); 3620 3621#if !defined(_LP64) && defined(COMPILER2) 3622 // In 32 bit, C2 returns longs in G1 so restore the saved G1 into 3623 // I0/I1 if the return value is long. 3624 Label not_long; 3625 __ cmp_and_br_short(O0,T_LONG, Assembler::notEqual, Assembler::pt, not_long); 3626 __ ldd(saved_Greturn1_addr,I0); 3627 __ bind(not_long); 3628#endif 3629 __ ret(); 3630 __ delayed()->restore(); 3631 3632 masm->flush(); 3633 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words); 3634 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 3635} 3636 3637#ifdef COMPILER2 3638 3639//------------------------------generate_uncommon_trap_blob-------------------- 3640// Ought to generate an ideal graph & compile, but here's some SPARC ASM 3641// instead. 3642void SharedRuntime::generate_uncommon_trap_blob() { 3643 // allocate space for the code 3644 ResourceMark rm; 3645 // setup code generation tools 3646 int pad = VerifyThread ? 512 : 0; 3647 if (UseStackBanging) { 3648 pad += StackShadowPages*16 + 32; 3649 } 3650#ifdef _LP64 3651 CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512); 3652#else 3653 // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread) 3654 // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread) 3655 CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512); 3656#endif 3657 MacroAssembler* masm = new MacroAssembler(&buffer); 3658 Register O2UnrollBlock = O2; 3659 Register O2klass_index = O2; 3660 3661 // 3662 // This is the entry point for all traps the compiler takes when it thinks 3663 // it cannot handle further execution of compilation code. The frame is 3664 // deoptimized in these cases and converted into interpreter frames for 3665 // execution 3666 // The steps taken by this frame are as follows: 3667 // - push a fake "unpack_frame" 3668 // - call the C routine Deoptimization::uncommon_trap (this function 3669 // packs the current compiled frame into vframe arrays and returns 3670 // information about the number and size of interpreter frames which 3671 // are equivalent to the frame which is being deoptimized) 3672 // - deallocate the "unpack_frame" 3673 // - deallocate the deoptimization frame 3674 // - in a loop using the information returned in the previous step 3675 // push interpreter frames; 3676 // - create a dummy "unpack_frame" 3677 // - call the C routine: Deoptimization::unpack_frames (this function 3678 // lays out values on the interpreter frame which was just created) 3679 // - deallocate the dummy unpack_frame 3680 // - return to the interpreter entry point 3681 // 3682 // Refer to the following methods for more information: 3683 // - Deoptimization::uncommon_trap 3684 // - Deoptimization::unpack_frame 3685 3686 // the unloaded class index is in O0 (first parameter to this blob) 3687 3688 // push a dummy "unpack_frame" 3689 // and call Deoptimization::uncommon_trap to pack the compiled frame into 3690 // vframe array and return the UnrollBlock information 3691 __ save_frame(0); 3692 __ set_last_Java_frame(SP, noreg); 3693 __ mov(I0, O2klass_index); 3694 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), G2_thread, O2klass_index); 3695 __ reset_last_Java_frame(); 3696 __ mov(O0, O2UnrollBlock->after_save()); 3697 __ restore(); 3698 3699 // deallocate the deoptimized frame taking care to preserve the return values 3700 __ mov(O2UnrollBlock, O2UnrollBlock->after_save()); 3701 __ restore(); 3702 3703 // Allocate new interpreter frame(s) and possible c2i adapter frame 3704 3705 make_new_frames(masm, false); 3706 3707 // push a dummy "unpack_frame" taking care of float return values and 3708 // call Deoptimization::unpack_frames to have the unpacker layout 3709 // information in the interpreter frames just created and then return 3710 // to the interpreter entry point 3711 __ save_frame(0); 3712 __ set_last_Java_frame(SP, noreg); 3713 __ mov(Deoptimization::Unpack_uncommon_trap, O3); // indicate it is the uncommon trap case 3714 __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), G2_thread, O3); 3715 __ reset_last_Java_frame(); 3716 __ ret(); 3717 __ delayed()->restore(); 3718 3719 masm->flush(); 3720 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, __ total_frame_size_in_bytes(0)/wordSize); 3721} 3722 3723#endif // COMPILER2 3724 3725//------------------------------generate_handler_blob------------------- 3726// 3727// Generate a special Compile2Runtime blob that saves all registers, and sets 3728// up an OopMap. 3729// 3730// This blob is jumped to (via a breakpoint and the signal handler) from a 3731// safepoint in compiled code. On entry to this blob, O7 contains the 3732// address in the original nmethod at which we should resume normal execution. 3733// Thus, this blob looks like a subroutine which must preserve lots of 3734// registers and return normally. Note that O7 is never register-allocated, 3735// so it is guaranteed to be free here. 3736// 3737 3738// The hardest part of what this blob must do is to save the 64-bit %o 3739// registers in the 32-bit build. A simple 'save' turn the %o's to %i's and 3740// an interrupt will chop off their heads. Making space in the caller's frame 3741// first will let us save the 64-bit %o's before save'ing, but we cannot hand 3742// the adjusted FP off to the GC stack-crawler: this will modify the caller's 3743// SP and mess up HIS OopMaps. So we first adjust the caller's SP, then save 3744// the 64-bit %o's, then do a save, then fixup the caller's SP (our FP). 3745// Tricky, tricky, tricky... 3746 3747SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { 3748 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3749 3750 // allocate space for the code 3751 ResourceMark rm; 3752 // setup code generation tools 3753 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 3754 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 3755 // even larger with TraceJumps 3756 int pad = TraceJumps ? 512 : 0; 3757 CodeBuffer buffer("handler_blob", 1600 + pad, 512); 3758 MacroAssembler* masm = new MacroAssembler(&buffer); 3759 int frame_size_words; 3760 OopMapSet *oop_maps = new OopMapSet(); 3761 OopMap* map = NULL; 3762 3763 int start = __ offset(); 3764 3765 bool cause_return = (poll_type == POLL_AT_RETURN); 3766 // If this causes a return before the processing, then do a "restore" 3767 if (cause_return) { 3768 __ restore(); 3769 } else { 3770 // Make it look like we were called via the poll 3771 // so that frame constructor always sees a valid return address 3772 __ ld_ptr(G2_thread, in_bytes(JavaThread::saved_exception_pc_offset()), O7); 3773 __ sub(O7, frame::pc_return_offset, O7); 3774 } 3775 3776 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3777 3778 // setup last_Java_sp (blows G4) 3779 __ set_last_Java_frame(SP, noreg); 3780 3781 // call into the runtime to handle illegal instructions exception 3782 // Do not use call_VM_leaf, because we need to make a GC map at this call site. 3783 __ mov(G2_thread, O0); 3784 __ save_thread(L7_thread_cache); 3785 __ call(call_ptr); 3786 __ delayed()->nop(); 3787 3788 // Set an oopmap for the call site. 3789 // We need this not only for callee-saved registers, but also for volatile 3790 // registers that the compiler might be keeping live across a safepoint. 3791 3792 oop_maps->add_gc_map( __ offset() - start, map); 3793 3794 __ restore_thread(L7_thread_cache); 3795 // clear last_Java_sp 3796 __ reset_last_Java_frame(); 3797 3798 // Check for exceptions 3799 Label pending; 3800 3801 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 3802 __ br_notnull_short(O1, Assembler::pn, pending); 3803 3804 RegisterSaver::restore_live_registers(masm); 3805 3806 // We are back the the original state on entry and ready to go. 3807 3808 __ retl(); 3809 __ delayed()->nop(); 3810 3811 // Pending exception after the safepoint 3812 3813 __ bind(pending); 3814 3815 RegisterSaver::restore_live_registers(masm); 3816 3817 // We are back the the original state on entry. 3818 3819 // Tail-call forward_exception_entry, with the issuing PC in O7, 3820 // so it looks like the original nmethod called forward_exception_entry. 3821 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 3822 __ JMP(O0, 0); 3823 __ delayed()->nop(); 3824 3825 // ------------- 3826 // make sure all code is generated 3827 masm->flush(); 3828 3829 // return exception blob 3830 return SafepointBlob::create(&buffer, oop_maps, frame_size_words); 3831} 3832 3833// 3834// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 3835// 3836// Generate a stub that calls into vm to find out the proper destination 3837// of a java call. All the argument registers are live at this point 3838// but since this is generic code we don't know what they are and the caller 3839// must do any gc of the args. 3840// 3841RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { 3842 assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); 3843 3844 // allocate space for the code 3845 ResourceMark rm; 3846 // setup code generation tools 3847 // Measured 8/7/03 at 896 in 32bit debug build (no VerifyThread) 3848 // Measured 8/7/03 at 1080 in 32bit debug build (VerifyThread) 3849 // even larger with TraceJumps 3850 int pad = TraceJumps ? 512 : 0; 3851 CodeBuffer buffer(name, 1600 + pad, 512); 3852 MacroAssembler* masm = new MacroAssembler(&buffer); 3853 int frame_size_words; 3854 OopMapSet *oop_maps = new OopMapSet(); 3855 OopMap* map = NULL; 3856 3857 int start = __ offset(); 3858 3859 map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); 3860 3861 int frame_complete = __ offset(); 3862 3863 // setup last_Java_sp (blows G4) 3864 __ set_last_Java_frame(SP, noreg); 3865 3866 // call into the runtime to handle illegal instructions exception 3867 // Do not use call_VM_leaf, because we need to make a GC map at this call site. 3868 __ mov(G2_thread, O0); 3869 __ save_thread(L7_thread_cache); 3870 __ call(destination, relocInfo::runtime_call_type); 3871 __ delayed()->nop(); 3872 3873 // O0 contains the address we are going to jump to assuming no exception got installed 3874 3875 // Set an oopmap for the call site. 3876 // We need this not only for callee-saved registers, but also for volatile 3877 // registers that the compiler might be keeping live across a safepoint. 3878 3879 oop_maps->add_gc_map( __ offset() - start, map); 3880 3881 __ restore_thread(L7_thread_cache); 3882 // clear last_Java_sp 3883 __ reset_last_Java_frame(); 3884 3885 // Check for exceptions 3886 Label pending; 3887 3888 __ ld_ptr(G2_thread, in_bytes(Thread::pending_exception_offset()), O1); 3889 __ br_notnull_short(O1, Assembler::pn, pending); 3890 3891 // get the returned Method* 3892 3893 __ get_vm_result_2(G5_method); 3894 __ stx(G5_method, SP, RegisterSaver::G5_offset()+STACK_BIAS); 3895 3896 // O0 is where we want to jump, overwrite G3 which is saved and scratch 3897 3898 __ stx(O0, SP, RegisterSaver::G3_offset()+STACK_BIAS); 3899 3900 RegisterSaver::restore_live_registers(masm); 3901 3902 // We are back the the original state on entry and ready to go. 3903 3904 __ JMP(G3, 0); 3905 __ delayed()->nop(); 3906 3907 // Pending exception after the safepoint 3908 3909 __ bind(pending); 3910 3911 RegisterSaver::restore_live_registers(masm); 3912 3913 // We are back the the original state on entry. 3914 3915 // Tail-call forward_exception_entry, with the issuing PC in O7, 3916 // so it looks like the original nmethod called forward_exception_entry. 3917 __ set((intptr_t)StubRoutines::forward_exception_entry(), O0); 3918 __ JMP(O0, 0); 3919 __ delayed()->nop(); 3920 3921 // ------------- 3922 // make sure all code is generated 3923 masm->flush(); 3924 3925 // return the blob 3926 // frame_size_words or bytes?? 3927 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); 3928} 3929