templateInterpreterGenerator_ppc.cpp revision 9898:2794bc7859f5
1/*
2 * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2015 SAP AG. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#include "precompiled.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "interpreter/bytecodeHistogram.hpp"
29#include "interpreter/interpreter.hpp"
30#include "interpreter/interpreterRuntime.hpp"
31#include "interpreter/interp_masm.hpp"
32#include "interpreter/templateInterpreterGenerator.hpp"
33#include "interpreter/templateTable.hpp"
34#include "oops/arrayOop.hpp"
35#include "oops/methodData.hpp"
36#include "oops/method.hpp"
37#include "oops/oop.inline.hpp"
38#include "prims/jvmtiExport.hpp"
39#include "prims/jvmtiThreadState.hpp"
40#include "runtime/arguments.hpp"
41#include "runtime/deoptimization.hpp"
42#include "runtime/frame.inline.hpp"
43#include "runtime/sharedRuntime.hpp"
44#include "runtime/stubRoutines.hpp"
45#include "runtime/synchronizer.hpp"
46#include "runtime/timer.hpp"
47#include "runtime/vframeArray.hpp"
48#include "utilities/debug.hpp"
49#include "utilities/macros.hpp"
50
51#undef __
52#define __ _masm->
53
54#ifdef PRODUCT
55#define BLOCK_COMMENT(str) /* nothing */
56#else
57#define BLOCK_COMMENT(str) __ block_comment(str)
58#endif
59
60#define BIND(label)        __ bind(label); BLOCK_COMMENT(#label ":")
61
62//-----------------------------------------------------------------------------
63
64// Actually we should never reach here since we do stack overflow checks before pushing any frame.
65address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
66  address entry = __ pc();
67  __ unimplemented("generate_StackOverflowError_handler");
68  return entry;
69}
70
71address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) {
72  address entry = __ pc();
73  __ empty_expression_stack();
74  __ load_const_optimized(R4_ARG2, (address) name);
75  // Index is in R17_tos.
76  __ mr(R5_ARG3, R17_tos);
77  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException));
78  return entry;
79}
80
81#if 0
82// Call special ClassCastException constructor taking object to cast
83// and target class as arguments.
84address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler() {
85  address entry = __ pc();
86
87  // Expression stack must be empty before entering the VM if an
88  // exception happened.
89  __ empty_expression_stack();
90
91  // Thread will be loaded to R3_ARG1.
92  // Target class oop is in register R5_ARG3 by convention!
93  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose), R17_tos, R5_ARG3);
94  // Above call must not return here since exception pending.
95  DEBUG_ONLY(__ should_not_reach_here();)
96  return entry;
97}
98#endif
99
100address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
101  address entry = __ pc();
102  // Expression stack must be empty before entering the VM if an
103  // exception happened.
104  __ empty_expression_stack();
105
106  // Load exception object.
107  // Thread will be loaded to R3_ARG1.
108  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), R17_tos);
109#ifdef ASSERT
110  // Above call must not return here since exception pending.
111  __ should_not_reach_here();
112#endif
113  return entry;
114}
115
116address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) {
117  address entry = __ pc();
118  //__ untested("generate_exception_handler_common");
119  Register Rexception = R17_tos;
120
121  // Expression stack must be empty before entering the VM if an exception happened.
122  __ empty_expression_stack();
123
124  __ load_const_optimized(R4_ARG2, (address) name, R11_scratch1);
125  if (pass_oop) {
126    __ mr(R5_ARG3, Rexception);
127    __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), false);
128  } else {
129    __ load_const_optimized(R5_ARG3, (address) message, R11_scratch1);
130    __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), false);
131  }
132
133  // Throw exception.
134  __ mr(R3_ARG1, Rexception);
135  __ load_const_optimized(R11_scratch1, Interpreter::throw_exception_entry(), R12_scratch2);
136  __ mtctr(R11_scratch1);
137  __ bctr();
138
139  return entry;
140}
141
142address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
143  address entry = __ pc();
144  __ unimplemented("generate_continuation_for");
145  return entry;
146}
147
148// This entry is returned to when a call returns to the interpreter.
149// When we arrive here, we expect that the callee stack frame is already popped.
150address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
151  address entry = __ pc();
152
153  // Move the value out of the return register back to the TOS cache of current frame.
154  switch (state) {
155    case ltos:
156    case btos:
157    case ctos:
158    case stos:
159    case atos:
160    case itos: __ mr(R17_tos, R3_RET); break;   // RET -> TOS cache
161    case ftos:
162    case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET
163    case vtos: break;                           // Nothing to do, this was a void return.
164    default  : ShouldNotReachHere();
165  }
166
167  __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp.
168  __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
169  __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
170
171  // Compiled code destroys templateTableBase, reload.
172  __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R12_scratch2);
173
174  if (state == atos) {
175    __ profile_return_type(R3_RET, R11_scratch1, R12_scratch2);
176  }
177
178  const Register cache = R11_scratch1;
179  const Register size  = R12_scratch2;
180  __ get_cache_and_index_at_bcp(cache, 1, index_size);
181
182  // Get least significant byte of 64 bit value:
183#if defined(VM_LITTLE_ENDIAN)
184  __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()), cache);
185#else
186  __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache);
187#endif
188  __ sldi(size, size, Interpreter::logStackElementSize);
189  __ add(R15_esp, R15_esp, size);
190  __ dispatch_next(state, step);
191  return entry;
192}
193
194address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
195  address entry = __ pc();
196  // If state != vtos, we're returning from a native method, which put it's result
197  // into the result register. So move the value out of the return register back
198  // to the TOS cache of current frame.
199
200  switch (state) {
201    case ltos:
202    case btos:
203    case ctos:
204    case stos:
205    case atos:
206    case itos: __ mr(R17_tos, R3_RET); break;   // GR_RET -> TOS cache
207    case ftos:
208    case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET
209    case vtos: break;                           // Nothing to do, this was a void return.
210    default  : ShouldNotReachHere();
211  }
212
213  // Load LcpoolCache @@@ should be already set!
214  __ get_constant_pool_cache(R27_constPoolCache);
215
216  // Handle a pending exception, fall through if none.
217  __ check_and_forward_exception(R11_scratch1, R12_scratch2);
218
219  // Start executing bytecodes.
220  __ dispatch_next(state, step);
221
222  return entry;
223}
224
225// A result handler converts the native result into java format.
226// Use the shared code between c++ and template interpreter.
227address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) {
228  return AbstractInterpreterGenerator::generate_result_handler_for(type);
229}
230
231address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) {
232  address entry = __ pc();
233
234  __ push(state);
235  __ call_VM(noreg, runtime_entry);
236  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
237
238  return entry;
239}
240
241// Helpers for commoning out cases in the various type of method entries.
242
243// Increment invocation count & check for overflow.
244//
245// Note: checking for negative value instead of overflow
246//       so we have a 'sticky' overflow test.
247//
248void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
249  // Note: In tiered we increment either counters in method or in MDO depending if we're profiling or not.
250  Register Rscratch1   = R11_scratch1;
251  Register Rscratch2   = R12_scratch2;
252  Register R3_counters = R3_ARG1;
253  Label done;
254
255  if (TieredCompilation) {
256    const int increment = InvocationCounter::count_increment;
257    Label no_mdo;
258    if (ProfileInterpreter) {
259      const Register Rmdo = R3_counters;
260      // If no method data exists, go to profile_continue.
261      __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
262      __ cmpdi(CCR0, Rmdo, 0);
263      __ beq(CCR0, no_mdo);
264
265      // Increment invocation counter in the MDO.
266      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
267      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
268      __ lwz(Rscratch1, in_bytes(MethodData::invoke_mask_offset()), Rmdo);
269      __ addi(Rscratch2, Rscratch2, increment);
270      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
271      __ and_(Rscratch1, Rscratch2, Rscratch1);
272      __ bne(CCR0, done);
273      __ b(*overflow);
274    }
275
276    // Increment counter in MethodCounters*.
277    const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
278    __ bind(no_mdo);
279    __ get_method_counters(R19_method, R3_counters, done);
280    __ lwz(Rscratch2, mo_ic_offs, R3_counters);
281    __ lwz(Rscratch1, in_bytes(MethodCounters::invoke_mask_offset()), R3_counters);
282    __ addi(Rscratch2, Rscratch2, increment);
283    __ stw(Rscratch2, mo_ic_offs, R3_counters);
284    __ and_(Rscratch1, Rscratch2, Rscratch1);
285    __ beq(CCR0, *overflow);
286
287    __ bind(done);
288
289  } else {
290
291    // Update standard invocation counters.
292    Register Rsum_ivc_bec = R4_ARG2;
293    __ get_method_counters(R19_method, R3_counters, done);
294    __ increment_invocation_counter(R3_counters, Rsum_ivc_bec, R12_scratch2);
295    // Increment interpreter invocation counter.
296    if (ProfileInterpreter) {  // %%% Merge this into methodDataOop.
297      __ lwz(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters);
298      __ addi(R12_scratch2, R12_scratch2, 1);
299      __ stw(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters);
300    }
301    // Check if we must create a method data obj.
302    if (ProfileInterpreter && profile_method != NULL) {
303      const Register profile_limit = Rscratch1;
304      __ lwz(profile_limit, in_bytes(MethodCounters::interpreter_profile_limit_offset()), R3_counters);
305      // Test to see if we should create a method data oop.
306      __ cmpw(CCR0, Rsum_ivc_bec, profile_limit);
307      __ blt(CCR0, *profile_method_continue);
308      // If no method data exists, go to profile_method.
309      __ test_method_data_pointer(*profile_method);
310    }
311    // Finally check for counter overflow.
312    if (overflow) {
313      const Register invocation_limit = Rscratch1;
314      __ lwz(invocation_limit, in_bytes(MethodCounters::interpreter_invocation_limit_offset()), R3_counters);
315      __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit);
316      __ bge(CCR0, *overflow);
317    }
318
319    __ bind(done);
320  }
321}
322
323// Generate code to initiate compilation on invocation counter overflow.
324void TemplateInterpreterGenerator::generate_counter_overflow(Label& continue_entry) {
325  // Generate code to initiate compilation on the counter overflow.
326
327  // InterpreterRuntime::frequency_counter_overflow takes one arguments,
328  // which indicates if the counter overflow occurs at a backwards branch (NULL bcp)
329  // We pass zero in.
330  // The call returns the address of the verified entry point for the method or NULL
331  // if the compilation did not complete (either went background or bailed out).
332  //
333  // Unlike the C++ interpreter above: Check exceptions!
334  // Assumption: Caller must set the flag "do_not_unlock_if_sychronized" if the monitor of a sync'ed
335  // method has not yet been created. Thus, no unlocking of a non-existing monitor can occur.
336
337  __ li(R4_ARG2, 0);
338  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);
339
340  // Returns verified_entry_point or NULL.
341  // We ignore it in any case.
342  __ b(continue_entry);
343}
344
345void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rmem_frame_size, Register Rscratch1) {
346  assert_different_registers(Rmem_frame_size, Rscratch1);
347  __ generate_stack_overflow_check_with_compare_and_throw(Rmem_frame_size, Rscratch1);
348}
349
350void TemplateInterpreterGenerator::unlock_method(bool check_exceptions) {
351  __ unlock_object(R26_monitor, check_exceptions);
352}
353
354// Lock the current method, interpreter register window must be set up!
355void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded) {
356  const Register Robj_to_lock = Rscratch2;
357
358  {
359    if (!flags_preloaded) {
360      __ lwz(Rflags, method_(access_flags));
361    }
362
363#ifdef ASSERT
364    // Check if methods needs synchronization.
365    {
366      Label Lok;
367      __ testbitdi(CCR0, R0, Rflags, JVM_ACC_SYNCHRONIZED_BIT);
368      __ btrue(CCR0,Lok);
369      __ stop("method doesn't need synchronization");
370      __ bind(Lok);
371    }
372#endif // ASSERT
373  }
374
375  // Get synchronization object to Rscratch2.
376  {
377    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
378    Label Lstatic;
379    Label Ldone;
380
381    __ testbitdi(CCR0, R0, Rflags, JVM_ACC_STATIC_BIT);
382    __ btrue(CCR0, Lstatic);
383
384    // Non-static case: load receiver obj from stack and we're done.
385    __ ld(Robj_to_lock, R18_locals);
386    __ b(Ldone);
387
388    __ bind(Lstatic); // Static case: Lock the java mirror
389    __ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method);
390    __ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock);
391    __ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock);
392    __ ld(Robj_to_lock, mirror_offset, Robj_to_lock);
393
394    __ bind(Ldone);
395    __ verify_oop(Robj_to_lock);
396  }
397
398  // Got the oop to lock => execute!
399  __ add_monitor_to_stack(true, Rscratch1, R0);
400
401  __ std(Robj_to_lock, BasicObjectLock::obj_offset_in_bytes(), R26_monitor);
402  __ lock_object(R26_monitor, Robj_to_lock);
403}
404
405// Generate a fixed interpreter frame for pure interpreter
406// and I2N native transition frames.
407//
408// Before (stack grows downwards):
409//
410//         |  ...         |
411//         |------------- |
412//         |  java arg0   |
413//         |  ...         |
414//         |  java argn   |
415//         |              |   <-   R15_esp
416//         |              |
417//         |--------------|
418//         | abi_112      |
419//         |              |   <-   R1_SP
420//         |==============|
421//
422//
423// After:
424//
425//         |  ...         |
426//         |  java arg0   |<-   R18_locals
427//         |  ...         |
428//         |  java argn   |
429//         |--------------|
430//         |              |
431//         |  java locals |
432//         |              |
433//         |--------------|
434//         |  abi_48      |
435//         |==============|
436//         |              |
437//         |   istate     |
438//         |              |
439//         |--------------|
440//         |   monitor    |<-   R26_monitor
441//         |--------------|
442//         |              |<-   R15_esp
443//         | expression   |
444//         | stack        |
445//         |              |
446//         |--------------|
447//         |              |
448//         | abi_112      |<-   R1_SP
449//         |==============|
450//
451// The top most frame needs an abi space of 112 bytes. This space is needed,
452// since we call to c. The c function may spill their arguments to the caller
453// frame. When we call to java, we don't need these spill slots. In order to save
454// space on the stack, we resize the caller. However, java local reside in
455// the caller frame and the frame has to be increased. The frame_size for the
456// current frame was calculated based on max_stack as size for the expression
457// stack. At the call, just a part of the expression stack might be used.
458// We don't want to waste this space and cut the frame back accordingly.
459// The resulting amount for resizing is calculated as follows:
460// resize =   (number_of_locals - number_of_arguments) * slot_size
461//          + (R1_SP - R15_esp) + 48
462//
463// The size for the callee frame is calculated:
464// framesize = 112 + max_stack + monitor + state_size
465//
466// maxstack:   Max number of slots on the expression stack, loaded from the method.
467// monitor:    We statically reserve room for one monitor object.
468// state_size: We save the current state of the interpreter to this area.
469//
470void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals) {
471  Register parent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes.
472           top_frame_size      = R7_ARG5,
473           Rconst_method       = R8_ARG6;
474
475  assert_different_registers(Rsize_of_parameters, Rsize_of_locals, parent_frame_resize, top_frame_size);
476
477  __ ld(Rconst_method, method_(const));
478  __ lhz(Rsize_of_parameters /* number of params */,
479         in_bytes(ConstMethod::size_of_parameters_offset()), Rconst_method);
480  if (native_call) {
481    // If we're calling a native method, we reserve space for the worst-case signature
482    // handler varargs vector, which is max(Argument::n_register_parameters, parameter_count+2).
483    // We add two slots to the parameter_count, one for the jni
484    // environment and one for a possible native mirror.
485    Label skip_native_calculate_max_stack;
486    __ addi(top_frame_size, Rsize_of_parameters, 2);
487    __ cmpwi(CCR0, top_frame_size, Argument::n_register_parameters);
488    __ bge(CCR0, skip_native_calculate_max_stack);
489    __ li(top_frame_size, Argument::n_register_parameters);
490    __ bind(skip_native_calculate_max_stack);
491    __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
492    __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
493    __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
494    assert(Rsize_of_locals == noreg, "Rsize_of_locals not initialized"); // Only relevant value is Rsize_of_parameters.
495  } else {
496    __ lhz(Rsize_of_locals /* number of params */, in_bytes(ConstMethod::size_of_locals_offset()), Rconst_method);
497    __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize);
498    __ sldi(Rsize_of_locals, Rsize_of_locals, Interpreter::logStackElementSize);
499    __ lhz(top_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method);
500    __ sub(R11_scratch1, Rsize_of_locals, Rsize_of_parameters); // >=0
501    __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize!
502    __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize);
503    __ add(parent_frame_resize, parent_frame_resize, R11_scratch1);
504  }
505
506  // Compute top frame size.
507  __ addi(top_frame_size, top_frame_size, frame::abi_reg_args_size + frame::ijava_state_size);
508
509  // Cut back area between esp and max_stack.
510  __ addi(parent_frame_resize, parent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize);
511
512  __ round_to(top_frame_size, frame::alignment_in_bytes);
513  __ round_to(parent_frame_resize, frame::alignment_in_bytes);
514  // parent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size.
515  // Enlarge by locals-parameters (not in case of native_call), shrink by ESP-SP-ABI48.
516
517  {
518    // --------------------------------------------------------------------------
519    // Stack overflow check
520
521    Label cont;
522    __ add(R11_scratch1, parent_frame_resize, top_frame_size);
523    generate_stack_overflow_check(R11_scratch1, R12_scratch2);
524  }
525
526  // Set up interpreter state registers.
527
528  __ add(R18_locals, R15_esp, Rsize_of_parameters);
529  __ ld(R27_constPoolCache, in_bytes(ConstMethod::constants_offset()), Rconst_method);
530  __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), R27_constPoolCache);
531
532  // Set method data pointer.
533  if (ProfileInterpreter) {
534    Label zero_continue;
535    __ ld(R28_mdx, method_(method_data));
536    __ cmpdi(CCR0, R28_mdx, 0);
537    __ beq(CCR0, zero_continue);
538    __ addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset()));
539    __ bind(zero_continue);
540  }
541
542  if (native_call) {
543    __ li(R14_bcp, 0); // Must initialize.
544  } else {
545    __ add(R14_bcp, in_bytes(ConstMethod::codes_offset()), Rconst_method);
546  }
547
548  // Resize parent frame.
549  __ mflr(R12_scratch2);
550  __ neg(parent_frame_resize, parent_frame_resize);
551  __ resize_frame(parent_frame_resize, R11_scratch1);
552  __ std(R12_scratch2, _abi(lr), R1_SP);
553
554  __ addi(R26_monitor, R1_SP, - frame::ijava_state_size);
555  __ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize);
556
557  // Store values.
558  // R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls
559  // in InterpreterMacroAssembler::call_from_interpreter.
560  __ std(R19_method, _ijava_state_neg(method), R1_SP);
561  __ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP);
562  __ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP);
563  __ std(R18_locals, _ijava_state_neg(locals), R1_SP);
564
565  // Note: esp, bcp, monitor, mdx live in registers. Hence, the correct version can only
566  // be found in the frame after save_interpreter_state is done. This is always true
567  // for non-top frames. But when a signal occurs, dumping the top frame can go wrong,
568  // because e.g. frame::interpreter_frame_bcp() will not access the correct value
569  // (Enhanced Stack Trace).
570  // The signal handler does not save the interpreter state into the frame.
571  __ li(R0, 0);
572#ifdef ASSERT
573  // Fill remaining slots with constants.
574  __ load_const_optimized(R11_scratch1, 0x5afe);
575  __ load_const_optimized(R12_scratch2, 0xdead);
576#endif
577  // We have to initialize some frame slots for native calls (accessed by GC).
578  if (native_call) {
579    __ std(R26_monitor, _ijava_state_neg(monitors), R1_SP);
580    __ std(R14_bcp, _ijava_state_neg(bcp), R1_SP);
581    if (ProfileInterpreter) { __ std(R28_mdx, _ijava_state_neg(mdx), R1_SP); }
582  }
583#ifdef ASSERT
584  else {
585    __ std(R12_scratch2, _ijava_state_neg(monitors), R1_SP);
586    __ std(R12_scratch2, _ijava_state_neg(bcp), R1_SP);
587    __ std(R12_scratch2, _ijava_state_neg(mdx), R1_SP);
588  }
589  __ std(R11_scratch1, _ijava_state_neg(ijava_reserved), R1_SP);
590  __ std(R12_scratch2, _ijava_state_neg(esp), R1_SP);
591  __ std(R12_scratch2, _ijava_state_neg(lresult), R1_SP);
592  __ std(R12_scratch2, _ijava_state_neg(fresult), R1_SP);
593#endif
594  __ subf(R12_scratch2, top_frame_size, R1_SP);
595  __ std(R0, _ijava_state_neg(oop_tmp), R1_SP);
596  __ std(R12_scratch2, _ijava_state_neg(top_frame_sp), R1_SP);
597
598  // Push top frame.
599  __ push_frame(top_frame_size, R11_scratch1);
600}
601
602// End of helpers
603
604address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
605  if (!TemplateInterpreter::math_entry_available(kind)) {
606    NOT_PRODUCT(__ should_not_reach_here();)
607    return NULL;
608  }
609
610  address entry = __ pc();
611
612  __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
613
614  // Pop c2i arguments (if any) off when we return.
615#ifdef ASSERT
616  __ ld(R9_ARG7, 0, R1_SP);
617  __ ld(R10_ARG8, 0, R21_sender_SP);
618  __ cmpd(CCR0, R9_ARG7, R10_ARG8);
619  __ asm_assert_eq("backlink", 0x545);
620#endif // ASSERT
621  __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
622
623  if (kind == Interpreter::java_lang_math_sqrt) {
624    __ fsqrt(F1_RET, F1_RET);
625  } else if (kind == Interpreter::java_lang_math_abs) {
626    __ fabs(F1_RET, F1_RET);
627  } else {
628    ShouldNotReachHere();
629  }
630
631  // And we're done.
632  __ blr();
633
634  __ flush();
635
636  return entry;
637}
638
639// Interpreter stub for calling a native method. (asm interpreter)
640// This sets up a somewhat different looking stack for calling the
641// native method than the typical interpreter frame setup.
642//
643// On entry:
644//   R19_method    - method
645//   R16_thread    - JavaThread*
646//   R15_esp       - intptr_t* sender tos
647//
648//   abstract stack (grows up)
649//     [  IJava (caller of JNI callee)  ]  <-- ASP
650//        ...
651address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
652
653  address entry = __ pc();
654
655  const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
656
657  // -----------------------------------------------------------------------------
658  // Allocate a new frame that represents the native callee (i2n frame).
659  // This is not a full-blown interpreter frame, but in particular, the
660  // following registers are valid after this:
661  // - R19_method
662  // - R18_local (points to start of argumuments to native function)
663  //
664  //   abstract stack (grows up)
665  //     [  IJava (caller of JNI callee)  ]  <-- ASP
666  //        ...
667
668  const Register signature_handler_fd = R11_scratch1;
669  const Register pending_exception    = R0;
670  const Register result_handler_addr  = R31;
671  const Register native_method_fd     = R11_scratch1;
672  const Register access_flags         = R22_tmp2;
673  const Register active_handles       = R11_scratch1; // R26_monitor saved to state.
674  const Register sync_state           = R12_scratch2;
675  const Register sync_state_addr      = sync_state;   // Address is dead after use.
676  const Register suspend_flags        = R11_scratch1;
677
678  //=============================================================================
679  // Allocate new frame and initialize interpreter state.
680
681  Label exception_return;
682  Label exception_return_sync_check;
683  Label stack_overflow_return;
684
685  // Generate new interpreter state and jump to stack_overflow_return in case of
686  // a stack overflow.
687  //generate_compute_interpreter_state(stack_overflow_return);
688
689  Register size_of_parameters = R22_tmp2;
690
691  generate_fixed_frame(true, size_of_parameters, noreg /* unused */);
692
693  //=============================================================================
694  // Increment invocation counter. On overflow, entry to JNI method
695  // will be compiled.
696  Label invocation_counter_overflow, continue_after_compile;
697  if (inc_counter) {
698    if (synchronized) {
699      // Since at this point in the method invocation the exception handler
700      // would try to exit the monitor of synchronized methods which hasn't
701      // been entered yet, we set the thread local variable
702      // _do_not_unlock_if_synchronized to true. If any exception was thrown by
703      // runtime, exception handling i.e. unlock_if_synchronized_method will
704      // check this thread local flag.
705      // This flag has two effects, one is to force an unwind in the topmost
706      // interpreter frame and not perform an unlock while doing so.
707      __ li(R0, 1);
708      __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
709    }
710    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
711
712    BIND(continue_after_compile);
713    // Reset the _do_not_unlock_if_synchronized flag.
714    if (synchronized) {
715      __ li(R0, 0);
716      __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
717    }
718  }
719
720  // access_flags = method->access_flags();
721  // Load access flags.
722  assert(access_flags->is_nonvolatile(),
723         "access_flags must be in a non-volatile register");
724  // Type check.
725  assert(4 == sizeof(AccessFlags), "unexpected field size");
726  __ lwz(access_flags, method_(access_flags));
727
728  // We don't want to reload R19_method and access_flags after calls
729  // to some helper functions.
730  assert(R19_method->is_nonvolatile(),
731         "R19_method must be a non-volatile register");
732
733  // Check for synchronized methods. Must happen AFTER invocation counter
734  // check, so method is not locked if counter overflows.
735
736  if (synchronized) {
737    lock_method(access_flags, R11_scratch1, R12_scratch2, true);
738
739    // Update monitor in state.
740    __ ld(R11_scratch1, 0, R1_SP);
741    __ std(R26_monitor, _ijava_state_neg(monitors), R11_scratch1);
742  }
743
744  // jvmti/jvmpi support
745  __ notify_method_entry();
746
747  //=============================================================================
748  // Get and call the signature handler.
749
750  __ ld(signature_handler_fd, method_(signature_handler));
751  Label call_signature_handler;
752
753  __ cmpdi(CCR0, signature_handler_fd, 0);
754  __ bne(CCR0, call_signature_handler);
755
756  // Method has never been called. Either generate a specialized
757  // handler or point to the slow one.
758  //
759  // Pass parameter 'false' to avoid exception check in call_VM.
760  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false);
761
762  // Check for an exception while looking up the target method. If we
763  // incurred one, bail.
764  __ ld(pending_exception, thread_(pending_exception));
765  __ cmpdi(CCR0, pending_exception, 0);
766  __ bne(CCR0, exception_return_sync_check); // Has pending exception.
767
768  // Reload signature handler, it may have been created/assigned in the meanwhile.
769  __ ld(signature_handler_fd, method_(signature_handler));
770  __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below).
771
772  BIND(call_signature_handler);
773
774  // Before we call the signature handler we push a new frame to
775  // protect the interpreter frame volatile registers when we return
776  // from jni but before we can get back to Java.
777
778  // First set the frame anchor while the SP/FP registers are
779  // convenient and the slow signature handler can use this same frame
780  // anchor.
781
782  // We have a TOP_IJAVA_FRAME here, which belongs to us.
783  __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
784
785  // Now the interpreter frame (and its call chain) have been
786  // invalidated and flushed. We are now protected against eager
787  // being enabled in native code. Even if it goes eager the
788  // registers will be reloaded as clean and we will invalidate after
789  // the call so no spurious flush should be possible.
790
791  // Call signature handler and pass locals address.
792  //
793  // Our signature handlers copy required arguments to the C stack
794  // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13.
795  __ mr(R3_ARG1, R18_locals);
796#if !defined(ABI_ELFv2)
797  __ ld(signature_handler_fd, 0, signature_handler_fd);
798#endif
799
800  __ call_stub(signature_handler_fd);
801
802  // Remove the register parameter varargs slots we allocated in
803  // compute_interpreter_state. SP+16 ends up pointing to the ABI
804  // outgoing argument area.
805  //
806  // Not needed on PPC64.
807  //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord);
808
809  assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register");
810  // Save across call to native method.
811  __ mr(result_handler_addr, R3_RET);
812
813  __ isync(); // Acquire signature handler before trying to fetch the native entry point and klass mirror.
814
815  // Set up fixed parameters and call the native method.
816  // If the method is static, get mirror into R4_ARG2.
817  {
818    Label method_is_not_static;
819    // Access_flags is non-volatile and still, no need to restore it.
820
821    // Restore access flags.
822    __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
823    __ bfalse(CCR0, method_is_not_static);
824
825    // constants = method->constants();
826    __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
827    __ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
828    // pool_holder = method->constants()->pool_holder();
829    __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
830          R11_scratch1/*constants*/);
831
832    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
833
834    // mirror = pool_holder->klass_part()->java_mirror();
835    __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
836    // state->_native_mirror = mirror;
837
838    __ ld(R11_scratch1, 0, R1_SP);
839    __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1);
840    // R4_ARG2 = &state->_oop_temp;
841    __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp));
842    BIND(method_is_not_static);
843  }
844
845  // At this point, arguments have been copied off the stack into
846  // their JNI positions. Oops are boxed in-place on the stack, with
847  // handles copied to arguments. The result handler address is in a
848  // register.
849
850  // Pass JNIEnv address as first parameter.
851  __ addir(R3_ARG1, thread_(jni_environment));
852
853  // Load the native_method entry before we change the thread state.
854  __ ld(native_method_fd, method_(native_function));
855
856  //=============================================================================
857  // Transition from _thread_in_Java to _thread_in_native. As soon as
858  // we make this change the safepoint code needs to be certain that
859  // the last Java frame we established is good. The pc in that frame
860  // just needs to be near here not an actual return address.
861
862  // We use release_store_fence to update values like the thread state, where
863  // we don't want the current thread to continue until all our prior memory
864  // accesses (including the new thread state) are visible to other threads.
865  __ li(R0, _thread_in_native);
866  __ release();
867
868  // TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
869  __ stw(R0, thread_(thread_state));
870
871  if (UseMembar) {
872    __ fence();
873  }
874
875  //=============================================================================
876  // Call the native method. Argument registers must not have been
877  // overwritten since "__ call_stub(signature_handler);" (except for
878  // ARG1 and ARG2 for static methods).
879  __ call_c(native_method_fd);
880
881  __ li(R0, 0);
882  __ ld(R11_scratch1, 0, R1_SP);
883  __ std(R3_RET, _ijava_state_neg(lresult), R11_scratch1);
884  __ stfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1);
885  __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); // reset
886
887  // Note: C++ interpreter needs the following here:
888  // The frame_manager_lr field, which we use for setting the last
889  // java frame, gets overwritten by the signature handler. Restore
890  // it now.
891  //__ get_PC_trash_LR(R11_scratch1);
892  //__ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
893
894  // Because of GC R19_method may no longer be valid.
895
896  // Block, if necessary, before resuming in _thread_in_Java state.
897  // In order for GC to work, don't clear the last_Java_sp until after
898  // blocking.
899
900  //=============================================================================
901  // Switch thread to "native transition" state before reading the
902  // synchronization state. This additional state is necessary
903  // because reading and testing the synchronization state is not
904  // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
905  // in _thread_in_native state, loads _not_synchronized and is
906  // preempted. VM thread changes sync state to synchronizing and
907  // suspends threads for GC. Thread A is resumed to finish this
908  // native method, but doesn't block here since it didn't see any
909  // synchronization in progress, and escapes.
910
911  // We use release_store_fence to update values like the thread state, where
912  // we don't want the current thread to continue until all our prior memory
913  // accesses (including the new thread state) are visible to other threads.
914  __ li(R0/*thread_state*/, _thread_in_native_trans);
915  __ release();
916  __ stw(R0/*thread_state*/, thread_(thread_state));
917  if (UseMembar) {
918    __ fence();
919  }
920  // Write serialization page so that the VM thread can do a pseudo remote
921  // membar. We use the current thread pointer to calculate a thread
922  // specific offset to write to within the page. This minimizes bus
923  // traffic due to cache line collision.
924  else {
925    __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2);
926  }
927
928  // Now before we return to java we must look for a current safepoint
929  // (a new safepoint can not start since we entered native_trans).
930  // We must check here because a current safepoint could be modifying
931  // the callers registers right this moment.
932
933  // Acquire isn't strictly necessary here because of the fence, but
934  // sync_state is declared to be volatile, so we do it anyway
935  // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
936  int sync_state_offs = __ load_const_optimized(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
937
938  // TODO PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
939  __ lwz(sync_state, sync_state_offs, sync_state_addr);
940
941  // TODO PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
942  __ lwz(suspend_flags, thread_(suspend_flags));
943
944  Label sync_check_done;
945  Label do_safepoint;
946  // No synchronization in progress nor yet synchronized.
947  __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
948  // Not suspended.
949  __ cmpwi(CCR1, suspend_flags, 0);
950
951  __ bne(CCR0, do_safepoint);
952  __ beq(CCR1, sync_check_done);
953  __ bind(do_safepoint);
954  __ isync();
955  // Block. We do the call directly and leave the current
956  // last_Java_frame setup undisturbed. We must save any possible
957  // native result across the call. No oop is present.
958
959  __ mr(R3_ARG1, R16_thread);
960#if defined(ABI_ELFv2)
961  __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
962            relocInfo::none);
963#else
964  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
965            relocInfo::none);
966#endif
967
968  __ bind(sync_check_done);
969
970  //=============================================================================
971  // <<<<<< Back in Interpreter Frame >>>>>
972
973  // We are in thread_in_native_trans here and back in the normal
974  // interpreter frame. We don't have to do anything special about
975  // safepoints and we can switch to Java mode anytime we are ready.
976
977  // Note: frame::interpreter_frame_result has a dependency on how the
978  // method result is saved across the call to post_method_exit. For
979  // native methods it assumes that the non-FPU/non-void result is
980  // saved in _native_lresult and a FPU result in _native_fresult. If
981  // this changes then the interpreter_frame_result implementation
982  // will need to be updated too.
983
984  // On PPC64, we have stored the result directly after the native call.
985
986  //=============================================================================
987  // Back in Java
988
989  // We use release_store_fence to update values like the thread state, where
990  // we don't want the current thread to continue until all our prior memory
991  // accesses (including the new thread state) are visible to other threads.
992  __ li(R0/*thread_state*/, _thread_in_Java);
993  __ release();
994  __ stw(R0/*thread_state*/, thread_(thread_state));
995  if (UseMembar) {
996    __ fence();
997  }
998
999  __ reset_last_Java_frame();
1000
1001  // Jvmdi/jvmpi support. Whether we've got an exception pending or
1002  // not, and whether unlocking throws an exception or not, we notify
1003  // on native method exit. If we do have an exception, we'll end up
1004  // in the caller's context to handle it, so if we don't do the
1005  // notify here, we'll drop it on the floor.
1006  __ notify_method_exit(true/*native method*/,
1007                        ilgl /*illegal state (not used for native methods)*/,
1008                        InterpreterMacroAssembler::NotifyJVMTI,
1009                        false /*check_exceptions*/);
1010
1011  //=============================================================================
1012  // Handle exceptions
1013
1014  if (synchronized) {
1015    // Don't check for exceptions since we're still in the i2n frame. Do that
1016    // manually afterwards.
1017    unlock_method(false);
1018  }
1019
1020  // Reset active handles after returning from native.
1021  // thread->active_handles()->clear();
1022  __ ld(active_handles, thread_(active_handles));
1023  // TODO PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
1024  __ li(R0, 0);
1025  __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles);
1026
1027  Label exception_return_sync_check_already_unlocked;
1028  __ ld(R0/*pending_exception*/, thread_(pending_exception));
1029  __ cmpdi(CCR0, R0/*pending_exception*/, 0);
1030  __ bne(CCR0, exception_return_sync_check_already_unlocked);
1031
1032  //-----------------------------------------------------------------------------
1033  // No exception pending.
1034
1035  // Move native method result back into proper registers and return.
1036  // Invoke result handler (may unbox/promote).
1037  __ ld(R11_scratch1, 0, R1_SP);
1038  __ ld(R3_RET, _ijava_state_neg(lresult), R11_scratch1);
1039  __ lfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1);
1040  __ call_stub(result_handler_addr);
1041
1042  __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2);
1043
1044  // Must use the return pc which was loaded from the caller's frame
1045  // as the VM uses return-pc-patching for deoptimization.
1046  __ mtlr(R0);
1047  __ blr();
1048
1049  //-----------------------------------------------------------------------------
1050  // An exception is pending. We call into the runtime only if the
1051  // caller was not interpreted. If it was interpreted the
1052  // interpreter will do the correct thing. If it isn't interpreted
1053  // (call stub/compiled code) we will change our return and continue.
1054
1055  BIND(exception_return_sync_check);
1056
1057  if (synchronized) {
1058    // Don't check for exceptions since we're still in the i2n frame. Do that
1059    // manually afterwards.
1060    unlock_method(false);
1061  }
1062  BIND(exception_return_sync_check_already_unlocked);
1063
1064  const Register return_pc = R31;
1065
1066  __ ld(return_pc, 0, R1_SP);
1067  __ ld(return_pc, _abi(lr), return_pc);
1068
1069  // Get the address of the exception handler.
1070  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
1071                  R16_thread,
1072                  return_pc /* return pc */);
1073  __ merge_frames(/*top_frame_sp*/ R21_sender_SP, noreg, R11_scratch1, R12_scratch2);
1074
1075  // Load the PC of the the exception handler into LR.
1076  __ mtlr(R3_RET);
1077
1078  // Load exception into R3_ARG1 and clear pending exception in thread.
1079  __ ld(R3_ARG1/*exception*/, thread_(pending_exception));
1080  __ li(R4_ARG2, 0);
1081  __ std(R4_ARG2, thread_(pending_exception));
1082
1083  // Load the original return pc into R4_ARG2.
1084  __ mr(R4_ARG2/*issuing_pc*/, return_pc);
1085
1086  // Return to exception handler.
1087  __ blr();
1088
1089  //=============================================================================
1090  // Counter overflow.
1091
1092  if (inc_counter) {
1093    // Handle invocation counter overflow.
1094    __ bind(invocation_counter_overflow);
1095
1096    generate_counter_overflow(continue_after_compile);
1097  }
1098
1099  return entry;
1100}
1101
1102// Generic interpreted method entry to (asm) interpreter.
1103//
1104address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
1105  bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods;
1106  address entry = __ pc();
1107  // Generate the code to allocate the interpreter stack frame.
1108  Register Rsize_of_parameters = R4_ARG2, // Written by generate_fixed_frame.
1109           Rsize_of_locals     = R5_ARG3; // Written by generate_fixed_frame.
1110
1111  generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals);
1112
1113  // --------------------------------------------------------------------------
1114  // Zero out non-parameter locals.
1115  // Note: *Always* zero out non-parameter locals as Sparc does. It's not
1116  // worth to ask the flag, just do it.
1117  Register Rslot_addr = R6_ARG4,
1118           Rnum       = R7_ARG5;
1119  Label Lno_locals, Lzero_loop;
1120
1121  // Set up the zeroing loop.
1122  __ subf(Rnum, Rsize_of_parameters, Rsize_of_locals);
1123  __ subf(Rslot_addr, Rsize_of_parameters, R18_locals);
1124  __ srdi_(Rnum, Rnum, Interpreter::logStackElementSize);
1125  __ beq(CCR0, Lno_locals);
1126  __ li(R0, 0);
1127  __ mtctr(Rnum);
1128
1129  // The zero locals loop.
1130  __ bind(Lzero_loop);
1131  __ std(R0, 0, Rslot_addr);
1132  __ addi(Rslot_addr, Rslot_addr, -Interpreter::stackElementSize);
1133  __ bdnz(Lzero_loop);
1134
1135  __ bind(Lno_locals);
1136
1137  // --------------------------------------------------------------------------
1138  // Counter increment and overflow check.
1139  Label invocation_counter_overflow,
1140        profile_method,
1141        profile_method_continue;
1142  if (inc_counter || ProfileInterpreter) {
1143
1144    Register Rdo_not_unlock_if_synchronized_addr = R11_scratch1;
1145    if (synchronized) {
1146      // Since at this point in the method invocation the exception handler
1147      // would try to exit the monitor of synchronized methods which hasn't
1148      // been entered yet, we set the thread local variable
1149      // _do_not_unlock_if_synchronized to true. If any exception was thrown by
1150      // runtime, exception handling i.e. unlock_if_synchronized_method will
1151      // check this thread local flag.
1152      // This flag has two effects, one is to force an unwind in the topmost
1153      // interpreter frame and not perform an unlock while doing so.
1154      __ li(R0, 1);
1155      __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
1156    }
1157
1158    // Argument and return type profiling.
1159    __ profile_parameters_type(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4);
1160
1161    // Increment invocation counter and check for overflow.
1162    if (inc_counter) {
1163      generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue);
1164    }
1165
1166    __ bind(profile_method_continue);
1167
1168    // Reset the _do_not_unlock_if_synchronized flag.
1169    if (synchronized) {
1170      __ li(R0, 0);
1171      __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread);
1172    }
1173  }
1174
1175  // --------------------------------------------------------------------------
1176  // Locking of synchronized methods. Must happen AFTER invocation_counter
1177  // check and stack overflow check, so method is not locked if overflows.
1178  if (synchronized) {
1179    lock_method(R3_ARG1, R4_ARG2, R5_ARG3);
1180  }
1181#ifdef ASSERT
1182  else {
1183    Label Lok;
1184    __ lwz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1185    __ andi_(R0, R0, JVM_ACC_SYNCHRONIZED);
1186    __ asm_assert_eq("method needs synchronization", 0x8521);
1187    __ bind(Lok);
1188  }
1189#endif // ASSERT
1190
1191  __ verify_thread();
1192
1193  // --------------------------------------------------------------------------
1194  // JVMTI support
1195  __ notify_method_entry();
1196
1197  // --------------------------------------------------------------------------
1198  // Start executing instructions.
1199  __ dispatch_next(vtos);
1200
1201  // --------------------------------------------------------------------------
1202  // Out of line counter overflow and MDO creation code.
1203  if (ProfileInterpreter) {
1204    // We have decided to profile this method in the interpreter.
1205    __ bind(profile_method);
1206    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
1207    __ set_method_data_pointer_for_bcp();
1208    __ b(profile_method_continue);
1209  }
1210
1211  if (inc_counter) {
1212    // Handle invocation counter overflow.
1213    __ bind(invocation_counter_overflow);
1214    generate_counter_overflow(profile_method_continue);
1215  }
1216  return entry;
1217}
1218
1219// CRC32 Intrinsics.
1220//
1221// Contract on scratch and work registers.
1222// =======================================
1223//
1224// On ppc, the register set {R2..R12} is available in the interpreter as scratch/work registers.
1225// You should, however, keep in mind that {R3_ARG1..R10_ARG8} is the C-ABI argument register set.
1226// You can't rely on these registers across calls.
1227//
1228// The generators for CRC32_update and for CRC32_updateBytes use the
1229// scratch/work register set internally, passing the work registers
1230// as arguments to the MacroAssembler emitters as required.
1231//
1232// R3_ARG1..R6_ARG4 are preset to hold the incoming java arguments.
1233// Their contents is not constant but may change according to the requirements
1234// of the emitted code.
1235//
1236// All other registers from the scratch/work register set are used "internally"
1237// and contain garbage (i.e. unpredictable values) once blr() is reached.
1238// Basically, only R3_RET contains a defined value which is the function result.
1239//
1240/**
1241 * Method entry for static native methods:
1242 *   int java.util.zip.CRC32.update(int crc, int b)
1243 */
1244address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
1245  if (UseCRC32Intrinsics) {
1246    address start = __ pc();  // Remember stub start address (is rtn value).
1247    Label slow_path;
1248
1249    // Safepoint check
1250    const Register sync_state = R11_scratch1;
1251    int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
1252    __ lwz(sync_state, sync_state_offs, sync_state);
1253    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
1254    __ bne(CCR0, slow_path);
1255
1256    // We don't generate local frame and don't align stack because
1257    // we not even call stub code (we generate the code inline)
1258    // and there is no safepoint on this path.
1259
1260    // Load java parameters.
1261    // R15_esp is callers operand stack pointer, i.e. it points to the parameters.
1262    const Register argP    = R15_esp;
1263    const Register crc     = R3_ARG1;  // crc value
1264    const Register data    = R4_ARG2;  // address of java byte value (kernel_crc32 needs address)
1265    const Register dataLen = R5_ARG3;  // source data len (1 byte). Not used because calling the single-byte emitter.
1266    const Register table   = R6_ARG4;  // address of crc32 table
1267    const Register tmp     = dataLen;  // Reuse unused len register to show we don't actually need a separate tmp here.
1268
1269    BLOCK_COMMENT("CRC32_update {");
1270
1271    // Arguments are reversed on java expression stack
1272#ifdef VM_LITTLE_ENDIAN
1273    __ addi(data, argP, 0+1*wordSize); // (stack) address of byte value. Emitter expects address, not value.
1274                                       // Being passed as an int, the single byte is at offset +0.
1275#else
1276    __ addi(data, argP, 3+1*wordSize); // (stack) address of byte value. Emitter expects address, not value.
1277                                       // Being passed from java as an int, the single byte is at offset +3.
1278#endif
1279    __ lwz(crc,  2*wordSize, argP);    // Current crc state, zero extend to 64 bit to have a clean register.
1280
1281    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
1282    __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp);
1283
1284    // Restore caller sp for c2i case and return.
1285    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
1286    __ blr();
1287
1288    // Generate a vanilla native entry as the slow path.
1289    BLOCK_COMMENT("} CRC32_update");
1290    BIND(slow_path);
1291    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
1292    return start;
1293  }
1294
1295  return NULL;
1296}
1297
1298// CRC32 Intrinsics.
1299/**
1300 * Method entry for static native methods:
1301 *   int java.util.zip.CRC32.updateBytes(     int crc, byte[] b,  int off, int len)
1302 *   int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len)
1303 */
1304address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
1305  if (UseCRC32Intrinsics) {
1306    address start = __ pc();  // Remember stub start address (is rtn value).
1307    Label slow_path;
1308
1309    // Safepoint check
1310    const Register sync_state = R11_scratch1;
1311    int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true);
1312    __ lwz(sync_state, sync_state_offs, sync_state);
1313    __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
1314    __ bne(CCR0, slow_path);
1315
1316    // We don't generate local frame and don't align stack because
1317    // we not even call stub code (we generate the code inline)
1318    // and there is no safepoint on this path.
1319
1320    // Load parameters.
1321    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
1322    const Register argP    = R15_esp;
1323    const Register crc     = R3_ARG1;  // crc value
1324    const Register data    = R4_ARG2;  // address of java byte array
1325    const Register dataLen = R5_ARG3;  // source data len
1326    const Register table   = R6_ARG4;  // address of crc32 table
1327
1328    const Register t0      = R9;       // scratch registers for crc calculation
1329    const Register t1      = R10;
1330    const Register t2      = R11;
1331    const Register t3      = R12;
1332
1333    const Register tc0     = R2;       // registers to hold pre-calculated column addresses
1334    const Register tc1     = R7;
1335    const Register tc2     = R8;
1336    const Register tc3     = table;    // table address is reconstructed at the end of kernel_crc32_* emitters
1337
1338    const Register tmp     = t0;       // Only used very locally to calculate byte buffer address.
1339
1340    // Arguments are reversed on java expression stack.
1341    // Calculate address of start element.
1342    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct".
1343      BLOCK_COMMENT("CRC32_updateByteBuffer {");
1344      // crc     @ (SP + 5W) (32bit)
1345      // buf     @ (SP + 3W) (64bit ptr to long array)
1346      // off     @ (SP + 2W) (32bit)
1347      // dataLen @ (SP + 1W) (32bit)
1348      // data = buf + off
1349      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
1350      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
1351      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
1352      __ lwz( crc,     5*wordSize, argP);  // current crc state
1353      __ add( data, data, tmp);            // Add byte buffer offset.
1354    } else {                                                         // Used for "updateBytes update".
1355      BLOCK_COMMENT("CRC32_updateBytes {");
1356      // crc     @ (SP + 4W) (32bit)
1357      // buf     @ (SP + 3W) (64bit ptr to byte array)
1358      // off     @ (SP + 2W) (32bit)
1359      // dataLen @ (SP + 1W) (32bit)
1360      // data = buf + off + base_offset
1361      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
1362      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
1363      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
1364      __ add( data, data, tmp);            // add byte buffer offset
1365      __ lwz( crc,     4*wordSize, argP);  // current crc state
1366      __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
1367    }
1368
1369    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
1370
1371    // Performance measurements show the 1word and 2word variants to be almost equivalent,
1372    // with very light advantages for the 1word variant. We chose the 1word variant for
1373    // code compactness.
1374    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3);
1375
1376    // Restore caller sp for c2i case and return.
1377    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
1378    __ blr();
1379
1380    // Generate a vanilla native entry as the slow path.
1381    BLOCK_COMMENT("} CRC32_updateBytes(Buffer)");
1382    BIND(slow_path);
1383    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
1384    return start;
1385  }
1386
1387  return NULL;
1388}
1389
1390// Not supported
1391address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
1392  return NULL;
1393}
1394
1395// =============================================================================
1396// Exceptions
1397
1398void TemplateInterpreterGenerator::generate_throw_exception() {
1399  Register Rexception    = R17_tos,
1400           Rcontinuation = R3_RET;
1401
1402  // --------------------------------------------------------------------------
1403  // Entry point if an method returns with a pending exception (rethrow).
1404  Interpreter::_rethrow_exception_entry = __ pc();
1405  {
1406    __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp.
1407    __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
1408    __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
1409
1410    // Compiled code destroys templateTableBase, reload.
1411    __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1412  }
1413
1414  // Entry point if a interpreted method throws an exception (throw).
1415  Interpreter::_throw_exception_entry = __ pc();
1416  {
1417    __ mr(Rexception, R3_RET);
1418
1419    __ verify_thread();
1420    __ verify_oop(Rexception);
1421
1422    // Expression stack must be empty before entering the VM in case of an exception.
1423    __ empty_expression_stack();
1424    // Find exception handler address and preserve exception oop.
1425    // Call C routine to find handler and jump to it.
1426    __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Rexception);
1427    __ mtctr(Rcontinuation);
1428    // Push exception for exception handler bytecodes.
1429    __ push_ptr(Rexception);
1430
1431    // Jump to exception handler (may be remove activation entry!).
1432    __ bctr();
1433  }
1434
1435  // If the exception is not handled in the current frame the frame is
1436  // removed and the exception is rethrown (i.e. exception
1437  // continuation is _rethrow_exception).
1438  //
1439  // Note: At this point the bci is still the bxi for the instruction
1440  // which caused the exception and the expression stack is
1441  // empty. Thus, for any VM calls at this point, GC will find a legal
1442  // oop map (with empty expression stack).
1443
1444  // In current activation
1445  // tos: exception
1446  // bcp: exception bcp
1447
1448  // --------------------------------------------------------------------------
1449  // JVMTI PopFrame support
1450
1451  Interpreter::_remove_activation_preserving_args_entry = __ pc();
1452  {
1453    // Set the popframe_processing bit in popframe_condition indicating that we are
1454    // currently handling popframe, so that call_VMs that may happen later do not
1455    // trigger new popframe handling cycles.
1456    __ lwz(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
1457    __ ori(R11_scratch1, R11_scratch1, JavaThread::popframe_processing_bit);
1458    __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
1459
1460    // Empty the expression stack, as in normal exception handling.
1461    __ empty_expression_stack();
1462    __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false);
1463
1464    // Check to see whether we are returning to a deoptimized frame.
1465    // (The PopFrame call ensures that the caller of the popped frame is
1466    // either interpreted or compiled and deoptimizes it if compiled.)
1467    // Note that we don't compare the return PC against the
1468    // deoptimization blob's unpack entry because of the presence of
1469    // adapter frames in C2.
1470    Label Lcaller_not_deoptimized;
1471    Register return_pc = R3_ARG1;
1472    __ ld(return_pc, 0, R1_SP);
1473    __ ld(return_pc, _abi(lr), return_pc);
1474    __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), return_pc);
1475    __ cmpdi(CCR0, R3_RET, 0);
1476    __ bne(CCR0, Lcaller_not_deoptimized);
1477
1478    // The deoptimized case.
1479    // In this case, we can't call dispatch_next() after the frame is
1480    // popped, but instead must save the incoming arguments and restore
1481    // them after deoptimization has occurred.
1482    __ ld(R4_ARG2, in_bytes(Method::const_offset()), R19_method);
1483    __ lhz(R4_ARG2 /* number of params */, in_bytes(ConstMethod::size_of_parameters_offset()), R4_ARG2);
1484    __ slwi(R4_ARG2, R4_ARG2, Interpreter::logStackElementSize);
1485    __ addi(R5_ARG3, R18_locals, Interpreter::stackElementSize);
1486    __ subf(R5_ARG3, R4_ARG2, R5_ARG3);
1487    // Save these arguments.
1488    __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), R16_thread, R4_ARG2, R5_ARG3);
1489
1490    // Inform deoptimization that it is responsible for restoring these arguments.
1491    __ load_const_optimized(R11_scratch1, JavaThread::popframe_force_deopt_reexecution_bit);
1492    __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
1493
1494    // Return from the current method into the deoptimization blob. Will eventually
1495    // end up in the deopt interpeter entry, deoptimization prepared everything that
1496    // we will reexecute the call that called us.
1497    __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*reload return_pc*/ return_pc, R11_scratch1, R12_scratch2);
1498    __ mtlr(return_pc);
1499    __ blr();
1500
1501    // The non-deoptimized case.
1502    __ bind(Lcaller_not_deoptimized);
1503
1504    // Clear the popframe condition flag.
1505    __ li(R0, 0);
1506    __ stw(R0, in_bytes(JavaThread::popframe_condition_offset()), R16_thread);
1507
1508    // Get out of the current method and re-execute the call that called us.
1509    __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2);
1510    __ restore_interpreter_state(R11_scratch1);
1511    __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1);
1512    __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0);
1513    if (ProfileInterpreter) {
1514      __ set_method_data_pointer_for_bcp();
1515      __ ld(R11_scratch1, 0, R1_SP);
1516      __ std(R28_mdx, _ijava_state_neg(mdx), R11_scratch1);
1517    }
1518#if INCLUDE_JVMTI
1519    Label L_done;
1520
1521    __ lbz(R11_scratch1, 0, R14_bcp);
1522    __ cmpwi(CCR0, R11_scratch1, Bytecodes::_invokestatic);
1523    __ bne(CCR0, L_done);
1524
1525    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
1526    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
1527    __ ld(R4_ARG2, 0, R18_locals);
1528    __ MacroAssembler::call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp, false);
1529    __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);
1530    __ cmpdi(CCR0, R4_ARG2, 0);
1531    __ beq(CCR0, L_done);
1532    __ std(R4_ARG2, wordSize, R15_esp);
1533    __ bind(L_done);
1534#endif // INCLUDE_JVMTI
1535    __ dispatch_next(vtos);
1536  }
1537  // end of JVMTI PopFrame support
1538
1539  // --------------------------------------------------------------------------
1540  // Remove activation exception entry.
1541  // This is jumped to if an interpreted method can't handle an exception itself
1542  // (we come from the throw/rethrow exception entry above). We're going to call
1543  // into the VM to find the exception handler in the caller, pop the current
1544  // frame and return the handler we calculated.
1545  Interpreter::_remove_activation_entry = __ pc();
1546  {
1547    __ pop_ptr(Rexception);
1548    __ verify_thread();
1549    __ verify_oop(Rexception);
1550    __ std(Rexception, in_bytes(JavaThread::vm_result_offset()), R16_thread);
1551
1552    __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, true);
1553    __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI, false);
1554
1555    __ get_vm_result(Rexception);
1556
1557    // We are done with this activation frame; find out where to go next.
1558    // The continuation point will be an exception handler, which expects
1559    // the following registers set up:
1560    //
1561    // RET:  exception oop
1562    // ARG2: Issuing PC (see generate_exception_blob()), only used if the caller is compiled.
1563
1564    Register return_pc = R31; // Needs to survive the runtime call.
1565    __ ld(return_pc, 0, R1_SP);
1566    __ ld(return_pc, _abi(lr), return_pc);
1567    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, return_pc);
1568
1569    // Remove the current activation.
1570    __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2);
1571
1572    __ mr(R4_ARG2, return_pc);
1573    __ mtlr(R3_RET);
1574    __ mr(R3_RET, Rexception);
1575    __ blr();
1576  }
1577}
1578
1579// JVMTI ForceEarlyReturn support.
1580// Returns "in the middle" of a method with a "fake" return value.
1581address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
1582
1583  Register Rscratch1 = R11_scratch1,
1584           Rscratch2 = R12_scratch2;
1585
1586  address entry = __ pc();
1587  __ empty_expression_stack();
1588
1589  __ load_earlyret_value(state, Rscratch1);
1590
1591  __ ld(Rscratch1, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread);
1592  // Clear the earlyret state.
1593  __ li(R0, 0);
1594  __ stw(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rscratch1);
1595
1596  __ remove_activation(state, false, false);
1597  // Copied from TemplateTable::_return.
1598  // Restoration of lr done by remove_activation.
1599  switch (state) {
1600    case ltos:
1601    case btos:
1602    case ctos:
1603    case stos:
1604    case atos:
1605    case itos: __ mr(R3_RET, R17_tos); break;
1606    case ftos:
1607    case dtos: __ fmr(F1_RET, F15_ftos); break;
1608    case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need
1609               // to get visible before the reference to the object gets stored anywhere.
1610               __ membar(Assembler::StoreStore); break;
1611    default  : ShouldNotReachHere();
1612  }
1613  __ blr();
1614
1615  return entry;
1616} // end of ForceEarlyReturn support
1617
1618//-----------------------------------------------------------------------------
1619// Helper for vtos entry point generation
1620
1621void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
1622                                                         address& bep,
1623                                                         address& cep,
1624                                                         address& sep,
1625                                                         address& aep,
1626                                                         address& iep,
1627                                                         address& lep,
1628                                                         address& fep,
1629                                                         address& dep,
1630                                                         address& vep) {
1631  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
1632  Label L;
1633
1634  aep = __ pc();  __ push_ptr();  __ b(L);
1635  fep = __ pc();  __ push_f();    __ b(L);
1636  dep = __ pc();  __ push_d();    __ b(L);
1637  lep = __ pc();  __ push_l();    __ b(L);
1638  __ align(32, 12, 24); // align L
1639  bep = cep = sep =
1640  iep = __ pc();  __ push_i();
1641  vep = __ pc();
1642  __ bind(L);
1643  generate_and_dispatch(t);
1644}
1645
1646//-----------------------------------------------------------------------------
1647
1648// Non-product code
1649#ifndef PRODUCT
1650address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
1651  //__ flush_bundle();
1652  address entry = __ pc();
1653
1654  const char *bname = NULL;
1655  uint tsize = 0;
1656  switch(state) {
1657  case ftos:
1658    bname = "trace_code_ftos {";
1659    tsize = 2;
1660    break;
1661  case btos:
1662    bname = "trace_code_btos {";
1663    tsize = 2;
1664    break;
1665  case ctos:
1666    bname = "trace_code_ctos {";
1667    tsize = 2;
1668    break;
1669  case stos:
1670    bname = "trace_code_stos {";
1671    tsize = 2;
1672    break;
1673  case itos:
1674    bname = "trace_code_itos {";
1675    tsize = 2;
1676    break;
1677  case ltos:
1678    bname = "trace_code_ltos {";
1679    tsize = 3;
1680    break;
1681  case atos:
1682    bname = "trace_code_atos {";
1683    tsize = 2;
1684    break;
1685  case vtos:
1686    // Note: In case of vtos, the topmost of stack value could be a int or doubl
1687    // In case of a double (2 slots) we won't see the 2nd stack value.
1688    // Maybe we simply should print the topmost 3 stack slots to cope with the problem.
1689    bname = "trace_code_vtos {";
1690    tsize = 2;
1691
1692    break;
1693  case dtos:
1694    bname = "trace_code_dtos {";
1695    tsize = 3;
1696    break;
1697  default:
1698    ShouldNotReachHere();
1699  }
1700  BLOCK_COMMENT(bname);
1701
1702  // Support short-cut for TraceBytecodesAt.
1703  // Don't call into the VM if we don't want to trace to speed up things.
1704  Label Lskip_vm_call;
1705  if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) {
1706    int offs1 = __ load_const_optimized(R11_scratch1, (address) &TraceBytecodesAt, R0, true);
1707    int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true);
1708    __ ld(R11_scratch1, offs1, R11_scratch1);
1709    __ lwa(R12_scratch2, offs2, R12_scratch2);
1710    __ cmpd(CCR0, R12_scratch2, R11_scratch1);
1711    __ blt(CCR0, Lskip_vm_call);
1712  }
1713
1714  __ push(state);
1715  // Load 2 topmost expression stack values.
1716  __ ld(R6_ARG4, tsize*Interpreter::stackElementSize, R15_esp);
1717  __ ld(R5_ARG3, Interpreter::stackElementSize, R15_esp);
1718  __ mflr(R31);
1719  __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), /* unused */ R4_ARG2, R5_ARG3, R6_ARG4, false);
1720  __ mtlr(R31);
1721  __ pop(state);
1722
1723  if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) {
1724    __ bind(Lskip_vm_call);
1725  }
1726  __ blr();
1727  BLOCK_COMMENT("} trace_code");
1728  return entry;
1729}
1730
1731void TemplateInterpreterGenerator::count_bytecode() {
1732  int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeCounter::_counter_value, R12_scratch2, true);
1733  __ lwz(R12_scratch2, offs, R11_scratch1);
1734  __ addi(R12_scratch2, R12_scratch2, 1);
1735  __ stw(R12_scratch2, offs, R11_scratch1);
1736}
1737
1738void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
1739  int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeHistogram::_counters[t->bytecode()], R12_scratch2, true);
1740  __ lwz(R12_scratch2, offs, R11_scratch1);
1741  __ addi(R12_scratch2, R12_scratch2, 1);
1742  __ stw(R12_scratch2, offs, R11_scratch1);
1743}
1744
1745void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
1746  const Register addr = R11_scratch1,
1747                 tmp  = R12_scratch2;
1748  // Get index, shift out old bytecode, bring in new bytecode, and store it.
1749  // _index = (_index >> log2_number_of_codes) |
1750  //          (bytecode << log2_number_of_codes);
1751  int offs1 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_index, tmp, true);
1752  __ lwz(tmp, offs1, addr);
1753  __ srwi(tmp, tmp, BytecodePairHistogram::log2_number_of_codes);
1754  __ ori(tmp, tmp, ((int) t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
1755  __ stw(tmp, offs1, addr);
1756
1757  // Bump bucket contents.
1758  // _counters[_index] ++;
1759  int offs2 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_counters, R0, true);
1760  __ sldi(tmp, tmp, LogBytesPerInt);
1761  __ add(addr, tmp, addr);
1762  __ lwz(tmp, offs2, addr);
1763  __ addi(tmp, tmp, 1);
1764  __ stw(tmp, offs2, addr);
1765}
1766
1767void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
1768  // Call a little run-time stub to avoid blow-up for each bytecode.
1769  // The run-time runtime saves the right registers, depending on
1770  // the tosca in-state for the given template.
1771
1772  assert(Interpreter::trace_code(t->tos_in()) != NULL,
1773         "entry must have been generated");
1774
1775  // Note: we destroy LR here.
1776  __ bl(Interpreter::trace_code(t->tos_in()));
1777}
1778
1779void TemplateInterpreterGenerator::stop_interpreter_at() {
1780  Label L;
1781  int offs1 = __ load_const_optimized(R11_scratch1, (address) &StopInterpreterAt, R0, true);
1782  int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true);
1783  __ ld(R11_scratch1, offs1, R11_scratch1);
1784  __ lwa(R12_scratch2, offs2, R12_scratch2);
1785  __ cmpd(CCR0, R12_scratch2, R11_scratch1);
1786  __ bne(CCR0, L);
1787  __ illtrap();
1788  __ bind(L);
1789}
1790
1791#endif // !PRODUCT
1792