templateTable_x86.cpp revision 13254:c044f8d03932
1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "interpreter/interpreter.hpp"
28#include "interpreter/interpreterRuntime.hpp"
29#include "interpreter/interp_masm.hpp"
30#include "interpreter/templateTable.hpp"
31#include "memory/universe.inline.hpp"
32#include "oops/methodData.hpp"
33#include "oops/objArrayKlass.hpp"
34#include "oops/oop.inline.hpp"
35#include "prims/methodHandles.hpp"
36#include "runtime/sharedRuntime.hpp"
37#include "runtime/stubRoutines.hpp"
38#include "runtime/synchronizer.hpp"
39#include "utilities/macros.hpp"
40
41#define __ _masm->
42
43// Global Register Names
44static const Register rbcp     = LP64_ONLY(r13) NOT_LP64(rsi);
45static const Register rlocals  = LP64_ONLY(r14) NOT_LP64(rdi);
46
47// Platform-dependent initialization
48void TemplateTable::pd_initialize() {
49  // No x86 specific initialization
50}
51
52// Address Computation: local variables
53static inline Address iaddress(int n) {
54  return Address(rlocals, Interpreter::local_offset_in_bytes(n));
55}
56
57static inline Address laddress(int n) {
58  return iaddress(n + 1);
59}
60
61#ifndef _LP64
62static inline Address haddress(int n) {
63  return iaddress(n + 0);
64}
65#endif
66
67static inline Address faddress(int n) {
68  return iaddress(n);
69}
70
71static inline Address daddress(int n) {
72  return laddress(n);
73}
74
75static inline Address aaddress(int n) {
76  return iaddress(n);
77}
78
79static inline Address iaddress(Register r) {
80  return Address(rlocals, r, Address::times_ptr);
81}
82
83static inline Address laddress(Register r) {
84  return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1));
85}
86
87#ifndef _LP64
88static inline Address haddress(Register r)       {
89  return Address(rlocals, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
90}
91#endif
92
93static inline Address faddress(Register r) {
94  return iaddress(r);
95}
96
97static inline Address daddress(Register r) {
98  return laddress(r);
99}
100
101static inline Address aaddress(Register r) {
102  return iaddress(r);
103}
104
105
106// expression stack
107// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
108// data beyond the rsp which is potentially unsafe in an MT environment;
109// an interrupt may overwrite that data.)
110static inline Address at_rsp   () {
111  return Address(rsp, 0);
112}
113
114// At top of Java expression stack which may be different than esp().  It
115// isn't for category 1 objects.
116static inline Address at_tos   () {
117  return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
118}
119
120static inline Address at_tos_p1() {
121  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
122}
123
124static inline Address at_tos_p2() {
125  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
126}
127
128// Condition conversion
129static Assembler::Condition j_not(TemplateTable::Condition cc) {
130  switch (cc) {
131  case TemplateTable::equal        : return Assembler::notEqual;
132  case TemplateTable::not_equal    : return Assembler::equal;
133  case TemplateTable::less         : return Assembler::greaterEqual;
134  case TemplateTable::less_equal   : return Assembler::greater;
135  case TemplateTable::greater      : return Assembler::lessEqual;
136  case TemplateTable::greater_equal: return Assembler::less;
137  }
138  ShouldNotReachHere();
139  return Assembler::zero;
140}
141
142
143
144// Miscelaneous helper routines
145// Store an oop (or NULL) at the address described by obj.
146// If val == noreg this means store a NULL
147
148
149static void do_oop_store(InterpreterMacroAssembler* _masm,
150                         Address obj,
151                         Register val,
152                         BarrierSet::Name barrier,
153                         bool precise) {
154  assert(val == noreg || val == rax, "parameter is just for looks");
155  switch (barrier) {
156#if INCLUDE_ALL_GCS
157    case BarrierSet::G1SATBCTLogging:
158      {
159        // flatten object address if needed
160        // We do it regardless of precise because we need the registers
161        if (obj.index() == noreg && obj.disp() == 0) {
162          if (obj.base() != rdx) {
163            __ movptr(rdx, obj.base());
164          }
165        } else {
166          __ lea(rdx, obj);
167        }
168
169        Register rtmp    = LP64_ONLY(r8)         NOT_LP64(rsi);
170        Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
171
172        NOT_LP64(__ get_thread(rcx));
173        NOT_LP64(__ save_bcp());
174
175        __ g1_write_barrier_pre(rdx /* obj */,
176                                rbx /* pre_val */,
177                                rthread /* thread */,
178                                rtmp  /* tmp */,
179                                val != noreg /* tosca_live */,
180                                false /* expand_call */);
181        if (val == noreg) {
182          __ store_heap_oop_null(Address(rdx, 0));
183        } else {
184          // G1 barrier needs uncompressed oop for region cross check.
185          Register new_val = val;
186          if (UseCompressedOops) {
187            new_val = rbx;
188            __ movptr(new_val, val);
189          }
190          __ store_heap_oop(Address(rdx, 0), val);
191          __ g1_write_barrier_post(rdx /* store_adr */,
192                                   new_val /* new_val */,
193                                   rthread /* thread */,
194                                   rtmp /* tmp */,
195                                   rbx /* tmp2 */);
196        }
197        NOT_LP64( __ restore_bcp());
198      }
199      break;
200#endif // INCLUDE_ALL_GCS
201    case BarrierSet::CardTableForRS:
202    case BarrierSet::CardTableExtension:
203      {
204        if (val == noreg) {
205          __ store_heap_oop_null(obj);
206        } else {
207          __ store_heap_oop(obj, val);
208          // flatten object address if needed
209          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
210            __ store_check(obj.base());
211          } else {
212            __ lea(rdx, obj);
213            __ store_check(rdx);
214          }
215        }
216      }
217      break;
218    case BarrierSet::ModRef:
219      if (val == noreg) {
220        __ store_heap_oop_null(obj);
221      } else {
222        __ store_heap_oop(obj, val);
223      }
224      break;
225    default      :
226      ShouldNotReachHere();
227
228  }
229}
230
231Address TemplateTable::at_bcp(int offset) {
232  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
233  return Address(rbcp, offset);
234}
235
236
237void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
238                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
239                                   int byte_no) {
240  if (!RewriteBytecodes)  return;
241  Label L_patch_done;
242
243  switch (bc) {
244  case Bytecodes::_fast_aputfield:
245  case Bytecodes::_fast_bputfield:
246  case Bytecodes::_fast_zputfield:
247  case Bytecodes::_fast_cputfield:
248  case Bytecodes::_fast_dputfield:
249  case Bytecodes::_fast_fputfield:
250  case Bytecodes::_fast_iputfield:
251  case Bytecodes::_fast_lputfield:
252  case Bytecodes::_fast_sputfield:
253    {
254      // We skip bytecode quickening for putfield instructions when
255      // the put_code written to the constant pool cache is zero.
256      // This is required so that every execution of this instruction
257      // calls out to InterpreterRuntime::resolve_get_put to do
258      // additional, required work.
259      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
260      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
261      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
262      __ movl(bc_reg, bc);
263      __ cmpl(temp_reg, (int) 0);
264      __ jcc(Assembler::zero, L_patch_done);  // don't patch
265    }
266    break;
267  default:
268    assert(byte_no == -1, "sanity");
269    // the pair bytecodes have already done the load.
270    if (load_bc_into_bc_reg) {
271      __ movl(bc_reg, bc);
272    }
273  }
274
275  if (JvmtiExport::can_post_breakpoint()) {
276    Label L_fast_patch;
277    // if a breakpoint is present we can't rewrite the stream directly
278    __ movzbl(temp_reg, at_bcp(0));
279    __ cmpl(temp_reg, Bytecodes::_breakpoint);
280    __ jcc(Assembler::notEqual, L_fast_patch);
281    __ get_method(temp_reg);
282    // Let breakpoint table handling rewrite to quicker bytecode
283    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg);
284#ifndef ASSERT
285    __ jmpb(L_patch_done);
286#else
287    __ jmp(L_patch_done);
288#endif
289    __ bind(L_fast_patch);
290  }
291
292#ifdef ASSERT
293  Label L_okay;
294  __ load_unsigned_byte(temp_reg, at_bcp(0));
295  __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
296  __ jcc(Assembler::equal, L_okay);
297  __ cmpl(temp_reg, bc_reg);
298  __ jcc(Assembler::equal, L_okay);
299  __ stop("patching the wrong bytecode");
300  __ bind(L_okay);
301#endif
302
303  // patch bytecode
304  __ movb(at_bcp(0), bc_reg);
305  __ bind(L_patch_done);
306}
307// Individual instructions
308
309
310void TemplateTable::nop() {
311  transition(vtos, vtos);
312  // nothing to do
313}
314
315void TemplateTable::shouldnotreachhere() {
316  transition(vtos, vtos);
317  __ stop("shouldnotreachhere bytecode");
318}
319
320void TemplateTable::aconst_null() {
321  transition(vtos, atos);
322  __ xorl(rax, rax);
323}
324
325void TemplateTable::iconst(int value) {
326  transition(vtos, itos);
327  if (value == 0) {
328    __ xorl(rax, rax);
329  } else {
330    __ movl(rax, value);
331  }
332}
333
334void TemplateTable::lconst(int value) {
335  transition(vtos, ltos);
336  if (value == 0) {
337    __ xorl(rax, rax);
338  } else {
339    __ movl(rax, value);
340  }
341#ifndef _LP64
342  assert(value >= 0, "check this code");
343  __ xorptr(rdx, rdx);
344#endif
345}
346
347
348
349void TemplateTable::fconst(int value) {
350  transition(vtos, ftos);
351  if (UseSSE >= 1) {
352    static float one = 1.0f, two = 2.0f;
353    switch (value) {
354    case 0:
355      __ xorps(xmm0, xmm0);
356      break;
357    case 1:
358      __ movflt(xmm0, ExternalAddress((address) &one));
359      break;
360    case 2:
361      __ movflt(xmm0, ExternalAddress((address) &two));
362      break;
363    default:
364      ShouldNotReachHere();
365      break;
366    }
367  } else {
368#ifdef _LP64
369    ShouldNotReachHere();
370#else
371           if (value == 0) { __ fldz();
372    } else if (value == 1) { __ fld1();
373    } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
374    } else                 { ShouldNotReachHere();
375    }
376#endif // _LP64
377  }
378}
379
380void TemplateTable::dconst(int value) {
381  transition(vtos, dtos);
382  if (UseSSE >= 2) {
383    static double one = 1.0;
384    switch (value) {
385    case 0:
386      __ xorpd(xmm0, xmm0);
387      break;
388    case 1:
389      __ movdbl(xmm0, ExternalAddress((address) &one));
390      break;
391    default:
392      ShouldNotReachHere();
393      break;
394    }
395  } else {
396#ifdef _LP64
397    ShouldNotReachHere();
398#else
399           if (value == 0) { __ fldz();
400    } else if (value == 1) { __ fld1();
401    } else                 { ShouldNotReachHere();
402    }
403#endif
404  }
405}
406
407void TemplateTable::bipush() {
408  transition(vtos, itos);
409  __ load_signed_byte(rax, at_bcp(1));
410}
411
412void TemplateTable::sipush() {
413  transition(vtos, itos);
414  __ load_unsigned_short(rax, at_bcp(1));
415  __ bswapl(rax);
416  __ sarl(rax, 16);
417}
418
419void TemplateTable::ldc(bool wide) {
420  transition(vtos, vtos);
421  Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
422  Label call_ldc, notFloat, notClass, Done;
423
424  if (wide) {
425    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
426  } else {
427    __ load_unsigned_byte(rbx, at_bcp(1));
428  }
429
430  __ get_cpool_and_tags(rcx, rax);
431  const int base_offset = ConstantPool::header_size() * wordSize;
432  const int tags_offset = Array<u1>::base_offset_in_bytes();
433
434  // get type
435  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
436
437  // unresolved class - get the resolved class
438  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
439  __ jccb(Assembler::equal, call_ldc);
440
441  // unresolved class in error state - call into runtime to throw the error
442  // from the first resolution attempt
443  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
444  __ jccb(Assembler::equal, call_ldc);
445
446  // resolved class - need to call vm to get java mirror of the class
447  __ cmpl(rdx, JVM_CONSTANT_Class);
448  __ jcc(Assembler::notEqual, notClass);
449
450  __ bind(call_ldc);
451
452  __ movl(rarg, wide);
453  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg);
454
455  __ push(atos);
456  __ jmp(Done);
457
458  __ bind(notClass);
459  __ cmpl(rdx, JVM_CONSTANT_Float);
460  __ jccb(Assembler::notEqual, notFloat);
461
462  // ftos
463  __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
464  __ push(ftos);
465  __ jmp(Done);
466
467  __ bind(notFloat);
468#ifdef ASSERT
469  {
470    Label L;
471    __ cmpl(rdx, JVM_CONSTANT_Integer);
472    __ jcc(Assembler::equal, L);
473    // String and Object are rewritten to fast_aldc
474    __ stop("unexpected tag type in ldc");
475    __ bind(L);
476  }
477#endif
478  // itos JVM_CONSTANT_Integer only
479  __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
480  __ push(itos);
481  __ bind(Done);
482}
483
484// Fast path for caching oop constants.
485void TemplateTable::fast_aldc(bool wide) {
486  transition(vtos, atos);
487
488  Register result = rax;
489  Register tmp = rdx;
490  int index_size = wide ? sizeof(u2) : sizeof(u1);
491
492  Label resolved;
493
494  // We are resolved if the resolved reference cache entry contains a
495  // non-null object (String, MethodType, etc.)
496  assert_different_registers(result, tmp);
497  __ get_cache_index_at_bcp(tmp, 1, index_size);
498  __ load_resolved_reference_at_index(result, tmp);
499  __ testl(result, result);
500  __ jcc(Assembler::notZero, resolved);
501
502  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
503
504  // first time invocation - must resolve first
505  __ movl(tmp, (int)bytecode());
506  __ call_VM(result, entry, tmp);
507
508  __ bind(resolved);
509
510  if (VerifyOops) {
511    __ verify_oop(result);
512  }
513}
514
515void TemplateTable::ldc2_w() {
516  transition(vtos, vtos);
517  Label Long, Done;
518  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
519
520  __ get_cpool_and_tags(rcx, rax);
521  const int base_offset = ConstantPool::header_size() * wordSize;
522  const int tags_offset = Array<u1>::base_offset_in_bytes();
523
524  // get type
525  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
526          JVM_CONSTANT_Double);
527  __ jccb(Assembler::notEqual, Long);
528
529  // dtos
530  __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
531  __ push(dtos);
532
533  __ jmpb(Done);
534  __ bind(Long);
535
536  // ltos
537  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
538  NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
539  __ push(ltos);
540
541  __ bind(Done);
542}
543
544void TemplateTable::locals_index(Register reg, int offset) {
545  __ load_unsigned_byte(reg, at_bcp(offset));
546  __ negptr(reg);
547}
548
549void TemplateTable::iload() {
550  iload_internal();
551}
552
553void TemplateTable::nofast_iload() {
554  iload_internal(may_not_rewrite);
555}
556
557void TemplateTable::iload_internal(RewriteControl rc) {
558  transition(vtos, itos);
559  if (RewriteFrequentPairs && rc == may_rewrite) {
560    Label rewrite, done;
561    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
562    LP64_ONLY(assert(rbx != bc, "register damaged"));
563
564    // get next byte
565    __ load_unsigned_byte(rbx,
566                          at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
567    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
568    // last two iloads in a pair.  Comparing against fast_iload means that
569    // the next bytecode is neither an iload or a caload, and therefore
570    // an iload pair.
571    __ cmpl(rbx, Bytecodes::_iload);
572    __ jcc(Assembler::equal, done);
573
574    __ cmpl(rbx, Bytecodes::_fast_iload);
575    __ movl(bc, Bytecodes::_fast_iload2);
576
577    __ jccb(Assembler::equal, rewrite);
578
579    // if _caload, rewrite to fast_icaload
580    __ cmpl(rbx, Bytecodes::_caload);
581    __ movl(bc, Bytecodes::_fast_icaload);
582    __ jccb(Assembler::equal, rewrite);
583
584    // rewrite so iload doesn't check again.
585    __ movl(bc, Bytecodes::_fast_iload);
586
587    // rewrite
588    // bc: fast bytecode
589    __ bind(rewrite);
590    patch_bytecode(Bytecodes::_iload, bc, rbx, false);
591    __ bind(done);
592  }
593
594  // Get the local value into tos
595  locals_index(rbx);
596  __ movl(rax, iaddress(rbx));
597}
598
599void TemplateTable::fast_iload2() {
600  transition(vtos, itos);
601  locals_index(rbx);
602  __ movl(rax, iaddress(rbx));
603  __ push(itos);
604  locals_index(rbx, 3);
605  __ movl(rax, iaddress(rbx));
606}
607
608void TemplateTable::fast_iload() {
609  transition(vtos, itos);
610  locals_index(rbx);
611  __ movl(rax, iaddress(rbx));
612}
613
614void TemplateTable::lload() {
615  transition(vtos, ltos);
616  locals_index(rbx);
617  __ movptr(rax, laddress(rbx));
618  NOT_LP64(__ movl(rdx, haddress(rbx)));
619}
620
621void TemplateTable::fload() {
622  transition(vtos, ftos);
623  locals_index(rbx);
624  __ load_float(faddress(rbx));
625}
626
627void TemplateTable::dload() {
628  transition(vtos, dtos);
629  locals_index(rbx);
630  __ load_double(daddress(rbx));
631}
632
633void TemplateTable::aload() {
634  transition(vtos, atos);
635  locals_index(rbx);
636  __ movptr(rax, aaddress(rbx));
637}
638
639void TemplateTable::locals_index_wide(Register reg) {
640  __ load_unsigned_short(reg, at_bcp(2));
641  __ bswapl(reg);
642  __ shrl(reg, 16);
643  __ negptr(reg);
644}
645
646void TemplateTable::wide_iload() {
647  transition(vtos, itos);
648  locals_index_wide(rbx);
649  __ movl(rax, iaddress(rbx));
650}
651
652void TemplateTable::wide_lload() {
653  transition(vtos, ltos);
654  locals_index_wide(rbx);
655  __ movptr(rax, laddress(rbx));
656  NOT_LP64(__ movl(rdx, haddress(rbx)));
657}
658
659void TemplateTable::wide_fload() {
660  transition(vtos, ftos);
661  locals_index_wide(rbx);
662  __ load_float(faddress(rbx));
663}
664
665void TemplateTable::wide_dload() {
666  transition(vtos, dtos);
667  locals_index_wide(rbx);
668  __ load_double(daddress(rbx));
669}
670
671void TemplateTable::wide_aload() {
672  transition(vtos, atos);
673  locals_index_wide(rbx);
674  __ movptr(rax, aaddress(rbx));
675}
676
677void TemplateTable::index_check(Register array, Register index) {
678  // Pop ptr into array
679  __ pop_ptr(array);
680  index_check_without_pop(array, index);
681}
682
683void TemplateTable::index_check_without_pop(Register array, Register index) {
684  // destroys rbx
685  // check array
686  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
687  // sign extend index for use by indexed load
688  __ movl2ptr(index, index);
689  // check index
690  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
691  if (index != rbx) {
692    // ??? convention: move aberrant index into rbx for exception message
693    assert(rbx != array, "different registers");
694    __ movl(rbx, index);
695  }
696  __ jump_cc(Assembler::aboveEqual,
697             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
698}
699
700
701void TemplateTable::iaload() {
702  transition(itos, itos);
703  // rax: index
704  // rdx: array
705  index_check(rdx, rax); // kills rbx
706  __ movl(rax, Address(rdx, rax,
707                       Address::times_4,
708                       arrayOopDesc::base_offset_in_bytes(T_INT)));
709}
710
711void TemplateTable::laload() {
712  transition(itos, ltos);
713  // rax: index
714  // rdx: array
715  index_check(rdx, rax); // kills rbx
716  NOT_LP64(__ mov(rbx, rax));
717  // rbx,: index
718  __ movptr(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
719  NOT_LP64(__ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize)));
720}
721
722
723
724void TemplateTable::faload() {
725  transition(itos, ftos);
726  // rax: index
727  // rdx: array
728  index_check(rdx, rax); // kills rbx
729  __ load_float(Address(rdx, rax,
730                        Address::times_4,
731                        arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
732}
733
734void TemplateTable::daload() {
735  transition(itos, dtos);
736  // rax: index
737  // rdx: array
738  index_check(rdx, rax); // kills rbx
739  __ load_double(Address(rdx, rax,
740                         Address::times_8,
741                         arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
742}
743
744void TemplateTable::aaload() {
745  transition(itos, atos);
746  // rax: index
747  // rdx: array
748  index_check(rdx, rax); // kills rbx
749  __ load_heap_oop(rax, Address(rdx, rax,
750                                UseCompressedOops ? Address::times_4 : Address::times_ptr,
751                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
752}
753
754void TemplateTable::baload() {
755  transition(itos, itos);
756  // rax: index
757  // rdx: array
758  index_check(rdx, rax); // kills rbx
759  __ load_signed_byte(rax, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
760}
761
762void TemplateTable::caload() {
763  transition(itos, itos);
764  // rax: index
765  // rdx: array
766  index_check(rdx, rax); // kills rbx
767  __ load_unsigned_short(rax, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
768}
769
770// iload followed by caload frequent pair
771void TemplateTable::fast_icaload() {
772  transition(vtos, itos);
773  // load index out of locals
774  locals_index(rbx);
775  __ movl(rax, iaddress(rbx));
776
777  // rax: index
778  // rdx: array
779  index_check(rdx, rax); // kills rbx
780  __ load_unsigned_short(rax,
781                         Address(rdx, rax,
782                                 Address::times_2,
783                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
784}
785
786
787void TemplateTable::saload() {
788  transition(itos, itos);
789  // rax: index
790  // rdx: array
791  index_check(rdx, rax); // kills rbx
792  __ load_signed_short(rax, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
793}
794
795void TemplateTable::iload(int n) {
796  transition(vtos, itos);
797  __ movl(rax, iaddress(n));
798}
799
800void TemplateTable::lload(int n) {
801  transition(vtos, ltos);
802  __ movptr(rax, laddress(n));
803  NOT_LP64(__ movptr(rdx, haddress(n)));
804}
805
806void TemplateTable::fload(int n) {
807  transition(vtos, ftos);
808  __ load_float(faddress(n));
809}
810
811void TemplateTable::dload(int n) {
812  transition(vtos, dtos);
813  __ load_double(daddress(n));
814}
815
816void TemplateTable::aload(int n) {
817  transition(vtos, atos);
818  __ movptr(rax, aaddress(n));
819}
820
821void TemplateTable::aload_0() {
822  aload_0_internal();
823}
824
825void TemplateTable::nofast_aload_0() {
826  aload_0_internal(may_not_rewrite);
827}
828
829void TemplateTable::aload_0_internal(RewriteControl rc) {
830  transition(vtos, atos);
831  // According to bytecode histograms, the pairs:
832  //
833  // _aload_0, _fast_igetfield
834  // _aload_0, _fast_agetfield
835  // _aload_0, _fast_fgetfield
836  //
837  // occur frequently. If RewriteFrequentPairs is set, the (slow)
838  // _aload_0 bytecode checks if the next bytecode is either
839  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
840  // rewrites the current bytecode into a pair bytecode; otherwise it
841  // rewrites the current bytecode into _fast_aload_0 that doesn't do
842  // the pair check anymore.
843  //
844  // Note: If the next bytecode is _getfield, the rewrite must be
845  //       delayed, otherwise we may miss an opportunity for a pair.
846  //
847  // Also rewrite frequent pairs
848  //   aload_0, aload_1
849  //   aload_0, iload_1
850  // These bytecodes with a small amount of code are most profitable
851  // to rewrite
852  if (RewriteFrequentPairs && rc == may_rewrite) {
853    Label rewrite, done;
854
855    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
856    LP64_ONLY(assert(rbx != bc, "register damaged"));
857
858    // get next byte
859    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
860
861    // if _getfield then wait with rewrite
862    __ cmpl(rbx, Bytecodes::_getfield);
863    __ jcc(Assembler::equal, done);
864
865    // if _igetfield then rewrite to _fast_iaccess_0
866    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
867    __ cmpl(rbx, Bytecodes::_fast_igetfield);
868    __ movl(bc, Bytecodes::_fast_iaccess_0);
869    __ jccb(Assembler::equal, rewrite);
870
871    // if _agetfield then rewrite to _fast_aaccess_0
872    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
873    __ cmpl(rbx, Bytecodes::_fast_agetfield);
874    __ movl(bc, Bytecodes::_fast_aaccess_0);
875    __ jccb(Assembler::equal, rewrite);
876
877    // if _fgetfield then rewrite to _fast_faccess_0
878    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
879    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
880    __ movl(bc, Bytecodes::_fast_faccess_0);
881    __ jccb(Assembler::equal, rewrite);
882
883    // else rewrite to _fast_aload0
884    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
885    __ movl(bc, Bytecodes::_fast_aload_0);
886
887    // rewrite
888    // bc: fast bytecode
889    __ bind(rewrite);
890    patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
891
892    __ bind(done);
893  }
894
895  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
896  aload(0);
897}
898
899void TemplateTable::istore() {
900  transition(itos, vtos);
901  locals_index(rbx);
902  __ movl(iaddress(rbx), rax);
903}
904
905
906void TemplateTable::lstore() {
907  transition(ltos, vtos);
908  locals_index(rbx);
909  __ movptr(laddress(rbx), rax);
910  NOT_LP64(__ movptr(haddress(rbx), rdx));
911}
912
913void TemplateTable::fstore() {
914  transition(ftos, vtos);
915  locals_index(rbx);
916  __ store_float(faddress(rbx));
917}
918
919void TemplateTable::dstore() {
920  transition(dtos, vtos);
921  locals_index(rbx);
922  __ store_double(daddress(rbx));
923}
924
925void TemplateTable::astore() {
926  transition(vtos, vtos);
927  __ pop_ptr(rax);
928  locals_index(rbx);
929  __ movptr(aaddress(rbx), rax);
930}
931
932void TemplateTable::wide_istore() {
933  transition(vtos, vtos);
934  __ pop_i();
935  locals_index_wide(rbx);
936  __ movl(iaddress(rbx), rax);
937}
938
939void TemplateTable::wide_lstore() {
940  transition(vtos, vtos);
941  NOT_LP64(__ pop_l(rax, rdx));
942  LP64_ONLY(__ pop_l());
943  locals_index_wide(rbx);
944  __ movptr(laddress(rbx), rax);
945  NOT_LP64(__ movl(haddress(rbx), rdx));
946}
947
948void TemplateTable::wide_fstore() {
949#ifdef _LP64
950  transition(vtos, vtos);
951  __ pop_f(xmm0);
952  locals_index_wide(rbx);
953  __ movflt(faddress(rbx), xmm0);
954#else
955  wide_istore();
956#endif
957}
958
959void TemplateTable::wide_dstore() {
960#ifdef _LP64
961  transition(vtos, vtos);
962  __ pop_d(xmm0);
963  locals_index_wide(rbx);
964  __ movdbl(daddress(rbx), xmm0);
965#else
966  wide_lstore();
967#endif
968}
969
970void TemplateTable::wide_astore() {
971  transition(vtos, vtos);
972  __ pop_ptr(rax);
973  locals_index_wide(rbx);
974  __ movptr(aaddress(rbx), rax);
975}
976
977void TemplateTable::iastore() {
978  transition(itos, vtos);
979  __ pop_i(rbx);
980  // rax: value
981  // rbx: index
982  // rdx: array
983  index_check(rdx, rbx); // prefer index in rbx
984  __ movl(Address(rdx, rbx,
985                  Address::times_4,
986                  arrayOopDesc::base_offset_in_bytes(T_INT)),
987          rax);
988}
989
990void TemplateTable::lastore() {
991  transition(ltos, vtos);
992  __ pop_i(rbx);
993  // rax,: low(value)
994  // rcx: array
995  // rdx: high(value)
996  index_check(rcx, rbx);  // prefer index in rbx,
997  // rbx,: index
998  __ movptr(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
999  NOT_LP64(__ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx));
1000}
1001
1002
1003void TemplateTable::fastore() {
1004  transition(ftos, vtos);
1005  __ pop_i(rbx);
1006  // value is in UseSSE >= 1 ? xmm0 : ST(0)
1007  // rbx:  index
1008  // rdx:  array
1009  index_check(rdx, rbx); // prefer index in rbx
1010  __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
1011}
1012
1013void TemplateTable::dastore() {
1014  transition(dtos, vtos);
1015  __ pop_i(rbx);
1016  // value is in UseSSE >= 2 ? xmm0 : ST(0)
1017  // rbx:  index
1018  // rdx:  array
1019  index_check(rdx, rbx); // prefer index in rbx
1020  __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
1021}
1022
1023void TemplateTable::aastore() {
1024  Label is_null, ok_is_subtype, done;
1025  transition(vtos, vtos);
1026  // stack: ..., array, index, value
1027  __ movptr(rax, at_tos());    // value
1028  __ movl(rcx, at_tos_p1()); // index
1029  __ movptr(rdx, at_tos_p2()); // array
1030
1031  Address element_address(rdx, rcx,
1032                          UseCompressedOops? Address::times_4 : Address::times_ptr,
1033                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1034
1035  index_check_without_pop(rdx, rcx);     // kills rbx
1036  __ testptr(rax, rax);
1037  __ jcc(Assembler::zero, is_null);
1038
1039  // Move subklass into rbx
1040  __ load_klass(rbx, rax);
1041  // Move superklass into rax
1042  __ load_klass(rax, rdx);
1043  __ movptr(rax, Address(rax,
1044                         ObjArrayKlass::element_klass_offset()));
1045  // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
1046  __ lea(rdx, element_address);
1047
1048  // Generate subtype check.  Blows rcx, rdi
1049  // Superklass in rax.  Subklass in rbx.
1050  __ gen_subtype_check(rbx, ok_is_subtype);
1051
1052  // Come here on failure
1053  // object is at TOS
1054  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
1055
1056  // Come here on success
1057  __ bind(ok_is_subtype);
1058
1059  // Get the value we will store
1060  __ movptr(rax, at_tos());
1061  // Now store using the appropriate barrier
1062  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
1063  __ jmp(done);
1064
1065  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
1066  __ bind(is_null);
1067  __ profile_null_seen(rbx);
1068
1069  // Store a NULL
1070  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
1071
1072  // Pop stack arguments
1073  __ bind(done);
1074  __ addptr(rsp, 3 * Interpreter::stackElementSize);
1075}
1076
1077void TemplateTable::bastore() {
1078  transition(itos, vtos);
1079  __ pop_i(rbx);
1080  // rax: value
1081  // rbx: index
1082  // rdx: array
1083  index_check(rdx, rbx); // prefer index in rbx
1084  // Need to check whether array is boolean or byte
1085  // since both types share the bastore bytecode.
1086  __ load_klass(rcx, rdx);
1087  __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
1088  int diffbit = Klass::layout_helper_boolean_diffbit();
1089  __ testl(rcx, diffbit);
1090  Label L_skip;
1091  __ jccb(Assembler::zero, L_skip);
1092  __ andl(rax, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1093  __ bind(L_skip);
1094  __ movb(Address(rdx, rbx,
1095                  Address::times_1,
1096                  arrayOopDesc::base_offset_in_bytes(T_BYTE)),
1097          rax);
1098}
1099
1100void TemplateTable::castore() {
1101  transition(itos, vtos);
1102  __ pop_i(rbx);
1103  // rax: value
1104  // rbx: index
1105  // rdx: array
1106  index_check(rdx, rbx);  // prefer index in rbx
1107  __ movw(Address(rdx, rbx,
1108                  Address::times_2,
1109                  arrayOopDesc::base_offset_in_bytes(T_CHAR)),
1110          rax);
1111}
1112
1113
1114void TemplateTable::sastore() {
1115  castore();
1116}
1117
1118void TemplateTable::istore(int n) {
1119  transition(itos, vtos);
1120  __ movl(iaddress(n), rax);
1121}
1122
1123void TemplateTable::lstore(int n) {
1124  transition(ltos, vtos);
1125  __ movptr(laddress(n), rax);
1126  NOT_LP64(__ movptr(haddress(n), rdx));
1127}
1128
1129void TemplateTable::fstore(int n) {
1130  transition(ftos, vtos);
1131  __ store_float(faddress(n));
1132}
1133
1134void TemplateTable::dstore(int n) {
1135  transition(dtos, vtos);
1136  __ store_double(daddress(n));
1137}
1138
1139
1140void TemplateTable::astore(int n) {
1141  transition(vtos, vtos);
1142  __ pop_ptr(rax);
1143  __ movptr(aaddress(n), rax);
1144}
1145
1146void TemplateTable::pop() {
1147  transition(vtos, vtos);
1148  __ addptr(rsp, Interpreter::stackElementSize);
1149}
1150
1151void TemplateTable::pop2() {
1152  transition(vtos, vtos);
1153  __ addptr(rsp, 2 * Interpreter::stackElementSize);
1154}
1155
1156
1157void TemplateTable::dup() {
1158  transition(vtos, vtos);
1159  __ load_ptr(0, rax);
1160  __ push_ptr(rax);
1161  // stack: ..., a, a
1162}
1163
1164void TemplateTable::dup_x1() {
1165  transition(vtos, vtos);
1166  // stack: ..., a, b
1167  __ load_ptr( 0, rax);  // load b
1168  __ load_ptr( 1, rcx);  // load a
1169  __ store_ptr(1, rax);  // store b
1170  __ store_ptr(0, rcx);  // store a
1171  __ push_ptr(rax);      // push b
1172  // stack: ..., b, a, b
1173}
1174
1175void TemplateTable::dup_x2() {
1176  transition(vtos, vtos);
1177  // stack: ..., a, b, c
1178  __ load_ptr( 0, rax);  // load c
1179  __ load_ptr( 2, rcx);  // load a
1180  __ store_ptr(2, rax);  // store c in a
1181  __ push_ptr(rax);      // push c
1182  // stack: ..., c, b, c, c
1183  __ load_ptr( 2, rax);  // load b
1184  __ store_ptr(2, rcx);  // store a in b
1185  // stack: ..., c, a, c, c
1186  __ store_ptr(1, rax);  // store b in c
1187  // stack: ..., c, a, b, c
1188}
1189
1190void TemplateTable::dup2() {
1191  transition(vtos, vtos);
1192  // stack: ..., a, b
1193  __ load_ptr(1, rax);  // load a
1194  __ push_ptr(rax);     // push a
1195  __ load_ptr(1, rax);  // load b
1196  __ push_ptr(rax);     // push b
1197  // stack: ..., a, b, a, b
1198}
1199
1200
1201void TemplateTable::dup2_x1() {
1202  transition(vtos, vtos);
1203  // stack: ..., a, b, c
1204  __ load_ptr( 0, rcx);  // load c
1205  __ load_ptr( 1, rax);  // load b
1206  __ push_ptr(rax);      // push b
1207  __ push_ptr(rcx);      // push c
1208  // stack: ..., a, b, c, b, c
1209  __ store_ptr(3, rcx);  // store c in b
1210  // stack: ..., a, c, c, b, c
1211  __ load_ptr( 4, rcx);  // load a
1212  __ store_ptr(2, rcx);  // store a in 2nd c
1213  // stack: ..., a, c, a, b, c
1214  __ store_ptr(4, rax);  // store b in a
1215  // stack: ..., b, c, a, b, c
1216}
1217
1218void TemplateTable::dup2_x2() {
1219  transition(vtos, vtos);
1220  // stack: ..., a, b, c, d
1221  __ load_ptr( 0, rcx);  // load d
1222  __ load_ptr( 1, rax);  // load c
1223  __ push_ptr(rax);      // push c
1224  __ push_ptr(rcx);      // push d
1225  // stack: ..., a, b, c, d, c, d
1226  __ load_ptr( 4, rax);  // load b
1227  __ store_ptr(2, rax);  // store b in d
1228  __ store_ptr(4, rcx);  // store d in b
1229  // stack: ..., a, d, c, b, c, d
1230  __ load_ptr( 5, rcx);  // load a
1231  __ load_ptr( 3, rax);  // load c
1232  __ store_ptr(3, rcx);  // store a in c
1233  __ store_ptr(5, rax);  // store c in a
1234  // stack: ..., c, d, a, b, c, d
1235}
1236
1237void TemplateTable::swap() {
1238  transition(vtos, vtos);
1239  // stack: ..., a, b
1240  __ load_ptr( 1, rcx);  // load a
1241  __ load_ptr( 0, rax);  // load b
1242  __ store_ptr(0, rcx);  // store a in b
1243  __ store_ptr(1, rax);  // store b in a
1244  // stack: ..., b, a
1245}
1246
1247void TemplateTable::iop2(Operation op) {
1248  transition(itos, itos);
1249  switch (op) {
1250  case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1251  case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1252  case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1253  case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1254  case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1255  case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1256  case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
1257  case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
1258  case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
1259  default   : ShouldNotReachHere();
1260  }
1261}
1262
1263void TemplateTable::lop2(Operation op) {
1264  transition(ltos, ltos);
1265#ifdef _LP64
1266  switch (op) {
1267  case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
1268  case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
1269  case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
1270  case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
1271  case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
1272  default   : ShouldNotReachHere();
1273  }
1274#else
1275  __ pop_l(rbx, rcx);
1276  switch (op) {
1277    case add  : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
1278    case sub  : __ subl(rbx, rax); __ sbbl(rcx, rdx);
1279                __ mov (rax, rbx); __ mov (rdx, rcx); break;
1280    case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break;
1281    case _or  : __ orl (rax, rbx); __ orl (rdx, rcx); break;
1282    case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
1283    default   : ShouldNotReachHere();
1284  }
1285#endif
1286}
1287
1288void TemplateTable::idiv() {
1289  transition(itos, itos);
1290  __ movl(rcx, rax);
1291  __ pop_i(rax);
1292  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1293  //       they are not equal, one could do a normal division (no correction
1294  //       needed), which may speed up this implementation for the common case.
1295  //       (see also JVM spec., p.243 & p.271)
1296  __ corrected_idivl(rcx);
1297}
1298
1299void TemplateTable::irem() {
1300  transition(itos, itos);
1301  __ movl(rcx, rax);
1302  __ pop_i(rax);
1303  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1304  //       they are not equal, one could do a normal division (no correction
1305  //       needed), which may speed up this implementation for the common case.
1306  //       (see also JVM spec., p.243 & p.271)
1307  __ corrected_idivl(rcx);
1308  __ movl(rax, rdx);
1309}
1310
1311void TemplateTable::lmul() {
1312  transition(ltos, ltos);
1313#ifdef _LP64
1314  __ pop_l(rdx);
1315  __ imulq(rax, rdx);
1316#else
1317  __ pop_l(rbx, rcx);
1318  __ push(rcx); __ push(rbx);
1319  __ push(rdx); __ push(rax);
1320  __ lmul(2 * wordSize, 0);
1321  __ addptr(rsp, 4 * wordSize);  // take off temporaries
1322#endif
1323}
1324
1325void TemplateTable::ldiv() {
1326  transition(ltos, ltos);
1327#ifdef _LP64
1328  __ mov(rcx, rax);
1329  __ pop_l(rax);
1330  // generate explicit div0 check
1331  __ testq(rcx, rcx);
1332  __ jump_cc(Assembler::zero,
1333             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1334  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1335  //       they are not equal, one could do a normal division (no correction
1336  //       needed), which may speed up this implementation for the common case.
1337  //       (see also JVM spec., p.243 & p.271)
1338  __ corrected_idivq(rcx); // kills rbx
1339#else
1340  __ pop_l(rbx, rcx);
1341  __ push(rcx); __ push(rbx);
1342  __ push(rdx); __ push(rax);
1343  // check if y = 0
1344  __ orl(rax, rdx);
1345  __ jump_cc(Assembler::zero,
1346             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1347  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
1348  __ addptr(rsp, 4 * wordSize);  // take off temporaries
1349#endif
1350}
1351
1352void TemplateTable::lrem() {
1353  transition(ltos, ltos);
1354#ifdef _LP64
1355  __ mov(rcx, rax);
1356  __ pop_l(rax);
1357  __ testq(rcx, rcx);
1358  __ jump_cc(Assembler::zero,
1359             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1360  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1361  //       they are not equal, one could do a normal division (no correction
1362  //       needed), which may speed up this implementation for the common case.
1363  //       (see also JVM spec., p.243 & p.271)
1364  __ corrected_idivq(rcx); // kills rbx
1365  __ mov(rax, rdx);
1366#else
1367  __ pop_l(rbx, rcx);
1368  __ push(rcx); __ push(rbx);
1369  __ push(rdx); __ push(rax);
1370  // check if y = 0
1371  __ orl(rax, rdx);
1372  __ jump_cc(Assembler::zero,
1373             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1374  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
1375  __ addptr(rsp, 4 * wordSize);
1376#endif
1377}
1378
1379void TemplateTable::lshl() {
1380  transition(itos, ltos);
1381  __ movl(rcx, rax);                             // get shift count
1382  #ifdef _LP64
1383  __ pop_l(rax);                                 // get shift value
1384  __ shlq(rax);
1385#else
1386  __ pop_l(rax, rdx);                            // get shift value
1387  __ lshl(rdx, rax);
1388#endif
1389}
1390
1391void TemplateTable::lshr() {
1392#ifdef _LP64
1393  transition(itos, ltos);
1394  __ movl(rcx, rax);                             // get shift count
1395  __ pop_l(rax);                                 // get shift value
1396  __ sarq(rax);
1397#else
1398  transition(itos, ltos);
1399  __ mov(rcx, rax);                              // get shift count
1400  __ pop_l(rax, rdx);                            // get shift value
1401  __ lshr(rdx, rax, true);
1402#endif
1403}
1404
1405void TemplateTable::lushr() {
1406  transition(itos, ltos);
1407#ifdef _LP64
1408  __ movl(rcx, rax);                             // get shift count
1409  __ pop_l(rax);                                 // get shift value
1410  __ shrq(rax);
1411#else
1412  __ mov(rcx, rax);                              // get shift count
1413  __ pop_l(rax, rdx);                            // get shift value
1414  __ lshr(rdx, rax);
1415#endif
1416}
1417
1418void TemplateTable::fop2(Operation op) {
1419  transition(ftos, ftos);
1420
1421  if (UseSSE >= 1) {
1422    switch (op) {
1423    case add:
1424      __ addss(xmm0, at_rsp());
1425      __ addptr(rsp, Interpreter::stackElementSize);
1426      break;
1427    case sub:
1428      __ movflt(xmm1, xmm0);
1429      __ pop_f(xmm0);
1430      __ subss(xmm0, xmm1);
1431      break;
1432    case mul:
1433      __ mulss(xmm0, at_rsp());
1434      __ addptr(rsp, Interpreter::stackElementSize);
1435      break;
1436    case div:
1437      __ movflt(xmm1, xmm0);
1438      __ pop_f(xmm0);
1439      __ divss(xmm0, xmm1);
1440      break;
1441    case rem:
1442      // On x86_64 platforms the SharedRuntime::frem method is called to perform the
1443      // modulo operation. The frem method calls the function
1444      // double fmod(double x, double y) in math.h. The documentation of fmod states:
1445      // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
1446      // (signalling or quiet) is returned.
1447      //
1448      // On x86_32 platforms the FPU is used to perform the modulo operation. The
1449      // reason is that on 32-bit Windows the sign of modulo operations diverges from
1450      // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
1451      // The fprem instruction used on x86_32 is functionally equivalent to
1452      // SharedRuntime::frem in that it returns a NaN.
1453#ifdef _LP64
1454      __ movflt(xmm1, xmm0);
1455      __ pop_f(xmm0);
1456      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
1457#else
1458      __ push_f(xmm0);
1459      __ pop_f();
1460      __ fld_s(at_rsp());
1461      __ fremr(rax);
1462      __ f2ieee();
1463      __ pop(rax);  // pop second operand off the stack
1464      __ push_f();
1465      __ pop_f(xmm0);
1466#endif
1467      break;
1468    default:
1469      ShouldNotReachHere();
1470      break;
1471    }
1472  } else {
1473#ifdef _LP64
1474    ShouldNotReachHere();
1475#else
1476    switch (op) {
1477    case add: __ fadd_s (at_rsp());                break;
1478    case sub: __ fsubr_s(at_rsp());                break;
1479    case mul: __ fmul_s (at_rsp());                break;
1480    case div: __ fdivr_s(at_rsp());                break;
1481    case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
1482    default : ShouldNotReachHere();
1483    }
1484    __ f2ieee();
1485    __ pop(rax);  // pop second operand off the stack
1486#endif // _LP64
1487  }
1488}
1489
1490void TemplateTable::dop2(Operation op) {
1491  transition(dtos, dtos);
1492  if (UseSSE >= 2) {
1493    switch (op) {
1494    case add:
1495      __ addsd(xmm0, at_rsp());
1496      __ addptr(rsp, 2 * Interpreter::stackElementSize);
1497      break;
1498    case sub:
1499      __ movdbl(xmm1, xmm0);
1500      __ pop_d(xmm0);
1501      __ subsd(xmm0, xmm1);
1502      break;
1503    case mul:
1504      __ mulsd(xmm0, at_rsp());
1505      __ addptr(rsp, 2 * Interpreter::stackElementSize);
1506      break;
1507    case div:
1508      __ movdbl(xmm1, xmm0);
1509      __ pop_d(xmm0);
1510      __ divsd(xmm0, xmm1);
1511      break;
1512    case rem:
1513      // Similar to fop2(), the modulo operation is performed using the
1514      // SharedRuntime::drem method (on x86_64 platforms) or using the
1515      // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
1516#ifdef _LP64
1517      __ movdbl(xmm1, xmm0);
1518      __ pop_d(xmm0);
1519      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
1520#else
1521      __ push_d(xmm0);
1522      __ pop_d();
1523      __ fld_d(at_rsp());
1524      __ fremr(rax);
1525      __ d2ieee();
1526      __ pop(rax);
1527      __ pop(rdx);
1528      __ push_d();
1529      __ pop_d(xmm0);
1530#endif
1531      break;
1532    default:
1533      ShouldNotReachHere();
1534      break;
1535    }
1536  } else {
1537#ifdef _LP64
1538    ShouldNotReachHere();
1539#else
1540    switch (op) {
1541    case add: __ fadd_d (at_rsp());                break;
1542    case sub: __ fsubr_d(at_rsp());                break;
1543    case mul: {
1544      Label L_strict;
1545      Label L_join;
1546      const Address access_flags      (rcx, Method::access_flags_offset());
1547      __ get_method(rcx);
1548      __ movl(rcx, access_flags);
1549      __ testl(rcx, JVM_ACC_STRICT);
1550      __ jccb(Assembler::notZero, L_strict);
1551      __ fmul_d (at_rsp());
1552      __ jmpb(L_join);
1553      __ bind(L_strict);
1554      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1555      __ fmulp();
1556      __ fmul_d (at_rsp());
1557      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1558      __ fmulp();
1559      __ bind(L_join);
1560      break;
1561    }
1562    case div: {
1563      Label L_strict;
1564      Label L_join;
1565      const Address access_flags      (rcx, Method::access_flags_offset());
1566      __ get_method(rcx);
1567      __ movl(rcx, access_flags);
1568      __ testl(rcx, JVM_ACC_STRICT);
1569      __ jccb(Assembler::notZero, L_strict);
1570      __ fdivr_d(at_rsp());
1571      __ jmp(L_join);
1572      __ bind(L_strict);
1573      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1574      __ fmul_d (at_rsp());
1575      __ fdivrp();
1576      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1577      __ fmulp();
1578      __ bind(L_join);
1579      break;
1580    }
1581    case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
1582    default : ShouldNotReachHere();
1583    }
1584    __ d2ieee();
1585    // Pop double precision number from rsp.
1586    __ pop(rax);
1587    __ pop(rdx);
1588#endif
1589  }
1590}
1591
1592void TemplateTable::ineg() {
1593  transition(itos, itos);
1594  __ negl(rax);
1595}
1596
1597void TemplateTable::lneg() {
1598  transition(ltos, ltos);
1599  LP64_ONLY(__ negq(rax));
1600  NOT_LP64(__ lneg(rdx, rax));
1601}
1602
1603// Note: 'double' and 'long long' have 32-bits alignment on x86.
1604static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1605  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1606  // of 128-bits operands for SSE instructions.
1607  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1608  // Store the value to a 128-bits operand.
1609  operand[0] = lo;
1610  operand[1] = hi;
1611  return operand;
1612}
1613
1614// Buffer for 128-bits masks used by SSE instructions.
1615static jlong float_signflip_pool[2*2];
1616static jlong double_signflip_pool[2*2];
1617
1618void TemplateTable::fneg() {
1619  transition(ftos, ftos);
1620  if (UseSSE >= 1) {
1621    static jlong *float_signflip  = double_quadword(&float_signflip_pool[1],  CONST64(0x8000000080000000),  CONST64(0x8000000080000000));
1622    __ xorps(xmm0, ExternalAddress((address) float_signflip));
1623  } else {
1624    LP64_ONLY(ShouldNotReachHere());
1625    NOT_LP64(__ fchs());
1626  }
1627}
1628
1629void TemplateTable::dneg() {
1630  transition(dtos, dtos);
1631  if (UseSSE >= 2) {
1632    static jlong *double_signflip =
1633      double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
1634    __ xorpd(xmm0, ExternalAddress((address) double_signflip));
1635  } else {
1636#ifdef _LP64
1637    ShouldNotReachHere();
1638#else
1639    __ fchs();
1640#endif
1641  }
1642}
1643
1644void TemplateTable::iinc() {
1645  transition(vtos, vtos);
1646  __ load_signed_byte(rdx, at_bcp(2)); // get constant
1647  locals_index(rbx);
1648  __ addl(iaddress(rbx), rdx);
1649}
1650
1651void TemplateTable::wide_iinc() {
1652  transition(vtos, vtos);
1653  __ movl(rdx, at_bcp(4)); // get constant
1654  locals_index_wide(rbx);
1655  __ bswapl(rdx); // swap bytes & sign-extend constant
1656  __ sarl(rdx, 16);
1657  __ addl(iaddress(rbx), rdx);
1658  // Note: should probably use only one movl to get both
1659  //       the index and the constant -> fix this
1660}
1661
1662void TemplateTable::convert() {
1663#ifdef _LP64
1664  // Checking
1665#ifdef ASSERT
1666  {
1667    TosState tos_in  = ilgl;
1668    TosState tos_out = ilgl;
1669    switch (bytecode()) {
1670    case Bytecodes::_i2l: // fall through
1671    case Bytecodes::_i2f: // fall through
1672    case Bytecodes::_i2d: // fall through
1673    case Bytecodes::_i2b: // fall through
1674    case Bytecodes::_i2c: // fall through
1675    case Bytecodes::_i2s: tos_in = itos; break;
1676    case Bytecodes::_l2i: // fall through
1677    case Bytecodes::_l2f: // fall through
1678    case Bytecodes::_l2d: tos_in = ltos; break;
1679    case Bytecodes::_f2i: // fall through
1680    case Bytecodes::_f2l: // fall through
1681    case Bytecodes::_f2d: tos_in = ftos; break;
1682    case Bytecodes::_d2i: // fall through
1683    case Bytecodes::_d2l: // fall through
1684    case Bytecodes::_d2f: tos_in = dtos; break;
1685    default             : ShouldNotReachHere();
1686    }
1687    switch (bytecode()) {
1688    case Bytecodes::_l2i: // fall through
1689    case Bytecodes::_f2i: // fall through
1690    case Bytecodes::_d2i: // fall through
1691    case Bytecodes::_i2b: // fall through
1692    case Bytecodes::_i2c: // fall through
1693    case Bytecodes::_i2s: tos_out = itos; break;
1694    case Bytecodes::_i2l: // fall through
1695    case Bytecodes::_f2l: // fall through
1696    case Bytecodes::_d2l: tos_out = ltos; break;
1697    case Bytecodes::_i2f: // fall through
1698    case Bytecodes::_l2f: // fall through
1699    case Bytecodes::_d2f: tos_out = ftos; break;
1700    case Bytecodes::_i2d: // fall through
1701    case Bytecodes::_l2d: // fall through
1702    case Bytecodes::_f2d: tos_out = dtos; break;
1703    default             : ShouldNotReachHere();
1704    }
1705    transition(tos_in, tos_out);
1706  }
1707#endif // ASSERT
1708
1709  static const int64_t is_nan = 0x8000000000000000L;
1710
1711  // Conversion
1712  switch (bytecode()) {
1713  case Bytecodes::_i2l:
1714    __ movslq(rax, rax);
1715    break;
1716  case Bytecodes::_i2f:
1717    __ cvtsi2ssl(xmm0, rax);
1718    break;
1719  case Bytecodes::_i2d:
1720    __ cvtsi2sdl(xmm0, rax);
1721    break;
1722  case Bytecodes::_i2b:
1723    __ movsbl(rax, rax);
1724    break;
1725  case Bytecodes::_i2c:
1726    __ movzwl(rax, rax);
1727    break;
1728  case Bytecodes::_i2s:
1729    __ movswl(rax, rax);
1730    break;
1731  case Bytecodes::_l2i:
1732    __ movl(rax, rax);
1733    break;
1734  case Bytecodes::_l2f:
1735    __ cvtsi2ssq(xmm0, rax);
1736    break;
1737  case Bytecodes::_l2d:
1738    __ cvtsi2sdq(xmm0, rax);
1739    break;
1740  case Bytecodes::_f2i:
1741  {
1742    Label L;
1743    __ cvttss2sil(rax, xmm0);
1744    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1745    __ jcc(Assembler::notEqual, L);
1746    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1747    __ bind(L);
1748  }
1749    break;
1750  case Bytecodes::_f2l:
1751  {
1752    Label L;
1753    __ cvttss2siq(rax, xmm0);
1754    // NaN or overflow/underflow?
1755    __ cmp64(rax, ExternalAddress((address) &is_nan));
1756    __ jcc(Assembler::notEqual, L);
1757    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1758    __ bind(L);
1759  }
1760    break;
1761  case Bytecodes::_f2d:
1762    __ cvtss2sd(xmm0, xmm0);
1763    break;
1764  case Bytecodes::_d2i:
1765  {
1766    Label L;
1767    __ cvttsd2sil(rax, xmm0);
1768    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1769    __ jcc(Assembler::notEqual, L);
1770    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1771    __ bind(L);
1772  }
1773    break;
1774  case Bytecodes::_d2l:
1775  {
1776    Label L;
1777    __ cvttsd2siq(rax, xmm0);
1778    // NaN or overflow/underflow?
1779    __ cmp64(rax, ExternalAddress((address) &is_nan));
1780    __ jcc(Assembler::notEqual, L);
1781    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1782    __ bind(L);
1783  }
1784    break;
1785  case Bytecodes::_d2f:
1786    __ cvtsd2ss(xmm0, xmm0);
1787    break;
1788  default:
1789    ShouldNotReachHere();
1790  }
1791#else
1792  // Checking
1793#ifdef ASSERT
1794  { TosState tos_in  = ilgl;
1795    TosState tos_out = ilgl;
1796    switch (bytecode()) {
1797      case Bytecodes::_i2l: // fall through
1798      case Bytecodes::_i2f: // fall through
1799      case Bytecodes::_i2d: // fall through
1800      case Bytecodes::_i2b: // fall through
1801      case Bytecodes::_i2c: // fall through
1802      case Bytecodes::_i2s: tos_in = itos; break;
1803      case Bytecodes::_l2i: // fall through
1804      case Bytecodes::_l2f: // fall through
1805      case Bytecodes::_l2d: tos_in = ltos; break;
1806      case Bytecodes::_f2i: // fall through
1807      case Bytecodes::_f2l: // fall through
1808      case Bytecodes::_f2d: tos_in = ftos; break;
1809      case Bytecodes::_d2i: // fall through
1810      case Bytecodes::_d2l: // fall through
1811      case Bytecodes::_d2f: tos_in = dtos; break;
1812      default             : ShouldNotReachHere();
1813    }
1814    switch (bytecode()) {
1815      case Bytecodes::_l2i: // fall through
1816      case Bytecodes::_f2i: // fall through
1817      case Bytecodes::_d2i: // fall through
1818      case Bytecodes::_i2b: // fall through
1819      case Bytecodes::_i2c: // fall through
1820      case Bytecodes::_i2s: tos_out = itos; break;
1821      case Bytecodes::_i2l: // fall through
1822      case Bytecodes::_f2l: // fall through
1823      case Bytecodes::_d2l: tos_out = ltos; break;
1824      case Bytecodes::_i2f: // fall through
1825      case Bytecodes::_l2f: // fall through
1826      case Bytecodes::_d2f: tos_out = ftos; break;
1827      case Bytecodes::_i2d: // fall through
1828      case Bytecodes::_l2d: // fall through
1829      case Bytecodes::_f2d: tos_out = dtos; break;
1830      default             : ShouldNotReachHere();
1831    }
1832    transition(tos_in, tos_out);
1833  }
1834#endif // ASSERT
1835
1836  // Conversion
1837  // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
1838  switch (bytecode()) {
1839    case Bytecodes::_i2l:
1840      __ extend_sign(rdx, rax);
1841      break;
1842    case Bytecodes::_i2f:
1843      if (UseSSE >= 1) {
1844        __ cvtsi2ssl(xmm0, rax);
1845      } else {
1846        __ push(rax);          // store int on tos
1847        __ fild_s(at_rsp());   // load int to ST0
1848        __ f2ieee();           // truncate to float size
1849        __ pop(rcx);           // adjust rsp
1850      }
1851      break;
1852    case Bytecodes::_i2d:
1853      if (UseSSE >= 2) {
1854        __ cvtsi2sdl(xmm0, rax);
1855      } else {
1856      __ push(rax);          // add one slot for d2ieee()
1857      __ push(rax);          // store int on tos
1858      __ fild_s(at_rsp());   // load int to ST0
1859      __ d2ieee();           // truncate to double size
1860      __ pop(rcx);           // adjust rsp
1861      __ pop(rcx);
1862      }
1863      break;
1864    case Bytecodes::_i2b:
1865      __ shll(rax, 24);      // truncate upper 24 bits
1866      __ sarl(rax, 24);      // and sign-extend byte
1867      LP64_ONLY(__ movsbl(rax, rax));
1868      break;
1869    case Bytecodes::_i2c:
1870      __ andl(rax, 0xFFFF);  // truncate upper 16 bits
1871      LP64_ONLY(__ movzwl(rax, rax));
1872      break;
1873    case Bytecodes::_i2s:
1874      __ shll(rax, 16);      // truncate upper 16 bits
1875      __ sarl(rax, 16);      // and sign-extend short
1876      LP64_ONLY(__ movswl(rax, rax));
1877      break;
1878    case Bytecodes::_l2i:
1879      /* nothing to do */
1880      break;
1881    case Bytecodes::_l2f:
1882      // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
1883      // 64-bit long values to floats. On 32-bit platforms it is not possible
1884      // to use that instruction with 64-bit operands, therefore the FPU is
1885      // used to perform the conversion.
1886      __ push(rdx);          // store long on tos
1887      __ push(rax);
1888      __ fild_d(at_rsp());   // load long to ST0
1889      __ f2ieee();           // truncate to float size
1890      __ pop(rcx);           // adjust rsp
1891      __ pop(rcx);
1892      if (UseSSE >= 1) {
1893        __ push_f();
1894        __ pop_f(xmm0);
1895      }
1896      break;
1897    case Bytecodes::_l2d:
1898      // On 32-bit platforms the FPU is used for conversion because on
1899      // 32-bit platforms it is not not possible to use the cvtsi2sdq
1900      // instruction with 64-bit operands.
1901      __ push(rdx);          // store long on tos
1902      __ push(rax);
1903      __ fild_d(at_rsp());   // load long to ST0
1904      __ d2ieee();           // truncate to double size
1905      __ pop(rcx);           // adjust rsp
1906      __ pop(rcx);
1907      if (UseSSE >= 2) {
1908        __ push_d();
1909        __ pop_d(xmm0);
1910      }
1911      break;
1912    case Bytecodes::_f2i:
1913      // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
1914      // as it returns 0 for any NaN.
1915      if (UseSSE >= 1) {
1916        __ push_f(xmm0);
1917      } else {
1918        __ push(rcx);          // reserve space for argument
1919        __ fstp_s(at_rsp());   // pass float argument on stack
1920      }
1921      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1922      break;
1923    case Bytecodes::_f2l:
1924      // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
1925      // as it returns 0 for any NaN.
1926      if (UseSSE >= 1) {
1927       __ push_f(xmm0);
1928      } else {
1929        __ push(rcx);          // reserve space for argument
1930        __ fstp_s(at_rsp());   // pass float argument on stack
1931      }
1932      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1933      break;
1934    case Bytecodes::_f2d:
1935      if (UseSSE < 1) {
1936        /* nothing to do */
1937      } else if (UseSSE == 1) {
1938        __ push_f(xmm0);
1939        __ pop_f();
1940      } else { // UseSSE >= 2
1941        __ cvtss2sd(xmm0, xmm0);
1942      }
1943      break;
1944    case Bytecodes::_d2i:
1945      if (UseSSE >= 2) {
1946        __ push_d(xmm0);
1947      } else {
1948        __ push(rcx);          // reserve space for argument
1949        __ push(rcx);
1950        __ fstp_d(at_rsp());   // pass double argument on stack
1951      }
1952      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
1953      break;
1954    case Bytecodes::_d2l:
1955      if (UseSSE >= 2) {
1956        __ push_d(xmm0);
1957      } else {
1958        __ push(rcx);          // reserve space for argument
1959        __ push(rcx);
1960        __ fstp_d(at_rsp());   // pass double argument on stack
1961      }
1962      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
1963      break;
1964    case Bytecodes::_d2f:
1965      if (UseSSE <= 1) {
1966        __ push(rcx);          // reserve space for f2ieee()
1967        __ f2ieee();           // truncate to float size
1968        __ pop(rcx);           // adjust rsp
1969        if (UseSSE == 1) {
1970          // The cvtsd2ss instruction is not available if UseSSE==1, therefore
1971          // the conversion is performed using the FPU in this case.
1972          __ push_f();
1973          __ pop_f(xmm0);
1974        }
1975      } else { // UseSSE >= 2
1976        __ cvtsd2ss(xmm0, xmm0);
1977      }
1978      break;
1979    default             :
1980      ShouldNotReachHere();
1981  }
1982#endif
1983}
1984
1985void TemplateTable::lcmp() {
1986  transition(ltos, itos);
1987#ifdef _LP64
1988  Label done;
1989  __ pop_l(rdx);
1990  __ cmpq(rdx, rax);
1991  __ movl(rax, -1);
1992  __ jccb(Assembler::less, done);
1993  __ setb(Assembler::notEqual, rax);
1994  __ movzbl(rax, rax);
1995  __ bind(done);
1996#else
1997
1998  // y = rdx:rax
1999  __ pop_l(rbx, rcx);             // get x = rcx:rbx
2000  __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
2001  __ mov(rax, rcx);
2002#endif
2003}
2004
2005void TemplateTable::float_cmp(bool is_float, int unordered_result) {
2006  if ((is_float && UseSSE >= 1) ||
2007      (!is_float && UseSSE >= 2)) {
2008    Label done;
2009    if (is_float) {
2010      // XXX get rid of pop here, use ... reg, mem32
2011      __ pop_f(xmm1);
2012      __ ucomiss(xmm1, xmm0);
2013    } else {
2014      // XXX get rid of pop here, use ... reg, mem64
2015      __ pop_d(xmm1);
2016      __ ucomisd(xmm1, xmm0);
2017    }
2018    if (unordered_result < 0) {
2019      __ movl(rax, -1);
2020      __ jccb(Assembler::parity, done);
2021      __ jccb(Assembler::below, done);
2022      __ setb(Assembler::notEqual, rdx);
2023      __ movzbl(rax, rdx);
2024    } else {
2025      __ movl(rax, 1);
2026      __ jccb(Assembler::parity, done);
2027      __ jccb(Assembler::above, done);
2028      __ movl(rax, 0);
2029      __ jccb(Assembler::equal, done);
2030      __ decrementl(rax);
2031    }
2032    __ bind(done);
2033  } else {
2034#ifdef _LP64
2035    ShouldNotReachHere();
2036#else
2037    if (is_float) {
2038      __ fld_s(at_rsp());
2039    } else {
2040      __ fld_d(at_rsp());
2041      __ pop(rdx);
2042    }
2043    __ pop(rcx);
2044    __ fcmp2int(rax, unordered_result < 0);
2045#endif // _LP64
2046  }
2047}
2048
2049void TemplateTable::branch(bool is_jsr, bool is_wide) {
2050  __ get_method(rcx); // rcx holds method
2051  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
2052                                     // holds bumped taken count
2053
2054  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
2055                             InvocationCounter::counter_offset();
2056  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
2057                              InvocationCounter::counter_offset();
2058
2059  // Load up edx with the branch displacement
2060  if (is_wide) {
2061    __ movl(rdx, at_bcp(1));
2062  } else {
2063    __ load_signed_short(rdx, at_bcp(1));
2064  }
2065  __ bswapl(rdx);
2066
2067  if (!is_wide) {
2068    __ sarl(rdx, 16);
2069  }
2070  LP64_ONLY(__ movl2ptr(rdx, rdx));
2071
2072  // Handle all the JSR stuff here, then exit.
2073  // It's much shorter and cleaner than intermingling with the non-JSR
2074  // normal-branch stuff occurring below.
2075  if (is_jsr) {
2076    // Pre-load the next target bytecode into rbx
2077    __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1, 0));
2078
2079    // compute return address as bci in rax
2080    __ lea(rax, at_bcp((is_wide ? 5 : 3) -
2081                        in_bytes(ConstMethod::codes_offset())));
2082    __ subptr(rax, Address(rcx, Method::const_offset()));
2083    // Adjust the bcp in r13 by the displacement in rdx
2084    __ addptr(rbcp, rdx);
2085    // jsr returns atos that is not an oop
2086    __ push_i(rax);
2087    __ dispatch_only(vtos);
2088    return;
2089  }
2090
2091  // Normal (non-jsr) branch handling
2092
2093  // Adjust the bcp in r13 by the displacement in rdx
2094  __ addptr(rbcp, rdx);
2095
2096  assert(UseLoopCounter || !UseOnStackReplacement,
2097         "on-stack-replacement requires loop counters");
2098  Label backedge_counter_overflow;
2099  Label profile_method;
2100  Label dispatch;
2101  if (UseLoopCounter) {
2102    // increment backedge counter for backward branches
2103    // rax: MDO
2104    // rbx: MDO bumped taken-count
2105    // rcx: method
2106    // rdx: target offset
2107    // r13: target bcp
2108    // r14: locals pointer
2109    __ testl(rdx, rdx);             // check if forward or backward branch
2110    __ jcc(Assembler::positive, dispatch); // count only if backward branch
2111
2112    // check if MethodCounters exists
2113    Label has_counters;
2114    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2115    __ testptr(rax, rax);
2116    __ jcc(Assembler::notZero, has_counters);
2117    __ push(rdx);
2118    __ push(rcx);
2119    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
2120               rcx);
2121    __ pop(rcx);
2122    __ pop(rdx);
2123    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2124    __ testptr(rax, rax);
2125    __ jcc(Assembler::zero, dispatch);
2126    __ bind(has_counters);
2127
2128    if (TieredCompilation) {
2129      Label no_mdo;
2130      int increment = InvocationCounter::count_increment;
2131      if (ProfileInterpreter) {
2132        // Are we profiling?
2133        __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
2134        __ testptr(rbx, rbx);
2135        __ jccb(Assembler::zero, no_mdo);
2136        // Increment the MDO backedge counter
2137        const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
2138                                           in_bytes(InvocationCounter::counter_offset()));
2139        const Address mask(rbx, in_bytes(MethodData::backedge_mask_offset()));
2140        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
2141                                   rax, false, Assembler::zero, &backedge_counter_overflow);
2142        __ jmp(dispatch);
2143      }
2144      __ bind(no_mdo);
2145      // Increment backedge counter in MethodCounters*
2146      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2147      const Address mask(rcx, in_bytes(MethodCounters::backedge_mask_offset()));
2148      __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
2149                                 rax, false, Assembler::zero, &backedge_counter_overflow);
2150    } else { // not TieredCompilation
2151      // increment counter
2152      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2153      __ movl(rax, Address(rcx, be_offset));        // load backedge counter
2154      __ incrementl(rax, InvocationCounter::count_increment); // increment counter
2155      __ movl(Address(rcx, be_offset), rax);        // store counter
2156
2157      __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
2158
2159      __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
2160      __ addl(rax, Address(rcx, be_offset));        // add both counters
2161
2162      if (ProfileInterpreter) {
2163        // Test to see if we should create a method data oop
2164        __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
2165        __ jcc(Assembler::less, dispatch);
2166
2167        // if no method data exists, go to profile method
2168        __ test_method_data_pointer(rax, profile_method);
2169
2170        if (UseOnStackReplacement) {
2171          // check for overflow against rbx which is the MDO taken count
2172          __ cmp32(rbx, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2173          __ jcc(Assembler::below, dispatch);
2174
2175          // When ProfileInterpreter is on, the backedge_count comes
2176          // from the MethodData*, which value does not get reset on
2177          // the call to frequency_counter_overflow().  To avoid
2178          // excessive calls to the overflow routine while the method is
2179          // being compiled, add a second test to make sure the overflow
2180          // function is called only once every overflow_frequency.
2181          const int overflow_frequency = 1024;
2182          __ andl(rbx, overflow_frequency - 1);
2183          __ jcc(Assembler::zero, backedge_counter_overflow);
2184
2185        }
2186      } else {
2187        if (UseOnStackReplacement) {
2188          // check for overflow against rax, which is the sum of the
2189          // counters
2190          __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2191          __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
2192
2193        }
2194      }
2195    }
2196    __ bind(dispatch);
2197  }
2198
2199  // Pre-load the next target bytecode into rbx
2200  __ load_unsigned_byte(rbx, Address(rbcp, 0));
2201
2202  // continue with the bytecode @ target
2203  // rax: return bci for jsr's, unused otherwise
2204  // rbx: target bytecode
2205  // r13: target bcp
2206  __ dispatch_only(vtos);
2207
2208  if (UseLoopCounter) {
2209    if (ProfileInterpreter) {
2210      // Out-of-line code to allocate method data oop.
2211      __ bind(profile_method);
2212      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
2213      __ set_method_data_pointer_for_bcp();
2214      __ jmp(dispatch);
2215    }
2216
2217    if (UseOnStackReplacement) {
2218      // invocation counter overflow
2219      __ bind(backedge_counter_overflow);
2220      __ negptr(rdx);
2221      __ addptr(rdx, rbcp); // branch bcp
2222      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
2223      __ call_VM(noreg,
2224                 CAST_FROM_FN_PTR(address,
2225                                  InterpreterRuntime::frequency_counter_overflow),
2226                 rdx);
2227
2228      // rax: osr nmethod (osr ok) or NULL (osr not possible)
2229      // rdx: scratch
2230      // r14: locals pointer
2231      // r13: bcp
2232      __ testptr(rax, rax);                        // test result
2233      __ jcc(Assembler::zero, dispatch);         // no osr if null
2234      // nmethod may have been invalidated (VM may block upon call_VM return)
2235      __ cmpb(Address(rax, nmethod::state_offset()), nmethod::in_use);
2236      __ jcc(Assembler::notEqual, dispatch);
2237
2238      // We have the address of an on stack replacement routine in rax.
2239      // In preparation of invoking it, first we must migrate the locals
2240      // and monitors from off the interpreter frame on the stack.
2241      // Ensure to save the osr nmethod over the migration call,
2242      // it will be preserved in rbx.
2243      __ mov(rbx, rax);
2244
2245      NOT_LP64(__ get_thread(rcx));
2246
2247      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
2248
2249      // rax is OSR buffer, move it to expected parameter location
2250      LP64_ONLY(__ mov(j_rarg0, rax));
2251      NOT_LP64(__ mov(rcx, rax));
2252      // We use j_rarg definitions here so that registers don't conflict as parameter
2253      // registers change across platforms as we are in the midst of a calling
2254      // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
2255
2256      const Register retaddr   = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
2257      const Register sender_sp = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
2258
2259      // pop the interpreter frame
2260      __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
2261      __ leave();                                // remove frame anchor
2262      __ pop(retaddr);                           // get return address
2263      __ mov(rsp, sender_sp);                   // set sp to sender sp
2264      // Ensure compiled code always sees stack at proper alignment
2265      __ andptr(rsp, -(StackAlignmentInBytes));
2266
2267      // unlike x86 we need no specialized return from compiled code
2268      // to the interpreter or the call stub.
2269
2270      // push the return address
2271      __ push(retaddr);
2272
2273      // and begin the OSR nmethod
2274      __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
2275    }
2276  }
2277}
2278
2279void TemplateTable::if_0cmp(Condition cc) {
2280  transition(itos, vtos);
2281  // assume branch is more often taken than not (loops use backward branches)
2282  Label not_taken;
2283  __ testl(rax, rax);
2284  __ jcc(j_not(cc), not_taken);
2285  branch(false, false);
2286  __ bind(not_taken);
2287  __ profile_not_taken_branch(rax);
2288}
2289
2290void TemplateTable::if_icmp(Condition cc) {
2291  transition(itos, vtos);
2292  // assume branch is more often taken than not (loops use backward branches)
2293  Label not_taken;
2294  __ pop_i(rdx);
2295  __ cmpl(rdx, rax);
2296  __ jcc(j_not(cc), not_taken);
2297  branch(false, false);
2298  __ bind(not_taken);
2299  __ profile_not_taken_branch(rax);
2300}
2301
2302void TemplateTable::if_nullcmp(Condition cc) {
2303  transition(atos, vtos);
2304  // assume branch is more often taken than not (loops use backward branches)
2305  Label not_taken;
2306  __ testptr(rax, rax);
2307  __ jcc(j_not(cc), not_taken);
2308  branch(false, false);
2309  __ bind(not_taken);
2310  __ profile_not_taken_branch(rax);
2311}
2312
2313void TemplateTable::if_acmp(Condition cc) {
2314  transition(atos, vtos);
2315  // assume branch is more often taken than not (loops use backward branches)
2316  Label not_taken;
2317  __ pop_ptr(rdx);
2318  __ cmpptr(rdx, rax);
2319  __ jcc(j_not(cc), not_taken);
2320  branch(false, false);
2321  __ bind(not_taken);
2322  __ profile_not_taken_branch(rax);
2323}
2324
2325void TemplateTable::ret() {
2326  transition(vtos, vtos);
2327  locals_index(rbx);
2328  LP64_ONLY(__ movslq(rbx, iaddress(rbx))); // get return bci, compute return bcp
2329  NOT_LP64(__ movptr(rbx, iaddress(rbx)));
2330  __ profile_ret(rbx, rcx);
2331  __ get_method(rax);
2332  __ movptr(rbcp, Address(rax, Method::const_offset()));
2333  __ lea(rbcp, Address(rbcp, rbx, Address::times_1,
2334                      ConstMethod::codes_offset()));
2335  __ dispatch_next(vtos);
2336}
2337
2338void TemplateTable::wide_ret() {
2339  transition(vtos, vtos);
2340  locals_index_wide(rbx);
2341  __ movptr(rbx, aaddress(rbx)); // get return bci, compute return bcp
2342  __ profile_ret(rbx, rcx);
2343  __ get_method(rax);
2344  __ movptr(rbcp, Address(rax, Method::const_offset()));
2345  __ lea(rbcp, Address(rbcp, rbx, Address::times_1, ConstMethod::codes_offset()));
2346  __ dispatch_next(vtos);
2347}
2348
2349void TemplateTable::tableswitch() {
2350  Label default_case, continue_execution;
2351  transition(itos, vtos);
2352
2353  // align r13/rsi
2354  __ lea(rbx, at_bcp(BytesPerInt));
2355  __ andptr(rbx, -BytesPerInt);
2356  // load lo & hi
2357  __ movl(rcx, Address(rbx, BytesPerInt));
2358  __ movl(rdx, Address(rbx, 2 * BytesPerInt));
2359  __ bswapl(rcx);
2360  __ bswapl(rdx);
2361  // check against lo & hi
2362  __ cmpl(rax, rcx);
2363  __ jcc(Assembler::less, default_case);
2364  __ cmpl(rax, rdx);
2365  __ jcc(Assembler::greater, default_case);
2366  // lookup dispatch offset
2367  __ subl(rax, rcx);
2368  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
2369  __ profile_switch_case(rax, rbx, rcx);
2370  // continue execution
2371  __ bind(continue_execution);
2372  __ bswapl(rdx);
2373  LP64_ONLY(__ movl2ptr(rdx, rdx));
2374  __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2375  __ addptr(rbcp, rdx);
2376  __ dispatch_only(vtos);
2377  // handle default
2378  __ bind(default_case);
2379  __ profile_switch_default(rax);
2380  __ movl(rdx, Address(rbx, 0));
2381  __ jmp(continue_execution);
2382}
2383
2384void TemplateTable::lookupswitch() {
2385  transition(itos, itos);
2386  __ stop("lookupswitch bytecode should have been rewritten");
2387}
2388
2389void TemplateTable::fast_linearswitch() {
2390  transition(itos, vtos);
2391  Label loop_entry, loop, found, continue_execution;
2392  // bswap rax so we can avoid bswapping the table entries
2393  __ bswapl(rax);
2394  // align r13
2395  __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
2396                                    // this instruction (change offsets
2397                                    // below)
2398  __ andptr(rbx, -BytesPerInt);
2399  // set counter
2400  __ movl(rcx, Address(rbx, BytesPerInt));
2401  __ bswapl(rcx);
2402  __ jmpb(loop_entry);
2403  // table search
2404  __ bind(loop);
2405  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
2406  __ jcc(Assembler::equal, found);
2407  __ bind(loop_entry);
2408  __ decrementl(rcx);
2409  __ jcc(Assembler::greaterEqual, loop);
2410  // default case
2411  __ profile_switch_default(rax);
2412  __ movl(rdx, Address(rbx, 0));
2413  __ jmp(continue_execution);
2414  // entry found -> get offset
2415  __ bind(found);
2416  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
2417  __ profile_switch_case(rcx, rax, rbx);
2418  // continue execution
2419  __ bind(continue_execution);
2420  __ bswapl(rdx);
2421  __ movl2ptr(rdx, rdx);
2422  __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2423  __ addptr(rbcp, rdx);
2424  __ dispatch_only(vtos);
2425}
2426
2427void TemplateTable::fast_binaryswitch() {
2428  transition(itos, vtos);
2429  // Implementation using the following core algorithm:
2430  //
2431  // int binary_search(int key, LookupswitchPair* array, int n) {
2432  //   // Binary search according to "Methodik des Programmierens" by
2433  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2434  //   int i = 0;
2435  //   int j = n;
2436  //   while (i+1 < j) {
2437  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2438  //     // with      Q: for all i: 0 <= i < n: key < a[i]
2439  //     // where a stands for the array and assuming that the (inexisting)
2440  //     // element a[n] is infinitely big.
2441  //     int h = (i + j) >> 1;
2442  //     // i < h < j
2443  //     if (key < array[h].fast_match()) {
2444  //       j = h;
2445  //     } else {
2446  //       i = h;
2447  //     }
2448  //   }
2449  //   // R: a[i] <= key < a[i+1] or Q
2450  //   // (i.e., if key is within array, i is the correct index)
2451  //   return i;
2452  // }
2453
2454  // Register allocation
2455  const Register key   = rax; // already set (tosca)
2456  const Register array = rbx;
2457  const Register i     = rcx;
2458  const Register j     = rdx;
2459  const Register h     = rdi;
2460  const Register temp  = rsi;
2461
2462  // Find array start
2463  NOT_LP64(__ save_bcp());
2464
2465  __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
2466                                          // get rid of this
2467                                          // instruction (change
2468                                          // offsets below)
2469  __ andptr(array, -BytesPerInt);
2470
2471  // Initialize i & j
2472  __ xorl(i, i);                            // i = 0;
2473  __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
2474
2475  // Convert j into native byteordering
2476  __ bswapl(j);
2477
2478  // And start
2479  Label entry;
2480  __ jmp(entry);
2481
2482  // binary search loop
2483  {
2484    Label loop;
2485    __ bind(loop);
2486    // int h = (i + j) >> 1;
2487    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
2488    __ sarl(h, 1);                               // h = (i + j) >> 1;
2489    // if (key < array[h].fast_match()) {
2490    //   j = h;
2491    // } else {
2492    //   i = h;
2493    // }
2494    // Convert array[h].match to native byte-ordering before compare
2495    __ movl(temp, Address(array, h, Address::times_8));
2496    __ bswapl(temp);
2497    __ cmpl(key, temp);
2498    // j = h if (key <  array[h].fast_match())
2499    __ cmov32(Assembler::less, j, h);
2500    // i = h if (key >= array[h].fast_match())
2501    __ cmov32(Assembler::greaterEqual, i, h);
2502    // while (i+1 < j)
2503    __ bind(entry);
2504    __ leal(h, Address(i, 1)); // i+1
2505    __ cmpl(h, j);             // i+1 < j
2506    __ jcc(Assembler::less, loop);
2507  }
2508
2509  // end of binary search, result index is i (must check again!)
2510  Label default_case;
2511  // Convert array[i].match to native byte-ordering before compare
2512  __ movl(temp, Address(array, i, Address::times_8));
2513  __ bswapl(temp);
2514  __ cmpl(key, temp);
2515  __ jcc(Assembler::notEqual, default_case);
2516
2517  // entry found -> j = offset
2518  __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
2519  __ profile_switch_case(i, key, array);
2520  __ bswapl(j);
2521  LP64_ONLY(__ movslq(j, j));
2522
2523  NOT_LP64(__ restore_bcp());
2524  NOT_LP64(__ restore_locals());                           // restore rdi
2525
2526  __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2527  __ addptr(rbcp, j);
2528  __ dispatch_only(vtos);
2529
2530  // default case -> j = default offset
2531  __ bind(default_case);
2532  __ profile_switch_default(i);
2533  __ movl(j, Address(array, -2 * BytesPerInt));
2534  __ bswapl(j);
2535  LP64_ONLY(__ movslq(j, j));
2536
2537  NOT_LP64(__ restore_bcp());
2538  NOT_LP64(__ restore_locals());
2539
2540  __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2541  __ addptr(rbcp, j);
2542  __ dispatch_only(vtos);
2543}
2544
2545void TemplateTable::_return(TosState state) {
2546  transition(state, state);
2547
2548  assert(_desc->calls_vm(),
2549         "inconsistent calls_vm information"); // call in remove_activation
2550
2551  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2552    assert(state == vtos, "only valid state");
2553    Register robj = LP64_ONLY(c_rarg1) NOT_LP64(rax);
2554    __ movptr(robj, aaddress(0));
2555    __ load_klass(rdi, robj);
2556    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
2557    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
2558    Label skip_register_finalizer;
2559    __ jcc(Assembler::zero, skip_register_finalizer);
2560
2561    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), robj);
2562
2563    __ bind(skip_register_finalizer);
2564  }
2565
2566  // Narrow result if state is itos but result type is smaller.
2567  // Need to narrow in the return bytecode rather than in generate_return_entry
2568  // since compiled code callers expect the result to already be narrowed.
2569  if (state == itos) {
2570    __ narrow(rax);
2571  }
2572  __ remove_activation(state, rbcp);
2573
2574  __ jmp(rbcp);
2575}
2576
2577// ----------------------------------------------------------------------------
2578// Volatile variables demand their effects be made known to all CPU's
2579// in order.  Store buffers on most chips allow reads & writes to
2580// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2581// without some kind of memory barrier (i.e., it's not sufficient that
2582// the interpreter does not reorder volatile references, the hardware
2583// also must not reorder them).
2584//
2585// According to the new Java Memory Model (JMM):
2586// (1) All volatiles are serialized wrt to each other.  ALSO reads &
2587//     writes act as aquire & release, so:
2588// (2) A read cannot let unrelated NON-volatile memory refs that
2589//     happen after the read float up to before the read.  It's OK for
2590//     non-volatile memory refs that happen before the volatile read to
2591//     float down below it.
2592// (3) Similar a volatile write cannot let unrelated NON-volatile
2593//     memory refs that happen BEFORE the write float down to after the
2594//     write.  It's OK for non-volatile memory refs that happen after the
2595//     volatile write to float up before it.
2596//
2597// We only put in barriers around volatile refs (they are expensive),
2598// not _between_ memory refs (that would require us to track the
2599// flavor of the previous memory refs).  Requirements (2) and (3)
2600// require some barriers before volatile stores and after volatile
2601// loads.  These nearly cover requirement (1) but miss the
2602// volatile-store-volatile-load case.  This final case is placed after
2603// volatile-stores although it could just as well go before
2604// volatile-loads.
2605
2606void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint ) {
2607  // Helper function to insert a is-volatile test and memory barrier
2608  if(!os::is_MP()) return;    // Not needed on single CPU
2609  __ membar(order_constraint);
2610}
2611
2612void TemplateTable::resolve_cache_and_index(int byte_no,
2613                                            Register Rcache,
2614                                            Register index,
2615                                            size_t index_size) {
2616  const Register temp = rbx;
2617  assert_different_registers(Rcache, index, temp);
2618
2619  Label resolved;
2620
2621  Bytecodes::Code code = bytecode();
2622  switch (code) {
2623  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2624  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2625  default: break;
2626  }
2627
2628  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2629  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
2630  __ cmpl(temp, code);  // have we resolved this bytecode?
2631  __ jcc(Assembler::equal, resolved);
2632
2633  // resolve first time through
2634  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2635  __ movl(temp, code);
2636  __ call_VM(noreg, entry, temp);
2637  // Update registers with resolved info
2638  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2639  __ bind(resolved);
2640}
2641
2642// The cache and index registers must be set before call
2643void TemplateTable::load_field_cp_cache_entry(Register obj,
2644                                              Register cache,
2645                                              Register index,
2646                                              Register off,
2647                                              Register flags,
2648                                              bool is_static = false) {
2649  assert_different_registers(cache, index, flags, off);
2650
2651  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2652  // Field offset
2653  __ movptr(off, Address(cache, index, Address::times_ptr,
2654                         in_bytes(cp_base_offset +
2655                                  ConstantPoolCacheEntry::f2_offset())));
2656  // Flags
2657  __ movl(flags, Address(cache, index, Address::times_ptr,
2658                         in_bytes(cp_base_offset +
2659                                  ConstantPoolCacheEntry::flags_offset())));
2660
2661  // klass overwrite register
2662  if (is_static) {
2663    __ movptr(obj, Address(cache, index, Address::times_ptr,
2664                           in_bytes(cp_base_offset +
2665                                    ConstantPoolCacheEntry::f1_offset())));
2666    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2667    __ movptr(obj, Address(obj, mirror_offset));
2668  }
2669}
2670
2671void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2672                                               Register method,
2673                                               Register itable_index,
2674                                               Register flags,
2675                                               bool is_invokevirtual,
2676                                               bool is_invokevfinal, /*unused*/
2677                                               bool is_invokedynamic) {
2678  // setup registers
2679  const Register cache = rcx;
2680  const Register index = rdx;
2681  assert_different_registers(method, flags);
2682  assert_different_registers(method, cache, index);
2683  assert_different_registers(itable_index, flags);
2684  assert_different_registers(itable_index, cache, index);
2685  // determine constant pool cache field offsets
2686  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
2687  const int method_offset = in_bytes(
2688    ConstantPoolCache::base_offset() +
2689      ((byte_no == f2_byte)
2690       ? ConstantPoolCacheEntry::f2_offset()
2691       : ConstantPoolCacheEntry::f1_offset()));
2692  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2693                                    ConstantPoolCacheEntry::flags_offset());
2694  // access constant pool cache fields
2695  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2696                                    ConstantPoolCacheEntry::f2_offset());
2697
2698  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
2699  resolve_cache_and_index(byte_no, cache, index, index_size);
2700    __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
2701
2702  if (itable_index != noreg) {
2703    // pick up itable or appendix index from f2 also:
2704    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
2705  }
2706  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
2707}
2708
2709// The registers cache and index expected to be set before call.
2710// Correct values of the cache and index registers are preserved.
2711void TemplateTable::jvmti_post_field_access(Register cache,
2712                                            Register index,
2713                                            bool is_static,
2714                                            bool has_tos) {
2715  if (JvmtiExport::can_post_field_access()) {
2716    // Check to see if a field access watch has been set before we take
2717    // the time to call into the VM.
2718    Label L1;
2719    assert_different_registers(cache, index, rax);
2720    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2721    __ testl(rax,rax);
2722    __ jcc(Assembler::zero, L1);
2723
2724    // cache entry pointer
2725    __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()));
2726    __ shll(index, LogBytesPerWord);
2727    __ addptr(cache, index);
2728    if (is_static) {
2729      __ xorptr(rax, rax);      // NULL object reference
2730    } else {
2731      __ pop(atos);         // Get the object
2732      __ verify_oop(rax);
2733      __ push(atos);        // Restore stack state
2734    }
2735    // rax,:   object pointer or NULL
2736    // cache: cache entry pointer
2737    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
2738               rax, cache);
2739    __ get_cache_and_index_at_bcp(cache, index, 1);
2740    __ bind(L1);
2741  }
2742}
2743
2744void TemplateTable::pop_and_check_object(Register r) {
2745  __ pop_ptr(r);
2746  __ null_check(r);  // for field access must check obj.
2747  __ verify_oop(r);
2748}
2749
2750void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2751  transition(vtos, vtos);
2752
2753  const Register cache = rcx;
2754  const Register index = rdx;
2755  const Register obj   = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
2756  const Register off   = rbx;
2757  const Register flags = rax;
2758  const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // uses same reg as obj, so don't mix them
2759
2760  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2761  jvmti_post_field_access(cache, index, is_static, false);
2762  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2763
2764  if (!is_static) pop_and_check_object(obj);
2765
2766  const Address field(obj, off, Address::times_1, 0*wordSize);
2767  NOT_LP64(const Address hi(obj, off, Address::times_1, 1*wordSize));
2768
2769  Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
2770
2771  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
2772  // Make sure we don't need to mask edx after the above shift
2773  assert(btos == 0, "change code, btos != 0");
2774
2775  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
2776
2777  __ jcc(Assembler::notZero, notByte);
2778  // btos
2779  __ load_signed_byte(rax, field);
2780  __ push(btos);
2781  // Rewrite bytecode to be faster
2782  if (!is_static && rc == may_rewrite) {
2783    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2784  }
2785  __ jmp(Done);
2786
2787  __ bind(notByte);
2788  __ cmpl(flags, ztos);
2789  __ jcc(Assembler::notEqual, notBool);
2790
2791  // ztos (same code as btos)
2792  __ load_signed_byte(rax, field);
2793  __ push(ztos);
2794  // Rewrite bytecode to be faster
2795  if (!is_static && rc == may_rewrite) {
2796    // use btos rewriting, no truncating to t/f bit is needed for getfield.
2797    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2798  }
2799  __ jmp(Done);
2800
2801  __ bind(notBool);
2802  __ cmpl(flags, atos);
2803  __ jcc(Assembler::notEqual, notObj);
2804  // atos
2805  __ load_heap_oop(rax, field);
2806  __ push(atos);
2807  if (!is_static && rc == may_rewrite) {
2808    patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
2809  }
2810  __ jmp(Done);
2811
2812  __ bind(notObj);
2813  __ cmpl(flags, itos);
2814  __ jcc(Assembler::notEqual, notInt);
2815  // itos
2816  __ movl(rax, field);
2817  __ push(itos);
2818  // Rewrite bytecode to be faster
2819  if (!is_static && rc == may_rewrite) {
2820    patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
2821  }
2822  __ jmp(Done);
2823
2824  __ bind(notInt);
2825  __ cmpl(flags, ctos);
2826  __ jcc(Assembler::notEqual, notChar);
2827  // ctos
2828  __ load_unsigned_short(rax, field);
2829  __ push(ctos);
2830  // Rewrite bytecode to be faster
2831  if (!is_static && rc == may_rewrite) {
2832    patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
2833  }
2834  __ jmp(Done);
2835
2836  __ bind(notChar);
2837  __ cmpl(flags, stos);
2838  __ jcc(Assembler::notEqual, notShort);
2839  // stos
2840  __ load_signed_short(rax, field);
2841  __ push(stos);
2842  // Rewrite bytecode to be faster
2843  if (!is_static && rc == may_rewrite) {
2844    patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
2845  }
2846  __ jmp(Done);
2847
2848  __ bind(notShort);
2849  __ cmpl(flags, ltos);
2850  __ jcc(Assembler::notEqual, notLong);
2851  // ltos
2852
2853#ifndef _LP64
2854  // Generate code as if volatile.  There just aren't enough registers to
2855  // save that information and this code is faster than the test.
2856  __ fild_d(field);                // Must load atomically
2857  __ subptr(rsp,2*wordSize);    // Make space for store
2858  __ fistp_d(Address(rsp,0));
2859  __ pop(rax);
2860  __ pop(rdx);
2861#else
2862  __ movq(rax, field);
2863#endif
2864
2865  __ push(ltos);
2866  // Rewrite bytecode to be faster
2867  LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx));
2868  __ jmp(Done);
2869
2870  __ bind(notLong);
2871  __ cmpl(flags, ftos);
2872  __ jcc(Assembler::notEqual, notFloat);
2873  // ftos
2874
2875  __ load_float(field);
2876  __ push(ftos);
2877  // Rewrite bytecode to be faster
2878  if (!is_static && rc == may_rewrite) {
2879    patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
2880  }
2881  __ jmp(Done);
2882
2883  __ bind(notFloat);
2884#ifdef ASSERT
2885  __ cmpl(flags, dtos);
2886  __ jcc(Assembler::notEqual, notDouble);
2887#endif
2888  // dtos
2889  __ load_double(field);
2890  __ push(dtos);
2891  // Rewrite bytecode to be faster
2892  if (!is_static && rc == may_rewrite) {
2893    patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
2894  }
2895#ifdef ASSERT
2896  __ jmp(Done);
2897
2898
2899  __ bind(notDouble);
2900  __ stop("Bad state");
2901#endif
2902
2903  __ bind(Done);
2904  // [jk] not needed currently
2905  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
2906  //                                              Assembler::LoadStore));
2907}
2908
2909void TemplateTable::getfield(int byte_no) {
2910  getfield_or_static(byte_no, false);
2911}
2912
2913void TemplateTable::nofast_getfield(int byte_no) {
2914  getfield_or_static(byte_no, false, may_not_rewrite);
2915}
2916
2917void TemplateTable::getstatic(int byte_no) {
2918  getfield_or_static(byte_no, true);
2919}
2920
2921
2922// The registers cache and index expected to be set before call.
2923// The function may destroy various registers, just not the cache and index registers.
2924void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2925
2926  const Register robj = LP64_ONLY(c_rarg2)   NOT_LP64(rax);
2927  const Register RBX  = LP64_ONLY(c_rarg1)   NOT_LP64(rbx);
2928  const Register RCX  = LP64_ONLY(c_rarg3)   NOT_LP64(rcx);
2929  const Register RDX  = LP64_ONLY(rscratch1) NOT_LP64(rdx);
2930
2931  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2932
2933  if (JvmtiExport::can_post_field_modification()) {
2934    // Check to see if a field modification watch has been set before
2935    // we take the time to call into the VM.
2936    Label L1;
2937    assert_different_registers(cache, index, rax);
2938    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2939    __ testl(rax, rax);
2940    __ jcc(Assembler::zero, L1);
2941
2942    __ get_cache_and_index_at_bcp(robj, RDX, 1);
2943
2944
2945    if (is_static) {
2946      // Life is simple.  Null out the object pointer.
2947      __ xorl(RBX, RBX);
2948
2949    } else {
2950      // Life is harder. The stack holds the value on top, followed by
2951      // the object.  We don't know the size of the value, though; it
2952      // could be one or two words depending on its type. As a result,
2953      // we must find the type to determine where the object is.
2954#ifndef _LP64
2955      Label two_word, valsize_known;
2956#endif
2957      __ movl(RCX, Address(robj, RDX,
2958                           Address::times_ptr,
2959                           in_bytes(cp_base_offset +
2960                                     ConstantPoolCacheEntry::flags_offset())));
2961      NOT_LP64(__ mov(rbx, rsp));
2962      __ shrl(RCX, ConstantPoolCacheEntry::tos_state_shift);
2963
2964      // Make sure we don't need to mask rcx after the above shift
2965      ConstantPoolCacheEntry::verify_tos_state_shift();
2966#ifdef _LP64
2967      __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2968      __ cmpl(c_rarg3, ltos);
2969      __ cmovptr(Assembler::equal,
2970                 c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2971      __ cmpl(c_rarg3, dtos);
2972      __ cmovptr(Assembler::equal,
2973                 c_rarg1, at_tos_p2()); // dtos (two word jvalue)
2974#else
2975      __ cmpl(rcx, ltos);
2976      __ jccb(Assembler::equal, two_word);
2977      __ cmpl(rcx, dtos);
2978      __ jccb(Assembler::equal, two_word);
2979      __ addptr(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
2980      __ jmpb(valsize_known);
2981
2982      __ bind(two_word);
2983      __ addptr(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
2984
2985      __ bind(valsize_known);
2986      // setup object pointer
2987      __ movptr(rbx, Address(rbx, 0));
2988#endif
2989    }
2990    // cache entry pointer
2991    __ addptr(robj, in_bytes(cp_base_offset));
2992    __ shll(RDX, LogBytesPerWord);
2993    __ addptr(robj, RDX);
2994    // object (tos)
2995    __ mov(RCX, rsp);
2996    // c_rarg1: object pointer set up above (NULL if static)
2997    // c_rarg2: cache entry pointer
2998    // c_rarg3: jvalue object on the stack
2999    __ call_VM(noreg,
3000               CAST_FROM_FN_PTR(address,
3001                                InterpreterRuntime::post_field_modification),
3002               RBX, robj, RCX);
3003    __ get_cache_and_index_at_bcp(cache, index, 1);
3004    __ bind(L1);
3005  }
3006}
3007
3008void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
3009  transition(vtos, vtos);
3010
3011  const Register cache = rcx;
3012  const Register index = rdx;
3013  const Register obj   = rcx;
3014  const Register off   = rbx;
3015  const Register flags = rax;
3016  const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3017
3018  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
3019  jvmti_post_field_mod(cache, index, is_static);
3020  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
3021
3022  // [jk] not needed currently
3023  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3024  //                                              Assembler::StoreStore));
3025
3026  Label notVolatile, Done;
3027  __ movl(rdx, flags);
3028  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3029  __ andl(rdx, 0x1);
3030
3031  // field addresses
3032  const Address field(obj, off, Address::times_1, 0*wordSize);
3033  NOT_LP64( const Address hi(obj, off, Address::times_1, 1*wordSize);)
3034
3035  Label notByte, notBool, notInt, notShort, notChar,
3036        notLong, notFloat, notObj, notDouble;
3037
3038  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3039
3040  assert(btos == 0, "change code, btos != 0");
3041  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
3042  __ jcc(Assembler::notZero, notByte);
3043
3044  // btos
3045  {
3046    __ pop(btos);
3047    if (!is_static) pop_and_check_object(obj);
3048    __ movb(field, rax);
3049    if (!is_static && rc == may_rewrite) {
3050      patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
3051    }
3052    __ jmp(Done);
3053  }
3054
3055  __ bind(notByte);
3056  __ cmpl(flags, ztos);
3057  __ jcc(Assembler::notEqual, notBool);
3058
3059  // ztos
3060  {
3061    __ pop(ztos);
3062    if (!is_static) pop_and_check_object(obj);
3063    __ andl(rax, 0x1);
3064    __ movb(field, rax);
3065    if (!is_static && rc == may_rewrite) {
3066      patch_bytecode(Bytecodes::_fast_zputfield, bc, rbx, true, byte_no);
3067    }
3068    __ jmp(Done);
3069  }
3070
3071  __ bind(notBool);
3072  __ cmpl(flags, atos);
3073  __ jcc(Assembler::notEqual, notObj);
3074
3075  // atos
3076  {
3077    __ pop(atos);
3078    if (!is_static) pop_and_check_object(obj);
3079    // Store into the field
3080    do_oop_store(_masm, field, rax, _bs->kind(), false);
3081    if (!is_static && rc == may_rewrite) {
3082      patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
3083    }
3084    __ jmp(Done);
3085  }
3086
3087  __ bind(notObj);
3088  __ cmpl(flags, itos);
3089  __ jcc(Assembler::notEqual, notInt);
3090
3091  // itos
3092  {
3093    __ pop(itos);
3094    if (!is_static) pop_and_check_object(obj);
3095    __ movl(field, rax);
3096    if (!is_static && rc == may_rewrite) {
3097      patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
3098    }
3099    __ jmp(Done);
3100  }
3101
3102  __ bind(notInt);
3103  __ cmpl(flags, ctos);
3104  __ jcc(Assembler::notEqual, notChar);
3105
3106  // ctos
3107  {
3108    __ pop(ctos);
3109    if (!is_static) pop_and_check_object(obj);
3110    __ movw(field, rax);
3111    if (!is_static && rc == may_rewrite) {
3112      patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
3113    }
3114    __ jmp(Done);
3115  }
3116
3117  __ bind(notChar);
3118  __ cmpl(flags, stos);
3119  __ jcc(Assembler::notEqual, notShort);
3120
3121  // stos
3122  {
3123    __ pop(stos);
3124    if (!is_static) pop_and_check_object(obj);
3125    __ movw(field, rax);
3126    if (!is_static && rc == may_rewrite) {
3127      patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
3128    }
3129    __ jmp(Done);
3130  }
3131
3132  __ bind(notShort);
3133  __ cmpl(flags, ltos);
3134  __ jcc(Assembler::notEqual, notLong);
3135
3136  // ltos
3137#ifdef _LP64
3138  {
3139    __ pop(ltos);
3140    if (!is_static) pop_and_check_object(obj);
3141    __ movq(field, rax);
3142    if (!is_static && rc == may_rewrite) {
3143      patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
3144    }
3145    __ jmp(Done);
3146  }
3147#else
3148  {
3149    Label notVolatileLong;
3150    __ testl(rdx, rdx);
3151    __ jcc(Assembler::zero, notVolatileLong);
3152
3153    __ pop(ltos);  // overwrites rdx, do this after testing volatile.
3154    if (!is_static) pop_and_check_object(obj);
3155
3156    // Replace with real volatile test
3157    __ push(rdx);
3158    __ push(rax);                 // Must update atomically with FIST
3159    __ fild_d(Address(rsp,0));    // So load into FPU register
3160    __ fistp_d(field);            // and put into memory atomically
3161    __ addptr(rsp, 2*wordSize);
3162    // volatile_barrier();
3163    volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3164                                                 Assembler::StoreStore));
3165    // Don't rewrite volatile version
3166    __ jmp(notVolatile);
3167
3168    __ bind(notVolatileLong);
3169
3170    __ pop(ltos);  // overwrites rdx
3171    if (!is_static) pop_and_check_object(obj);
3172    __ movptr(hi, rdx);
3173    __ movptr(field, rax);
3174    // Don't rewrite to _fast_lputfield for potential volatile case.
3175    __ jmp(notVolatile);
3176  }
3177#endif // _LP64
3178
3179  __ bind(notLong);
3180  __ cmpl(flags, ftos);
3181  __ jcc(Assembler::notEqual, notFloat);
3182
3183  // ftos
3184  {
3185    __ pop(ftos);
3186    if (!is_static) pop_and_check_object(obj);
3187    __ store_float(field);
3188    if (!is_static && rc == may_rewrite) {
3189      patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
3190    }
3191    __ jmp(Done);
3192  }
3193
3194  __ bind(notFloat);
3195#ifdef ASSERT
3196  __ cmpl(flags, dtos);
3197  __ jcc(Assembler::notEqual, notDouble);
3198#endif
3199
3200  // dtos
3201  {
3202    __ pop(dtos);
3203    if (!is_static) pop_and_check_object(obj);
3204    __ store_double(field);
3205    if (!is_static && rc == may_rewrite) {
3206      patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
3207    }
3208  }
3209
3210#ifdef ASSERT
3211  __ jmp(Done);
3212
3213  __ bind(notDouble);
3214  __ stop("Bad state");
3215#endif
3216
3217  __ bind(Done);
3218
3219  // Check for volatile store
3220  __ testl(rdx, rdx);
3221  __ jcc(Assembler::zero, notVolatile);
3222  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3223                                               Assembler::StoreStore));
3224  __ bind(notVolatile);
3225}
3226
3227void TemplateTable::putfield(int byte_no) {
3228  putfield_or_static(byte_no, false);
3229}
3230
3231void TemplateTable::nofast_putfield(int byte_no) {
3232  putfield_or_static(byte_no, false, may_not_rewrite);
3233}
3234
3235void TemplateTable::putstatic(int byte_no) {
3236  putfield_or_static(byte_no, true);
3237}
3238
3239void TemplateTable::jvmti_post_fast_field_mod() {
3240
3241  const Register scratch = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3242
3243  if (JvmtiExport::can_post_field_modification()) {
3244    // Check to see if a field modification watch has been set before
3245    // we take the time to call into the VM.
3246    Label L2;
3247    __ mov32(scratch, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
3248    __ testl(scratch, scratch);
3249    __ jcc(Assembler::zero, L2);
3250    __ pop_ptr(rbx);                  // copy the object pointer from tos
3251    __ verify_oop(rbx);
3252    __ push_ptr(rbx);                 // put the object pointer back on tos
3253    // Save tos values before call_VM() clobbers them. Since we have
3254    // to do it for every data type, we use the saved values as the
3255    // jvalue object.
3256    switch (bytecode()) {          // load values into the jvalue object
3257    case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
3258    case Bytecodes::_fast_bputfield: // fall through
3259    case Bytecodes::_fast_zputfield: // fall through
3260    case Bytecodes::_fast_sputfield: // fall through
3261    case Bytecodes::_fast_cputfield: // fall through
3262    case Bytecodes::_fast_iputfield: __ push_i(rax); break;
3263    case Bytecodes::_fast_dputfield: __ push(dtos); break;
3264    case Bytecodes::_fast_fputfield: __ push(ftos); break;
3265    case Bytecodes::_fast_lputfield: __ push_l(rax); break;
3266
3267    default:
3268      ShouldNotReachHere();
3269    }
3270    __ mov(scratch, rsp);             // points to jvalue on the stack
3271    // access constant pool cache entry
3272    LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1));
3273    NOT_LP64(__ get_cache_entry_pointer_at_bcp(rax, rdx, 1));
3274    __ verify_oop(rbx);
3275    // rbx: object pointer copied above
3276    // c_rarg2: cache entry pointer
3277    // c_rarg3: jvalue object on the stack
3278    LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3));
3279    NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx));
3280
3281    switch (bytecode()) {             // restore tos values
3282    case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
3283    case Bytecodes::_fast_bputfield: // fall through
3284    case Bytecodes::_fast_zputfield: // fall through
3285    case Bytecodes::_fast_sputfield: // fall through
3286    case Bytecodes::_fast_cputfield: // fall through
3287    case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
3288    case Bytecodes::_fast_dputfield: __ pop(dtos); break;
3289    case Bytecodes::_fast_fputfield: __ pop(ftos); break;
3290    case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
3291    default: break;
3292    }
3293    __ bind(L2);
3294  }
3295}
3296
3297void TemplateTable::fast_storefield(TosState state) {
3298  transition(state, vtos);
3299
3300  ByteSize base = ConstantPoolCache::base_offset();
3301
3302  jvmti_post_fast_field_mod();
3303
3304  // access constant pool cache
3305  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3306
3307  // test for volatile with rdx but rdx is tos register for lputfield.
3308  __ movl(rdx, Address(rcx, rbx, Address::times_ptr,
3309                       in_bytes(base +
3310                                ConstantPoolCacheEntry::flags_offset())));
3311
3312  // replace index with field offset from cache entry
3313  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr,
3314                         in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
3315
3316  // [jk] not needed currently
3317  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3318  //                                              Assembler::StoreStore));
3319
3320  Label notVolatile;
3321  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3322  __ andl(rdx, 0x1);
3323
3324  // Get object from stack
3325  pop_and_check_object(rcx);
3326
3327  // field address
3328  const Address field(rcx, rbx, Address::times_1);
3329
3330  // access field
3331  switch (bytecode()) {
3332  case Bytecodes::_fast_aputfield:
3333    do_oop_store(_masm, field, rax, _bs->kind(), false);
3334    break;
3335  case Bytecodes::_fast_lputfield:
3336#ifdef _LP64
3337  __ movq(field, rax);
3338#else
3339  __ stop("should not be rewritten");
3340#endif
3341    break;
3342  case Bytecodes::_fast_iputfield:
3343    __ movl(field, rax);
3344    break;
3345  case Bytecodes::_fast_zputfield:
3346    __ andl(rax, 0x1);  // boolean is true if LSB is 1
3347    // fall through to bputfield
3348  case Bytecodes::_fast_bputfield:
3349    __ movb(field, rax);
3350    break;
3351  case Bytecodes::_fast_sputfield:
3352    // fall through
3353  case Bytecodes::_fast_cputfield:
3354    __ movw(field, rax);
3355    break;
3356  case Bytecodes::_fast_fputfield:
3357    __ store_float(field);
3358    break;
3359  case Bytecodes::_fast_dputfield:
3360    __ store_double(field);
3361    break;
3362  default:
3363    ShouldNotReachHere();
3364  }
3365
3366  // Check for volatile store
3367  __ testl(rdx, rdx);
3368  __ jcc(Assembler::zero, notVolatile);
3369  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3370                                               Assembler::StoreStore));
3371  __ bind(notVolatile);
3372}
3373
3374void TemplateTable::fast_accessfield(TosState state) {
3375  transition(atos, state);
3376
3377  // Do the JVMTI work here to avoid disturbing the register state below
3378  if (JvmtiExport::can_post_field_access()) {
3379    // Check to see if a field access watch has been set before we
3380    // take the time to call into the VM.
3381    Label L1;
3382    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
3383    __ testl(rcx, rcx);
3384    __ jcc(Assembler::zero, L1);
3385    // access constant pool cache entry
3386    LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1));
3387    NOT_LP64(__ get_cache_entry_pointer_at_bcp(rcx, rdx, 1));
3388    __ verify_oop(rax);
3389    __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
3390    LP64_ONLY(__ mov(c_rarg1, rax));
3391    // c_rarg1: object pointer copied above
3392    // c_rarg2: cache entry pointer
3393    LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), c_rarg1, c_rarg2));
3394    NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx));
3395    __ pop_ptr(rax); // restore object pointer
3396    __ bind(L1);
3397  }
3398
3399  // access constant pool cache
3400  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3401  // replace index with field offset from cache entry
3402  // [jk] not needed currently
3403  // if (os::is_MP()) {
3404  //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
3405  //                        in_bytes(ConstantPoolCache::base_offset() +
3406  //                                 ConstantPoolCacheEntry::flags_offset())));
3407  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3408  //   __ andl(rdx, 0x1);
3409  // }
3410  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr,
3411                         in_bytes(ConstantPoolCache::base_offset() +
3412                                  ConstantPoolCacheEntry::f2_offset())));
3413
3414  // rax: object
3415  __ verify_oop(rax);
3416  __ null_check(rax);
3417  Address field(rax, rbx, Address::times_1);
3418
3419  // access field
3420  switch (bytecode()) {
3421  case Bytecodes::_fast_agetfield:
3422    __ load_heap_oop(rax, field);
3423    __ verify_oop(rax);
3424    break;
3425  case Bytecodes::_fast_lgetfield:
3426#ifdef _LP64
3427  __ movq(rax, field);
3428#else
3429  __ stop("should not be rewritten");
3430#endif
3431    break;
3432  case Bytecodes::_fast_igetfield:
3433    __ movl(rax, field);
3434    break;
3435  case Bytecodes::_fast_bgetfield:
3436    __ movsbl(rax, field);
3437    break;
3438  case Bytecodes::_fast_sgetfield:
3439    __ load_signed_short(rax, field);
3440    break;
3441  case Bytecodes::_fast_cgetfield:
3442    __ load_unsigned_short(rax, field);
3443    break;
3444  case Bytecodes::_fast_fgetfield:
3445    __ load_float(field);
3446    break;
3447  case Bytecodes::_fast_dgetfield:
3448    __ load_double(field);
3449    break;
3450  default:
3451    ShouldNotReachHere();
3452  }
3453  // [jk] not needed currently
3454  // if (os::is_MP()) {
3455  //   Label notVolatile;
3456  //   __ testl(rdx, rdx);
3457  //   __ jcc(Assembler::zero, notVolatile);
3458  //   __ membar(Assembler::LoadLoad);
3459  //   __ bind(notVolatile);
3460  //};
3461}
3462
3463void TemplateTable::fast_xaccess(TosState state) {
3464  transition(vtos, state);
3465
3466  // get receiver
3467  __ movptr(rax, aaddress(0));
3468  // access constant pool cache
3469  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
3470  __ movptr(rbx,
3471            Address(rcx, rdx, Address::times_ptr,
3472                    in_bytes(ConstantPoolCache::base_offset() +
3473                             ConstantPoolCacheEntry::f2_offset())));
3474  // make sure exception is reported in correct bcp range (getfield is
3475  // next instruction)
3476  __ increment(rbcp);
3477  __ null_check(rax);
3478  const Address field = Address(rax, rbx, Address::times_1, 0*wordSize);
3479  switch (state) {
3480  case itos:
3481    __ movl(rax, field);
3482    break;
3483  case atos:
3484    __ load_heap_oop(rax, field);
3485    __ verify_oop(rax);
3486    break;
3487  case ftos:
3488    __ load_float(field);
3489    break;
3490  default:
3491    ShouldNotReachHere();
3492  }
3493
3494  // [jk] not needed currently
3495  // if (os::is_MP()) {
3496  //   Label notVolatile;
3497  //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
3498  //                        in_bytes(ConstantPoolCache::base_offset() +
3499  //                                 ConstantPoolCacheEntry::flags_offset())));
3500  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3501  //   __ testl(rdx, 0x1);
3502  //   __ jcc(Assembler::zero, notVolatile);
3503  //   __ membar(Assembler::LoadLoad);
3504  //   __ bind(notVolatile);
3505  // }
3506
3507  __ decrement(rbcp);
3508}
3509
3510//-----------------------------------------------------------------------------
3511// Calls
3512
3513void TemplateTable::count_calls(Register method, Register temp) {
3514  // implemented elsewhere
3515  ShouldNotReachHere();
3516}
3517
3518void TemplateTable::prepare_invoke(int byte_no,
3519                                   Register method,  // linked method (or i-klass)
3520                                   Register index,   // itable index, MethodType, etc.
3521                                   Register recv,    // if caller wants to see it
3522                                   Register flags    // if caller wants to test it
3523                                   ) {
3524  // determine flags
3525  const Bytecodes::Code code = bytecode();
3526  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
3527  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
3528  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
3529  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
3530  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
3531  const bool load_receiver       = (recv  != noreg);
3532  const bool save_flags          = (flags != noreg);
3533  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3534  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3535  assert(flags == noreg || flags == rdx, "");
3536  assert(recv  == noreg || recv  == rcx, "");
3537
3538  // setup registers & access constant pool cache
3539  if (recv  == noreg)  recv  = rcx;
3540  if (flags == noreg)  flags = rdx;
3541  assert_different_registers(method, index, recv, flags);
3542
3543  // save 'interpreter return address'
3544  __ save_bcp();
3545
3546  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3547
3548  // maybe push appendix to arguments (just before return address)
3549  if (is_invokedynamic || is_invokehandle) {
3550    Label L_no_push;
3551    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
3552    __ jcc(Assembler::zero, L_no_push);
3553    // Push the appendix as a trailing parameter.
3554    // This must be done before we get the receiver,
3555    // since the parameter_size includes it.
3556    __ push(rbx);
3557    __ mov(rbx, index);
3558    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
3559    __ load_resolved_reference_at_index(index, rbx);
3560    __ pop(rbx);
3561    __ push(index);  // push appendix (MethodType, CallSite, etc.)
3562    __ bind(L_no_push);
3563  }
3564
3565  // load receiver if needed (after appendix is pushed so parameter size is correct)
3566  // Note: no return address pushed yet
3567  if (load_receiver) {
3568    __ movl(recv, flags);
3569    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
3570    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
3571    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
3572    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
3573    __ movptr(recv, recv_addr);
3574    __ verify_oop(recv);
3575  }
3576
3577  if (save_flags) {
3578    __ movl(rbcp, flags);
3579  }
3580
3581  // compute return type
3582  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3583  // Make sure we don't need to mask flags after the above shift
3584  ConstantPoolCacheEntry::verify_tos_state_shift();
3585  // load return address
3586  {
3587    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3588    ExternalAddress table(table_addr);
3589    LP64_ONLY(__ lea(rscratch1, table));
3590    LP64_ONLY(__ movptr(flags, Address(rscratch1, flags, Address::times_ptr)));
3591    NOT_LP64(__ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))));
3592  }
3593
3594  // push return address
3595  __ push(flags);
3596
3597  // Restore flags value from the constant pool cache, and restore rsi
3598  // for later null checks.  r13 is the bytecode pointer
3599  if (save_flags) {
3600    __ movl(flags, rbcp);
3601    __ restore_bcp();
3602  }
3603}
3604
3605void TemplateTable::invokevirtual_helper(Register index,
3606                                         Register recv,
3607                                         Register flags) {
3608  // Uses temporary registers rax, rdx
3609  assert_different_registers(index, recv, rax, rdx);
3610  assert(index == rbx, "");
3611  assert(recv  == rcx, "");
3612
3613  // Test for an invoke of a final method
3614  Label notFinal;
3615  __ movl(rax, flags);
3616  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
3617  __ jcc(Assembler::zero, notFinal);
3618
3619  const Register method = index;  // method must be rbx
3620  assert(method == rbx,
3621         "Method* must be rbx for interpreter calling convention");
3622
3623  // do the call - the index is actually the method to call
3624  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
3625
3626  // It's final, need a null check here!
3627  __ null_check(recv);
3628
3629  // profile this call
3630  __ profile_final_call(rax);
3631  __ profile_arguments_type(rax, method, rbcp, true);
3632
3633  __ jump_from_interpreted(method, rax);
3634
3635  __ bind(notFinal);
3636
3637  // get receiver klass
3638  __ null_check(recv, oopDesc::klass_offset_in_bytes());
3639  __ load_klass(rax, recv);
3640
3641  // profile this call
3642  __ profile_virtual_call(rax, rlocals, rdx);
3643  // get target Method* & entry point
3644  __ lookup_virtual_method(rax, index, method);
3645  __ profile_called_method(method, rdx, rbcp);
3646
3647  __ profile_arguments_type(rdx, method, rbcp, true);
3648  __ jump_from_interpreted(method, rdx);
3649}
3650
3651void TemplateTable::invokevirtual(int byte_no) {
3652  transition(vtos, vtos);
3653  assert(byte_no == f2_byte, "use this argument");
3654  prepare_invoke(byte_no,
3655                 rbx,    // method or vtable index
3656                 noreg,  // unused itable index
3657                 rcx, rdx); // recv, flags
3658
3659  // rbx: index
3660  // rcx: receiver
3661  // rdx: flags
3662
3663  invokevirtual_helper(rbx, rcx, rdx);
3664}
3665
3666void TemplateTable::invokespecial(int byte_no) {
3667  transition(vtos, vtos);
3668  assert(byte_no == f1_byte, "use this argument");
3669  prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
3670                 rcx);  // get receiver also for null check
3671  __ verify_oop(rcx);
3672  __ null_check(rcx);
3673  // do the call
3674  __ profile_call(rax);
3675  __ profile_arguments_type(rax, rbx, rbcp, false);
3676  __ jump_from_interpreted(rbx, rax);
3677}
3678
3679void TemplateTable::invokestatic(int byte_no) {
3680  transition(vtos, vtos);
3681  assert(byte_no == f1_byte, "use this argument");
3682  prepare_invoke(byte_no, rbx);  // get f1 Method*
3683  // do the call
3684  __ profile_call(rax);
3685  __ profile_arguments_type(rax, rbx, rbcp, false);
3686  __ jump_from_interpreted(rbx, rax);
3687}
3688
3689
3690void TemplateTable::fast_invokevfinal(int byte_no) {
3691  transition(vtos, vtos);
3692  assert(byte_no == f2_byte, "use this argument");
3693  __ stop("fast_invokevfinal not used on x86");
3694}
3695
3696
3697void TemplateTable::invokeinterface(int byte_no) {
3698  transition(vtos, vtos);
3699  assert(byte_no == f1_byte, "use this argument");
3700  prepare_invoke(byte_no, rax, rbx,  // get f1 Klass*, f2 itable index
3701                 rcx, rdx); // recv, flags
3702
3703  // rax: interface klass (from f1)
3704  // rbx: itable index (from f2)
3705  // rcx: receiver
3706  // rdx: flags
3707
3708  // Special case of invokeinterface called for virtual method of
3709  // java.lang.Object.  See cpCacheOop.cpp for details.
3710  // This code isn't produced by javac, but could be produced by
3711  // another compliant java compiler.
3712  Label notMethod;
3713  __ movl(rlocals, rdx);
3714  __ andl(rlocals, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
3715
3716  __ jcc(Assembler::zero, notMethod);
3717
3718  invokevirtual_helper(rbx, rcx, rdx);
3719  __ bind(notMethod);
3720
3721  // Get receiver klass into rdx - also a null check
3722  __ restore_locals();  // restore r14
3723  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
3724  __ load_klass(rdx, rcx);
3725
3726  // profile this call
3727  __ profile_virtual_call(rdx, rbcp, rlocals);
3728
3729  Label no_such_interface, no_such_method;
3730
3731  __ lookup_interface_method(// inputs: rec. class, interface, itable index
3732                             rdx, rax, rbx,
3733                             // outputs: method, scan temp. reg
3734                             rbx, rbcp,
3735                             no_such_interface);
3736
3737  // rbx: Method* to call
3738  // rcx: receiver
3739  // Check for abstract method error
3740  // Note: This should be done more efficiently via a throw_abstract_method_error
3741  //       interpreter entry point and a conditional jump to it in case of a null
3742  //       method.
3743  __ testptr(rbx, rbx);
3744  __ jcc(Assembler::zero, no_such_method);
3745
3746  __ profile_called_method(rbx, rbcp, rdx);
3747  __ profile_arguments_type(rdx, rbx, rbcp, true);
3748
3749  // do the call
3750  // rcx: receiver
3751  // rbx,: Method*
3752  __ jump_from_interpreted(rbx, rdx);
3753  __ should_not_reach_here();
3754
3755  // exception handling code follows...
3756  // note: must restore interpreter registers to canonical
3757  //       state for exception handling to work correctly!
3758
3759  __ bind(no_such_method);
3760  // throw exception
3761  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3762  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
3763  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3764  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
3765  // the call_VM checks for exception, so we should never return here.
3766  __ should_not_reach_here();
3767
3768  __ bind(no_such_interface);
3769  // throw exception
3770  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3771  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
3772  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3773  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3774                   InterpreterRuntime::throw_IncompatibleClassChangeError));
3775  // the call_VM checks for exception, so we should never return here.
3776  __ should_not_reach_here();
3777}
3778
3779void TemplateTable::invokehandle(int byte_no) {
3780  transition(vtos, vtos);
3781  assert(byte_no == f1_byte, "use this argument");
3782  const Register rbx_method = rbx;
3783  const Register rax_mtype  = rax;
3784  const Register rcx_recv   = rcx;
3785  const Register rdx_flags  = rdx;
3786
3787  prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
3788  __ verify_method_ptr(rbx_method);
3789  __ verify_oop(rcx_recv);
3790  __ null_check(rcx_recv);
3791
3792  // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
3793  // rbx: MH.invokeExact_MT method (from f2)
3794
3795  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
3796
3797  // FIXME: profile the LambdaForm also
3798  __ profile_final_call(rax);
3799  __ profile_arguments_type(rdx, rbx_method, rbcp, true);
3800
3801  __ jump_from_interpreted(rbx_method, rdx);
3802}
3803
3804void TemplateTable::invokedynamic(int byte_no) {
3805  transition(vtos, vtos);
3806  assert(byte_no == f1_byte, "use this argument");
3807
3808  const Register rbx_method   = rbx;
3809  const Register rax_callsite = rax;
3810
3811  prepare_invoke(byte_no, rbx_method, rax_callsite);
3812
3813  // rax: CallSite object (from cpool->resolved_references[f1])
3814  // rbx: MH.linkToCallSite method (from f2)
3815
3816  // Note:  rax_callsite is already pushed by prepare_invoke
3817
3818  // %%% should make a type profile for any invokedynamic that takes a ref argument
3819  // profile this call
3820  __ profile_call(rbcp);
3821  __ profile_arguments_type(rdx, rbx_method, rbcp, false);
3822
3823  __ verify_oop(rax_callsite);
3824
3825  __ jump_from_interpreted(rbx_method, rdx);
3826}
3827
3828//-----------------------------------------------------------------------------
3829// Allocation
3830
3831void TemplateTable::_new() {
3832  transition(vtos, atos);
3833  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3834  Label slow_case;
3835  Label slow_case_no_pop;
3836  Label done;
3837  Label initialize_header;
3838  Label initialize_object;  // including clearing the fields
3839  Label allocate_shared;
3840
3841  __ get_cpool_and_tags(rcx, rax);
3842
3843  // Make sure the class we're about to instantiate has been resolved.
3844  // This is done before loading InstanceKlass to be consistent with the order
3845  // how Constant Pool is updated (see ConstantPool::klass_at_put)
3846  const int tags_offset = Array<u1>::base_offset_in_bytes();
3847  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
3848  __ jcc(Assembler::notEqual, slow_case_no_pop);
3849
3850  // get InstanceKlass
3851  __ load_resolved_klass_at_index(rcx, rdx, rcx);
3852  __ push(rcx);  // save the contexts of klass for initializing the header
3853
3854  // make sure klass is initialized & doesn't have finalizer
3855  // make sure klass is fully initialized
3856  __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3857  __ jcc(Assembler::notEqual, slow_case);
3858
3859  // get instance_size in InstanceKlass (scaled to a count of bytes)
3860  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
3861  // test to see if it has a finalizer or is malformed in some way
3862  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
3863  __ jcc(Assembler::notZero, slow_case);
3864
3865  //
3866  // Allocate the instance
3867  // 1) Try to allocate in the TLAB
3868  // 2) if fail and the object is large allocate in the shared Eden
3869  // 3) if the above fails (or is not applicable), go to a slow case
3870  // (creates a new TLAB, etc.)
3871
3872  const bool allow_shared_alloc =
3873    Universe::heap()->supports_inline_contig_alloc();
3874
3875  const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
3876#ifndef _LP64
3877  if (UseTLAB || allow_shared_alloc) {
3878    __ get_thread(thread);
3879  }
3880#endif // _LP64
3881
3882  if (UseTLAB) {
3883    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
3884    __ lea(rbx, Address(rax, rdx, Address::times_1));
3885    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
3886    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
3887    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
3888    if (ZeroTLAB) {
3889      // the fields have been already cleared
3890      __ jmp(initialize_header);
3891    } else {
3892      // initialize both the header and fields
3893      __ jmp(initialize_object);
3894    }
3895  }
3896
3897  // Allocation in the shared Eden, if allowed.
3898  //
3899  // rdx: instance size in bytes
3900  if (allow_shared_alloc) {
3901    __ bind(allocate_shared);
3902
3903    ExternalAddress heap_top((address)Universe::heap()->top_addr());
3904    ExternalAddress heap_end((address)Universe::heap()->end_addr());
3905
3906    Label retry;
3907    __ bind(retry);
3908    __ movptr(rax, heap_top);
3909    __ lea(rbx, Address(rax, rdx, Address::times_1));
3910    __ cmpptr(rbx, heap_end);
3911    __ jcc(Assembler::above, slow_case);
3912
3913    // Compare rax, with the top addr, and if still equal, store the new
3914    // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
3915    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
3916    //
3917    // rax,: object begin
3918    // rbx,: object end
3919    // rdx: instance size in bytes
3920    __ locked_cmpxchgptr(rbx, heap_top);
3921
3922    // if someone beat us on the allocation, try again, otherwise continue
3923    __ jcc(Assembler::notEqual, retry);
3924
3925    __ incr_allocated_bytes(thread, rdx, 0);
3926  }
3927
3928  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3929    // The object is initialized before the header.  If the object size is
3930    // zero, go directly to the header initialization.
3931    __ bind(initialize_object);
3932    __ decrement(rdx, sizeof(oopDesc));
3933    __ jcc(Assembler::zero, initialize_header);
3934
3935    // Initialize topmost object field, divide rdx by 8, check if odd and
3936    // test if zero.
3937    __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
3938    __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3939
3940    // rdx must have been multiple of 8
3941#ifdef ASSERT
3942    // make sure rdx was multiple of 8
3943    Label L;
3944    // Ignore partial flag stall after shrl() since it is debug VM
3945    __ jccb(Assembler::carryClear, L);
3946    __ stop("object size is not multiple of 2 - adjust this code");
3947    __ bind(L);
3948    // rdx must be > 0, no extra check needed here
3949#endif
3950
3951    // initialize remaining object fields: rdx was a multiple of 8
3952    { Label loop;
3953    __ bind(loop);
3954    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
3955    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
3956    __ decrement(rdx);
3957    __ jcc(Assembler::notZero, loop);
3958    }
3959
3960    // initialize object header only.
3961    __ bind(initialize_header);
3962    if (UseBiasedLocking) {
3963      __ pop(rcx);   // get saved klass back in the register.
3964      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
3965      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
3966    } else {
3967      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
3968                (intptr_t)markOopDesc::prototype()); // header
3969      __ pop(rcx);   // get saved klass back in the register.
3970    }
3971#ifdef _LP64
3972    __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
3973    __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
3974#endif
3975    __ store_klass(rax, rcx);  // klass
3976
3977    {
3978      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
3979      // Trigger dtrace event for fastpath
3980      __ push(atos);
3981      __ call_VM_leaf(
3982           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
3983      __ pop(atos);
3984    }
3985
3986    __ jmp(done);
3987  }
3988
3989  // slow case
3990  __ bind(slow_case);
3991  __ pop(rcx);   // restore stack pointer to what it was when we came in.
3992  __ bind(slow_case_no_pop);
3993
3994  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
3995  Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
3996
3997  __ get_constant_pool(rarg1);
3998  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
3999  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
4000   __ verify_oop(rax);
4001
4002  // continue
4003  __ bind(done);
4004}
4005
4006void TemplateTable::newarray() {
4007  transition(itos, atos);
4008  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4009  __ load_unsigned_byte(rarg1, at_bcp(1));
4010  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
4011          rarg1, rax);
4012}
4013
4014void TemplateTable::anewarray() {
4015  transition(itos, atos);
4016
4017  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4018  Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
4019
4020  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
4021  __ get_constant_pool(rarg1);
4022  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
4023          rarg1, rarg2, rax);
4024}
4025
4026void TemplateTable::arraylength() {
4027  transition(atos, itos);
4028  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
4029  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
4030}
4031
4032void TemplateTable::checkcast() {
4033  transition(atos, atos);
4034  Label done, is_null, ok_is_subtype, quicked, resolved;
4035  __ testptr(rax, rax); // object is in rax
4036  __ jcc(Assembler::zero, is_null);
4037
4038  // Get cpool & tags index
4039  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4040  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4041  // See if bytecode has already been quicked
4042  __ cmpb(Address(rdx, rbx,
4043                  Address::times_1,
4044                  Array<u1>::base_offset_in_bytes()),
4045          JVM_CONSTANT_Class);
4046  __ jcc(Assembler::equal, quicked);
4047  __ push(atos); // save receiver for result, and for GC
4048  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4049
4050  // vm_result_2 has metadata result
4051#ifndef _LP64
4052  // borrow rdi from locals
4053  __ get_thread(rdi);
4054  __ get_vm_result_2(rax, rdi);
4055  __ restore_locals();
4056#else
4057  __ get_vm_result_2(rax, r15_thread);
4058#endif
4059
4060  __ pop_ptr(rdx); // restore receiver
4061  __ jmpb(resolved);
4062
4063  // Get superklass in rax and subklass in rbx
4064  __ bind(quicked);
4065  __ mov(rdx, rax); // Save object in rdx; rax needed for subtype check
4066  __ load_resolved_klass_at_index(rcx, rbx, rax);
4067
4068  __ bind(resolved);
4069  __ load_klass(rbx, rdx);
4070
4071  // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
4072  // Superklass in rax.  Subklass in rbx.
4073  __ gen_subtype_check(rbx, ok_is_subtype);
4074
4075  // Come here on failure
4076  __ push_ptr(rdx);
4077  // object is at TOS
4078  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
4079
4080  // Come here on success
4081  __ bind(ok_is_subtype);
4082  __ mov(rax, rdx); // Restore object in rdx
4083
4084  // Collect counts on whether this check-cast sees NULLs a lot or not.
4085  if (ProfileInterpreter) {
4086    __ jmp(done);
4087    __ bind(is_null);
4088    __ profile_null_seen(rcx);
4089  } else {
4090    __ bind(is_null);   // same as 'done'
4091  }
4092  __ bind(done);
4093}
4094
4095void TemplateTable::instanceof() {
4096  transition(atos, itos);
4097  Label done, is_null, ok_is_subtype, quicked, resolved;
4098  __ testptr(rax, rax);
4099  __ jcc(Assembler::zero, is_null);
4100
4101  // Get cpool & tags index
4102  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4103  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4104  // See if bytecode has already been quicked
4105  __ cmpb(Address(rdx, rbx,
4106                  Address::times_1,
4107                  Array<u1>::base_offset_in_bytes()),
4108          JVM_CONSTANT_Class);
4109  __ jcc(Assembler::equal, quicked);
4110
4111  __ push(atos); // save receiver for result, and for GC
4112  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4113  // vm_result_2 has metadata result
4114
4115#ifndef _LP64
4116  // borrow rdi from locals
4117  __ get_thread(rdi);
4118  __ get_vm_result_2(rax, rdi);
4119  __ restore_locals();
4120#else
4121  __ get_vm_result_2(rax, r15_thread);
4122#endif
4123
4124  __ pop_ptr(rdx); // restore receiver
4125  __ verify_oop(rdx);
4126  __ load_klass(rdx, rdx);
4127  __ jmpb(resolved);
4128
4129  // Get superklass in rax and subklass in rdx
4130  __ bind(quicked);
4131  __ load_klass(rdx, rax);
4132  __ load_resolved_klass_at_index(rcx, rbx, rax);
4133
4134  __ bind(resolved);
4135
4136  // Generate subtype check.  Blows rcx, rdi
4137  // Superklass in rax.  Subklass in rdx.
4138  __ gen_subtype_check(rdx, ok_is_subtype);
4139
4140  // Come here on failure
4141  __ xorl(rax, rax);
4142  __ jmpb(done);
4143  // Come here on success
4144  __ bind(ok_is_subtype);
4145  __ movl(rax, 1);
4146
4147  // Collect counts on whether this test sees NULLs a lot or not.
4148  if (ProfileInterpreter) {
4149    __ jmp(done);
4150    __ bind(is_null);
4151    __ profile_null_seen(rcx);
4152  } else {
4153    __ bind(is_null);   // same as 'done'
4154  }
4155  __ bind(done);
4156  // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
4157  // rax = 1: obj != NULL and obj is     an instanceof the specified klass
4158}
4159
4160
4161//----------------------------------------------------------------------------------------------------
4162// Breakpoints
4163void TemplateTable::_breakpoint() {
4164  // Note: We get here even if we are single stepping..
4165  // jbug insists on setting breakpoints at every bytecode
4166  // even if we are in single step mode.
4167
4168  transition(vtos, vtos);
4169
4170  Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4171
4172  // get the unpatched byte code
4173  __ get_method(rarg);
4174  __ call_VM(noreg,
4175             CAST_FROM_FN_PTR(address,
4176                              InterpreterRuntime::get_original_bytecode_at),
4177             rarg, rbcp);
4178  __ mov(rbx, rax);  // why?
4179
4180  // post the breakpoint event
4181  __ get_method(rarg);
4182  __ call_VM(noreg,
4183             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
4184             rarg, rbcp);
4185
4186  // complete the execution of original bytecode
4187  __ dispatch_only_normal(vtos);
4188}
4189
4190//-----------------------------------------------------------------------------
4191// Exceptions
4192
4193void TemplateTable::athrow() {
4194  transition(atos, vtos);
4195  __ null_check(rax);
4196  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
4197}
4198
4199//-----------------------------------------------------------------------------
4200// Synchronization
4201//
4202// Note: monitorenter & exit are symmetric routines; which is reflected
4203//       in the assembly code structure as well
4204//
4205// Stack layout:
4206//
4207// [expressions  ] <--- rsp               = expression stack top
4208// ..
4209// [expressions  ]
4210// [monitor entry] <--- monitor block top = expression stack bot
4211// ..
4212// [monitor entry]
4213// [frame data   ] <--- monitor block bot
4214// ...
4215// [saved rbp    ] <--- rbp
4216void TemplateTable::monitorenter() {
4217  transition(atos, vtos);
4218
4219  // check for NULL object
4220  __ null_check(rax);
4221
4222  const Address monitor_block_top(
4223        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4224  const Address monitor_block_bot(
4225        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4226  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4227
4228  Label allocated;
4229
4230  Register rtop = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
4231  Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4232  Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4233
4234  // initialize entry pointer
4235  __ xorl(rmon, rmon); // points to free slot or NULL
4236
4237  // find a free slot in the monitor block (result in rmon)
4238  {
4239    Label entry, loop, exit;
4240    __ movptr(rtop, monitor_block_top); // points to current entry,
4241                                        // starting with top-most entry
4242    __ lea(rbot, monitor_block_bot);    // points to word before bottom
4243                                        // of monitor block
4244    __ jmpb(entry);
4245
4246    __ bind(loop);
4247    // check if current entry is used
4248    __ cmpptr(Address(rtop, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
4249    // if not used then remember entry in rmon
4250    __ cmovptr(Assembler::equal, rmon, rtop);   // cmov => cmovptr
4251    // check if current entry is for same object
4252    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4253    // if same object then stop searching
4254    __ jccb(Assembler::equal, exit);
4255    // otherwise advance to next entry
4256    __ addptr(rtop, entry_size);
4257    __ bind(entry);
4258    // check if bottom reached
4259    __ cmpptr(rtop, rbot);
4260    // if not at bottom then check this entry
4261    __ jcc(Assembler::notEqual, loop);
4262    __ bind(exit);
4263  }
4264
4265  __ testptr(rmon, rmon); // check if a slot has been found
4266  __ jcc(Assembler::notZero, allocated); // if found, continue with that one
4267
4268  // allocate one if there's no free slot
4269  {
4270    Label entry, loop;
4271    // 1. compute new pointers          // rsp: old expression stack top
4272    __ movptr(rmon, monitor_block_bot); // rmon: old expression stack bottom
4273    __ subptr(rsp, entry_size);         // move expression stack top
4274    __ subptr(rmon, entry_size);        // move expression stack bottom
4275    __ mov(rtop, rsp);                  // set start value for copy loop
4276    __ movptr(monitor_block_bot, rmon); // set new monitor block bottom
4277    __ jmp(entry);
4278    // 2. move expression stack contents
4279    __ bind(loop);
4280    __ movptr(rbot, Address(rtop, entry_size)); // load expression stack
4281                                                // word from old location
4282    __ movptr(Address(rtop, 0), rbot);          // and store it at new location
4283    __ addptr(rtop, wordSize);                  // advance to next word
4284    __ bind(entry);
4285    __ cmpptr(rtop, rmon);                      // check if bottom reached
4286    __ jcc(Assembler::notEqual, loop);          // if not at bottom then
4287                                                // copy next word
4288  }
4289
4290  // call run-time routine
4291  // rmon: points to monitor entry
4292  __ bind(allocated);
4293
4294  // Increment bcp to point to the next bytecode, so exception
4295  // handling for async. exceptions work correctly.
4296  // The object has already been poped from the stack, so the
4297  // expression stack looks correct.
4298  __ increment(rbcp);
4299
4300  // store object
4301  __ movptr(Address(rmon, BasicObjectLock::obj_offset_in_bytes()), rax);
4302  __ lock_object(rmon);
4303
4304  // check to make sure this monitor doesn't cause stack overflow after locking
4305  __ save_bcp();  // in case of exception
4306  __ generate_stack_overflow_check(0);
4307
4308  // The bcp has already been incremented. Just need to dispatch to
4309  // next instruction.
4310  __ dispatch_next(vtos);
4311}
4312
4313void TemplateTable::monitorexit() {
4314  transition(atos, vtos);
4315
4316  // check for NULL object
4317  __ null_check(rax);
4318
4319  const Address monitor_block_top(
4320        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4321  const Address monitor_block_bot(
4322        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4323  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4324
4325  Register rtop = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4326  Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4327
4328  Label found;
4329
4330  // find matching slot
4331  {
4332    Label entry, loop;
4333    __ movptr(rtop, monitor_block_top); // points to current entry,
4334                                        // starting with top-most entry
4335    __ lea(rbot, monitor_block_bot);    // points to word before bottom
4336                                        // of monitor block
4337    __ jmpb(entry);
4338
4339    __ bind(loop);
4340    // check if current entry is for same object
4341    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4342    // if same object then stop searching
4343    __ jcc(Assembler::equal, found);
4344    // otherwise advance to next entry
4345    __ addptr(rtop, entry_size);
4346    __ bind(entry);
4347    // check if bottom reached
4348    __ cmpptr(rtop, rbot);
4349    // if not at bottom then check this entry
4350    __ jcc(Assembler::notEqual, loop);
4351  }
4352
4353  // error handling. Unlocking was not block-structured
4354  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4355                   InterpreterRuntime::throw_illegal_monitor_state_exception));
4356  __ should_not_reach_here();
4357
4358  // call run-time routine
4359  __ bind(found);
4360  __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
4361  __ unlock_object(rtop);
4362  __ pop_ptr(rax); // discard object
4363}
4364
4365// Wide instructions
4366void TemplateTable::wide() {
4367  transition(vtos, vtos);
4368  __ load_unsigned_byte(rbx, at_bcp(1));
4369  ExternalAddress wtable((address)Interpreter::_wentry_point);
4370  __ jump(ArrayAddress(wtable, Address(noreg, rbx, Address::times_ptr)));
4371  // Note: the rbcp increment step is part of the individual wide bytecode implementations
4372}
4373
4374// Multi arrays
4375void TemplateTable::multianewarray() {
4376  transition(vtos, atos);
4377
4378  Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4379  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
4380  // last dim is on top of stack; we want address of first one:
4381  // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
4382  // the latter wordSize to point to the beginning of the array.
4383  __ lea(rarg, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
4384  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg);
4385  __ load_unsigned_byte(rbx, at_bcp(3));
4386  __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
4387}
4388