1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "interpreter/interpreter.hpp"
28#include "interpreter/interpreterRuntime.hpp"
29#include "interpreter/interp_masm.hpp"
30#include "interpreter/templateTable.hpp"
31#include "memory/universe.inline.hpp"
32#include "oops/methodData.hpp"
33#include "oops/objArrayKlass.hpp"
34#include "oops/oop.inline.hpp"
35#include "prims/methodHandles.hpp"
36#include "runtime/sharedRuntime.hpp"
37#include "runtime/stubRoutines.hpp"
38#include "runtime/synchronizer.hpp"
39#include "utilities/macros.hpp"
40
41#define __ _masm->
42
43// Global Register Names
44static const Register rbcp     = LP64_ONLY(r13) NOT_LP64(rsi);
45static const Register rlocals  = LP64_ONLY(r14) NOT_LP64(rdi);
46
47// Platform-dependent initialization
48void TemplateTable::pd_initialize() {
49  // No x86 specific initialization
50}
51
52// Address Computation: local variables
53static inline Address iaddress(int n) {
54  return Address(rlocals, Interpreter::local_offset_in_bytes(n));
55}
56
57static inline Address laddress(int n) {
58  return iaddress(n + 1);
59}
60
61#ifndef _LP64
62static inline Address haddress(int n) {
63  return iaddress(n + 0);
64}
65#endif
66
67static inline Address faddress(int n) {
68  return iaddress(n);
69}
70
71static inline Address daddress(int n) {
72  return laddress(n);
73}
74
75static inline Address aaddress(int n) {
76  return iaddress(n);
77}
78
79static inline Address iaddress(Register r) {
80  return Address(rlocals, r, Address::times_ptr);
81}
82
83static inline Address laddress(Register r) {
84  return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1));
85}
86
87#ifndef _LP64
88static inline Address haddress(Register r)       {
89  return Address(rlocals, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0));
90}
91#endif
92
93static inline Address faddress(Register r) {
94  return iaddress(r);
95}
96
97static inline Address daddress(Register r) {
98  return laddress(r);
99}
100
101static inline Address aaddress(Register r) {
102  return iaddress(r);
103}
104
105
106// expression stack
107// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
108// data beyond the rsp which is potentially unsafe in an MT environment;
109// an interrupt may overwrite that data.)
110static inline Address at_rsp   () {
111  return Address(rsp, 0);
112}
113
114// At top of Java expression stack which may be different than esp().  It
115// isn't for category 1 objects.
116static inline Address at_tos   () {
117  return Address(rsp,  Interpreter::expr_offset_in_bytes(0));
118}
119
120static inline Address at_tos_p1() {
121  return Address(rsp,  Interpreter::expr_offset_in_bytes(1));
122}
123
124static inline Address at_tos_p2() {
125  return Address(rsp,  Interpreter::expr_offset_in_bytes(2));
126}
127
128// Condition conversion
129static Assembler::Condition j_not(TemplateTable::Condition cc) {
130  switch (cc) {
131  case TemplateTable::equal        : return Assembler::notEqual;
132  case TemplateTable::not_equal    : return Assembler::equal;
133  case TemplateTable::less         : return Assembler::greaterEqual;
134  case TemplateTable::less_equal   : return Assembler::greater;
135  case TemplateTable::greater      : return Assembler::lessEqual;
136  case TemplateTable::greater_equal: return Assembler::less;
137  }
138  ShouldNotReachHere();
139  return Assembler::zero;
140}
141
142
143
144// Miscelaneous helper routines
145// Store an oop (or NULL) at the address described by obj.
146// If val == noreg this means store a NULL
147
148
149static void do_oop_store(InterpreterMacroAssembler* _masm,
150                         Address obj,
151                         Register val,
152                         BarrierSet::Name barrier,
153                         bool precise) {
154  assert(val == noreg || val == rax, "parameter is just for looks");
155  switch (barrier) {
156#if INCLUDE_ALL_GCS
157    case BarrierSet::G1SATBCTLogging:
158      {
159        // flatten object address if needed
160        // We do it regardless of precise because we need the registers
161        if (obj.index() == noreg && obj.disp() == 0) {
162          if (obj.base() != rdx) {
163            __ movptr(rdx, obj.base());
164          }
165        } else {
166          __ lea(rdx, obj);
167        }
168
169        Register rtmp    = LP64_ONLY(r8)         NOT_LP64(rsi);
170        Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
171
172        NOT_LP64(__ get_thread(rcx));
173        NOT_LP64(__ save_bcp());
174
175        __ g1_write_barrier_pre(rdx /* obj */,
176                                rbx /* pre_val */,
177                                rthread /* thread */,
178                                rtmp  /* tmp */,
179                                val != noreg /* tosca_live */,
180                                false /* expand_call */);
181        if (val == noreg) {
182          __ store_heap_oop_null(Address(rdx, 0));
183        } else {
184          // G1 barrier needs uncompressed oop for region cross check.
185          Register new_val = val;
186          if (UseCompressedOops) {
187            new_val = rbx;
188            __ movptr(new_val, val);
189          }
190          __ store_heap_oop(Address(rdx, 0), val);
191          __ g1_write_barrier_post(rdx /* store_adr */,
192                                   new_val /* new_val */,
193                                   rthread /* thread */,
194                                   rtmp /* tmp */,
195                                   rbx /* tmp2 */);
196        }
197        NOT_LP64( __ restore_bcp());
198      }
199      break;
200#endif // INCLUDE_ALL_GCS
201    case BarrierSet::CardTableForRS:
202    case BarrierSet::CardTableExtension:
203      {
204        if (val == noreg) {
205          __ store_heap_oop_null(obj);
206        } else {
207          __ store_heap_oop(obj, val);
208          // flatten object address if needed
209          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
210            __ store_check(obj.base());
211          } else {
212            __ lea(rdx, obj);
213            __ store_check(rdx);
214          }
215        }
216      }
217      break;
218    case BarrierSet::ModRef:
219      if (val == noreg) {
220        __ store_heap_oop_null(obj);
221      } else {
222        __ store_heap_oop(obj, val);
223      }
224      break;
225    default      :
226      ShouldNotReachHere();
227
228  }
229}
230
231Address TemplateTable::at_bcp(int offset) {
232  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
233  return Address(rbcp, offset);
234}
235
236
237void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
238                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
239                                   int byte_no) {
240  if (!RewriteBytecodes)  return;
241  Label L_patch_done;
242
243  switch (bc) {
244  case Bytecodes::_fast_aputfield:
245  case Bytecodes::_fast_bputfield:
246  case Bytecodes::_fast_zputfield:
247  case Bytecodes::_fast_cputfield:
248  case Bytecodes::_fast_dputfield:
249  case Bytecodes::_fast_fputfield:
250  case Bytecodes::_fast_iputfield:
251  case Bytecodes::_fast_lputfield:
252  case Bytecodes::_fast_sputfield:
253    {
254      // We skip bytecode quickening for putfield instructions when
255      // the put_code written to the constant pool cache is zero.
256      // This is required so that every execution of this instruction
257      // calls out to InterpreterRuntime::resolve_get_put to do
258      // additional, required work.
259      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
260      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
261      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
262      __ movl(bc_reg, bc);
263      __ cmpl(temp_reg, (int) 0);
264      __ jcc(Assembler::zero, L_patch_done);  // don't patch
265    }
266    break;
267  default:
268    assert(byte_no == -1, "sanity");
269    // the pair bytecodes have already done the load.
270    if (load_bc_into_bc_reg) {
271      __ movl(bc_reg, bc);
272    }
273  }
274
275  if (JvmtiExport::can_post_breakpoint()) {
276    Label L_fast_patch;
277    // if a breakpoint is present we can't rewrite the stream directly
278    __ movzbl(temp_reg, at_bcp(0));
279    __ cmpl(temp_reg, Bytecodes::_breakpoint);
280    __ jcc(Assembler::notEqual, L_fast_patch);
281    __ get_method(temp_reg);
282    // Let breakpoint table handling rewrite to quicker bytecode
283    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg);
284#ifndef ASSERT
285    __ jmpb(L_patch_done);
286#else
287    __ jmp(L_patch_done);
288#endif
289    __ bind(L_fast_patch);
290  }
291
292#ifdef ASSERT
293  Label L_okay;
294  __ load_unsigned_byte(temp_reg, at_bcp(0));
295  __ cmpl(temp_reg, (int) Bytecodes::java_code(bc));
296  __ jcc(Assembler::equal, L_okay);
297  __ cmpl(temp_reg, bc_reg);
298  __ jcc(Assembler::equal, L_okay);
299  __ stop("patching the wrong bytecode");
300  __ bind(L_okay);
301#endif
302
303  // patch bytecode
304  __ movb(at_bcp(0), bc_reg);
305  __ bind(L_patch_done);
306}
307// Individual instructions
308
309
310void TemplateTable::nop() {
311  transition(vtos, vtos);
312  // nothing to do
313}
314
315void TemplateTable::shouldnotreachhere() {
316  transition(vtos, vtos);
317  __ stop("shouldnotreachhere bytecode");
318}
319
320void TemplateTable::aconst_null() {
321  transition(vtos, atos);
322  __ xorl(rax, rax);
323}
324
325void TemplateTable::iconst(int value) {
326  transition(vtos, itos);
327  if (value == 0) {
328    __ xorl(rax, rax);
329  } else {
330    __ movl(rax, value);
331  }
332}
333
334void TemplateTable::lconst(int value) {
335  transition(vtos, ltos);
336  if (value == 0) {
337    __ xorl(rax, rax);
338  } else {
339    __ movl(rax, value);
340  }
341#ifndef _LP64
342  assert(value >= 0, "check this code");
343  __ xorptr(rdx, rdx);
344#endif
345}
346
347
348
349void TemplateTable::fconst(int value) {
350  transition(vtos, ftos);
351  if (UseSSE >= 1) {
352    static float one = 1.0f, two = 2.0f;
353    switch (value) {
354    case 0:
355      __ xorps(xmm0, xmm0);
356      break;
357    case 1:
358      __ movflt(xmm0, ExternalAddress((address) &one));
359      break;
360    case 2:
361      __ movflt(xmm0, ExternalAddress((address) &two));
362      break;
363    default:
364      ShouldNotReachHere();
365      break;
366    }
367  } else {
368#ifdef _LP64
369    ShouldNotReachHere();
370#else
371           if (value == 0) { __ fldz();
372    } else if (value == 1) { __ fld1();
373    } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here
374    } else                 { ShouldNotReachHere();
375    }
376#endif // _LP64
377  }
378}
379
380void TemplateTable::dconst(int value) {
381  transition(vtos, dtos);
382  if (UseSSE >= 2) {
383    static double one = 1.0;
384    switch (value) {
385    case 0:
386      __ xorpd(xmm0, xmm0);
387      break;
388    case 1:
389      __ movdbl(xmm0, ExternalAddress((address) &one));
390      break;
391    default:
392      ShouldNotReachHere();
393      break;
394    }
395  } else {
396#ifdef _LP64
397    ShouldNotReachHere();
398#else
399           if (value == 0) { __ fldz();
400    } else if (value == 1) { __ fld1();
401    } else                 { ShouldNotReachHere();
402    }
403#endif
404  }
405}
406
407void TemplateTable::bipush() {
408  transition(vtos, itos);
409  __ load_signed_byte(rax, at_bcp(1));
410}
411
412void TemplateTable::sipush() {
413  transition(vtos, itos);
414  __ load_unsigned_short(rax, at_bcp(1));
415  __ bswapl(rax);
416  __ sarl(rax, 16);
417}
418
419void TemplateTable::ldc(bool wide) {
420  transition(vtos, vtos);
421  Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1);
422  Label call_ldc, notFloat, notClass, Done;
423
424  if (wide) {
425    __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
426  } else {
427    __ load_unsigned_byte(rbx, at_bcp(1));
428  }
429
430  __ get_cpool_and_tags(rcx, rax);
431  const int base_offset = ConstantPool::header_size() * wordSize;
432  const int tags_offset = Array<u1>::base_offset_in_bytes();
433
434  // get type
435  __ movzbl(rdx, Address(rax, rbx, Address::times_1, tags_offset));
436
437  // unresolved class - get the resolved class
438  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClass);
439  __ jccb(Assembler::equal, call_ldc);
440
441  // unresolved class in error state - call into runtime to throw the error
442  // from the first resolution attempt
443  __ cmpl(rdx, JVM_CONSTANT_UnresolvedClassInError);
444  __ jccb(Assembler::equal, call_ldc);
445
446  // resolved class - need to call vm to get java mirror of the class
447  __ cmpl(rdx, JVM_CONSTANT_Class);
448  __ jcc(Assembler::notEqual, notClass);
449
450  __ bind(call_ldc);
451
452  __ movl(rarg, wide);
453  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg);
454
455  __ push(atos);
456  __ jmp(Done);
457
458  __ bind(notClass);
459  __ cmpl(rdx, JVM_CONSTANT_Float);
460  __ jccb(Assembler::notEqual, notFloat);
461
462  // ftos
463  __ load_float(Address(rcx, rbx, Address::times_ptr, base_offset));
464  __ push(ftos);
465  __ jmp(Done);
466
467  __ bind(notFloat);
468#ifdef ASSERT
469  {
470    Label L;
471    __ cmpl(rdx, JVM_CONSTANT_Integer);
472    __ jcc(Assembler::equal, L);
473    // String and Object are rewritten to fast_aldc
474    __ stop("unexpected tag type in ldc");
475    __ bind(L);
476  }
477#endif
478  // itos JVM_CONSTANT_Integer only
479  __ movl(rax, Address(rcx, rbx, Address::times_ptr, base_offset));
480  __ push(itos);
481  __ bind(Done);
482}
483
484// Fast path for caching oop constants.
485void TemplateTable::fast_aldc(bool wide) {
486  transition(vtos, atos);
487
488  Register result = rax;
489  Register tmp = rdx;
490  int index_size = wide ? sizeof(u2) : sizeof(u1);
491
492  Label resolved;
493
494  // We are resolved if the resolved reference cache entry contains a
495  // non-null object (String, MethodType, etc.)
496  assert_different_registers(result, tmp);
497  __ get_cache_index_at_bcp(tmp, 1, index_size);
498  __ load_resolved_reference_at_index(result, tmp);
499  __ testl(result, result);
500  __ jcc(Assembler::notZero, resolved);
501
502  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
503
504  // first time invocation - must resolve first
505  __ movl(tmp, (int)bytecode());
506  __ call_VM(result, entry, tmp);
507
508  __ bind(resolved);
509
510  if (VerifyOops) {
511    __ verify_oop(result);
512  }
513}
514
515void TemplateTable::ldc2_w() {
516  transition(vtos, vtos);
517  Label Long, Done;
518  __ get_unsigned_2_byte_index_at_bcp(rbx, 1);
519
520  __ get_cpool_and_tags(rcx, rax);
521  const int base_offset = ConstantPool::header_size() * wordSize;
522  const int tags_offset = Array<u1>::base_offset_in_bytes();
523
524  // get type
525  __ cmpb(Address(rax, rbx, Address::times_1, tags_offset),
526          JVM_CONSTANT_Double);
527  __ jccb(Assembler::notEqual, Long);
528
529  // dtos
530  __ load_double(Address(rcx, rbx, Address::times_ptr, base_offset));
531  __ push(dtos);
532
533  __ jmpb(Done);
534  __ bind(Long);
535
536  // ltos
537  __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize));
538  NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize)));
539  __ push(ltos);
540
541  __ bind(Done);
542}
543
544void TemplateTable::locals_index(Register reg, int offset) {
545  __ load_unsigned_byte(reg, at_bcp(offset));
546  __ negptr(reg);
547}
548
549void TemplateTable::iload() {
550  iload_internal();
551}
552
553void TemplateTable::nofast_iload() {
554  iload_internal(may_not_rewrite);
555}
556
557void TemplateTable::iload_internal(RewriteControl rc) {
558  transition(vtos, itos);
559  if (RewriteFrequentPairs && rc == may_rewrite) {
560    Label rewrite, done;
561    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
562    LP64_ONLY(assert(rbx != bc, "register damaged"));
563
564    // get next byte
565    __ load_unsigned_byte(rbx,
566                          at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
567    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
568    // last two iloads in a pair.  Comparing against fast_iload means that
569    // the next bytecode is neither an iload or a caload, and therefore
570    // an iload pair.
571    __ cmpl(rbx, Bytecodes::_iload);
572    __ jcc(Assembler::equal, done);
573
574    __ cmpl(rbx, Bytecodes::_fast_iload);
575    __ movl(bc, Bytecodes::_fast_iload2);
576
577    __ jccb(Assembler::equal, rewrite);
578
579    // if _caload, rewrite to fast_icaload
580    __ cmpl(rbx, Bytecodes::_caload);
581    __ movl(bc, Bytecodes::_fast_icaload);
582    __ jccb(Assembler::equal, rewrite);
583
584    // rewrite so iload doesn't check again.
585    __ movl(bc, Bytecodes::_fast_iload);
586
587    // rewrite
588    // bc: fast bytecode
589    __ bind(rewrite);
590    patch_bytecode(Bytecodes::_iload, bc, rbx, false);
591    __ bind(done);
592  }
593
594  // Get the local value into tos
595  locals_index(rbx);
596  __ movl(rax, iaddress(rbx));
597}
598
599void TemplateTable::fast_iload2() {
600  transition(vtos, itos);
601  locals_index(rbx);
602  __ movl(rax, iaddress(rbx));
603  __ push(itos);
604  locals_index(rbx, 3);
605  __ movl(rax, iaddress(rbx));
606}
607
608void TemplateTable::fast_iload() {
609  transition(vtos, itos);
610  locals_index(rbx);
611  __ movl(rax, iaddress(rbx));
612}
613
614void TemplateTable::lload() {
615  transition(vtos, ltos);
616  locals_index(rbx);
617  __ movptr(rax, laddress(rbx));
618  NOT_LP64(__ movl(rdx, haddress(rbx)));
619}
620
621void TemplateTable::fload() {
622  transition(vtos, ftos);
623  locals_index(rbx);
624  __ load_float(faddress(rbx));
625}
626
627void TemplateTable::dload() {
628  transition(vtos, dtos);
629  locals_index(rbx);
630  __ load_double(daddress(rbx));
631}
632
633void TemplateTable::aload() {
634  transition(vtos, atos);
635  locals_index(rbx);
636  __ movptr(rax, aaddress(rbx));
637}
638
639void TemplateTable::locals_index_wide(Register reg) {
640  __ load_unsigned_short(reg, at_bcp(2));
641  __ bswapl(reg);
642  __ shrl(reg, 16);
643  __ negptr(reg);
644}
645
646void TemplateTable::wide_iload() {
647  transition(vtos, itos);
648  locals_index_wide(rbx);
649  __ movl(rax, iaddress(rbx));
650}
651
652void TemplateTable::wide_lload() {
653  transition(vtos, ltos);
654  locals_index_wide(rbx);
655  __ movptr(rax, laddress(rbx));
656  NOT_LP64(__ movl(rdx, haddress(rbx)));
657}
658
659void TemplateTable::wide_fload() {
660  transition(vtos, ftos);
661  locals_index_wide(rbx);
662  __ load_float(faddress(rbx));
663}
664
665void TemplateTable::wide_dload() {
666  transition(vtos, dtos);
667  locals_index_wide(rbx);
668  __ load_double(daddress(rbx));
669}
670
671void TemplateTable::wide_aload() {
672  transition(vtos, atos);
673  locals_index_wide(rbx);
674  __ movptr(rax, aaddress(rbx));
675}
676
677void TemplateTable::index_check(Register array, Register index) {
678  // Pop ptr into array
679  __ pop_ptr(array);
680  index_check_without_pop(array, index);
681}
682
683void TemplateTable::index_check_without_pop(Register array, Register index) {
684  // destroys rbx
685  // check array
686  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
687  // sign extend index for use by indexed load
688  __ movl2ptr(index, index);
689  // check index
690  __ cmpl(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
691  if (index != rbx) {
692    // ??? convention: move aberrant index into rbx for exception message
693    assert(rbx != array, "different registers");
694    __ movl(rbx, index);
695  }
696  __ jump_cc(Assembler::aboveEqual,
697             ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
698}
699
700
701void TemplateTable::iaload() {
702  transition(itos, itos);
703  // rax: index
704  // rdx: array
705  index_check(rdx, rax); // kills rbx
706  __ movl(rax, Address(rdx, rax,
707                       Address::times_4,
708                       arrayOopDesc::base_offset_in_bytes(T_INT)));
709}
710
711void TemplateTable::laload() {
712  transition(itos, ltos);
713  // rax: index
714  // rdx: array
715  index_check(rdx, rax); // kills rbx
716  NOT_LP64(__ mov(rbx, rax));
717  // rbx,: index
718  __ movptr(rax, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize));
719  NOT_LP64(__ movl(rdx, Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize)));
720}
721
722
723
724void TemplateTable::faload() {
725  transition(itos, ftos);
726  // rax: index
727  // rdx: array
728  index_check(rdx, rax); // kills rbx
729  __ load_float(Address(rdx, rax,
730                        Address::times_4,
731                        arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
732}
733
734void TemplateTable::daload() {
735  transition(itos, dtos);
736  // rax: index
737  // rdx: array
738  index_check(rdx, rax); // kills rbx
739  __ load_double(Address(rdx, rax,
740                         Address::times_8,
741                         arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
742}
743
744void TemplateTable::aaload() {
745  transition(itos, atos);
746  // rax: index
747  // rdx: array
748  index_check(rdx, rax); // kills rbx
749  __ load_heap_oop(rax, Address(rdx, rax,
750                                UseCompressedOops ? Address::times_4 : Address::times_ptr,
751                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
752}
753
754void TemplateTable::baload() {
755  transition(itos, itos);
756  // rax: index
757  // rdx: array
758  index_check(rdx, rax); // kills rbx
759  __ load_signed_byte(rax, Address(rdx, rax, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)));
760}
761
762void TemplateTable::caload() {
763  transition(itos, itos);
764  // rax: index
765  // rdx: array
766  index_check(rdx, rax); // kills rbx
767  __ load_unsigned_short(rax, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
768}
769
770// iload followed by caload frequent pair
771void TemplateTable::fast_icaload() {
772  transition(vtos, itos);
773  // load index out of locals
774  locals_index(rbx);
775  __ movl(rax, iaddress(rbx));
776
777  // rax: index
778  // rdx: array
779  index_check(rdx, rax); // kills rbx
780  __ load_unsigned_short(rax,
781                         Address(rdx, rax,
782                                 Address::times_2,
783                                 arrayOopDesc::base_offset_in_bytes(T_CHAR)));
784}
785
786
787void TemplateTable::saload() {
788  transition(itos, itos);
789  // rax: index
790  // rdx: array
791  index_check(rdx, rax); // kills rbx
792  __ load_signed_short(rax, Address(rdx, rax, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)));
793}
794
795void TemplateTable::iload(int n) {
796  transition(vtos, itos);
797  __ movl(rax, iaddress(n));
798}
799
800void TemplateTable::lload(int n) {
801  transition(vtos, ltos);
802  __ movptr(rax, laddress(n));
803  NOT_LP64(__ movptr(rdx, haddress(n)));
804}
805
806void TemplateTable::fload(int n) {
807  transition(vtos, ftos);
808  __ load_float(faddress(n));
809}
810
811void TemplateTable::dload(int n) {
812  transition(vtos, dtos);
813  __ load_double(daddress(n));
814}
815
816void TemplateTable::aload(int n) {
817  transition(vtos, atos);
818  __ movptr(rax, aaddress(n));
819}
820
821void TemplateTable::aload_0() {
822  aload_0_internal();
823}
824
825void TemplateTable::nofast_aload_0() {
826  aload_0_internal(may_not_rewrite);
827}
828
829void TemplateTable::aload_0_internal(RewriteControl rc) {
830  transition(vtos, atos);
831  // According to bytecode histograms, the pairs:
832  //
833  // _aload_0, _fast_igetfield
834  // _aload_0, _fast_agetfield
835  // _aload_0, _fast_fgetfield
836  //
837  // occur frequently. If RewriteFrequentPairs is set, the (slow)
838  // _aload_0 bytecode checks if the next bytecode is either
839  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
840  // rewrites the current bytecode into a pair bytecode; otherwise it
841  // rewrites the current bytecode into _fast_aload_0 that doesn't do
842  // the pair check anymore.
843  //
844  // Note: If the next bytecode is _getfield, the rewrite must be
845  //       delayed, otherwise we may miss an opportunity for a pair.
846  //
847  // Also rewrite frequent pairs
848  //   aload_0, aload_1
849  //   aload_0, iload_1
850  // These bytecodes with a small amount of code are most profitable
851  // to rewrite
852  if (RewriteFrequentPairs && rc == may_rewrite) {
853    Label rewrite, done;
854
855    const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
856    LP64_ONLY(assert(rbx != bc, "register damaged"));
857
858    // get next byte
859    __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
860
861    // if _getfield then wait with rewrite
862    __ cmpl(rbx, Bytecodes::_getfield);
863    __ jcc(Assembler::equal, done);
864
865    // if _igetfield then rewrite to _fast_iaccess_0
866    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
867    __ cmpl(rbx, Bytecodes::_fast_igetfield);
868    __ movl(bc, Bytecodes::_fast_iaccess_0);
869    __ jccb(Assembler::equal, rewrite);
870
871    // if _agetfield then rewrite to _fast_aaccess_0
872    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
873    __ cmpl(rbx, Bytecodes::_fast_agetfield);
874    __ movl(bc, Bytecodes::_fast_aaccess_0);
875    __ jccb(Assembler::equal, rewrite);
876
877    // if _fgetfield then rewrite to _fast_faccess_0
878    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
879    __ cmpl(rbx, Bytecodes::_fast_fgetfield);
880    __ movl(bc, Bytecodes::_fast_faccess_0);
881    __ jccb(Assembler::equal, rewrite);
882
883    // else rewrite to _fast_aload0
884    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
885    __ movl(bc, Bytecodes::_fast_aload_0);
886
887    // rewrite
888    // bc: fast bytecode
889    __ bind(rewrite);
890    patch_bytecode(Bytecodes::_aload_0, bc, rbx, false);
891
892    __ bind(done);
893  }
894
895  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
896  aload(0);
897}
898
899void TemplateTable::istore() {
900  transition(itos, vtos);
901  locals_index(rbx);
902  __ movl(iaddress(rbx), rax);
903}
904
905
906void TemplateTable::lstore() {
907  transition(ltos, vtos);
908  locals_index(rbx);
909  __ movptr(laddress(rbx), rax);
910  NOT_LP64(__ movptr(haddress(rbx), rdx));
911}
912
913void TemplateTable::fstore() {
914  transition(ftos, vtos);
915  locals_index(rbx);
916  __ store_float(faddress(rbx));
917}
918
919void TemplateTable::dstore() {
920  transition(dtos, vtos);
921  locals_index(rbx);
922  __ store_double(daddress(rbx));
923}
924
925void TemplateTable::astore() {
926  transition(vtos, vtos);
927  __ pop_ptr(rax);
928  locals_index(rbx);
929  __ movptr(aaddress(rbx), rax);
930}
931
932void TemplateTable::wide_istore() {
933  transition(vtos, vtos);
934  __ pop_i();
935  locals_index_wide(rbx);
936  __ movl(iaddress(rbx), rax);
937}
938
939void TemplateTable::wide_lstore() {
940  transition(vtos, vtos);
941  NOT_LP64(__ pop_l(rax, rdx));
942  LP64_ONLY(__ pop_l());
943  locals_index_wide(rbx);
944  __ movptr(laddress(rbx), rax);
945  NOT_LP64(__ movl(haddress(rbx), rdx));
946}
947
948void TemplateTable::wide_fstore() {
949#ifdef _LP64
950  transition(vtos, vtos);
951  __ pop_f(xmm0);
952  locals_index_wide(rbx);
953  __ movflt(faddress(rbx), xmm0);
954#else
955  wide_istore();
956#endif
957}
958
959void TemplateTable::wide_dstore() {
960#ifdef _LP64
961  transition(vtos, vtos);
962  __ pop_d(xmm0);
963  locals_index_wide(rbx);
964  __ movdbl(daddress(rbx), xmm0);
965#else
966  wide_lstore();
967#endif
968}
969
970void TemplateTable::wide_astore() {
971  transition(vtos, vtos);
972  __ pop_ptr(rax);
973  locals_index_wide(rbx);
974  __ movptr(aaddress(rbx), rax);
975}
976
977void TemplateTable::iastore() {
978  transition(itos, vtos);
979  __ pop_i(rbx);
980  // rax: value
981  // rbx: index
982  // rdx: array
983  index_check(rdx, rbx); // prefer index in rbx
984  __ movl(Address(rdx, rbx,
985                  Address::times_4,
986                  arrayOopDesc::base_offset_in_bytes(T_INT)),
987          rax);
988}
989
990void TemplateTable::lastore() {
991  transition(ltos, vtos);
992  __ pop_i(rbx);
993  // rax,: low(value)
994  // rcx: array
995  // rdx: high(value)
996  index_check(rcx, rbx);  // prefer index in rbx,
997  // rbx,: index
998  __ movptr(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize), rax);
999  NOT_LP64(__ movl(Address(rcx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_LONG) + 1 * wordSize), rdx));
1000}
1001
1002
1003void TemplateTable::fastore() {
1004  transition(ftos, vtos);
1005  __ pop_i(rbx);
1006  // value is in UseSSE >= 1 ? xmm0 : ST(0)
1007  // rbx:  index
1008  // rdx:  array
1009  index_check(rdx, rbx); // prefer index in rbx
1010  __ store_float(Address(rdx, rbx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_FLOAT)));
1011}
1012
1013void TemplateTable::dastore() {
1014  transition(dtos, vtos);
1015  __ pop_i(rbx);
1016  // value is in UseSSE >= 2 ? xmm0 : ST(0)
1017  // rbx:  index
1018  // rdx:  array
1019  index_check(rdx, rbx); // prefer index in rbx
1020  __ store_double(Address(rdx, rbx, Address::times_8, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)));
1021}
1022
1023void TemplateTable::aastore() {
1024  Label is_null, ok_is_subtype, done;
1025  transition(vtos, vtos);
1026  // stack: ..., array, index, value
1027  __ movptr(rax, at_tos());    // value
1028  __ movl(rcx, at_tos_p1()); // index
1029  __ movptr(rdx, at_tos_p2()); // array
1030
1031  Address element_address(rdx, rcx,
1032                          UseCompressedOops? Address::times_4 : Address::times_ptr,
1033                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1034
1035  index_check_without_pop(rdx, rcx);     // kills rbx
1036  __ testptr(rax, rax);
1037  __ jcc(Assembler::zero, is_null);
1038
1039  // Move subklass into rbx
1040  __ load_klass(rbx, rax);
1041  // Move superklass into rax
1042  __ load_klass(rax, rdx);
1043  __ movptr(rax, Address(rax,
1044                         ObjArrayKlass::element_klass_offset()));
1045  // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
1046  __ lea(rdx, element_address);
1047
1048  // Generate subtype check.  Blows rcx, rdi
1049  // Superklass in rax.  Subklass in rbx.
1050  __ gen_subtype_check(rbx, ok_is_subtype);
1051
1052  // Come here on failure
1053  // object is at TOS
1054  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
1055
1056  // Come here on success
1057  __ bind(ok_is_subtype);
1058
1059  // Get the value we will store
1060  __ movptr(rax, at_tos());
1061  // Now store using the appropriate barrier
1062  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
1063  __ jmp(done);
1064
1065  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
1066  __ bind(is_null);
1067  __ profile_null_seen(rbx);
1068
1069  // Store a NULL
1070  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);
1071
1072  // Pop stack arguments
1073  __ bind(done);
1074  __ addptr(rsp, 3 * Interpreter::stackElementSize);
1075}
1076
1077void TemplateTable::bastore() {
1078  transition(itos, vtos);
1079  __ pop_i(rbx);
1080  // rax: value
1081  // rbx: index
1082  // rdx: array
1083  index_check(rdx, rbx); // prefer index in rbx
1084  // Need to check whether array is boolean or byte
1085  // since both types share the bastore bytecode.
1086  __ load_klass(rcx, rdx);
1087  __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
1088  int diffbit = Klass::layout_helper_boolean_diffbit();
1089  __ testl(rcx, diffbit);
1090  Label L_skip;
1091  __ jccb(Assembler::zero, L_skip);
1092  __ andl(rax, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
1093  __ bind(L_skip);
1094  __ movb(Address(rdx, rbx,
1095                  Address::times_1,
1096                  arrayOopDesc::base_offset_in_bytes(T_BYTE)),
1097          rax);
1098}
1099
1100void TemplateTable::castore() {
1101  transition(itos, vtos);
1102  __ pop_i(rbx);
1103  // rax: value
1104  // rbx: index
1105  // rdx: array
1106  index_check(rdx, rbx);  // prefer index in rbx
1107  __ movw(Address(rdx, rbx,
1108                  Address::times_2,
1109                  arrayOopDesc::base_offset_in_bytes(T_CHAR)),
1110          rax);
1111}
1112
1113
1114void TemplateTable::sastore() {
1115  castore();
1116}
1117
1118void TemplateTable::istore(int n) {
1119  transition(itos, vtos);
1120  __ movl(iaddress(n), rax);
1121}
1122
1123void TemplateTable::lstore(int n) {
1124  transition(ltos, vtos);
1125  __ movptr(laddress(n), rax);
1126  NOT_LP64(__ movptr(haddress(n), rdx));
1127}
1128
1129void TemplateTable::fstore(int n) {
1130  transition(ftos, vtos);
1131  __ store_float(faddress(n));
1132}
1133
1134void TemplateTable::dstore(int n) {
1135  transition(dtos, vtos);
1136  __ store_double(daddress(n));
1137}
1138
1139
1140void TemplateTable::astore(int n) {
1141  transition(vtos, vtos);
1142  __ pop_ptr(rax);
1143  __ movptr(aaddress(n), rax);
1144}
1145
1146void TemplateTable::pop() {
1147  transition(vtos, vtos);
1148  __ addptr(rsp, Interpreter::stackElementSize);
1149}
1150
1151void TemplateTable::pop2() {
1152  transition(vtos, vtos);
1153  __ addptr(rsp, 2 * Interpreter::stackElementSize);
1154}
1155
1156
1157void TemplateTable::dup() {
1158  transition(vtos, vtos);
1159  __ load_ptr(0, rax);
1160  __ push_ptr(rax);
1161  // stack: ..., a, a
1162}
1163
1164void TemplateTable::dup_x1() {
1165  transition(vtos, vtos);
1166  // stack: ..., a, b
1167  __ load_ptr( 0, rax);  // load b
1168  __ load_ptr( 1, rcx);  // load a
1169  __ store_ptr(1, rax);  // store b
1170  __ store_ptr(0, rcx);  // store a
1171  __ push_ptr(rax);      // push b
1172  // stack: ..., b, a, b
1173}
1174
1175void TemplateTable::dup_x2() {
1176  transition(vtos, vtos);
1177  // stack: ..., a, b, c
1178  __ load_ptr( 0, rax);  // load c
1179  __ load_ptr( 2, rcx);  // load a
1180  __ store_ptr(2, rax);  // store c in a
1181  __ push_ptr(rax);      // push c
1182  // stack: ..., c, b, c, c
1183  __ load_ptr( 2, rax);  // load b
1184  __ store_ptr(2, rcx);  // store a in b
1185  // stack: ..., c, a, c, c
1186  __ store_ptr(1, rax);  // store b in c
1187  // stack: ..., c, a, b, c
1188}
1189
1190void TemplateTable::dup2() {
1191  transition(vtos, vtos);
1192  // stack: ..., a, b
1193  __ load_ptr(1, rax);  // load a
1194  __ push_ptr(rax);     // push a
1195  __ load_ptr(1, rax);  // load b
1196  __ push_ptr(rax);     // push b
1197  // stack: ..., a, b, a, b
1198}
1199
1200
1201void TemplateTable::dup2_x1() {
1202  transition(vtos, vtos);
1203  // stack: ..., a, b, c
1204  __ load_ptr( 0, rcx);  // load c
1205  __ load_ptr( 1, rax);  // load b
1206  __ push_ptr(rax);      // push b
1207  __ push_ptr(rcx);      // push c
1208  // stack: ..., a, b, c, b, c
1209  __ store_ptr(3, rcx);  // store c in b
1210  // stack: ..., a, c, c, b, c
1211  __ load_ptr( 4, rcx);  // load a
1212  __ store_ptr(2, rcx);  // store a in 2nd c
1213  // stack: ..., a, c, a, b, c
1214  __ store_ptr(4, rax);  // store b in a
1215  // stack: ..., b, c, a, b, c
1216}
1217
1218void TemplateTable::dup2_x2() {
1219  transition(vtos, vtos);
1220  // stack: ..., a, b, c, d
1221  __ load_ptr( 0, rcx);  // load d
1222  __ load_ptr( 1, rax);  // load c
1223  __ push_ptr(rax);      // push c
1224  __ push_ptr(rcx);      // push d
1225  // stack: ..., a, b, c, d, c, d
1226  __ load_ptr( 4, rax);  // load b
1227  __ store_ptr(2, rax);  // store b in d
1228  __ store_ptr(4, rcx);  // store d in b
1229  // stack: ..., a, d, c, b, c, d
1230  __ load_ptr( 5, rcx);  // load a
1231  __ load_ptr( 3, rax);  // load c
1232  __ store_ptr(3, rcx);  // store a in c
1233  __ store_ptr(5, rax);  // store c in a
1234  // stack: ..., c, d, a, b, c, d
1235}
1236
1237void TemplateTable::swap() {
1238  transition(vtos, vtos);
1239  // stack: ..., a, b
1240  __ load_ptr( 1, rcx);  // load a
1241  __ load_ptr( 0, rax);  // load b
1242  __ store_ptr(0, rcx);  // store a in b
1243  __ store_ptr(1, rax);  // store b in a
1244  // stack: ..., b, a
1245}
1246
1247void TemplateTable::iop2(Operation op) {
1248  transition(itos, itos);
1249  switch (op) {
1250  case add  :                    __ pop_i(rdx); __ addl (rax, rdx); break;
1251  case sub  : __ movl(rdx, rax); __ pop_i(rax); __ subl (rax, rdx); break;
1252  case mul  :                    __ pop_i(rdx); __ imull(rax, rdx); break;
1253  case _and :                    __ pop_i(rdx); __ andl (rax, rdx); break;
1254  case _or  :                    __ pop_i(rdx); __ orl  (rax, rdx); break;
1255  case _xor :                    __ pop_i(rdx); __ xorl (rax, rdx); break;
1256  case shl  : __ movl(rcx, rax); __ pop_i(rax); __ shll (rax);      break;
1257  case shr  : __ movl(rcx, rax); __ pop_i(rax); __ sarl (rax);      break;
1258  case ushr : __ movl(rcx, rax); __ pop_i(rax); __ shrl (rax);      break;
1259  default   : ShouldNotReachHere();
1260  }
1261}
1262
1263void TemplateTable::lop2(Operation op) {
1264  transition(ltos, ltos);
1265#ifdef _LP64
1266  switch (op) {
1267  case add  :                    __ pop_l(rdx); __ addptr(rax, rdx); break;
1268  case sub  : __ mov(rdx, rax);  __ pop_l(rax); __ subptr(rax, rdx); break;
1269  case _and :                    __ pop_l(rdx); __ andptr(rax, rdx); break;
1270  case _or  :                    __ pop_l(rdx); __ orptr (rax, rdx); break;
1271  case _xor :                    __ pop_l(rdx); __ xorptr(rax, rdx); break;
1272  default   : ShouldNotReachHere();
1273  }
1274#else
1275  __ pop_l(rbx, rcx);
1276  switch (op) {
1277    case add  : __ addl(rax, rbx); __ adcl(rdx, rcx); break;
1278    case sub  : __ subl(rbx, rax); __ sbbl(rcx, rdx);
1279                __ mov (rax, rbx); __ mov (rdx, rcx); break;
1280    case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break;
1281    case _or  : __ orl (rax, rbx); __ orl (rdx, rcx); break;
1282    case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break;
1283    default   : ShouldNotReachHere();
1284  }
1285#endif
1286}
1287
1288void TemplateTable::idiv() {
1289  transition(itos, itos);
1290  __ movl(rcx, rax);
1291  __ pop_i(rax);
1292  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1293  //       they are not equal, one could do a normal division (no correction
1294  //       needed), which may speed up this implementation for the common case.
1295  //       (see also JVM spec., p.243 & p.271)
1296  __ corrected_idivl(rcx);
1297}
1298
1299void TemplateTable::irem() {
1300  transition(itos, itos);
1301  __ movl(rcx, rax);
1302  __ pop_i(rax);
1303  // Note: could xor rax and ecx and compare with (-1 ^ min_int). If
1304  //       they are not equal, one could do a normal division (no correction
1305  //       needed), which may speed up this implementation for the common case.
1306  //       (see also JVM spec., p.243 & p.271)
1307  __ corrected_idivl(rcx);
1308  __ movl(rax, rdx);
1309}
1310
1311void TemplateTable::lmul() {
1312  transition(ltos, ltos);
1313#ifdef _LP64
1314  __ pop_l(rdx);
1315  __ imulq(rax, rdx);
1316#else
1317  __ pop_l(rbx, rcx);
1318  __ push(rcx); __ push(rbx);
1319  __ push(rdx); __ push(rax);
1320  __ lmul(2 * wordSize, 0);
1321  __ addptr(rsp, 4 * wordSize);  // take off temporaries
1322#endif
1323}
1324
1325void TemplateTable::ldiv() {
1326  transition(ltos, ltos);
1327#ifdef _LP64
1328  __ mov(rcx, rax);
1329  __ pop_l(rax);
1330  // generate explicit div0 check
1331  __ testq(rcx, rcx);
1332  __ jump_cc(Assembler::zero,
1333             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1334  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1335  //       they are not equal, one could do a normal division (no correction
1336  //       needed), which may speed up this implementation for the common case.
1337  //       (see also JVM spec., p.243 & p.271)
1338  __ corrected_idivq(rcx); // kills rbx
1339#else
1340  __ pop_l(rbx, rcx);
1341  __ push(rcx); __ push(rbx);
1342  __ push(rdx); __ push(rax);
1343  // check if y = 0
1344  __ orl(rax, rdx);
1345  __ jump_cc(Assembler::zero,
1346             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1347  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv));
1348  __ addptr(rsp, 4 * wordSize);  // take off temporaries
1349#endif
1350}
1351
1352void TemplateTable::lrem() {
1353  transition(ltos, ltos);
1354#ifdef _LP64
1355  __ mov(rcx, rax);
1356  __ pop_l(rax);
1357  __ testq(rcx, rcx);
1358  __ jump_cc(Assembler::zero,
1359             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1360  // Note: could xor rax and rcx and compare with (-1 ^ min_int). If
1361  //       they are not equal, one could do a normal division (no correction
1362  //       needed), which may speed up this implementation for the common case.
1363  //       (see also JVM spec., p.243 & p.271)
1364  __ corrected_idivq(rcx); // kills rbx
1365  __ mov(rax, rdx);
1366#else
1367  __ pop_l(rbx, rcx);
1368  __ push(rcx); __ push(rbx);
1369  __ push(rdx); __ push(rax);
1370  // check if y = 0
1371  __ orl(rax, rdx);
1372  __ jump_cc(Assembler::zero,
1373             ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
1374  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem));
1375  __ addptr(rsp, 4 * wordSize);
1376#endif
1377}
1378
1379void TemplateTable::lshl() {
1380  transition(itos, ltos);
1381  __ movl(rcx, rax);                             // get shift count
1382  #ifdef _LP64
1383  __ pop_l(rax);                                 // get shift value
1384  __ shlq(rax);
1385#else
1386  __ pop_l(rax, rdx);                            // get shift value
1387  __ lshl(rdx, rax);
1388#endif
1389}
1390
1391void TemplateTable::lshr() {
1392#ifdef _LP64
1393  transition(itos, ltos);
1394  __ movl(rcx, rax);                             // get shift count
1395  __ pop_l(rax);                                 // get shift value
1396  __ sarq(rax);
1397#else
1398  transition(itos, ltos);
1399  __ mov(rcx, rax);                              // get shift count
1400  __ pop_l(rax, rdx);                            // get shift value
1401  __ lshr(rdx, rax, true);
1402#endif
1403}
1404
1405void TemplateTable::lushr() {
1406  transition(itos, ltos);
1407#ifdef _LP64
1408  __ movl(rcx, rax);                             // get shift count
1409  __ pop_l(rax);                                 // get shift value
1410  __ shrq(rax);
1411#else
1412  __ mov(rcx, rax);                              // get shift count
1413  __ pop_l(rax, rdx);                            // get shift value
1414  __ lshr(rdx, rax);
1415#endif
1416}
1417
1418void TemplateTable::fop2(Operation op) {
1419  transition(ftos, ftos);
1420
1421  if (UseSSE >= 1) {
1422    switch (op) {
1423    case add:
1424      __ addss(xmm0, at_rsp());
1425      __ addptr(rsp, Interpreter::stackElementSize);
1426      break;
1427    case sub:
1428      __ movflt(xmm1, xmm0);
1429      __ pop_f(xmm0);
1430      __ subss(xmm0, xmm1);
1431      break;
1432    case mul:
1433      __ mulss(xmm0, at_rsp());
1434      __ addptr(rsp, Interpreter::stackElementSize);
1435      break;
1436    case div:
1437      __ movflt(xmm1, xmm0);
1438      __ pop_f(xmm0);
1439      __ divss(xmm0, xmm1);
1440      break;
1441    case rem:
1442      // On x86_64 platforms the SharedRuntime::frem method is called to perform the
1443      // modulo operation. The frem method calls the function
1444      // double fmod(double x, double y) in math.h. The documentation of fmod states:
1445      // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN
1446      // (signalling or quiet) is returned.
1447      //
1448      // On x86_32 platforms the FPU is used to perform the modulo operation. The
1449      // reason is that on 32-bit Windows the sign of modulo operations diverges from
1450      // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f).
1451      // The fprem instruction used on x86_32 is functionally equivalent to
1452      // SharedRuntime::frem in that it returns a NaN.
1453#ifdef _LP64
1454      __ movflt(xmm1, xmm0);
1455      __ pop_f(xmm0);
1456      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
1457#else
1458      __ push_f(xmm0);
1459      __ pop_f();
1460      __ fld_s(at_rsp());
1461      __ fremr(rax);
1462      __ f2ieee();
1463      __ pop(rax);  // pop second operand off the stack
1464      __ push_f();
1465      __ pop_f(xmm0);
1466#endif
1467      break;
1468    default:
1469      ShouldNotReachHere();
1470      break;
1471    }
1472  } else {
1473#ifdef _LP64
1474    ShouldNotReachHere();
1475#else
1476    switch (op) {
1477    case add: __ fadd_s (at_rsp());                break;
1478    case sub: __ fsubr_s(at_rsp());                break;
1479    case mul: __ fmul_s (at_rsp());                break;
1480    case div: __ fdivr_s(at_rsp());                break;
1481    case rem: __ fld_s  (at_rsp()); __ fremr(rax); break;
1482    default : ShouldNotReachHere();
1483    }
1484    __ f2ieee();
1485    __ pop(rax);  // pop second operand off the stack
1486#endif // _LP64
1487  }
1488}
1489
1490void TemplateTable::dop2(Operation op) {
1491  transition(dtos, dtos);
1492  if (UseSSE >= 2) {
1493    switch (op) {
1494    case add:
1495      __ addsd(xmm0, at_rsp());
1496      __ addptr(rsp, 2 * Interpreter::stackElementSize);
1497      break;
1498    case sub:
1499      __ movdbl(xmm1, xmm0);
1500      __ pop_d(xmm0);
1501      __ subsd(xmm0, xmm1);
1502      break;
1503    case mul:
1504      __ mulsd(xmm0, at_rsp());
1505      __ addptr(rsp, 2 * Interpreter::stackElementSize);
1506      break;
1507    case div:
1508      __ movdbl(xmm1, xmm0);
1509      __ pop_d(xmm0);
1510      __ divsd(xmm0, xmm1);
1511      break;
1512    case rem:
1513      // Similar to fop2(), the modulo operation is performed using the
1514      // SharedRuntime::drem method (on x86_64 platforms) or using the
1515      // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2().
1516#ifdef _LP64
1517      __ movdbl(xmm1, xmm0);
1518      __ pop_d(xmm0);
1519      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
1520#else
1521      __ push_d(xmm0);
1522      __ pop_d();
1523      __ fld_d(at_rsp());
1524      __ fremr(rax);
1525      __ d2ieee();
1526      __ pop(rax);
1527      __ pop(rdx);
1528      __ push_d();
1529      __ pop_d(xmm0);
1530#endif
1531      break;
1532    default:
1533      ShouldNotReachHere();
1534      break;
1535    }
1536  } else {
1537#ifdef _LP64
1538    ShouldNotReachHere();
1539#else
1540    switch (op) {
1541    case add: __ fadd_d (at_rsp());                break;
1542    case sub: __ fsubr_d(at_rsp());                break;
1543    case mul: {
1544      Label L_strict;
1545      Label L_join;
1546      const Address access_flags      (rcx, Method::access_flags_offset());
1547      __ get_method(rcx);
1548      __ movl(rcx, access_flags);
1549      __ testl(rcx, JVM_ACC_STRICT);
1550      __ jccb(Assembler::notZero, L_strict);
1551      __ fmul_d (at_rsp());
1552      __ jmpb(L_join);
1553      __ bind(L_strict);
1554      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1555      __ fmulp();
1556      __ fmul_d (at_rsp());
1557      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1558      __ fmulp();
1559      __ bind(L_join);
1560      break;
1561    }
1562    case div: {
1563      Label L_strict;
1564      Label L_join;
1565      const Address access_flags      (rcx, Method::access_flags_offset());
1566      __ get_method(rcx);
1567      __ movl(rcx, access_flags);
1568      __ testl(rcx, JVM_ACC_STRICT);
1569      __ jccb(Assembler::notZero, L_strict);
1570      __ fdivr_d(at_rsp());
1571      __ jmp(L_join);
1572      __ bind(L_strict);
1573      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias1()));
1574      __ fmul_d (at_rsp());
1575      __ fdivrp();
1576      __ fld_x(ExternalAddress(StubRoutines::addr_fpu_subnormal_bias2()));
1577      __ fmulp();
1578      __ bind(L_join);
1579      break;
1580    }
1581    case rem: __ fld_d  (at_rsp()); __ fremr(rax); break;
1582    default : ShouldNotReachHere();
1583    }
1584    __ d2ieee();
1585    // Pop double precision number from rsp.
1586    __ pop(rax);
1587    __ pop(rdx);
1588#endif
1589  }
1590}
1591
1592void TemplateTable::ineg() {
1593  transition(itos, itos);
1594  __ negl(rax);
1595}
1596
1597void TemplateTable::lneg() {
1598  transition(ltos, ltos);
1599  LP64_ONLY(__ negq(rax));
1600  NOT_LP64(__ lneg(rdx, rax));
1601}
1602
1603// Note: 'double' and 'long long' have 32-bits alignment on x86.
1604static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
1605  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
1606  // of 128-bits operands for SSE instructions.
1607  jlong *operand = (jlong*)(((intptr_t)adr)&((intptr_t)(~0xF)));
1608  // Store the value to a 128-bits operand.
1609  operand[0] = lo;
1610  operand[1] = hi;
1611  return operand;
1612}
1613
1614// Buffer for 128-bits masks used by SSE instructions.
1615static jlong float_signflip_pool[2*2];
1616static jlong double_signflip_pool[2*2];
1617
1618void TemplateTable::fneg() {
1619  transition(ftos, ftos);
1620  if (UseSSE >= 1) {
1621    static jlong *float_signflip  = double_quadword(&float_signflip_pool[1],  CONST64(0x8000000080000000),  CONST64(0x8000000080000000));
1622    __ xorps(xmm0, ExternalAddress((address) float_signflip));
1623  } else {
1624    LP64_ONLY(ShouldNotReachHere());
1625    NOT_LP64(__ fchs());
1626  }
1627}
1628
1629void TemplateTable::dneg() {
1630  transition(dtos, dtos);
1631  if (UseSSE >= 2) {
1632    static jlong *double_signflip =
1633      double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
1634    __ xorpd(xmm0, ExternalAddress((address) double_signflip));
1635  } else {
1636#ifdef _LP64
1637    ShouldNotReachHere();
1638#else
1639    __ fchs();
1640#endif
1641  }
1642}
1643
1644void TemplateTable::iinc() {
1645  transition(vtos, vtos);
1646  __ load_signed_byte(rdx, at_bcp(2)); // get constant
1647  locals_index(rbx);
1648  __ addl(iaddress(rbx), rdx);
1649}
1650
1651void TemplateTable::wide_iinc() {
1652  transition(vtos, vtos);
1653  __ movl(rdx, at_bcp(4)); // get constant
1654  locals_index_wide(rbx);
1655  __ bswapl(rdx); // swap bytes & sign-extend constant
1656  __ sarl(rdx, 16);
1657  __ addl(iaddress(rbx), rdx);
1658  // Note: should probably use only one movl to get both
1659  //       the index and the constant -> fix this
1660}
1661
1662void TemplateTable::convert() {
1663#ifdef _LP64
1664  // Checking
1665#ifdef ASSERT
1666  {
1667    TosState tos_in  = ilgl;
1668    TosState tos_out = ilgl;
1669    switch (bytecode()) {
1670    case Bytecodes::_i2l: // fall through
1671    case Bytecodes::_i2f: // fall through
1672    case Bytecodes::_i2d: // fall through
1673    case Bytecodes::_i2b: // fall through
1674    case Bytecodes::_i2c: // fall through
1675    case Bytecodes::_i2s: tos_in = itos; break;
1676    case Bytecodes::_l2i: // fall through
1677    case Bytecodes::_l2f: // fall through
1678    case Bytecodes::_l2d: tos_in = ltos; break;
1679    case Bytecodes::_f2i: // fall through
1680    case Bytecodes::_f2l: // fall through
1681    case Bytecodes::_f2d: tos_in = ftos; break;
1682    case Bytecodes::_d2i: // fall through
1683    case Bytecodes::_d2l: // fall through
1684    case Bytecodes::_d2f: tos_in = dtos; break;
1685    default             : ShouldNotReachHere();
1686    }
1687    switch (bytecode()) {
1688    case Bytecodes::_l2i: // fall through
1689    case Bytecodes::_f2i: // fall through
1690    case Bytecodes::_d2i: // fall through
1691    case Bytecodes::_i2b: // fall through
1692    case Bytecodes::_i2c: // fall through
1693    case Bytecodes::_i2s: tos_out = itos; break;
1694    case Bytecodes::_i2l: // fall through
1695    case Bytecodes::_f2l: // fall through
1696    case Bytecodes::_d2l: tos_out = ltos; break;
1697    case Bytecodes::_i2f: // fall through
1698    case Bytecodes::_l2f: // fall through
1699    case Bytecodes::_d2f: tos_out = ftos; break;
1700    case Bytecodes::_i2d: // fall through
1701    case Bytecodes::_l2d: // fall through
1702    case Bytecodes::_f2d: tos_out = dtos; break;
1703    default             : ShouldNotReachHere();
1704    }
1705    transition(tos_in, tos_out);
1706  }
1707#endif // ASSERT
1708
1709  static const int64_t is_nan = 0x8000000000000000L;
1710
1711  // Conversion
1712  switch (bytecode()) {
1713  case Bytecodes::_i2l:
1714    __ movslq(rax, rax);
1715    break;
1716  case Bytecodes::_i2f:
1717    __ cvtsi2ssl(xmm0, rax);
1718    break;
1719  case Bytecodes::_i2d:
1720    __ cvtsi2sdl(xmm0, rax);
1721    break;
1722  case Bytecodes::_i2b:
1723    __ movsbl(rax, rax);
1724    break;
1725  case Bytecodes::_i2c:
1726    __ movzwl(rax, rax);
1727    break;
1728  case Bytecodes::_i2s:
1729    __ movswl(rax, rax);
1730    break;
1731  case Bytecodes::_l2i:
1732    __ movl(rax, rax);
1733    break;
1734  case Bytecodes::_l2f:
1735    __ cvtsi2ssq(xmm0, rax);
1736    break;
1737  case Bytecodes::_l2d:
1738    __ cvtsi2sdq(xmm0, rax);
1739    break;
1740  case Bytecodes::_f2i:
1741  {
1742    Label L;
1743    __ cvttss2sil(rax, xmm0);
1744    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1745    __ jcc(Assembler::notEqual, L);
1746    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1747    __ bind(L);
1748  }
1749    break;
1750  case Bytecodes::_f2l:
1751  {
1752    Label L;
1753    __ cvttss2siq(rax, xmm0);
1754    // NaN or overflow/underflow?
1755    __ cmp64(rax, ExternalAddress((address) &is_nan));
1756    __ jcc(Assembler::notEqual, L);
1757    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1758    __ bind(L);
1759  }
1760    break;
1761  case Bytecodes::_f2d:
1762    __ cvtss2sd(xmm0, xmm0);
1763    break;
1764  case Bytecodes::_d2i:
1765  {
1766    Label L;
1767    __ cvttsd2sil(rax, xmm0);
1768    __ cmpl(rax, 0x80000000); // NaN or overflow/underflow?
1769    __ jcc(Assembler::notEqual, L);
1770    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
1771    __ bind(L);
1772  }
1773    break;
1774  case Bytecodes::_d2l:
1775  {
1776    Label L;
1777    __ cvttsd2siq(rax, xmm0);
1778    // NaN or overflow/underflow?
1779    __ cmp64(rax, ExternalAddress((address) &is_nan));
1780    __ jcc(Assembler::notEqual, L);
1781    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
1782    __ bind(L);
1783  }
1784    break;
1785  case Bytecodes::_d2f:
1786    __ cvtsd2ss(xmm0, xmm0);
1787    break;
1788  default:
1789    ShouldNotReachHere();
1790  }
1791#else
1792  // Checking
1793#ifdef ASSERT
1794  { TosState tos_in  = ilgl;
1795    TosState tos_out = ilgl;
1796    switch (bytecode()) {
1797      case Bytecodes::_i2l: // fall through
1798      case Bytecodes::_i2f: // fall through
1799      case Bytecodes::_i2d: // fall through
1800      case Bytecodes::_i2b: // fall through
1801      case Bytecodes::_i2c: // fall through
1802      case Bytecodes::_i2s: tos_in = itos; break;
1803      case Bytecodes::_l2i: // fall through
1804      case Bytecodes::_l2f: // fall through
1805      case Bytecodes::_l2d: tos_in = ltos; break;
1806      case Bytecodes::_f2i: // fall through
1807      case Bytecodes::_f2l: // fall through
1808      case Bytecodes::_f2d: tos_in = ftos; break;
1809      case Bytecodes::_d2i: // fall through
1810      case Bytecodes::_d2l: // fall through
1811      case Bytecodes::_d2f: tos_in = dtos; break;
1812      default             : ShouldNotReachHere();
1813    }
1814    switch (bytecode()) {
1815      case Bytecodes::_l2i: // fall through
1816      case Bytecodes::_f2i: // fall through
1817      case Bytecodes::_d2i: // fall through
1818      case Bytecodes::_i2b: // fall through
1819      case Bytecodes::_i2c: // fall through
1820      case Bytecodes::_i2s: tos_out = itos; break;
1821      case Bytecodes::_i2l: // fall through
1822      case Bytecodes::_f2l: // fall through
1823      case Bytecodes::_d2l: tos_out = ltos; break;
1824      case Bytecodes::_i2f: // fall through
1825      case Bytecodes::_l2f: // fall through
1826      case Bytecodes::_d2f: tos_out = ftos; break;
1827      case Bytecodes::_i2d: // fall through
1828      case Bytecodes::_l2d: // fall through
1829      case Bytecodes::_f2d: tos_out = dtos; break;
1830      default             : ShouldNotReachHere();
1831    }
1832    transition(tos_in, tos_out);
1833  }
1834#endif // ASSERT
1835
1836  // Conversion
1837  // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation)
1838  switch (bytecode()) {
1839    case Bytecodes::_i2l:
1840      __ extend_sign(rdx, rax);
1841      break;
1842    case Bytecodes::_i2f:
1843      if (UseSSE >= 1) {
1844        __ cvtsi2ssl(xmm0, rax);
1845      } else {
1846        __ push(rax);          // store int on tos
1847        __ fild_s(at_rsp());   // load int to ST0
1848        __ f2ieee();           // truncate to float size
1849        __ pop(rcx);           // adjust rsp
1850      }
1851      break;
1852    case Bytecodes::_i2d:
1853      if (UseSSE >= 2) {
1854        __ cvtsi2sdl(xmm0, rax);
1855      } else {
1856      __ push(rax);          // add one slot for d2ieee()
1857      __ push(rax);          // store int on tos
1858      __ fild_s(at_rsp());   // load int to ST0
1859      __ d2ieee();           // truncate to double size
1860      __ pop(rcx);           // adjust rsp
1861      __ pop(rcx);
1862      }
1863      break;
1864    case Bytecodes::_i2b:
1865      __ shll(rax, 24);      // truncate upper 24 bits
1866      __ sarl(rax, 24);      // and sign-extend byte
1867      LP64_ONLY(__ movsbl(rax, rax));
1868      break;
1869    case Bytecodes::_i2c:
1870      __ andl(rax, 0xFFFF);  // truncate upper 16 bits
1871      LP64_ONLY(__ movzwl(rax, rax));
1872      break;
1873    case Bytecodes::_i2s:
1874      __ shll(rax, 16);      // truncate upper 16 bits
1875      __ sarl(rax, 16);      // and sign-extend short
1876      LP64_ONLY(__ movswl(rax, rax));
1877      break;
1878    case Bytecodes::_l2i:
1879      /* nothing to do */
1880      break;
1881    case Bytecodes::_l2f:
1882      // On 64-bit platforms, the cvtsi2ssq instruction is used to convert
1883      // 64-bit long values to floats. On 32-bit platforms it is not possible
1884      // to use that instruction with 64-bit operands, therefore the FPU is
1885      // used to perform the conversion.
1886      __ push(rdx);          // store long on tos
1887      __ push(rax);
1888      __ fild_d(at_rsp());   // load long to ST0
1889      __ f2ieee();           // truncate to float size
1890      __ pop(rcx);           // adjust rsp
1891      __ pop(rcx);
1892      if (UseSSE >= 1) {
1893        __ push_f();
1894        __ pop_f(xmm0);
1895      }
1896      break;
1897    case Bytecodes::_l2d:
1898      // On 32-bit platforms the FPU is used for conversion because on
1899      // 32-bit platforms it is not not possible to use the cvtsi2sdq
1900      // instruction with 64-bit operands.
1901      __ push(rdx);          // store long on tos
1902      __ push(rax);
1903      __ fild_d(at_rsp());   // load long to ST0
1904      __ d2ieee();           // truncate to double size
1905      __ pop(rcx);           // adjust rsp
1906      __ pop(rcx);
1907      if (UseSSE >= 2) {
1908        __ push_d();
1909        __ pop_d(xmm0);
1910      }
1911      break;
1912    case Bytecodes::_f2i:
1913      // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs
1914      // as it returns 0 for any NaN.
1915      if (UseSSE >= 1) {
1916        __ push_f(xmm0);
1917      } else {
1918        __ push(rcx);          // reserve space for argument
1919        __ fstp_s(at_rsp());   // pass float argument on stack
1920      }
1921      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
1922      break;
1923    case Bytecodes::_f2l:
1924      // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs
1925      // as it returns 0 for any NaN.
1926      if (UseSSE >= 1) {
1927       __ push_f(xmm0);
1928      } else {
1929        __ push(rcx);          // reserve space for argument
1930        __ fstp_s(at_rsp());   // pass float argument on stack
1931      }
1932      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
1933      break;
1934    case Bytecodes::_f2d:
1935      if (UseSSE < 1) {
1936        /* nothing to do */
1937      } else if (UseSSE == 1) {
1938        __ push_f(xmm0);
1939        __ pop_f();
1940      } else { // UseSSE >= 2
1941        __ cvtss2sd(xmm0, xmm0);
1942      }
1943      break;
1944    case Bytecodes::_d2i:
1945      if (UseSSE >= 2) {
1946        __ push_d(xmm0);
1947      } else {
1948        __ push(rcx);          // reserve space for argument
1949        __ push(rcx);
1950        __ fstp_d(at_rsp());   // pass double argument on stack
1951      }
1952      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2);
1953      break;
1954    case Bytecodes::_d2l:
1955      if (UseSSE >= 2) {
1956        __ push_d(xmm0);
1957      } else {
1958        __ push(rcx);          // reserve space for argument
1959        __ push(rcx);
1960        __ fstp_d(at_rsp());   // pass double argument on stack
1961      }
1962      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2);
1963      break;
1964    case Bytecodes::_d2f:
1965      if (UseSSE <= 1) {
1966        __ push(rcx);          // reserve space for f2ieee()
1967        __ f2ieee();           // truncate to float size
1968        __ pop(rcx);           // adjust rsp
1969        if (UseSSE == 1) {
1970          // The cvtsd2ss instruction is not available if UseSSE==1, therefore
1971          // the conversion is performed using the FPU in this case.
1972          __ push_f();
1973          __ pop_f(xmm0);
1974        }
1975      } else { // UseSSE >= 2
1976        __ cvtsd2ss(xmm0, xmm0);
1977      }
1978      break;
1979    default             :
1980      ShouldNotReachHere();
1981  }
1982#endif
1983}
1984
1985void TemplateTable::lcmp() {
1986  transition(ltos, itos);
1987#ifdef _LP64
1988  Label done;
1989  __ pop_l(rdx);
1990  __ cmpq(rdx, rax);
1991  __ movl(rax, -1);
1992  __ jccb(Assembler::less, done);
1993  __ setb(Assembler::notEqual, rax);
1994  __ movzbl(rax, rax);
1995  __ bind(done);
1996#else
1997
1998  // y = rdx:rax
1999  __ pop_l(rbx, rcx);             // get x = rcx:rbx
2000  __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y)
2001  __ mov(rax, rcx);
2002#endif
2003}
2004
2005void TemplateTable::float_cmp(bool is_float, int unordered_result) {
2006  if ((is_float && UseSSE >= 1) ||
2007      (!is_float && UseSSE >= 2)) {
2008    Label done;
2009    if (is_float) {
2010      // XXX get rid of pop here, use ... reg, mem32
2011      __ pop_f(xmm1);
2012      __ ucomiss(xmm1, xmm0);
2013    } else {
2014      // XXX get rid of pop here, use ... reg, mem64
2015      __ pop_d(xmm1);
2016      __ ucomisd(xmm1, xmm0);
2017    }
2018    if (unordered_result < 0) {
2019      __ movl(rax, -1);
2020      __ jccb(Assembler::parity, done);
2021      __ jccb(Assembler::below, done);
2022      __ setb(Assembler::notEqual, rdx);
2023      __ movzbl(rax, rdx);
2024    } else {
2025      __ movl(rax, 1);
2026      __ jccb(Assembler::parity, done);
2027      __ jccb(Assembler::above, done);
2028      __ movl(rax, 0);
2029      __ jccb(Assembler::equal, done);
2030      __ decrementl(rax);
2031    }
2032    __ bind(done);
2033  } else {
2034#ifdef _LP64
2035    ShouldNotReachHere();
2036#else
2037    if (is_float) {
2038      __ fld_s(at_rsp());
2039    } else {
2040      __ fld_d(at_rsp());
2041      __ pop(rdx);
2042    }
2043    __ pop(rcx);
2044    __ fcmp2int(rax, unordered_result < 0);
2045#endif // _LP64
2046  }
2047}
2048
2049void TemplateTable::branch(bool is_jsr, bool is_wide) {
2050  __ get_method(rcx); // rcx holds method
2051  __ profile_taken_branch(rax, rbx); // rax holds updated MDP, rbx
2052                                     // holds bumped taken count
2053
2054  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
2055                             InvocationCounter::counter_offset();
2056  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
2057                              InvocationCounter::counter_offset();
2058
2059  // Load up edx with the branch displacement
2060  if (is_wide) {
2061    __ movl(rdx, at_bcp(1));
2062  } else {
2063    __ load_signed_short(rdx, at_bcp(1));
2064  }
2065  __ bswapl(rdx);
2066
2067  if (!is_wide) {
2068    __ sarl(rdx, 16);
2069  }
2070  LP64_ONLY(__ movl2ptr(rdx, rdx));
2071
2072  // Handle all the JSR stuff here, then exit.
2073  // It's much shorter and cleaner than intermingling with the non-JSR
2074  // normal-branch stuff occurring below.
2075  if (is_jsr) {
2076    // Pre-load the next target bytecode into rbx
2077    __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1, 0));
2078
2079    // compute return address as bci in rax
2080    __ lea(rax, at_bcp((is_wide ? 5 : 3) -
2081                        in_bytes(ConstMethod::codes_offset())));
2082    __ subptr(rax, Address(rcx, Method::const_offset()));
2083    // Adjust the bcp in r13 by the displacement in rdx
2084    __ addptr(rbcp, rdx);
2085    // jsr returns atos that is not an oop
2086    __ push_i(rax);
2087    __ dispatch_only(vtos);
2088    return;
2089  }
2090
2091  // Normal (non-jsr) branch handling
2092
2093  // Adjust the bcp in r13 by the displacement in rdx
2094  __ addptr(rbcp, rdx);
2095
2096  assert(UseLoopCounter || !UseOnStackReplacement,
2097         "on-stack-replacement requires loop counters");
2098  Label backedge_counter_overflow;
2099  Label profile_method;
2100  Label dispatch;
2101  if (UseLoopCounter) {
2102    // increment backedge counter for backward branches
2103    // rax: MDO
2104    // rbx: MDO bumped taken-count
2105    // rcx: method
2106    // rdx: target offset
2107    // r13: target bcp
2108    // r14: locals pointer
2109    __ testl(rdx, rdx);             // check if forward or backward branch
2110    __ jcc(Assembler::positive, dispatch); // count only if backward branch
2111
2112    // check if MethodCounters exists
2113    Label has_counters;
2114    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2115    __ testptr(rax, rax);
2116    __ jcc(Assembler::notZero, has_counters);
2117    __ push(rdx);
2118    __ push(rcx);
2119    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
2120               rcx);
2121    __ pop(rcx);
2122    __ pop(rdx);
2123    __ movptr(rax, Address(rcx, Method::method_counters_offset()));
2124    __ testptr(rax, rax);
2125    __ jcc(Assembler::zero, dispatch);
2126    __ bind(has_counters);
2127
2128    if (TieredCompilation) {
2129      Label no_mdo;
2130      int increment = InvocationCounter::count_increment;
2131      if (ProfileInterpreter) {
2132        // Are we profiling?
2133        __ movptr(rbx, Address(rcx, in_bytes(Method::method_data_offset())));
2134        __ testptr(rbx, rbx);
2135        __ jccb(Assembler::zero, no_mdo);
2136        // Increment the MDO backedge counter
2137        const Address mdo_backedge_counter(rbx, in_bytes(MethodData::backedge_counter_offset()) +
2138                                           in_bytes(InvocationCounter::counter_offset()));
2139        const Address mask(rbx, in_bytes(MethodData::backedge_mask_offset()));
2140        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
2141                                   rax, false, Assembler::zero, &backedge_counter_overflow);
2142        __ jmp(dispatch);
2143      }
2144      __ bind(no_mdo);
2145      // Increment backedge counter in MethodCounters*
2146      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2147      const Address mask(rcx, in_bytes(MethodCounters::backedge_mask_offset()));
2148      __ increment_mask_and_jump(Address(rcx, be_offset), increment, mask,
2149                                 rax, false, Assembler::zero, &backedge_counter_overflow);
2150    } else { // not TieredCompilation
2151      // increment counter
2152      __ movptr(rcx, Address(rcx, Method::method_counters_offset()));
2153      __ movl(rax, Address(rcx, be_offset));        // load backedge counter
2154      __ incrementl(rax, InvocationCounter::count_increment); // increment counter
2155      __ movl(Address(rcx, be_offset), rax);        // store counter
2156
2157      __ movl(rax, Address(rcx, inv_offset));    // load invocation counter
2158
2159      __ andl(rax, InvocationCounter::count_mask_value); // and the status bits
2160      __ addl(rax, Address(rcx, be_offset));        // add both counters
2161
2162      if (ProfileInterpreter) {
2163        // Test to see if we should create a method data oop
2164        __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
2165        __ jcc(Assembler::less, dispatch);
2166
2167        // if no method data exists, go to profile method
2168        __ test_method_data_pointer(rax, profile_method);
2169
2170        if (UseOnStackReplacement) {
2171          // check for overflow against rbx which is the MDO taken count
2172          __ cmp32(rbx, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2173          __ jcc(Assembler::below, dispatch);
2174
2175          // When ProfileInterpreter is on, the backedge_count comes
2176          // from the MethodData*, which value does not get reset on
2177          // the call to frequency_counter_overflow().  To avoid
2178          // excessive calls to the overflow routine while the method is
2179          // being compiled, add a second test to make sure the overflow
2180          // function is called only once every overflow_frequency.
2181          const int overflow_frequency = 1024;
2182          __ andl(rbx, overflow_frequency - 1);
2183          __ jcc(Assembler::zero, backedge_counter_overflow);
2184
2185        }
2186      } else {
2187        if (UseOnStackReplacement) {
2188          // check for overflow against rax, which is the sum of the
2189          // counters
2190          __ cmp32(rax, Address(rcx, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
2191          __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
2192
2193        }
2194      }
2195    }
2196    __ bind(dispatch);
2197  }
2198
2199  // Pre-load the next target bytecode into rbx
2200  __ load_unsigned_byte(rbx, Address(rbcp, 0));
2201
2202  // continue with the bytecode @ target
2203  // rax: return bci for jsr's, unused otherwise
2204  // rbx: target bytecode
2205  // r13: target bcp
2206  __ dispatch_only(vtos);
2207
2208  if (UseLoopCounter) {
2209    if (ProfileInterpreter) {
2210      // Out-of-line code to allocate method data oop.
2211      __ bind(profile_method);
2212      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
2213      __ set_method_data_pointer_for_bcp();
2214      __ jmp(dispatch);
2215    }
2216
2217    if (UseOnStackReplacement) {
2218      // invocation counter overflow
2219      __ bind(backedge_counter_overflow);
2220      __ negptr(rdx);
2221      __ addptr(rdx, rbcp); // branch bcp
2222      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
2223      __ call_VM(noreg,
2224                 CAST_FROM_FN_PTR(address,
2225                                  InterpreterRuntime::frequency_counter_overflow),
2226                 rdx);
2227
2228      // rax: osr nmethod (osr ok) or NULL (osr not possible)
2229      // rdx: scratch
2230      // r14: locals pointer
2231      // r13: bcp
2232      __ testptr(rax, rax);                        // test result
2233      __ jcc(Assembler::zero, dispatch);         // no osr if null
2234      // nmethod may have been invalidated (VM may block upon call_VM return)
2235      __ cmpb(Address(rax, nmethod::state_offset()), nmethod::in_use);
2236      __ jcc(Assembler::notEqual, dispatch);
2237
2238      // We have the address of an on stack replacement routine in rax.
2239      // In preparation of invoking it, first we must migrate the locals
2240      // and monitors from off the interpreter frame on the stack.
2241      // Ensure to save the osr nmethod over the migration call,
2242      // it will be preserved in rbx.
2243      __ mov(rbx, rax);
2244
2245      NOT_LP64(__ get_thread(rcx));
2246
2247      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
2248
2249      // rax is OSR buffer, move it to expected parameter location
2250      LP64_ONLY(__ mov(j_rarg0, rax));
2251      NOT_LP64(__ mov(rcx, rax));
2252      // We use j_rarg definitions here so that registers don't conflict as parameter
2253      // registers change across platforms as we are in the midst of a calling
2254      // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
2255
2256      const Register retaddr   = LP64_ONLY(j_rarg2) NOT_LP64(rdi);
2257      const Register sender_sp = LP64_ONLY(j_rarg1) NOT_LP64(rdx);
2258
2259      // pop the interpreter frame
2260      __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp
2261      __ leave();                                // remove frame anchor
2262      __ pop(retaddr);                           // get return address
2263      __ mov(rsp, sender_sp);                   // set sp to sender sp
2264      // Ensure compiled code always sees stack at proper alignment
2265      __ andptr(rsp, -(StackAlignmentInBytes));
2266
2267      // unlike x86 we need no specialized return from compiled code
2268      // to the interpreter or the call stub.
2269
2270      // push the return address
2271      __ push(retaddr);
2272
2273      // and begin the OSR nmethod
2274      __ jmp(Address(rbx, nmethod::osr_entry_point_offset()));
2275    }
2276  }
2277}
2278
2279void TemplateTable::if_0cmp(Condition cc) {
2280  transition(itos, vtos);
2281  // assume branch is more often taken than not (loops use backward branches)
2282  Label not_taken;
2283  __ testl(rax, rax);
2284  __ jcc(j_not(cc), not_taken);
2285  branch(false, false);
2286  __ bind(not_taken);
2287  __ profile_not_taken_branch(rax);
2288}
2289
2290void TemplateTable::if_icmp(Condition cc) {
2291  transition(itos, vtos);
2292  // assume branch is more often taken than not (loops use backward branches)
2293  Label not_taken;
2294  __ pop_i(rdx);
2295  __ cmpl(rdx, rax);
2296  __ jcc(j_not(cc), not_taken);
2297  branch(false, false);
2298  __ bind(not_taken);
2299  __ profile_not_taken_branch(rax);
2300}
2301
2302void TemplateTable::if_nullcmp(Condition cc) {
2303  transition(atos, vtos);
2304  // assume branch is more often taken than not (loops use backward branches)
2305  Label not_taken;
2306  __ testptr(rax, rax);
2307  __ jcc(j_not(cc), not_taken);
2308  branch(false, false);
2309  __ bind(not_taken);
2310  __ profile_not_taken_branch(rax);
2311}
2312
2313void TemplateTable::if_acmp(Condition cc) {
2314  transition(atos, vtos);
2315  // assume branch is more often taken than not (loops use backward branches)
2316  Label not_taken;
2317  __ pop_ptr(rdx);
2318  __ cmpptr(rdx, rax);
2319  __ jcc(j_not(cc), not_taken);
2320  branch(false, false);
2321  __ bind(not_taken);
2322  __ profile_not_taken_branch(rax);
2323}
2324
2325void TemplateTable::ret() {
2326  transition(vtos, vtos);
2327  locals_index(rbx);
2328  LP64_ONLY(__ movslq(rbx, iaddress(rbx))); // get return bci, compute return bcp
2329  NOT_LP64(__ movptr(rbx, iaddress(rbx)));
2330  __ profile_ret(rbx, rcx);
2331  __ get_method(rax);
2332  __ movptr(rbcp, Address(rax, Method::const_offset()));
2333  __ lea(rbcp, Address(rbcp, rbx, Address::times_1,
2334                      ConstMethod::codes_offset()));
2335  __ dispatch_next(vtos);
2336}
2337
2338void TemplateTable::wide_ret() {
2339  transition(vtos, vtos);
2340  locals_index_wide(rbx);
2341  __ movptr(rbx, aaddress(rbx)); // get return bci, compute return bcp
2342  __ profile_ret(rbx, rcx);
2343  __ get_method(rax);
2344  __ movptr(rbcp, Address(rax, Method::const_offset()));
2345  __ lea(rbcp, Address(rbcp, rbx, Address::times_1, ConstMethod::codes_offset()));
2346  __ dispatch_next(vtos);
2347}
2348
2349void TemplateTable::tableswitch() {
2350  Label default_case, continue_execution;
2351  transition(itos, vtos);
2352
2353  // align r13/rsi
2354  __ lea(rbx, at_bcp(BytesPerInt));
2355  __ andptr(rbx, -BytesPerInt);
2356  // load lo & hi
2357  __ movl(rcx, Address(rbx, BytesPerInt));
2358  __ movl(rdx, Address(rbx, 2 * BytesPerInt));
2359  __ bswapl(rcx);
2360  __ bswapl(rdx);
2361  // check against lo & hi
2362  __ cmpl(rax, rcx);
2363  __ jcc(Assembler::less, default_case);
2364  __ cmpl(rax, rdx);
2365  __ jcc(Assembler::greater, default_case);
2366  // lookup dispatch offset
2367  __ subl(rax, rcx);
2368  __ movl(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
2369  __ profile_switch_case(rax, rbx, rcx);
2370  // continue execution
2371  __ bind(continue_execution);
2372  __ bswapl(rdx);
2373  LP64_ONLY(__ movl2ptr(rdx, rdx));
2374  __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2375  __ addptr(rbcp, rdx);
2376  __ dispatch_only(vtos);
2377  // handle default
2378  __ bind(default_case);
2379  __ profile_switch_default(rax);
2380  __ movl(rdx, Address(rbx, 0));
2381  __ jmp(continue_execution);
2382}
2383
2384void TemplateTable::lookupswitch() {
2385  transition(itos, itos);
2386  __ stop("lookupswitch bytecode should have been rewritten");
2387}
2388
2389void TemplateTable::fast_linearswitch() {
2390  transition(itos, vtos);
2391  Label loop_entry, loop, found, continue_execution;
2392  // bswap rax so we can avoid bswapping the table entries
2393  __ bswapl(rax);
2394  // align r13
2395  __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
2396                                    // this instruction (change offsets
2397                                    // below)
2398  __ andptr(rbx, -BytesPerInt);
2399  // set counter
2400  __ movl(rcx, Address(rbx, BytesPerInt));
2401  __ bswapl(rcx);
2402  __ jmpb(loop_entry);
2403  // table search
2404  __ bind(loop);
2405  __ cmpl(rax, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
2406  __ jcc(Assembler::equal, found);
2407  __ bind(loop_entry);
2408  __ decrementl(rcx);
2409  __ jcc(Assembler::greaterEqual, loop);
2410  // default case
2411  __ profile_switch_default(rax);
2412  __ movl(rdx, Address(rbx, 0));
2413  __ jmp(continue_execution);
2414  // entry found -> get offset
2415  __ bind(found);
2416  __ movl(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
2417  __ profile_switch_case(rcx, rax, rbx);
2418  // continue execution
2419  __ bind(continue_execution);
2420  __ bswapl(rdx);
2421  __ movl2ptr(rdx, rdx);
2422  __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1));
2423  __ addptr(rbcp, rdx);
2424  __ dispatch_only(vtos);
2425}
2426
2427void TemplateTable::fast_binaryswitch() {
2428  transition(itos, vtos);
2429  // Implementation using the following core algorithm:
2430  //
2431  // int binary_search(int key, LookupswitchPair* array, int n) {
2432  //   // Binary search according to "Methodik des Programmierens" by
2433  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
2434  //   int i = 0;
2435  //   int j = n;
2436  //   while (i+1 < j) {
2437  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
2438  //     // with      Q: for all i: 0 <= i < n: key < a[i]
2439  //     // where a stands for the array and assuming that the (inexisting)
2440  //     // element a[n] is infinitely big.
2441  //     int h = (i + j) >> 1;
2442  //     // i < h < j
2443  //     if (key < array[h].fast_match()) {
2444  //       j = h;
2445  //     } else {
2446  //       i = h;
2447  //     }
2448  //   }
2449  //   // R: a[i] <= key < a[i+1] or Q
2450  //   // (i.e., if key is within array, i is the correct index)
2451  //   return i;
2452  // }
2453
2454  // Register allocation
2455  const Register key   = rax; // already set (tosca)
2456  const Register array = rbx;
2457  const Register i     = rcx;
2458  const Register j     = rdx;
2459  const Register h     = rdi;
2460  const Register temp  = rsi;
2461
2462  // Find array start
2463  NOT_LP64(__ save_bcp());
2464
2465  __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
2466                                          // get rid of this
2467                                          // instruction (change
2468                                          // offsets below)
2469  __ andptr(array, -BytesPerInt);
2470
2471  // Initialize i & j
2472  __ xorl(i, i);                            // i = 0;
2473  __ movl(j, Address(array, -BytesPerInt)); // j = length(array);
2474
2475  // Convert j into native byteordering
2476  __ bswapl(j);
2477
2478  // And start
2479  Label entry;
2480  __ jmp(entry);
2481
2482  // binary search loop
2483  {
2484    Label loop;
2485    __ bind(loop);
2486    // int h = (i + j) >> 1;
2487    __ leal(h, Address(i, j, Address::times_1)); // h = i + j;
2488    __ sarl(h, 1);                               // h = (i + j) >> 1;
2489    // if (key < array[h].fast_match()) {
2490    //   j = h;
2491    // } else {
2492    //   i = h;
2493    // }
2494    // Convert array[h].match to native byte-ordering before compare
2495    __ movl(temp, Address(array, h, Address::times_8));
2496    __ bswapl(temp);
2497    __ cmpl(key, temp);
2498    // j = h if (key <  array[h].fast_match())
2499    __ cmov32(Assembler::less, j, h);
2500    // i = h if (key >= array[h].fast_match())
2501    __ cmov32(Assembler::greaterEqual, i, h);
2502    // while (i+1 < j)
2503    __ bind(entry);
2504    __ leal(h, Address(i, 1)); // i+1
2505    __ cmpl(h, j);             // i+1 < j
2506    __ jcc(Assembler::less, loop);
2507  }
2508
2509  // end of binary search, result index is i (must check again!)
2510  Label default_case;
2511  // Convert array[i].match to native byte-ordering before compare
2512  __ movl(temp, Address(array, i, Address::times_8));
2513  __ bswapl(temp);
2514  __ cmpl(key, temp);
2515  __ jcc(Assembler::notEqual, default_case);
2516
2517  // entry found -> j = offset
2518  __ movl(j , Address(array, i, Address::times_8, BytesPerInt));
2519  __ profile_switch_case(i, key, array);
2520  __ bswapl(j);
2521  LP64_ONLY(__ movslq(j, j));
2522
2523  NOT_LP64(__ restore_bcp());
2524  NOT_LP64(__ restore_locals());                           // restore rdi
2525
2526  __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2527  __ addptr(rbcp, j);
2528  __ dispatch_only(vtos);
2529
2530  // default case -> j = default offset
2531  __ bind(default_case);
2532  __ profile_switch_default(i);
2533  __ movl(j, Address(array, -2 * BytesPerInt));
2534  __ bswapl(j);
2535  LP64_ONLY(__ movslq(j, j));
2536
2537  NOT_LP64(__ restore_bcp());
2538  NOT_LP64(__ restore_locals());
2539
2540  __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1));
2541  __ addptr(rbcp, j);
2542  __ dispatch_only(vtos);
2543}
2544
2545void TemplateTable::_return(TosState state) {
2546  transition(state, state);
2547
2548  assert(_desc->calls_vm(),
2549         "inconsistent calls_vm information"); // call in remove_activation
2550
2551  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
2552    assert(state == vtos, "only valid state");
2553    Register robj = LP64_ONLY(c_rarg1) NOT_LP64(rax);
2554    __ movptr(robj, aaddress(0));
2555    __ load_klass(rdi, robj);
2556    __ movl(rdi, Address(rdi, Klass::access_flags_offset()));
2557    __ testl(rdi, JVM_ACC_HAS_FINALIZER);
2558    Label skip_register_finalizer;
2559    __ jcc(Assembler::zero, skip_register_finalizer);
2560
2561    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), robj);
2562
2563    __ bind(skip_register_finalizer);
2564  }
2565
2566  // Narrow result if state is itos but result type is smaller.
2567  // Need to narrow in the return bytecode rather than in generate_return_entry
2568  // since compiled code callers expect the result to already be narrowed.
2569  if (state == itos) {
2570    __ narrow(rax);
2571  }
2572  __ remove_activation(state, rbcp);
2573
2574  __ jmp(rbcp);
2575}
2576
2577// ----------------------------------------------------------------------------
2578// Volatile variables demand their effects be made known to all CPU's
2579// in order.  Store buffers on most chips allow reads & writes to
2580// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
2581// without some kind of memory barrier (i.e., it's not sufficient that
2582// the interpreter does not reorder volatile references, the hardware
2583// also must not reorder them).
2584//
2585// According to the new Java Memory Model (JMM):
2586// (1) All volatiles are serialized wrt to each other.  ALSO reads &
2587//     writes act as aquire & release, so:
2588// (2) A read cannot let unrelated NON-volatile memory refs that
2589//     happen after the read float up to before the read.  It's OK for
2590//     non-volatile memory refs that happen before the volatile read to
2591//     float down below it.
2592// (3) Similar a volatile write cannot let unrelated NON-volatile
2593//     memory refs that happen BEFORE the write float down to after the
2594//     write.  It's OK for non-volatile memory refs that happen after the
2595//     volatile write to float up before it.
2596//
2597// We only put in barriers around volatile refs (they are expensive),
2598// not _between_ memory refs (that would require us to track the
2599// flavor of the previous memory refs).  Requirements (2) and (3)
2600// require some barriers before volatile stores and after volatile
2601// loads.  These nearly cover requirement (1) but miss the
2602// volatile-store-volatile-load case.  This final case is placed after
2603// volatile-stores although it could just as well go before
2604// volatile-loads.
2605
2606void TemplateTable::volatile_barrier(Assembler::Membar_mask_bits order_constraint ) {
2607  // Helper function to insert a is-volatile test and memory barrier
2608  if(!os::is_MP()) return;    // Not needed on single CPU
2609  __ membar(order_constraint);
2610}
2611
2612void TemplateTable::resolve_cache_and_index(int byte_no,
2613                                            Register Rcache,
2614                                            Register index,
2615                                            size_t index_size) {
2616  const Register temp = rbx;
2617  assert_different_registers(Rcache, index, temp);
2618
2619  Label resolved;
2620
2621  Bytecodes::Code code = bytecode();
2622  switch (code) {
2623  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
2624  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
2625  }
2626
2627  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
2628  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
2629  __ cmpl(temp, code);  // have we resolved this bytecode?
2630  __ jcc(Assembler::equal, resolved);
2631
2632  // resolve first time through
2633  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
2634  __ movl(temp, code);
2635  __ call_VM(noreg, entry, temp);
2636  // Update registers with resolved info
2637  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
2638  __ bind(resolved);
2639}
2640
2641// The cache and index registers must be set before call
2642void TemplateTable::load_field_cp_cache_entry(Register obj,
2643                                              Register cache,
2644                                              Register index,
2645                                              Register off,
2646                                              Register flags,
2647                                              bool is_static = false) {
2648  assert_different_registers(cache, index, flags, off);
2649
2650  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2651  // Field offset
2652  __ movptr(off, Address(cache, index, Address::times_ptr,
2653                         in_bytes(cp_base_offset +
2654                                  ConstantPoolCacheEntry::f2_offset())));
2655  // Flags
2656  __ movl(flags, Address(cache, index, Address::times_ptr,
2657                         in_bytes(cp_base_offset +
2658                                  ConstantPoolCacheEntry::flags_offset())));
2659
2660  // klass overwrite register
2661  if (is_static) {
2662    __ movptr(obj, Address(cache, index, Address::times_ptr,
2663                           in_bytes(cp_base_offset +
2664                                    ConstantPoolCacheEntry::f1_offset())));
2665    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2666    __ movptr(obj, Address(obj, mirror_offset));
2667  }
2668}
2669
2670void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
2671                                               Register method,
2672                                               Register itable_index,
2673                                               Register flags,
2674                                               bool is_invokevirtual,
2675                                               bool is_invokevfinal, /*unused*/
2676                                               bool is_invokedynamic) {
2677  // setup registers
2678  const Register cache = rcx;
2679  const Register index = rdx;
2680  assert_different_registers(method, flags);
2681  assert_different_registers(method, cache, index);
2682  assert_different_registers(itable_index, flags);
2683  assert_different_registers(itable_index, cache, index);
2684  // determine constant pool cache field offsets
2685  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
2686  const int method_offset = in_bytes(
2687    ConstantPoolCache::base_offset() +
2688      ((byte_no == f2_byte)
2689       ? ConstantPoolCacheEntry::f2_offset()
2690       : ConstantPoolCacheEntry::f1_offset()));
2691  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
2692                                    ConstantPoolCacheEntry::flags_offset());
2693  // access constant pool cache fields
2694  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
2695                                    ConstantPoolCacheEntry::f2_offset());
2696
2697  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
2698  resolve_cache_and_index(byte_no, cache, index, index_size);
2699    __ movptr(method, Address(cache, index, Address::times_ptr, method_offset));
2700
2701  if (itable_index != noreg) {
2702    // pick up itable or appendix index from f2 also:
2703    __ movptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
2704  }
2705  __ movl(flags, Address(cache, index, Address::times_ptr, flags_offset));
2706}
2707
2708// The registers cache and index expected to be set before call.
2709// Correct values of the cache and index registers are preserved.
2710void TemplateTable::jvmti_post_field_access(Register cache,
2711                                            Register index,
2712                                            bool is_static,
2713                                            bool has_tos) {
2714  if (JvmtiExport::can_post_field_access()) {
2715    // Check to see if a field access watch has been set before we take
2716    // the time to call into the VM.
2717    Label L1;
2718    assert_different_registers(cache, index, rax);
2719    __ mov32(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
2720    __ testl(rax,rax);
2721    __ jcc(Assembler::zero, L1);
2722
2723    // cache entry pointer
2724    __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()));
2725    __ shll(index, LogBytesPerWord);
2726    __ addptr(cache, index);
2727    if (is_static) {
2728      __ xorptr(rax, rax);      // NULL object reference
2729    } else {
2730      __ pop(atos);         // Get the object
2731      __ verify_oop(rax);
2732      __ push(atos);        // Restore stack state
2733    }
2734    // rax,:   object pointer or NULL
2735    // cache: cache entry pointer
2736    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
2737               rax, cache);
2738    __ get_cache_and_index_at_bcp(cache, index, 1);
2739    __ bind(L1);
2740  }
2741}
2742
2743void TemplateTable::pop_and_check_object(Register r) {
2744  __ pop_ptr(r);
2745  __ null_check(r);  // for field access must check obj.
2746  __ verify_oop(r);
2747}
2748
2749void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
2750  transition(vtos, vtos);
2751
2752  const Register cache = rcx;
2753  const Register index = rdx;
2754  const Register obj   = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
2755  const Register off   = rbx;
2756  const Register flags = rax;
2757  const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // uses same reg as obj, so don't mix them
2758
2759  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
2760  jvmti_post_field_access(cache, index, is_static, false);
2761  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
2762
2763  if (!is_static) pop_and_check_object(obj);
2764
2765  const Address field(obj, off, Address::times_1, 0*wordSize);
2766  NOT_LP64(const Address hi(obj, off, Address::times_1, 1*wordSize));
2767
2768  Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
2769
2770  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
2771  // Make sure we don't need to mask edx after the above shift
2772  assert(btos == 0, "change code, btos != 0");
2773
2774  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
2775
2776  __ jcc(Assembler::notZero, notByte);
2777  // btos
2778  __ load_signed_byte(rax, field);
2779  __ push(btos);
2780  // Rewrite bytecode to be faster
2781  if (!is_static && rc == may_rewrite) {
2782    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2783  }
2784  __ jmp(Done);
2785
2786  __ bind(notByte);
2787  __ cmpl(flags, ztos);
2788  __ jcc(Assembler::notEqual, notBool);
2789
2790  // ztos (same code as btos)
2791  __ load_signed_byte(rax, field);
2792  __ push(ztos);
2793  // Rewrite bytecode to be faster
2794  if (!is_static && rc == may_rewrite) {
2795    // use btos rewriting, no truncating to t/f bit is needed for getfield.
2796    patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
2797  }
2798  __ jmp(Done);
2799
2800  __ bind(notBool);
2801  __ cmpl(flags, atos);
2802  __ jcc(Assembler::notEqual, notObj);
2803  // atos
2804  __ load_heap_oop(rax, field);
2805  __ push(atos);
2806  if (!is_static && rc == may_rewrite) {
2807    patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
2808  }
2809  __ jmp(Done);
2810
2811  __ bind(notObj);
2812  __ cmpl(flags, itos);
2813  __ jcc(Assembler::notEqual, notInt);
2814  // itos
2815  __ movl(rax, field);
2816  __ push(itos);
2817  // Rewrite bytecode to be faster
2818  if (!is_static && rc == may_rewrite) {
2819    patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
2820  }
2821  __ jmp(Done);
2822
2823  __ bind(notInt);
2824  __ cmpl(flags, ctos);
2825  __ jcc(Assembler::notEqual, notChar);
2826  // ctos
2827  __ load_unsigned_short(rax, field);
2828  __ push(ctos);
2829  // Rewrite bytecode to be faster
2830  if (!is_static && rc == may_rewrite) {
2831    patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
2832  }
2833  __ jmp(Done);
2834
2835  __ bind(notChar);
2836  __ cmpl(flags, stos);
2837  __ jcc(Assembler::notEqual, notShort);
2838  // stos
2839  __ load_signed_short(rax, field);
2840  __ push(stos);
2841  // Rewrite bytecode to be faster
2842  if (!is_static && rc == may_rewrite) {
2843    patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
2844  }
2845  __ jmp(Done);
2846
2847  __ bind(notShort);
2848  __ cmpl(flags, ltos);
2849  __ jcc(Assembler::notEqual, notLong);
2850  // ltos
2851
2852#ifndef _LP64
2853  // Generate code as if volatile.  There just aren't enough registers to
2854  // save that information and this code is faster than the test.
2855  __ fild_d(field);                // Must load atomically
2856  __ subptr(rsp,2*wordSize);    // Make space for store
2857  __ fistp_d(Address(rsp,0));
2858  __ pop(rax);
2859  __ pop(rdx);
2860#else
2861  __ movq(rax, field);
2862#endif
2863
2864  __ push(ltos);
2865  // Rewrite bytecode to be faster
2866  LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx));
2867  __ jmp(Done);
2868
2869  __ bind(notLong);
2870  __ cmpl(flags, ftos);
2871  __ jcc(Assembler::notEqual, notFloat);
2872  // ftos
2873
2874  __ load_float(field);
2875  __ push(ftos);
2876  // Rewrite bytecode to be faster
2877  if (!is_static && rc == may_rewrite) {
2878    patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
2879  }
2880  __ jmp(Done);
2881
2882  __ bind(notFloat);
2883#ifdef ASSERT
2884  __ cmpl(flags, dtos);
2885  __ jcc(Assembler::notEqual, notDouble);
2886#endif
2887  // dtos
2888  __ load_double(field);
2889  __ push(dtos);
2890  // Rewrite bytecode to be faster
2891  if (!is_static && rc == may_rewrite) {
2892    patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
2893  }
2894#ifdef ASSERT
2895  __ jmp(Done);
2896
2897
2898  __ bind(notDouble);
2899  __ stop("Bad state");
2900#endif
2901
2902  __ bind(Done);
2903  // [jk] not needed currently
2904  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadLoad |
2905  //                                              Assembler::LoadStore));
2906}
2907
2908void TemplateTable::getfield(int byte_no) {
2909  getfield_or_static(byte_no, false);
2910}
2911
2912void TemplateTable::nofast_getfield(int byte_no) {
2913  getfield_or_static(byte_no, false, may_not_rewrite);
2914}
2915
2916void TemplateTable::getstatic(int byte_no) {
2917  getfield_or_static(byte_no, true);
2918}
2919
2920
2921// The registers cache and index expected to be set before call.
2922// The function may destroy various registers, just not the cache and index registers.
2923void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
2924
2925  const Register robj = LP64_ONLY(c_rarg2)   NOT_LP64(rax);
2926  const Register RBX  = LP64_ONLY(c_rarg1)   NOT_LP64(rbx);
2927  const Register RCX  = LP64_ONLY(c_rarg3)   NOT_LP64(rcx);
2928  const Register RDX  = LP64_ONLY(rscratch1) NOT_LP64(rdx);
2929
2930  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
2931
2932  if (JvmtiExport::can_post_field_modification()) {
2933    // Check to see if a field modification watch has been set before
2934    // we take the time to call into the VM.
2935    Label L1;
2936    assert_different_registers(cache, index, rax);
2937    __ mov32(rax, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
2938    __ testl(rax, rax);
2939    __ jcc(Assembler::zero, L1);
2940
2941    __ get_cache_and_index_at_bcp(robj, RDX, 1);
2942
2943
2944    if (is_static) {
2945      // Life is simple.  Null out the object pointer.
2946      __ xorl(RBX, RBX);
2947
2948    } else {
2949      // Life is harder. The stack holds the value on top, followed by
2950      // the object.  We don't know the size of the value, though; it
2951      // could be one or two words depending on its type. As a result,
2952      // we must find the type to determine where the object is.
2953#ifndef _LP64
2954      Label two_word, valsize_known;
2955#endif
2956      __ movl(RCX, Address(robj, RDX,
2957                           Address::times_ptr,
2958                           in_bytes(cp_base_offset +
2959                                     ConstantPoolCacheEntry::flags_offset())));
2960      NOT_LP64(__ mov(rbx, rsp));
2961      __ shrl(RCX, ConstantPoolCacheEntry::tos_state_shift);
2962
2963      // Make sure we don't need to mask rcx after the above shift
2964      ConstantPoolCacheEntry::verify_tos_state_shift();
2965#ifdef _LP64
2966      __ movptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
2967      __ cmpl(c_rarg3, ltos);
2968      __ cmovptr(Assembler::equal,
2969                 c_rarg1, at_tos_p2()); // ltos (two word jvalue)
2970      __ cmpl(c_rarg3, dtos);
2971      __ cmovptr(Assembler::equal,
2972                 c_rarg1, at_tos_p2()); // dtos (two word jvalue)
2973#else
2974      __ cmpl(rcx, ltos);
2975      __ jccb(Assembler::equal, two_word);
2976      __ cmpl(rcx, dtos);
2977      __ jccb(Assembler::equal, two_word);
2978      __ addptr(rbx, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos)
2979      __ jmpb(valsize_known);
2980
2981      __ bind(two_word);
2982      __ addptr(rbx, Interpreter::expr_offset_in_bytes(2)); // two words jvalue
2983
2984      __ bind(valsize_known);
2985      // setup object pointer
2986      __ movptr(rbx, Address(rbx, 0));
2987#endif
2988    }
2989    // cache entry pointer
2990    __ addptr(robj, in_bytes(cp_base_offset));
2991    __ shll(RDX, LogBytesPerWord);
2992    __ addptr(robj, RDX);
2993    // object (tos)
2994    __ mov(RCX, rsp);
2995    // c_rarg1: object pointer set up above (NULL if static)
2996    // c_rarg2: cache entry pointer
2997    // c_rarg3: jvalue object on the stack
2998    __ call_VM(noreg,
2999               CAST_FROM_FN_PTR(address,
3000                                InterpreterRuntime::post_field_modification),
3001               RBX, robj, RCX);
3002    __ get_cache_and_index_at_bcp(cache, index, 1);
3003    __ bind(L1);
3004  }
3005}
3006
3007void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
3008  transition(vtos, vtos);
3009
3010  const Register cache = rcx;
3011  const Register index = rdx;
3012  const Register obj   = rcx;
3013  const Register off   = rbx;
3014  const Register flags = rax;
3015  const Register bc    = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3016
3017  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
3018  jvmti_post_field_mod(cache, index, is_static);
3019  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
3020
3021  // [jk] not needed currently
3022  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3023  //                                              Assembler::StoreStore));
3024
3025  Label notVolatile, Done;
3026  __ movl(rdx, flags);
3027  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3028  __ andl(rdx, 0x1);
3029
3030  // field addresses
3031  const Address field(obj, off, Address::times_1, 0*wordSize);
3032  NOT_LP64( const Address hi(obj, off, Address::times_1, 1*wordSize);)
3033
3034  Label notByte, notBool, notInt, notShort, notChar,
3035        notLong, notFloat, notObj, notDouble;
3036
3037  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3038
3039  assert(btos == 0, "change code, btos != 0");
3040  __ andl(flags, ConstantPoolCacheEntry::tos_state_mask);
3041  __ jcc(Assembler::notZero, notByte);
3042
3043  // btos
3044  {
3045    __ pop(btos);
3046    if (!is_static) pop_and_check_object(obj);
3047    __ movb(field, rax);
3048    if (!is_static && rc == may_rewrite) {
3049      patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
3050    }
3051    __ jmp(Done);
3052  }
3053
3054  __ bind(notByte);
3055  __ cmpl(flags, ztos);
3056  __ jcc(Assembler::notEqual, notBool);
3057
3058  // ztos
3059  {
3060    __ pop(ztos);
3061    if (!is_static) pop_and_check_object(obj);
3062    __ andl(rax, 0x1);
3063    __ movb(field, rax);
3064    if (!is_static && rc == may_rewrite) {
3065      patch_bytecode(Bytecodes::_fast_zputfield, bc, rbx, true, byte_no);
3066    }
3067    __ jmp(Done);
3068  }
3069
3070  __ bind(notBool);
3071  __ cmpl(flags, atos);
3072  __ jcc(Assembler::notEqual, notObj);
3073
3074  // atos
3075  {
3076    __ pop(atos);
3077    if (!is_static) pop_and_check_object(obj);
3078    // Store into the field
3079    do_oop_store(_masm, field, rax, _bs->kind(), false);
3080    if (!is_static && rc == may_rewrite) {
3081      patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
3082    }
3083    __ jmp(Done);
3084  }
3085
3086  __ bind(notObj);
3087  __ cmpl(flags, itos);
3088  __ jcc(Assembler::notEqual, notInt);
3089
3090  // itos
3091  {
3092    __ pop(itos);
3093    if (!is_static) pop_and_check_object(obj);
3094    __ movl(field, rax);
3095    if (!is_static && rc == may_rewrite) {
3096      patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
3097    }
3098    __ jmp(Done);
3099  }
3100
3101  __ bind(notInt);
3102  __ cmpl(flags, ctos);
3103  __ jcc(Assembler::notEqual, notChar);
3104
3105  // ctos
3106  {
3107    __ pop(ctos);
3108    if (!is_static) pop_and_check_object(obj);
3109    __ movw(field, rax);
3110    if (!is_static && rc == may_rewrite) {
3111      patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
3112    }
3113    __ jmp(Done);
3114  }
3115
3116  __ bind(notChar);
3117  __ cmpl(flags, stos);
3118  __ jcc(Assembler::notEqual, notShort);
3119
3120  // stos
3121  {
3122    __ pop(stos);
3123    if (!is_static) pop_and_check_object(obj);
3124    __ movw(field, rax);
3125    if (!is_static && rc == may_rewrite) {
3126      patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
3127    }
3128    __ jmp(Done);
3129  }
3130
3131  __ bind(notShort);
3132  __ cmpl(flags, ltos);
3133  __ jcc(Assembler::notEqual, notLong);
3134
3135  // ltos
3136#ifdef _LP64
3137  {
3138    __ pop(ltos);
3139    if (!is_static) pop_and_check_object(obj);
3140    __ movq(field, rax);
3141    if (!is_static && rc == may_rewrite) {
3142      patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
3143    }
3144    __ jmp(Done);
3145  }
3146#else
3147  {
3148    Label notVolatileLong;
3149    __ testl(rdx, rdx);
3150    __ jcc(Assembler::zero, notVolatileLong);
3151
3152    __ pop(ltos);  // overwrites rdx, do this after testing volatile.
3153    if (!is_static) pop_and_check_object(obj);
3154
3155    // Replace with real volatile test
3156    __ push(rdx);
3157    __ push(rax);                 // Must update atomically with FIST
3158    __ fild_d(Address(rsp,0));    // So load into FPU register
3159    __ fistp_d(field);            // and put into memory atomically
3160    __ addptr(rsp, 2*wordSize);
3161    // volatile_barrier();
3162    volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3163                                                 Assembler::StoreStore));
3164    // Don't rewrite volatile version
3165    __ jmp(notVolatile);
3166
3167    __ bind(notVolatileLong);
3168
3169    __ pop(ltos);  // overwrites rdx
3170    if (!is_static) pop_and_check_object(obj);
3171    __ movptr(hi, rdx);
3172    __ movptr(field, rax);
3173    // Don't rewrite to _fast_lputfield for potential volatile case.
3174    __ jmp(notVolatile);
3175  }
3176#endif // _LP64
3177
3178  __ bind(notLong);
3179  __ cmpl(flags, ftos);
3180  __ jcc(Assembler::notEqual, notFloat);
3181
3182  // ftos
3183  {
3184    __ pop(ftos);
3185    if (!is_static) pop_and_check_object(obj);
3186    __ store_float(field);
3187    if (!is_static && rc == may_rewrite) {
3188      patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
3189    }
3190    __ jmp(Done);
3191  }
3192
3193  __ bind(notFloat);
3194#ifdef ASSERT
3195  __ cmpl(flags, dtos);
3196  __ jcc(Assembler::notEqual, notDouble);
3197#endif
3198
3199  // dtos
3200  {
3201    __ pop(dtos);
3202    if (!is_static) pop_and_check_object(obj);
3203    __ store_double(field);
3204    if (!is_static && rc == may_rewrite) {
3205      patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
3206    }
3207  }
3208
3209#ifdef ASSERT
3210  __ jmp(Done);
3211
3212  __ bind(notDouble);
3213  __ stop("Bad state");
3214#endif
3215
3216  __ bind(Done);
3217
3218  // Check for volatile store
3219  __ testl(rdx, rdx);
3220  __ jcc(Assembler::zero, notVolatile);
3221  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3222                                               Assembler::StoreStore));
3223  __ bind(notVolatile);
3224}
3225
3226void TemplateTable::putfield(int byte_no) {
3227  putfield_or_static(byte_no, false);
3228}
3229
3230void TemplateTable::nofast_putfield(int byte_no) {
3231  putfield_or_static(byte_no, false, may_not_rewrite);
3232}
3233
3234void TemplateTable::putstatic(int byte_no) {
3235  putfield_or_static(byte_no, true);
3236}
3237
3238void TemplateTable::jvmti_post_fast_field_mod() {
3239
3240  const Register scratch = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
3241
3242  if (JvmtiExport::can_post_field_modification()) {
3243    // Check to see if a field modification watch has been set before
3244    // we take the time to call into the VM.
3245    Label L2;
3246    __ mov32(scratch, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
3247    __ testl(scratch, scratch);
3248    __ jcc(Assembler::zero, L2);
3249    __ pop_ptr(rbx);                  // copy the object pointer from tos
3250    __ verify_oop(rbx);
3251    __ push_ptr(rbx);                 // put the object pointer back on tos
3252    // Save tos values before call_VM() clobbers them. Since we have
3253    // to do it for every data type, we use the saved values as the
3254    // jvalue object.
3255    switch (bytecode()) {          // load values into the jvalue object
3256    case Bytecodes::_fast_aputfield: __ push_ptr(rax); break;
3257    case Bytecodes::_fast_bputfield: // fall through
3258    case Bytecodes::_fast_zputfield: // fall through
3259    case Bytecodes::_fast_sputfield: // fall through
3260    case Bytecodes::_fast_cputfield: // fall through
3261    case Bytecodes::_fast_iputfield: __ push_i(rax); break;
3262    case Bytecodes::_fast_dputfield: __ push(dtos); break;
3263    case Bytecodes::_fast_fputfield: __ push(ftos); break;
3264    case Bytecodes::_fast_lputfield: __ push_l(rax); break;
3265
3266    default:
3267      ShouldNotReachHere();
3268    }
3269    __ mov(scratch, rsp);             // points to jvalue on the stack
3270    // access constant pool cache entry
3271    LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rax, 1));
3272    NOT_LP64(__ get_cache_entry_pointer_at_bcp(rax, rdx, 1));
3273    __ verify_oop(rbx);
3274    // rbx: object pointer copied above
3275    // c_rarg2: cache entry pointer
3276    // c_rarg3: jvalue object on the stack
3277    LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3));
3278    NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx));
3279
3280    switch (bytecode()) {             // restore tos values
3281    case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break;
3282    case Bytecodes::_fast_bputfield: // fall through
3283    case Bytecodes::_fast_zputfield: // fall through
3284    case Bytecodes::_fast_sputfield: // fall through
3285    case Bytecodes::_fast_cputfield: // fall through
3286    case Bytecodes::_fast_iputfield: __ pop_i(rax); break;
3287    case Bytecodes::_fast_dputfield: __ pop(dtos); break;
3288    case Bytecodes::_fast_fputfield: __ pop(ftos); break;
3289    case Bytecodes::_fast_lputfield: __ pop_l(rax); break;
3290    }
3291    __ bind(L2);
3292  }
3293}
3294
3295void TemplateTable::fast_storefield(TosState state) {
3296  transition(state, vtos);
3297
3298  ByteSize base = ConstantPoolCache::base_offset();
3299
3300  jvmti_post_fast_field_mod();
3301
3302  // access constant pool cache
3303  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3304
3305  // test for volatile with rdx but rdx is tos register for lputfield.
3306  __ movl(rdx, Address(rcx, rbx, Address::times_ptr,
3307                       in_bytes(base +
3308                                ConstantPoolCacheEntry::flags_offset())));
3309
3310  // replace index with field offset from cache entry
3311  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr,
3312                         in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
3313
3314  // [jk] not needed currently
3315  // volatile_barrier(Assembler::Membar_mask_bits(Assembler::LoadStore |
3316  //                                              Assembler::StoreStore));
3317
3318  Label notVolatile;
3319  __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3320  __ andl(rdx, 0x1);
3321
3322  // Get object from stack
3323  pop_and_check_object(rcx);
3324
3325  // field address
3326  const Address field(rcx, rbx, Address::times_1);
3327
3328  // access field
3329  switch (bytecode()) {
3330  case Bytecodes::_fast_aputfield:
3331    do_oop_store(_masm, field, rax, _bs->kind(), false);
3332    break;
3333  case Bytecodes::_fast_lputfield:
3334#ifdef _LP64
3335  __ movq(field, rax);
3336#else
3337  __ stop("should not be rewritten");
3338#endif
3339    break;
3340  case Bytecodes::_fast_iputfield:
3341    __ movl(field, rax);
3342    break;
3343  case Bytecodes::_fast_zputfield:
3344    __ andl(rax, 0x1);  // boolean is true if LSB is 1
3345    // fall through to bputfield
3346  case Bytecodes::_fast_bputfield:
3347    __ movb(field, rax);
3348    break;
3349  case Bytecodes::_fast_sputfield:
3350    // fall through
3351  case Bytecodes::_fast_cputfield:
3352    __ movw(field, rax);
3353    break;
3354  case Bytecodes::_fast_fputfield:
3355    __ store_float(field);
3356    break;
3357  case Bytecodes::_fast_dputfield:
3358    __ store_double(field);
3359    break;
3360  default:
3361    ShouldNotReachHere();
3362  }
3363
3364  // Check for volatile store
3365  __ testl(rdx, rdx);
3366  __ jcc(Assembler::zero, notVolatile);
3367  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
3368                                               Assembler::StoreStore));
3369  __ bind(notVolatile);
3370}
3371
3372void TemplateTable::fast_accessfield(TosState state) {
3373  transition(atos, state);
3374
3375  // Do the JVMTI work here to avoid disturbing the register state below
3376  if (JvmtiExport::can_post_field_access()) {
3377    // Check to see if a field access watch has been set before we
3378    // take the time to call into the VM.
3379    Label L1;
3380    __ mov32(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
3381    __ testl(rcx, rcx);
3382    __ jcc(Assembler::zero, L1);
3383    // access constant pool cache entry
3384    LP64_ONLY(__ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1));
3385    NOT_LP64(__ get_cache_entry_pointer_at_bcp(rcx, rdx, 1));
3386    __ verify_oop(rax);
3387    __ push_ptr(rax);  // save object pointer before call_VM() clobbers it
3388    LP64_ONLY(__ mov(c_rarg1, rax));
3389    // c_rarg1: object pointer copied above
3390    // c_rarg2: cache entry pointer
3391    LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), c_rarg1, c_rarg2));
3392    NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx));
3393    __ pop_ptr(rax); // restore object pointer
3394    __ bind(L1);
3395  }
3396
3397  // access constant pool cache
3398  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
3399  // replace index with field offset from cache entry
3400  // [jk] not needed currently
3401  // if (os::is_MP()) {
3402  //   __ movl(rdx, Address(rcx, rbx, Address::times_8,
3403  //                        in_bytes(ConstantPoolCache::base_offset() +
3404  //                                 ConstantPoolCacheEntry::flags_offset())));
3405  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3406  //   __ andl(rdx, 0x1);
3407  // }
3408  __ movptr(rbx, Address(rcx, rbx, Address::times_ptr,
3409                         in_bytes(ConstantPoolCache::base_offset() +
3410                                  ConstantPoolCacheEntry::f2_offset())));
3411
3412  // rax: object
3413  __ verify_oop(rax);
3414  __ null_check(rax);
3415  Address field(rax, rbx, Address::times_1);
3416
3417  // access field
3418  switch (bytecode()) {
3419  case Bytecodes::_fast_agetfield:
3420    __ load_heap_oop(rax, field);
3421    __ verify_oop(rax);
3422    break;
3423  case Bytecodes::_fast_lgetfield:
3424#ifdef _LP64
3425  __ movq(rax, field);
3426#else
3427  __ stop("should not be rewritten");
3428#endif
3429    break;
3430  case Bytecodes::_fast_igetfield:
3431    __ movl(rax, field);
3432    break;
3433  case Bytecodes::_fast_bgetfield:
3434    __ movsbl(rax, field);
3435    break;
3436  case Bytecodes::_fast_sgetfield:
3437    __ load_signed_short(rax, field);
3438    break;
3439  case Bytecodes::_fast_cgetfield:
3440    __ load_unsigned_short(rax, field);
3441    break;
3442  case Bytecodes::_fast_fgetfield:
3443    __ load_float(field);
3444    break;
3445  case Bytecodes::_fast_dgetfield:
3446    __ load_double(field);
3447    break;
3448  default:
3449    ShouldNotReachHere();
3450  }
3451  // [jk] not needed currently
3452  // if (os::is_MP()) {
3453  //   Label notVolatile;
3454  //   __ testl(rdx, rdx);
3455  //   __ jcc(Assembler::zero, notVolatile);
3456  //   __ membar(Assembler::LoadLoad);
3457  //   __ bind(notVolatile);
3458  //};
3459}
3460
3461void TemplateTable::fast_xaccess(TosState state) {
3462  transition(vtos, state);
3463
3464  // get receiver
3465  __ movptr(rax, aaddress(0));
3466  // access constant pool cache
3467  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
3468  __ movptr(rbx,
3469            Address(rcx, rdx, Address::times_ptr,
3470                    in_bytes(ConstantPoolCache::base_offset() +
3471                             ConstantPoolCacheEntry::f2_offset())));
3472  // make sure exception is reported in correct bcp range (getfield is
3473  // next instruction)
3474  __ increment(rbcp);
3475  __ null_check(rax);
3476  const Address field = Address(rax, rbx, Address::times_1, 0*wordSize);
3477  switch (state) {
3478  case itos:
3479    __ movl(rax, field);
3480    break;
3481  case atos:
3482    __ load_heap_oop(rax, field);
3483    __ verify_oop(rax);
3484    break;
3485  case ftos:
3486    __ load_float(field);
3487    break;
3488  default:
3489    ShouldNotReachHere();
3490  }
3491
3492  // [jk] not needed currently
3493  // if (os::is_MP()) {
3494  //   Label notVolatile;
3495  //   __ movl(rdx, Address(rcx, rdx, Address::times_8,
3496  //                        in_bytes(ConstantPoolCache::base_offset() +
3497  //                                 ConstantPoolCacheEntry::flags_offset())));
3498  //   __ shrl(rdx, ConstantPoolCacheEntry::is_volatile_shift);
3499  //   __ testl(rdx, 0x1);
3500  //   __ jcc(Assembler::zero, notVolatile);
3501  //   __ membar(Assembler::LoadLoad);
3502  //   __ bind(notVolatile);
3503  // }
3504
3505  __ decrement(rbcp);
3506}
3507
3508//-----------------------------------------------------------------------------
3509// Calls
3510
3511void TemplateTable::count_calls(Register method, Register temp) {
3512  // implemented elsewhere
3513  ShouldNotReachHere();
3514}
3515
3516void TemplateTable::prepare_invoke(int byte_no,
3517                                   Register method,  // linked method (or i-klass)
3518                                   Register index,   // itable index, MethodType, etc.
3519                                   Register recv,    // if caller wants to see it
3520                                   Register flags    // if caller wants to test it
3521                                   ) {
3522  // determine flags
3523  const Bytecodes::Code code = bytecode();
3524  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
3525  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
3526  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
3527  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
3528  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
3529  const bool load_receiver       = (recv  != noreg);
3530  const bool save_flags          = (flags != noreg);
3531  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
3532  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
3533  assert(flags == noreg || flags == rdx, "");
3534  assert(recv  == noreg || recv  == rcx, "");
3535
3536  // setup registers & access constant pool cache
3537  if (recv  == noreg)  recv  = rcx;
3538  if (flags == noreg)  flags = rdx;
3539  assert_different_registers(method, index, recv, flags);
3540
3541  // save 'interpreter return address'
3542  __ save_bcp();
3543
3544  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
3545
3546  // maybe push appendix to arguments (just before return address)
3547  if (is_invokedynamic || is_invokehandle) {
3548    Label L_no_push;
3549    __ testl(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
3550    __ jcc(Assembler::zero, L_no_push);
3551    // Push the appendix as a trailing parameter.
3552    // This must be done before we get the receiver,
3553    // since the parameter_size includes it.
3554    __ push(rbx);
3555    __ mov(rbx, index);
3556    assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
3557    __ load_resolved_reference_at_index(index, rbx);
3558    __ pop(rbx);
3559    __ push(index);  // push appendix (MethodType, CallSite, etc.)
3560    __ bind(L_no_push);
3561  }
3562
3563  // load receiver if needed (after appendix is pushed so parameter size is correct)
3564  // Note: no return address pushed yet
3565  if (load_receiver) {
3566    __ movl(recv, flags);
3567    __ andl(recv, ConstantPoolCacheEntry::parameter_size_mask);
3568    const int no_return_pc_pushed_yet = -1;  // argument slot correction before we push return address
3569    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
3570    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
3571    __ movptr(recv, recv_addr);
3572    __ verify_oop(recv);
3573  }
3574
3575  if (save_flags) {
3576    __ movl(rbcp, flags);
3577  }
3578
3579  // compute return type
3580  __ shrl(flags, ConstantPoolCacheEntry::tos_state_shift);
3581  // Make sure we don't need to mask flags after the above shift
3582  ConstantPoolCacheEntry::verify_tos_state_shift();
3583  // load return address
3584  {
3585    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
3586    ExternalAddress table(table_addr);
3587    LP64_ONLY(__ lea(rscratch1, table));
3588    LP64_ONLY(__ movptr(flags, Address(rscratch1, flags, Address::times_ptr)));
3589    NOT_LP64(__ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))));
3590  }
3591
3592  // push return address
3593  __ push(flags);
3594
3595  // Restore flags value from the constant pool cache, and restore rsi
3596  // for later null checks.  r13 is the bytecode pointer
3597  if (save_flags) {
3598    __ movl(flags, rbcp);
3599    __ restore_bcp();
3600  }
3601}
3602
3603void TemplateTable::invokevirtual_helper(Register index,
3604                                         Register recv,
3605                                         Register flags) {
3606  // Uses temporary registers rax, rdx
3607  assert_different_registers(index, recv, rax, rdx);
3608  assert(index == rbx, "");
3609  assert(recv  == rcx, "");
3610
3611  // Test for an invoke of a final method
3612  Label notFinal;
3613  __ movl(rax, flags);
3614  __ andl(rax, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
3615  __ jcc(Assembler::zero, notFinal);
3616
3617  const Register method = index;  // method must be rbx
3618  assert(method == rbx,
3619         "Method* must be rbx for interpreter calling convention");
3620
3621  // do the call - the index is actually the method to call
3622  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
3623
3624  // It's final, need a null check here!
3625  __ null_check(recv);
3626
3627  // profile this call
3628  __ profile_final_call(rax);
3629  __ profile_arguments_type(rax, method, rbcp, true);
3630
3631  __ jump_from_interpreted(method, rax);
3632
3633  __ bind(notFinal);
3634
3635  // get receiver klass
3636  __ null_check(recv, oopDesc::klass_offset_in_bytes());
3637  __ load_klass(rax, recv);
3638
3639  // profile this call
3640  __ profile_virtual_call(rax, rlocals, rdx);
3641  // get target Method* & entry point
3642  __ lookup_virtual_method(rax, index, method);
3643  __ profile_called_method(method, rdx, rbcp);
3644
3645  __ profile_arguments_type(rdx, method, rbcp, true);
3646  __ jump_from_interpreted(method, rdx);
3647}
3648
3649void TemplateTable::invokevirtual(int byte_no) {
3650  transition(vtos, vtos);
3651  assert(byte_no == f2_byte, "use this argument");
3652  prepare_invoke(byte_no,
3653                 rbx,    // method or vtable index
3654                 noreg,  // unused itable index
3655                 rcx, rdx); // recv, flags
3656
3657  // rbx: index
3658  // rcx: receiver
3659  // rdx: flags
3660
3661  invokevirtual_helper(rbx, rcx, rdx);
3662}
3663
3664void TemplateTable::invokespecial(int byte_no) {
3665  transition(vtos, vtos);
3666  assert(byte_no == f1_byte, "use this argument");
3667  prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
3668                 rcx);  // get receiver also for null check
3669  __ verify_oop(rcx);
3670  __ null_check(rcx);
3671  // do the call
3672  __ profile_call(rax);
3673  __ profile_arguments_type(rax, rbx, rbcp, false);
3674  __ jump_from_interpreted(rbx, rax);
3675}
3676
3677void TemplateTable::invokestatic(int byte_no) {
3678  transition(vtos, vtos);
3679  assert(byte_no == f1_byte, "use this argument");
3680  prepare_invoke(byte_no, rbx);  // get f1 Method*
3681  // do the call
3682  __ profile_call(rax);
3683  __ profile_arguments_type(rax, rbx, rbcp, false);
3684  __ jump_from_interpreted(rbx, rax);
3685}
3686
3687
3688void TemplateTable::fast_invokevfinal(int byte_no) {
3689  transition(vtos, vtos);
3690  assert(byte_no == f2_byte, "use this argument");
3691  __ stop("fast_invokevfinal not used on x86");
3692}
3693
3694
3695void TemplateTable::invokeinterface(int byte_no) {
3696  transition(vtos, vtos);
3697  assert(byte_no == f1_byte, "use this argument");
3698  prepare_invoke(byte_no, rax, rbx,  // get f1 Klass*, f2 itable index
3699                 rcx, rdx); // recv, flags
3700
3701  // rax: interface klass (from f1)
3702  // rbx: itable index (from f2)
3703  // rcx: receiver
3704  // rdx: flags
3705
3706  // Special case of invokeinterface called for virtual method of
3707  // java.lang.Object.  See cpCacheOop.cpp for details.
3708  // This code isn't produced by javac, but could be produced by
3709  // another compliant java compiler.
3710  Label notMethod;
3711  __ movl(rlocals, rdx);
3712  __ andl(rlocals, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
3713
3714  __ jcc(Assembler::zero, notMethod);
3715
3716  invokevirtual_helper(rbx, rcx, rdx);
3717  __ bind(notMethod);
3718
3719  // Get receiver klass into rdx - also a null check
3720  __ restore_locals();  // restore r14
3721  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
3722  __ load_klass(rdx, rcx);
3723
3724  // profile this call
3725  __ profile_virtual_call(rdx, rbcp, rlocals);
3726
3727  Label no_such_interface, no_such_method;
3728
3729  __ lookup_interface_method(// inputs: rec. class, interface, itable index
3730                             rdx, rax, rbx,
3731                             // outputs: method, scan temp. reg
3732                             rbx, rbcp,
3733                             no_such_interface);
3734
3735  // rbx: Method* to call
3736  // rcx: receiver
3737  // Check for abstract method error
3738  // Note: This should be done more efficiently via a throw_abstract_method_error
3739  //       interpreter entry point and a conditional jump to it in case of a null
3740  //       method.
3741  __ testptr(rbx, rbx);
3742  __ jcc(Assembler::zero, no_such_method);
3743
3744  __ profile_called_method(rbx, rbcp, rdx);
3745  __ profile_arguments_type(rdx, rbx, rbcp, true);
3746
3747  // do the call
3748  // rcx: receiver
3749  // rbx,: Method*
3750  __ jump_from_interpreted(rbx, rdx);
3751  __ should_not_reach_here();
3752
3753  // exception handling code follows...
3754  // note: must restore interpreter registers to canonical
3755  //       state for exception handling to work correctly!
3756
3757  __ bind(no_such_method);
3758  // throw exception
3759  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3760  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
3761  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3762  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
3763  // the call_VM checks for exception, so we should never return here.
3764  __ should_not_reach_here();
3765
3766  __ bind(no_such_interface);
3767  // throw exception
3768  __ pop(rbx);           // pop return address (pushed by prepare_invoke)
3769  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
3770  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
3771  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
3772                   InterpreterRuntime::throw_IncompatibleClassChangeError));
3773  // the call_VM checks for exception, so we should never return here.
3774  __ should_not_reach_here();
3775}
3776
3777void TemplateTable::invokehandle(int byte_no) {
3778  transition(vtos, vtos);
3779  assert(byte_no == f1_byte, "use this argument");
3780  const Register rbx_method = rbx;
3781  const Register rax_mtype  = rax;
3782  const Register rcx_recv   = rcx;
3783  const Register rdx_flags  = rdx;
3784
3785  prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
3786  __ verify_method_ptr(rbx_method);
3787  __ verify_oop(rcx_recv);
3788  __ null_check(rcx_recv);
3789
3790  // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
3791  // rbx: MH.invokeExact_MT method (from f2)
3792
3793  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
3794
3795  // FIXME: profile the LambdaForm also
3796  __ profile_final_call(rax);
3797  __ profile_arguments_type(rdx, rbx_method, rbcp, true);
3798
3799  __ jump_from_interpreted(rbx_method, rdx);
3800}
3801
3802void TemplateTable::invokedynamic(int byte_no) {
3803  transition(vtos, vtos);
3804  assert(byte_no == f1_byte, "use this argument");
3805
3806  const Register rbx_method   = rbx;
3807  const Register rax_callsite = rax;
3808
3809  prepare_invoke(byte_no, rbx_method, rax_callsite);
3810
3811  // rax: CallSite object (from cpool->resolved_references[f1])
3812  // rbx: MH.linkToCallSite method (from f2)
3813
3814  // Note:  rax_callsite is already pushed by prepare_invoke
3815
3816  // %%% should make a type profile for any invokedynamic that takes a ref argument
3817  // profile this call
3818  __ profile_call(rbcp);
3819  __ profile_arguments_type(rdx, rbx_method, rbcp, false);
3820
3821  __ verify_oop(rax_callsite);
3822
3823  __ jump_from_interpreted(rbx_method, rdx);
3824}
3825
3826//-----------------------------------------------------------------------------
3827// Allocation
3828
3829void TemplateTable::_new() {
3830  transition(vtos, atos);
3831  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
3832  Label slow_case;
3833  Label slow_case_no_pop;
3834  Label done;
3835  Label initialize_header;
3836  Label initialize_object;  // including clearing the fields
3837  Label allocate_shared;
3838
3839  __ get_cpool_and_tags(rcx, rax);
3840
3841  // Make sure the class we're about to instantiate has been resolved.
3842  // This is done before loading InstanceKlass to be consistent with the order
3843  // how Constant Pool is updated (see ConstantPool::klass_at_put)
3844  const int tags_offset = Array<u1>::base_offset_in_bytes();
3845  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
3846  __ jcc(Assembler::notEqual, slow_case_no_pop);
3847
3848  // get InstanceKlass
3849  __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(ConstantPool)));
3850  __ push(rcx);  // save the contexts of klass for initializing the header
3851
3852  // make sure klass is initialized & doesn't have finalizer
3853  // make sure klass is fully initialized
3854  __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
3855  __ jcc(Assembler::notEqual, slow_case);
3856
3857  // get instance_size in InstanceKlass (scaled to a count of bytes)
3858  __ movl(rdx, Address(rcx, Klass::layout_helper_offset()));
3859  // test to see if it has a finalizer or is malformed in some way
3860  __ testl(rdx, Klass::_lh_instance_slow_path_bit);
3861  __ jcc(Assembler::notZero, slow_case);
3862
3863  //
3864  // Allocate the instance
3865  // 1) Try to allocate in the TLAB
3866  // 2) if fail and the object is large allocate in the shared Eden
3867  // 3) if the above fails (or is not applicable), go to a slow case
3868  // (creates a new TLAB, etc.)
3869
3870  const bool allow_shared_alloc =
3871    Universe::heap()->supports_inline_contig_alloc();
3872
3873  const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
3874#ifndef _LP64
3875  if (UseTLAB || allow_shared_alloc) {
3876    __ get_thread(thread);
3877  }
3878#endif // _LP64
3879
3880  if (UseTLAB) {
3881    __ movptr(rax, Address(thread, in_bytes(JavaThread::tlab_top_offset())));
3882    __ lea(rbx, Address(rax, rdx, Address::times_1));
3883    __ cmpptr(rbx, Address(thread, in_bytes(JavaThread::tlab_end_offset())));
3884    __ jcc(Assembler::above, allow_shared_alloc ? allocate_shared : slow_case);
3885    __ movptr(Address(thread, in_bytes(JavaThread::tlab_top_offset())), rbx);
3886    if (ZeroTLAB) {
3887      // the fields have been already cleared
3888      __ jmp(initialize_header);
3889    } else {
3890      // initialize both the header and fields
3891      __ jmp(initialize_object);
3892    }
3893  }
3894
3895  // Allocation in the shared Eden, if allowed.
3896  //
3897  // rdx: instance size in bytes
3898  if (allow_shared_alloc) {
3899    __ bind(allocate_shared);
3900
3901    ExternalAddress heap_top((address)Universe::heap()->top_addr());
3902    ExternalAddress heap_end((address)Universe::heap()->end_addr());
3903
3904    Label retry;
3905    __ bind(retry);
3906    __ movptr(rax, heap_top);
3907    __ lea(rbx, Address(rax, rdx, Address::times_1));
3908    __ cmpptr(rbx, heap_end);
3909    __ jcc(Assembler::above, slow_case);
3910
3911    // Compare rax, with the top addr, and if still equal, store the new
3912    // top addr in rbx, at the address of the top addr pointer. Sets ZF if was
3913    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
3914    //
3915    // rax,: object begin
3916    // rbx,: object end
3917    // rdx: instance size in bytes
3918    __ locked_cmpxchgptr(rbx, heap_top);
3919
3920    // if someone beat us on the allocation, try again, otherwise continue
3921    __ jcc(Assembler::notEqual, retry);
3922
3923    __ incr_allocated_bytes(thread, rdx, 0);
3924  }
3925
3926  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
3927    // The object is initialized before the header.  If the object size is
3928    // zero, go directly to the header initialization.
3929    __ bind(initialize_object);
3930    __ decrement(rdx, sizeof(oopDesc));
3931    __ jcc(Assembler::zero, initialize_header);
3932
3933    // Initialize topmost object field, divide rdx by 8, check if odd and
3934    // test if zero.
3935    __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
3936    __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
3937
3938    // rdx must have been multiple of 8
3939#ifdef ASSERT
3940    // make sure rdx was multiple of 8
3941    Label L;
3942    // Ignore partial flag stall after shrl() since it is debug VM
3943    __ jccb(Assembler::carryClear, L);
3944    __ stop("object size is not multiple of 2 - adjust this code");
3945    __ bind(L);
3946    // rdx must be > 0, no extra check needed here
3947#endif
3948
3949    // initialize remaining object fields: rdx was a multiple of 8
3950    { Label loop;
3951    __ bind(loop);
3952    __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
3953    NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
3954    __ decrement(rdx);
3955    __ jcc(Assembler::notZero, loop);
3956    }
3957
3958    // initialize object header only.
3959    __ bind(initialize_header);
3960    if (UseBiasedLocking) {
3961      __ pop(rcx);   // get saved klass back in the register.
3962      __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
3963      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()), rbx);
3964    } else {
3965      __ movptr(Address(rax, oopDesc::mark_offset_in_bytes ()),
3966                (intptr_t)markOopDesc::prototype()); // header
3967      __ pop(rcx);   // get saved klass back in the register.
3968    }
3969#ifdef _LP64
3970    __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
3971    __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
3972#endif
3973    __ store_klass(rax, rcx);  // klass
3974
3975    {
3976      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
3977      // Trigger dtrace event for fastpath
3978      __ push(atos);
3979      __ call_VM_leaf(
3980           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
3981      __ pop(atos);
3982    }
3983
3984    __ jmp(done);
3985  }
3986
3987  // slow case
3988  __ bind(slow_case);
3989  __ pop(rcx);   // restore stack pointer to what it was when we came in.
3990  __ bind(slow_case_no_pop);
3991
3992  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
3993  Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
3994
3995  __ get_constant_pool(rarg1);
3996  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
3997  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
3998   __ verify_oop(rax);
3999
4000  // continue
4001  __ bind(done);
4002}
4003
4004void TemplateTable::newarray() {
4005  transition(itos, atos);
4006  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4007  __ load_unsigned_byte(rarg1, at_bcp(1));
4008  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
4009          rarg1, rax);
4010}
4011
4012void TemplateTable::anewarray() {
4013  transition(itos, atos);
4014
4015  Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4016  Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
4017
4018  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
4019  __ get_constant_pool(rarg1);
4020  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
4021          rarg1, rarg2, rax);
4022}
4023
4024void TemplateTable::arraylength() {
4025  transition(atos, itos);
4026  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
4027  __ movl(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
4028}
4029
4030void TemplateTable::checkcast() {
4031  transition(atos, atos);
4032  Label done, is_null, ok_is_subtype, quicked, resolved;
4033  __ testptr(rax, rax); // object is in rax
4034  __ jcc(Assembler::zero, is_null);
4035
4036  // Get cpool & tags index
4037  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4038  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4039  // See if bytecode has already been quicked
4040  __ cmpb(Address(rdx, rbx,
4041                  Address::times_1,
4042                  Array<u1>::base_offset_in_bytes()),
4043          JVM_CONSTANT_Class);
4044  __ jcc(Assembler::equal, quicked);
4045  __ push(atos); // save receiver for result, and for GC
4046  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4047
4048  // vm_result_2 has metadata result
4049#ifndef _LP64
4050  // borrow rdi from locals
4051  __ get_thread(rdi);
4052  __ get_vm_result_2(rax, rdi);
4053  __ restore_locals();
4054#else
4055  __ get_vm_result_2(rax, r15_thread);
4056#endif
4057
4058  __ pop_ptr(rdx); // restore receiver
4059  __ jmpb(resolved);
4060
4061  // Get superklass in rax and subklass in rbx
4062  __ bind(quicked);
4063  __ mov(rdx, rax); // Save object in rdx; rax needed for subtype check
4064  __ movptr(rax, Address(rcx, rbx,
4065                       Address::times_ptr, sizeof(ConstantPool)));
4066
4067  __ bind(resolved);
4068  __ load_klass(rbx, rdx);
4069
4070  // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
4071  // Superklass in rax.  Subklass in rbx.
4072  __ gen_subtype_check(rbx, ok_is_subtype);
4073
4074  // Come here on failure
4075  __ push_ptr(rdx);
4076  // object is at TOS
4077  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
4078
4079  // Come here on success
4080  __ bind(ok_is_subtype);
4081  __ mov(rax, rdx); // Restore object in rdx
4082
4083  // Collect counts on whether this check-cast sees NULLs a lot or not.
4084  if (ProfileInterpreter) {
4085    __ jmp(done);
4086    __ bind(is_null);
4087    __ profile_null_seen(rcx);
4088  } else {
4089    __ bind(is_null);   // same as 'done'
4090  }
4091  __ bind(done);
4092}
4093
4094void TemplateTable::instanceof() {
4095  transition(atos, itos);
4096  Label done, is_null, ok_is_subtype, quicked, resolved;
4097  __ testptr(rax, rax);
4098  __ jcc(Assembler::zero, is_null);
4099
4100  // Get cpool & tags index
4101  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
4102  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
4103  // See if bytecode has already been quicked
4104  __ cmpb(Address(rdx, rbx,
4105                  Address::times_1,
4106                  Array<u1>::base_offset_in_bytes()),
4107          JVM_CONSTANT_Class);
4108  __ jcc(Assembler::equal, quicked);
4109
4110  __ push(atos); // save receiver for result, and for GC
4111  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
4112  // vm_result_2 has metadata result
4113
4114#ifndef _LP64
4115  // borrow rdi from locals
4116  __ get_thread(rdi);
4117  __ get_vm_result_2(rax, rdi);
4118  __ restore_locals();
4119#else
4120  __ get_vm_result_2(rax, r15_thread);
4121#endif
4122
4123  __ pop_ptr(rdx); // restore receiver
4124  __ verify_oop(rdx);
4125  __ load_klass(rdx, rdx);
4126  __ jmpb(resolved);
4127
4128  // Get superklass in rax and subklass in rdx
4129  __ bind(quicked);
4130  __ load_klass(rdx, rax);
4131  __ movptr(rax, Address(rcx, rbx,
4132                         Address::times_ptr, sizeof(ConstantPool)));
4133
4134  __ bind(resolved);
4135
4136  // Generate subtype check.  Blows rcx, rdi
4137  // Superklass in rax.  Subklass in rdx.
4138  __ gen_subtype_check(rdx, ok_is_subtype);
4139
4140  // Come here on failure
4141  __ xorl(rax, rax);
4142  __ jmpb(done);
4143  // Come here on success
4144  __ bind(ok_is_subtype);
4145  __ movl(rax, 1);
4146
4147  // Collect counts on whether this test sees NULLs a lot or not.
4148  if (ProfileInterpreter) {
4149    __ jmp(done);
4150    __ bind(is_null);
4151    __ profile_null_seen(rcx);
4152  } else {
4153    __ bind(is_null);   // same as 'done'
4154  }
4155  __ bind(done);
4156  // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
4157  // rax = 1: obj != NULL and obj is     an instanceof the specified klass
4158}
4159
4160
4161//----------------------------------------------------------------------------------------------------
4162// Breakpoints
4163void TemplateTable::_breakpoint() {
4164  // Note: We get here even if we are single stepping..
4165  // jbug insists on setting breakpoints at every bytecode
4166  // even if we are in single step mode.
4167
4168  transition(vtos, vtos);
4169
4170  Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
4171
4172  // get the unpatched byte code
4173  __ get_method(rarg);
4174  __ call_VM(noreg,
4175             CAST_FROM_FN_PTR(address,
4176                              InterpreterRuntime::get_original_bytecode_at),
4177             rarg, rbcp);
4178  __ mov(rbx, rax);  // why?
4179
4180  // post the breakpoint event
4181  __ get_method(rarg);
4182  __ call_VM(noreg,
4183             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
4184             rarg, rbcp);
4185
4186  // complete the execution of original bytecode
4187  __ dispatch_only_normal(vtos);
4188}
4189
4190//-----------------------------------------------------------------------------
4191// Exceptions
4192
4193void TemplateTable::athrow() {
4194  transition(atos, vtos);
4195  __ null_check(rax);
4196  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
4197}
4198
4199//-----------------------------------------------------------------------------
4200// Synchronization
4201//
4202// Note: monitorenter & exit are symmetric routines; which is reflected
4203//       in the assembly code structure as well
4204//
4205// Stack layout:
4206//
4207// [expressions  ] <--- rsp               = expression stack top
4208// ..
4209// [expressions  ]
4210// [monitor entry] <--- monitor block top = expression stack bot
4211// ..
4212// [monitor entry]
4213// [frame data   ] <--- monitor block bot
4214// ...
4215// [saved rbp    ] <--- rbp
4216void TemplateTable::monitorenter() {
4217  transition(atos, vtos);
4218
4219  // check for NULL object
4220  __ null_check(rax);
4221
4222  const Address monitor_block_top(
4223        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4224  const Address monitor_block_bot(
4225        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4226  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4227
4228  Label allocated;
4229
4230  Register rtop = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
4231  Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4232  Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4233
4234  // initialize entry pointer
4235  __ xorl(rmon, rmon); // points to free slot or NULL
4236
4237  // find a free slot in the monitor block (result in rmon)
4238  {
4239    Label entry, loop, exit;
4240    __ movptr(rtop, monitor_block_top); // points to current entry,
4241                                        // starting with top-most entry
4242    __ lea(rbot, monitor_block_bot);    // points to word before bottom
4243                                        // of monitor block
4244    __ jmpb(entry);
4245
4246    __ bind(loop);
4247    // check if current entry is used
4248    __ cmpptr(Address(rtop, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
4249    // if not used then remember entry in rmon
4250    __ cmovptr(Assembler::equal, rmon, rtop);   // cmov => cmovptr
4251    // check if current entry is for same object
4252    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4253    // if same object then stop searching
4254    __ jccb(Assembler::equal, exit);
4255    // otherwise advance to next entry
4256    __ addptr(rtop, entry_size);
4257    __ bind(entry);
4258    // check if bottom reached
4259    __ cmpptr(rtop, rbot);
4260    // if not at bottom then check this entry
4261    __ jcc(Assembler::notEqual, loop);
4262    __ bind(exit);
4263  }
4264
4265  __ testptr(rmon, rmon); // check if a slot has been found
4266  __ jcc(Assembler::notZero, allocated); // if found, continue with that one
4267
4268  // allocate one if there's no free slot
4269  {
4270    Label entry, loop;
4271    // 1. compute new pointers          // rsp: old expression stack top
4272    __ movptr(rmon, monitor_block_bot); // rmon: old expression stack bottom
4273    __ subptr(rsp, entry_size);         // move expression stack top
4274    __ subptr(rmon, entry_size);        // move expression stack bottom
4275    __ mov(rtop, rsp);                  // set start value for copy loop
4276    __ movptr(monitor_block_bot, rmon); // set new monitor block bottom
4277    __ jmp(entry);
4278    // 2. move expression stack contents
4279    __ bind(loop);
4280    __ movptr(rbot, Address(rtop, entry_size)); // load expression stack
4281                                                // word from old location
4282    __ movptr(Address(rtop, 0), rbot);          // and store it at new location
4283    __ addptr(rtop, wordSize);                  // advance to next word
4284    __ bind(entry);
4285    __ cmpptr(rtop, rmon);                      // check if bottom reached
4286    __ jcc(Assembler::notEqual, loop);          // if not at bottom then
4287                                                // copy next word
4288  }
4289
4290  // call run-time routine
4291  // rmon: points to monitor entry
4292  __ bind(allocated);
4293
4294  // Increment bcp to point to the next bytecode, so exception
4295  // handling for async. exceptions work correctly.
4296  // The object has already been poped from the stack, so the
4297  // expression stack looks correct.
4298  __ increment(rbcp);
4299
4300  // store object
4301  __ movptr(Address(rmon, BasicObjectLock::obj_offset_in_bytes()), rax);
4302  __ lock_object(rmon);
4303
4304  // check to make sure this monitor doesn't cause stack overflow after locking
4305  __ save_bcp();  // in case of exception
4306  __ generate_stack_overflow_check(0);
4307
4308  // The bcp has already been incremented. Just need to dispatch to
4309  // next instruction.
4310  __ dispatch_next(vtos);
4311}
4312
4313void TemplateTable::monitorexit() {
4314  transition(atos, vtos);
4315
4316  // check for NULL object
4317  __ null_check(rax);
4318
4319  const Address monitor_block_top(
4320        rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
4321  const Address monitor_block_bot(
4322        rbp, frame::interpreter_frame_initial_sp_offset * wordSize);
4323  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
4324
4325  Register rtop = LP64_ONLY(c_rarg1) NOT_LP64(rdx);
4326  Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx);
4327
4328  Label found;
4329
4330  // find matching slot
4331  {
4332    Label entry, loop;
4333    __ movptr(rtop, monitor_block_top); // points to current entry,
4334                                        // starting with top-most entry
4335    __ lea(rbot, monitor_block_bot);    // points to word before bottom
4336                                        // of monitor block
4337    __ jmpb(entry);
4338
4339    __ bind(loop);
4340    // check if current entry is for same object
4341    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
4342    // if same object then stop searching
4343    __ jcc(Assembler::equal, found);
4344    // otherwise advance to next entry
4345    __ addptr(rtop, entry_size);
4346    __ bind(entry);
4347    // check if bottom reached
4348    __ cmpptr(rtop, rbot);
4349    // if not at bottom then check this entry
4350    __ jcc(Assembler::notEqual, loop);
4351  }
4352
4353  // error handling. Unlocking was not block-structured
4354  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
4355                   InterpreterRuntime::throw_illegal_monitor_state_exception));
4356  __ should_not_reach_here();
4357
4358  // call run-time routine
4359  __ bind(found);
4360  __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
4361  __ unlock_object(rtop);
4362  __ pop_ptr(rax); // discard object
4363}
4364
4365// Wide instructions
4366void TemplateTable::wide() {
4367  transition(vtos, vtos);
4368  __ load_unsigned_byte(rbx, at_bcp(1));
4369  ExternalAddress wtable((address)Interpreter::_wentry_point);
4370  __ jump(ArrayAddress(wtable, Address(noreg, rbx, Address::times_ptr)));
4371  // Note: the rbcp increment step is part of the individual wide bytecode implementations
4372}
4373
4374// Multi arrays
4375void TemplateTable::multianewarray() {
4376  transition(vtos, atos);
4377
4378  Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4379  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
4380  // last dim is on top of stack; we want address of first one:
4381  // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
4382  // the latter wordSize to point to the beginning of the array.
4383  __ lea(rarg, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize));
4384  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg);
4385  __ load_unsigned_byte(rbx, at_bcp(3));
4386  __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale()));  // get rid of counts
4387}
4388