1/*
2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "asm/assembler.inline.hpp"
28#include "asm/macroAssembler.hpp"
29#include "ci/ciEnv.hpp"
30#include "code/nativeInst.hpp"
31#include "compiler/disassembler.hpp"
32#include "gc/shared/cardTableModRefBS.hpp"
33#include "gc/shared/collectedHeap.inline.hpp"
34#include "interpreter/interpreter.hpp"
35#include "memory/resourceArea.hpp"
36#include "oops/klass.inline.hpp"
37#include "prims/methodHandles.hpp"
38#include "runtime/biasedLocking.hpp"
39#include "runtime/interfaceSupport.hpp"
40#include "runtime/objectMonitor.hpp"
41#include "runtime/os.hpp"
42#include "runtime/sharedRuntime.hpp"
43#include "runtime/stubRoutines.hpp"
44#include "utilities/macros.hpp"
45#if INCLUDE_ALL_GCS
46#include "gc/g1/g1CollectedHeap.inline.hpp"
47#include "gc/g1/g1SATBCardTableModRefBS.hpp"
48#include "gc/g1/heapRegion.hpp"
49#endif
50
51// Implementation of AddressLiteral
52
53void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
54  switch (rtype) {
55  case relocInfo::oop_type:
56    // Oops are a special case. Normally they would be their own section
57    // but in cases like icBuffer they are literals in the code stream that
58    // we don't have a section for. We use none so that we get a literal address
59    // which is always patchable.
60    break;
61  case relocInfo::external_word_type:
62    _rspec = external_word_Relocation::spec(_target);
63    break;
64  case relocInfo::internal_word_type:
65    _rspec = internal_word_Relocation::spec(_target);
66    break;
67  case relocInfo::opt_virtual_call_type:
68    _rspec = opt_virtual_call_Relocation::spec();
69    break;
70  case relocInfo::static_call_type:
71    _rspec = static_call_Relocation::spec();
72    break;
73  case relocInfo::runtime_call_type:
74    _rspec = runtime_call_Relocation::spec();
75    break;
76  case relocInfo::poll_type:
77  case relocInfo::poll_return_type:
78    _rspec = Relocation::spec_simple(rtype);
79    break;
80  case relocInfo::none:
81    break;
82  default:
83    ShouldNotReachHere();
84    break;
85  }
86}
87
88// Initially added to the Assembler interface as a pure virtual:
89//   RegisterConstant delayed_value(..)
90// for:
91//   6812678 macro assembler needs delayed binding of a few constants (for 6655638)
92// this was subsequently modified to its present name and return type
93RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
94                                                      Register tmp,
95                                                      int offset) {
96  ShouldNotReachHere();
97  return RegisterOrConstant(-1);
98}
99
100
101#ifdef AARCH64
102// Note: ARM32 version is OS dependent
103void MacroAssembler::breakpoint(AsmCondition cond) {
104  if (cond == al) {
105    brk();
106  } else {
107    Label L;
108    b(L, inverse(cond));
109    brk();
110    bind(L);
111  }
112}
113#endif // AARCH64
114
115
116// virtual method calling
117void MacroAssembler::lookup_virtual_method(Register recv_klass,
118                                           Register vtable_index,
119                                           Register method_result) {
120  const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes();
121  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
122  add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
123  ldr(method_result, Address(recv_klass, base_offset));
124}
125
126
127// Simplified, combined version, good for typical uses.
128// Falls through on failure.
129void MacroAssembler::check_klass_subtype(Register sub_klass,
130                                         Register super_klass,
131                                         Register temp_reg,
132                                         Register temp_reg2,
133                                         Register temp_reg3,
134                                         Label& L_success) {
135  Label L_failure;
136  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL);
137  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL);
138  bind(L_failure);
139};
140
141void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
142                                                   Register super_klass,
143                                                   Register temp_reg,
144                                                   Register temp_reg2,
145                                                   Label* L_success,
146                                                   Label* L_failure,
147                                                   Label* L_slow_path) {
148
149  assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
150  const Register super_check_offset = temp_reg2;
151
152  Label L_fallthrough;
153  int label_nulls = 0;
154  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
155  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
156  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
157  assert(label_nulls <= 1, "at most one NULL in the batch");
158
159  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
160  int sco_offset = in_bytes(Klass::super_check_offset_offset());
161  Address super_check_offset_addr(super_klass, sco_offset);
162
163  // If the pointers are equal, we are done (e.g., String[] elements).
164  // This self-check enables sharing of secondary supertype arrays among
165  // non-primary types such as array-of-interface.  Otherwise, each such
166  // type would need its own customized SSA.
167  // We move this check to the front of the fast path because many
168  // type checks are in fact trivially successful in this manner,
169  // so we get a nicely predicted branch right at the start of the check.
170  cmp(sub_klass, super_klass);
171  b(*L_success, eq);
172
173  // Check the supertype display:
174  ldr_u32(super_check_offset, super_check_offset_addr);
175
176  Address super_check_addr(sub_klass, super_check_offset);
177  ldr(temp_reg, super_check_addr);
178  cmp(super_klass, temp_reg); // load displayed supertype
179
180  // This check has worked decisively for primary supers.
181  // Secondary supers are sought in the super_cache ('super_cache_addr').
182  // (Secondary supers are interfaces and very deeply nested subtypes.)
183  // This works in the same check above because of a tricky aliasing
184  // between the super_cache and the primary super display elements.
185  // (The 'super_check_addr' can address either, as the case requires.)
186  // Note that the cache is updated below if it does not help us find
187  // what we need immediately.
188  // So if it was a primary super, we can just fail immediately.
189  // Otherwise, it's the slow path for us (no success at this point).
190
191  b(*L_success, eq);
192  cmp_32(super_check_offset, sc_offset);
193  if (L_failure == &L_fallthrough) {
194    b(*L_slow_path, eq);
195  } else {
196    b(*L_failure, ne);
197    if (L_slow_path != &L_fallthrough) {
198      b(*L_slow_path);
199    }
200  }
201
202  bind(L_fallthrough);
203}
204
205
206void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
207                                                   Register super_klass,
208                                                   Register temp_reg,
209                                                   Register temp2_reg,
210                                                   Register temp3_reg,
211                                                   Label* L_success,
212                                                   Label* L_failure,
213                                                   bool set_cond_codes) {
214#ifdef AARCH64
215  NOT_IMPLEMENTED();
216#else
217  // Note: if used by code that expects a register to be 0 on success,
218  // this register must be temp_reg and set_cond_codes must be true
219
220  Register saved_reg = noreg;
221
222  // get additional tmp registers
223  if (temp3_reg == noreg) {
224    saved_reg = temp3_reg = LR;
225    push(saved_reg);
226  }
227
228  assert(temp2_reg != noreg, "need all the temporary registers");
229  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
230
231  Register cmp_temp = temp_reg;
232  Register scan_temp = temp3_reg;
233  Register count_temp = temp2_reg;
234
235  Label L_fallthrough;
236  int label_nulls = 0;
237  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
238  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
239  assert(label_nulls <= 1, "at most one NULL in the batch");
240
241  // a couple of useful fields in sub_klass:
242  int ss_offset = in_bytes(Klass::secondary_supers_offset());
243  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
244  Address secondary_supers_addr(sub_klass, ss_offset);
245  Address super_cache_addr(     sub_klass, sc_offset);
246
247#ifndef PRODUCT
248  inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
249#endif
250
251  // We will consult the secondary-super array.
252  ldr(scan_temp, Address(sub_klass, ss_offset));
253
254  assert(! UseCompressedOops, "search_key must be the compressed super_klass");
255  // else search_key is the
256  Register search_key = super_klass;
257
258  // Load the array length.
259  ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
260  add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
261
262  add(count_temp, count_temp, 1);
263
264  Label L_loop, L_setnz_and_fail, L_fail;
265
266  // Top of search loop
267  bind(L_loop);
268  // Notes:
269  //  scan_temp starts at the array elements
270  //  count_temp is 1+size
271  subs(count_temp, count_temp, 1);
272  if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
273    // direct jump to L_failure if failed and no cleanup needed
274    b(*L_failure, eq); // not found and
275  } else {
276    b(L_fail, eq); // not found in the array
277  }
278
279  // Load next super to check
280  // In the array of super classes elements are pointer sized.
281  int element_size = wordSize;
282  ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
283
284  // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
285  subs(cmp_temp, cmp_temp, search_key);
286
287  // A miss means we are NOT a subtype and need to keep looping
288  b(L_loop, ne);
289
290  // Falling out the bottom means we found a hit; we ARE a subtype
291
292  // Note: temp_reg/cmp_temp is already 0 and flag Z is set
293
294  // Success.  Cache the super we found and proceed in triumph.
295  str(super_klass, Address(sub_klass, sc_offset));
296
297  if (saved_reg != noreg) {
298    // Return success
299    pop(saved_reg);
300  }
301
302  b(*L_success);
303
304  bind(L_fail);
305  // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
306  if (set_cond_codes) {
307    movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
308  }
309  if (saved_reg != noreg) {
310    pop(saved_reg);
311  }
312  if (L_failure != &L_fallthrough) {
313    b(*L_failure);
314  }
315
316  bind(L_fallthrough);
317#endif
318}
319
320// Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
321Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
322  assert_different_registers(params_base, params_count);
323  add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
324  return Address(tmp, -Interpreter::stackElementSize);
325}
326
327
328void MacroAssembler::align(int modulus) {
329  while (offset() % modulus != 0) {
330    nop();
331  }
332}
333
334int MacroAssembler::set_last_Java_frame(Register last_java_sp,
335                                        Register last_java_fp,
336                                        bool save_last_java_pc,
337                                        Register tmp) {
338  int pc_offset;
339  if (last_java_fp != noreg) {
340    // optional
341    str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
342    _fp_saved = true;
343  } else {
344    _fp_saved = false;
345  }
346  if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM
347#ifdef AARCH64
348    pc_offset = mov_pc_to(tmp);
349    str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset()));
350#else
351    str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
352    pc_offset = offset() + VM_Version::stored_pc_adjustment();
353#endif
354    _pc_saved = true;
355  } else {
356    _pc_saved = false;
357    pc_offset = -1;
358  }
359  // According to comment in javaFrameAnchorm SP must be saved last, so that other
360  // entries are valid when SP is set.
361
362  // However, this is probably not a strong constrainst since for instance PC is
363  // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
364  // we now write the fields in the expected order but we have not added a StoreStore
365  // barrier.
366
367  // XXX: if the ordering is really important, PC should always be saved (without forgetting
368  // to update oop_map offsets) and a StoreStore barrier might be needed.
369
370  if (last_java_sp == noreg) {
371    last_java_sp = SP; // always saved
372  }
373#ifdef AARCH64
374  if (last_java_sp == SP) {
375    mov(tmp, SP);
376    str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset()));
377  } else {
378    str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
379  }
380#else
381  str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
382#endif
383
384  return pc_offset; // for oopmaps
385}
386
387void MacroAssembler::reset_last_Java_frame(Register tmp) {
388  const Register Rzero = zero_register(tmp);
389  str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
390  if (_fp_saved) {
391    str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
392  }
393  if (_pc_saved) {
394    str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
395  }
396}
397
398
399// Implementation of call_VM versions
400
401void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
402  assert(number_of_arguments >= 0, "cannot have negative number of arguments");
403  assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
404
405#ifndef AARCH64
406  // Safer to save R9 here since callers may have been written
407  // assuming R9 survives. This is suboptimal but is not worth
408  // optimizing for the few platforms where R9 is scratched.
409  push(RegisterSet(R4) | R9ifScratched);
410  mov(R4, SP);
411  bic(SP, SP, StackAlignmentInBytes - 1);
412#endif // AARCH64
413  call(entry_point, relocInfo::runtime_call_type);
414#ifndef AARCH64
415  mov(SP, R4);
416  pop(RegisterSet(R4) | R9ifScratched);
417#endif // AARCH64
418}
419
420
421void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
422  assert(number_of_arguments >= 0, "cannot have negative number of arguments");
423  assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
424
425  const Register tmp = Rtemp;
426  assert_different_registers(oop_result, tmp);
427
428  set_last_Java_frame(SP, FP, true, tmp);
429
430#ifdef ASSERT
431  AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); });
432#endif // ASSERT
433
434#ifndef AARCH64
435#if R9_IS_SCRATCHED
436  // Safer to save R9 here since callers may have been written
437  // assuming R9 survives. This is suboptimal but is not worth
438  // optimizing for the few platforms where R9 is scratched.
439
440  // Note: cannot save R9 above the saved SP (some calls expect for
441  // instance the Java stack top at the saved SP)
442  // => once saved (with set_last_Java_frame), decrease SP before rounding to
443  // ensure the slot at SP will be free for R9).
444  sub(SP, SP, 4);
445  bic(SP, SP, StackAlignmentInBytes - 1);
446  str(R9, Address(SP, 0));
447#else
448  bic(SP, SP, StackAlignmentInBytes - 1);
449#endif // R9_IS_SCRATCHED
450#endif
451
452  mov(R0, Rthread);
453  call(entry_point, relocInfo::runtime_call_type);
454
455#ifndef AARCH64
456#if R9_IS_SCRATCHED
457  ldr(R9, Address(SP, 0));
458#endif
459  ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
460#endif
461
462  reset_last_Java_frame(tmp);
463
464  // C++ interp handles this in the interpreter
465  check_and_handle_popframe();
466  check_and_handle_earlyret();
467
468  if (check_exceptions) {
469    // check for pending exceptions
470    ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
471#ifdef AARCH64
472    Label L;
473    cbz(tmp, L);
474    mov_pc_to(Rexception_pc);
475    b(StubRoutines::forward_exception_entry());
476    bind(L);
477#else
478    cmp(tmp, 0);
479    mov(Rexception_pc, PC, ne);
480    b(StubRoutines::forward_exception_entry(), ne);
481#endif // AARCH64
482  }
483
484  // get oop result if there is one and reset the value in the thread
485  if (oop_result->is_valid()) {
486    get_vm_result(oop_result, tmp);
487  }
488}
489
490void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
491  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
492}
493
494
495void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
496  assert (arg_1 == R1, "fixed register for arg_1");
497  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
498}
499
500
501void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
502  assert (arg_1 == R1, "fixed register for arg_1");
503  assert (arg_2 == R2, "fixed register for arg_2");
504  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
505}
506
507
508void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
509  assert (arg_1 == R1, "fixed register for arg_1");
510  assert (arg_2 == R2, "fixed register for arg_2");
511  assert (arg_3 == R3, "fixed register for arg_3");
512  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
513}
514
515
516void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
517  // Not used on ARM
518  Unimplemented();
519}
520
521
522void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
523  // Not used on ARM
524  Unimplemented();
525}
526
527
528void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
529// Not used on ARM
530  Unimplemented();
531}
532
533
534void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
535  // Not used on ARM
536  Unimplemented();
537}
538
539// Raw call, without saving/restoring registers, exception handling, etc.
540// Mainly used from various stubs.
541void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
542  const Register tmp = Rtemp; // Rtemp free since scratched by call
543  set_last_Java_frame(SP, FP, true, tmp);
544#if R9_IS_SCRATCHED
545  if (save_R9_if_scratched) {
546    // Note: Saving also R10 for alignment.
547    push(RegisterSet(R9, R10));
548  }
549#endif
550  mov(R0, Rthread);
551  call(entry_point, relocInfo::runtime_call_type);
552#if R9_IS_SCRATCHED
553  if (save_R9_if_scratched) {
554    pop(RegisterSet(R9, R10));
555  }
556#endif
557  reset_last_Java_frame(tmp);
558}
559
560void MacroAssembler::call_VM_leaf(address entry_point) {
561  call_VM_leaf_helper(entry_point, 0);
562}
563
564void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
565  assert (arg_1 == R0, "fixed register for arg_1");
566  call_VM_leaf_helper(entry_point, 1);
567}
568
569void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
570  assert (arg_1 == R0, "fixed register for arg_1");
571  assert (arg_2 == R1, "fixed register for arg_2");
572  call_VM_leaf_helper(entry_point, 2);
573}
574
575void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
576  assert (arg_1 == R0, "fixed register for arg_1");
577  assert (arg_2 == R1, "fixed register for arg_2");
578  assert (arg_3 == R2, "fixed register for arg_3");
579  call_VM_leaf_helper(entry_point, 3);
580}
581
582void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
583  assert (arg_1 == R0, "fixed register for arg_1");
584  assert (arg_2 == R1, "fixed register for arg_2");
585  assert (arg_3 == R2, "fixed register for arg_3");
586  assert (arg_4 == R3, "fixed register for arg_4");
587  call_VM_leaf_helper(entry_point, 4);
588}
589
590void MacroAssembler::get_vm_result(Register oop_result, Register tmp) {
591  assert_different_registers(oop_result, tmp);
592  ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset()));
593  str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset()));
594  verify_oop(oop_result);
595}
596
597void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) {
598  assert_different_registers(metadata_result, tmp);
599  ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset()));
600  str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset()));
601}
602
603void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
604  if (arg2.is_register()) {
605    add(dst, arg1, arg2.as_register());
606  } else {
607    add(dst, arg1, arg2.as_constant());
608  }
609}
610
611void MacroAssembler::add_slow(Register rd, Register rn, int c) {
612#ifdef AARCH64
613  if (c == 0) {
614    if (rd != rn) {
615      mov(rd, rn);
616    }
617    return;
618  }
619  if (c < 0) {
620    sub_slow(rd, rn, -c);
621    return;
622  }
623  if (c > right_n_bits(24)) {
624    guarantee(rd != rn, "no large add_slow with only one register");
625    mov_slow(rd, c);
626    add(rd, rn, rd);
627  } else {
628    int lo = c & right_n_bits(12);
629    int hi = (c >> 12) & right_n_bits(12);
630    if (lo != 0) {
631      add(rd, rn, lo, lsl0);
632    }
633    if (hi != 0) {
634      add(rd, (lo == 0) ? rn : rd, hi, lsl12);
635    }
636  }
637#else
638  // This function is used in compiler for handling large frame offsets
639  if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
640    return sub(rd, rn, (-c));
641  }
642  int low = c & 0x3fc;
643  if (low != 0) {
644    add(rd, rn, low);
645    rn = rd;
646  }
647  if (c & ~0x3fc) {
648    assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
649    add(rd, rn, c & ~0x3fc);
650  } else if (rd != rn) {
651    assert(c == 0, "");
652    mov(rd, rn); // need to generate at least one move!
653  }
654#endif // AARCH64
655}
656
657void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
658#ifdef AARCH64
659  if (c <= 0) {
660    add_slow(rd, rn, -c);
661    return;
662  }
663  if (c > right_n_bits(24)) {
664    guarantee(rd != rn, "no large sub_slow with only one register");
665    mov_slow(rd, c);
666    sub(rd, rn, rd);
667  } else {
668    int lo = c & right_n_bits(12);
669    int hi = (c >> 12) & right_n_bits(12);
670    if (lo != 0) {
671      sub(rd, rn, lo, lsl0);
672    }
673    if (hi != 0) {
674      sub(rd, (lo == 0) ? rn : rd, hi, lsl12);
675    }
676  }
677#else
678  // This function is used in compiler for handling large frame offsets
679  if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
680    return add(rd, rn, (-c));
681  }
682  int low = c & 0x3fc;
683  if (low != 0) {
684    sub(rd, rn, low);
685    rn = rd;
686  }
687  if (c & ~0x3fc) {
688    assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
689    sub(rd, rn, c & ~0x3fc);
690  } else if (rd != rn) {
691    assert(c == 0, "");
692    mov(rd, rn); // need to generate at least one move!
693  }
694#endif // AARCH64
695}
696
697void MacroAssembler::mov_slow(Register rd, address addr) {
698  // do *not* call the non relocated mov_related_address
699  mov_slow(rd, (intptr_t)addr);
700}
701
702void MacroAssembler::mov_slow(Register rd, const char *str) {
703  mov_slow(rd, (intptr_t)str);
704}
705
706#ifdef AARCH64
707
708// Common code for mov_slow and instr_count_for_mov_slow.
709// Returns number of instructions of mov_slow pattern,
710// generating it if non-null MacroAssembler is given.
711int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) {
712  // This code pattern is matched in NativeIntruction::is_mov_slow.
713  // Update it at modifications.
714
715  const intx mask = right_n_bits(16);
716  // 1 movz instruction
717  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
718    if ((c & ~(mask << base_shift)) == 0) {
719      if (masm != NULL) {
720        masm->movz(rd, ((uintx)c) >> base_shift, base_shift);
721      }
722      return 1;
723    }
724  }
725  // 1 movn instruction
726  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
727    if (((~c) & ~(mask << base_shift)) == 0) {
728      if (masm != NULL) {
729        masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift);
730      }
731      return 1;
732    }
733  }
734  // 1 orr instruction
735  {
736    LogicalImmediate imm(c, false);
737    if (imm.is_encoded()) {
738      if (masm != NULL) {
739        masm->orr(rd, ZR, imm);
740      }
741      return 1;
742    }
743  }
744  // 1 movz/movn + up to 3 movk instructions
745  int zeroes = 0;
746  int ones = 0;
747  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
748    int part = (c >> base_shift) & mask;
749    if (part == 0) {
750      ++zeroes;
751    } else if (part == mask) {
752      ++ones;
753    }
754  }
755  int def_bits = 0;
756  if (ones > zeroes) {
757    def_bits = mask;
758  }
759  int inst_count = 0;
760  for (int base_shift = 0; base_shift < 64; base_shift += 16) {
761    int part = (c >> base_shift) & mask;
762    if (part != def_bits) {
763      if (masm != NULL) {
764        if (inst_count > 0) {
765          masm->movk(rd, part, base_shift);
766        } else {
767          if (def_bits == 0) {
768            masm->movz(rd, part, base_shift);
769          } else {
770            masm->movn(rd, ~part & mask, base_shift);
771          }
772        }
773      }
774      inst_count++;
775    }
776  }
777  assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions");
778  return inst_count;
779}
780
781void MacroAssembler::mov_slow(Register rd, intptr_t c) {
782#ifdef ASSERT
783  int off = offset();
784#endif
785  (void) mov_slow_helper(rd, c, this);
786  assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch");
787}
788
789// Counts instructions generated by mov_slow(rd, c).
790int MacroAssembler::instr_count_for_mov_slow(intptr_t c) {
791  return mov_slow_helper(noreg, c, NULL);
792}
793
794int MacroAssembler::instr_count_for_mov_slow(address c) {
795  return mov_slow_helper(noreg, (intptr_t)c, NULL);
796}
797
798#else
799
800void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
801  if (AsmOperand::is_rotated_imm(c)) {
802    mov(rd, c, cond);
803  } else if (AsmOperand::is_rotated_imm(~c)) {
804    mvn(rd, ~c, cond);
805  } else if (VM_Version::supports_movw()) {
806    movw(rd, c & 0xffff, cond);
807    if ((unsigned int)c >> 16) {
808      movt(rd, (unsigned int)c >> 16, cond);
809    }
810  } else {
811    // Find first non-zero bit
812    int shift = 0;
813    while ((c & (3 << shift)) == 0) {
814      shift += 2;
815    }
816    // Put the least significant part of the constant
817    int mask = 0xff << shift;
818    mov(rd, c & mask, cond);
819    // Add up to 3 other parts of the constant;
820    // each of them can be represented as rotated_imm
821    if (c & (mask << 8)) {
822      orr(rd, rd, c & (mask << 8), cond);
823    }
824    if (c & (mask << 16)) {
825      orr(rd, rd, c & (mask << 16), cond);
826    }
827    if (c & (mask << 24)) {
828      orr(rd, rd, c & (mask << 24), cond);
829    }
830  }
831}
832
833#endif // AARCH64
834
835void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
836#ifdef AARCH64
837                             bool patchable
838#else
839                             AsmCondition cond
840#endif
841                             ) {
842
843  if (o == NULL) {
844#ifdef AARCH64
845    if (patchable) {
846      nop();
847    }
848    mov(rd, ZR);
849#else
850    mov(rd, 0, cond);
851#endif
852    return;
853  }
854
855  if (oop_index == 0) {
856    oop_index = oop_recorder()->allocate_oop_index(o);
857  }
858  relocate(oop_Relocation::spec(oop_index));
859
860#ifdef AARCH64
861  if (patchable) {
862    nop();
863  }
864  ldr(rd, pc());
865#else
866  if (VM_Version::supports_movw()) {
867    movw(rd, 0, cond);
868    movt(rd, 0, cond);
869  } else {
870    ldr(rd, Address(PC), cond);
871    // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
872    nop();
873  }
874#endif
875}
876
877void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) {
878  if (o == NULL) {
879#ifdef AARCH64
880    if (patchable) {
881      nop();
882    }
883#endif
884    mov(rd, 0);
885    return;
886  }
887
888  if (metadata_index == 0) {
889    metadata_index = oop_recorder()->allocate_metadata_index(o);
890  }
891  relocate(metadata_Relocation::spec(metadata_index));
892
893#ifdef AARCH64
894  if (patchable) {
895    nop();
896  }
897#ifdef COMPILER2
898  if (!patchable && VM_Version::prefer_moves_over_load_literal()) {
899    mov_slow(rd, (address)o);
900    return;
901  }
902#endif
903  ldr(rd, pc());
904#else
905  if (VM_Version::supports_movw()) {
906    movw(rd, ((int)o) & 0xffff);
907    movt(rd, (unsigned int)o >> 16);
908  } else {
909    ldr(rd, Address(PC));
910    // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
911    nop();
912  }
913#endif // AARCH64
914}
915
916void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) {
917  Label skip_constant;
918  union {
919    jfloat f;
920    jint i;
921  } accessor;
922  accessor.f = c;
923
924#ifdef AARCH64
925  // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow
926  Label L;
927  ldr_s(fd, target(L));
928  b(skip_constant);
929  bind(L);
930  emit_int32(accessor.i);
931  bind(skip_constant);
932#else
933  flds(fd, Address(PC), cond);
934  b(skip_constant);
935  emit_int32(accessor.i);
936  bind(skip_constant);
937#endif // AARCH64
938}
939
940void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) {
941  Label skip_constant;
942  union {
943    jdouble d;
944    jint i[2];
945  } accessor;
946  accessor.d = c;
947
948#ifdef AARCH64
949  // TODO-AARCH64 - try to optimize loading of double constants with fmov
950  Label L;
951  ldr_d(fd, target(L));
952  b(skip_constant);
953  align(wordSize);
954  bind(L);
955  emit_int32(accessor.i[0]);
956  emit_int32(accessor.i[1]);
957  bind(skip_constant);
958#else
959  fldd(fd, Address(PC), cond);
960  b(skip_constant);
961  emit_int32(accessor.i[0]);
962  emit_int32(accessor.i[1]);
963  bind(skip_constant);
964#endif // AARCH64
965}
966
967void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
968  intptr_t addr = (intptr_t) address_of_global;
969#ifdef AARCH64
970  assert((addr & 0x3) == 0, "address should be aligned");
971
972  // FIXME: TODO
973  if (false && page_reachable_from_cache(address_of_global)) {
974    assert(false,"TODO: relocate");
975    //relocate();
976    adrp(reg, address_of_global);
977    ldrsw(reg, Address(reg, addr & 0xfff));
978  } else {
979    mov_slow(reg, addr & ~0x3fff);
980    ldrsw(reg, Address(reg, addr & 0x3fff));
981  }
982#else
983  mov_slow(reg, addr & ~0xfff);
984  ldr(reg, Address(reg, addr & 0xfff));
985#endif
986}
987
988void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
989#ifdef AARCH64
990  intptr_t addr = (intptr_t) address_of_global;
991  assert ((addr & 0x7) == 0, "address should be aligned");
992  mov_slow(reg, addr & ~0x7fff);
993  ldr(reg, Address(reg, addr & 0x7fff));
994#else
995  ldr_global_s32(reg, address_of_global);
996#endif
997}
998
999void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
1000  intptr_t addr = (intptr_t) address_of_global;
1001  mov_slow(reg, addr & ~0xfff);
1002  ldrb(reg, Address(reg, addr & 0xfff));
1003}
1004
1005void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
1006#ifdef AARCH64
1007  switch (bits) {
1008    case  8: uxtb(rd, rn); break;
1009    case 16: uxth(rd, rn); break;
1010    case 32: mov_w(rd, rn); break;
1011    default: ShouldNotReachHere();
1012  }
1013#else
1014  if (bits <= 8) {
1015    andr(rd, rn, (1 << bits) - 1);
1016  } else if (bits >= 24) {
1017    bic(rd, rn, -1 << bits);
1018  } else {
1019    mov(rd, AsmOperand(rn, lsl, 32 - bits));
1020    mov(rd, AsmOperand(rd, lsr, 32 - bits));
1021  }
1022#endif
1023}
1024
1025void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
1026#ifdef AARCH64
1027  switch (bits) {
1028    case  8: sxtb(rd, rn); break;
1029    case 16: sxth(rd, rn); break;
1030    case 32: sxtw(rd, rn); break;
1031    default: ShouldNotReachHere();
1032  }
1033#else
1034  mov(rd, AsmOperand(rn, lsl, 32 - bits));
1035  mov(rd, AsmOperand(rd, asr, 32 - bits));
1036#endif
1037}
1038
1039#ifndef AARCH64
1040
1041void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
1042                               Register rn_lo, Register rn_hi,
1043                               AsmCondition cond) {
1044  if (rd_lo != rn_hi) {
1045    if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1046    if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1047  } else if (rd_hi != rn_lo) {
1048    if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
1049    if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
1050  } else {
1051    eor(rd_lo, rd_hi, rd_lo, cond);
1052    eor(rd_hi, rd_lo, rd_hi, cond);
1053    eor(rd_lo, rd_hi, rd_lo, cond);
1054  }
1055}
1056
1057void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1058                                Register rn_lo, Register rn_hi,
1059                                AsmShift shift, Register count) {
1060  Register tmp;
1061  if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
1062    tmp = rd_lo;
1063  } else {
1064    tmp = rd_hi;
1065  }
1066  assert_different_registers(tmp, count, rn_lo, rn_hi);
1067
1068  subs(tmp, count, 32);
1069  if (shift == lsl) {
1070    assert_different_registers(rd_hi, rn_lo);
1071    assert_different_registers(count, rd_hi);
1072    mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
1073    rsb(tmp, count, 32, mi);
1074    if (rd_hi == rn_hi) {
1075      mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1076      orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1077    } else {
1078      mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
1079      orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
1080    }
1081    mov(rd_lo, AsmOperand(rn_lo, shift, count));
1082  } else {
1083    assert_different_registers(rd_lo, rn_hi);
1084    assert_different_registers(rd_lo, count);
1085    mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
1086    rsb(tmp, count, 32, mi);
1087    if (rd_lo == rn_lo) {
1088      mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1089      orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1090    } else {
1091      mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
1092      orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
1093    }
1094    mov(rd_hi, AsmOperand(rn_hi, shift, count));
1095  }
1096}
1097
1098void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
1099                                Register rn_lo, Register rn_hi,
1100                                AsmShift shift, int count) {
1101  assert(count != 0 && (count & ~63) == 0, "must be");
1102
1103  if (shift == lsl) {
1104    assert_different_registers(rd_hi, rn_lo);
1105    if (count >= 32) {
1106      mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
1107      mov(rd_lo, 0);
1108    } else {
1109      mov(rd_hi, AsmOperand(rn_hi, lsl, count));
1110      orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
1111      mov(rd_lo, AsmOperand(rn_lo, lsl, count));
1112    }
1113  } else {
1114    assert_different_registers(rd_lo, rn_hi);
1115    if (count >= 32) {
1116      if (count == 32) {
1117        mov(rd_lo, rn_hi);
1118      } else {
1119        mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
1120      }
1121      if (shift == asr) {
1122        mov(rd_hi, AsmOperand(rn_hi, asr, 0));
1123      } else {
1124        mov(rd_hi, 0);
1125      }
1126    } else {
1127      mov(rd_lo, AsmOperand(rn_lo, lsr, count));
1128      orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
1129      mov(rd_hi, AsmOperand(rn_hi, shift, count));
1130    }
1131  }
1132}
1133#endif // !AARCH64
1134
1135void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
1136  // This code pattern is matched in NativeIntruction::skip_verify_oop.
1137  // Update it at modifications.
1138  if (!VerifyOops) return;
1139
1140  char buffer[64];
1141#ifdef COMPILER1
1142  if (CommentedAssembly) {
1143    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
1144    block_comment(buffer);
1145  }
1146#endif
1147  const char* msg_buffer = NULL;
1148  {
1149    ResourceMark rm;
1150    stringStream ss;
1151    ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
1152    msg_buffer = code_string(ss.as_string());
1153  }
1154
1155  save_all_registers();
1156
1157  if (reg != R2) {
1158      mov(R2, reg);                              // oop to verify
1159  }
1160  mov(R1, SP);                                   // register save area
1161
1162  Label done;
1163  InlinedString Lmsg(msg_buffer);
1164  ldr_literal(R0, Lmsg);                         // message
1165
1166  // call indirectly to solve generation ordering problem
1167  ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1168  call(Rtemp);
1169
1170  restore_all_registers();
1171
1172  b(done);
1173#ifdef COMPILER2
1174  int off = offset();
1175#endif
1176  bind_literal(Lmsg);
1177#ifdef COMPILER2
1178  if (offset() - off == 1 * wordSize) {
1179    // no padding, so insert nop for worst-case sizing
1180    nop();
1181  }
1182#endif
1183  bind(done);
1184}
1185
1186void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
1187  if (!VerifyOops) return;
1188
1189  const char* msg_buffer = NULL;
1190  {
1191    ResourceMark rm;
1192    stringStream ss;
1193    if ((addr.base() == SP) && (addr.index()==noreg)) {
1194      ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
1195    } else {
1196      ss.print("verify_oop_addr: %s", s);
1197    }
1198    ss.print(" (%s:%d)", file, line);
1199    msg_buffer = code_string(ss.as_string());
1200  }
1201
1202  int push_size = save_all_registers();
1203
1204  if (addr.base() == SP) {
1205    // computes an addr that takes into account the push
1206    if (addr.index() != noreg) {
1207      Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
1208      add(new_base, SP, push_size);
1209      addr = addr.rebase(new_base);
1210    } else {
1211      addr = addr.plus_disp(push_size);
1212    }
1213  }
1214
1215  ldr(R2, addr);                                 // oop to verify
1216  mov(R1, SP);                                   // register save area
1217
1218  Label done;
1219  InlinedString Lmsg(msg_buffer);
1220  ldr_literal(R0, Lmsg);                         // message
1221
1222  // call indirectly to solve generation ordering problem
1223  ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
1224  call(Rtemp);
1225
1226  restore_all_registers();
1227
1228  b(done);
1229  bind_literal(Lmsg);
1230  bind(done);
1231}
1232
1233void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
1234  if (needs_explicit_null_check(offset)) {
1235#ifdef AARCH64
1236    ldr(ZR, Address(reg));
1237#else
1238    assert_different_registers(reg, tmp);
1239    if (tmp == noreg) {
1240      tmp = Rtemp;
1241      assert((! Thread::current()->is_Compiler_thread()) ||
1242             (! (ciEnv::current()->task() == NULL)) ||
1243             (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
1244             "Rtemp not available in C2"); // explicit tmp register required
1245      // XXX: could we mark the code buffer as not compatible with C2 ?
1246    }
1247    ldr(tmp, Address(reg));
1248#endif
1249  }
1250}
1251
1252// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1253void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
1254                                 RegisterOrConstant size_expression, Label& slow_case) {
1255  if (!Universe::heap()->supports_inline_contig_alloc()) {
1256    b(slow_case);
1257    return;
1258  }
1259
1260  CollectedHeap* ch = Universe::heap();
1261
1262  const Register top_addr = tmp1;
1263  const Register heap_end = tmp2;
1264
1265  if (size_expression.is_register()) {
1266    assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
1267  } else {
1268    assert_different_registers(obj, obj_end, top_addr, heap_end);
1269  }
1270
1271  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
1272  if (load_const) {
1273    mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
1274  } else {
1275    ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
1276  }
1277  // Calculate new heap_top by adding the size of the object
1278  Label retry;
1279  bind(retry);
1280
1281#ifdef AARCH64
1282  ldxr(obj, top_addr);
1283#else
1284  ldr(obj, Address(top_addr));
1285#endif // AARCH64
1286
1287  ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
1288  add_rc(obj_end, obj, size_expression);
1289  // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
1290  cmp(obj_end, obj);
1291  b(slow_case, lo);
1292  // Update heap_top if allocation succeeded
1293  cmp(obj_end, heap_end);
1294  b(slow_case, hi);
1295
1296#ifdef AARCH64
1297  stxr(heap_end/*scratched*/, obj_end, top_addr);
1298  cbnz_w(heap_end, retry);
1299#else
1300  atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
1301  b(retry, ne);
1302#endif // AARCH64
1303}
1304
1305// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
1306void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
1307                                 RegisterOrConstant size_expression, Label& slow_case) {
1308  const Register tlab_end = tmp1;
1309  assert_different_registers(obj, obj_end, tlab_end);
1310
1311  ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
1312  ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
1313  add_rc(obj_end, obj, size_expression);
1314  cmp(obj_end, tlab_end);
1315  b(slow_case, hi);
1316  str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
1317}
1318
1319void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2,
1320                                 Register tmp3, Register tmp4,
1321                               Label& try_eden, Label& slow_case) {
1322  if (!Universe::heap()->supports_inline_contig_alloc()) {
1323    b(slow_case);
1324    return;
1325  }
1326
1327  InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr());
1328  Label discard_tlab, do_refill;
1329  ldr(top,  Address(Rthread, JavaThread::tlab_top_offset()));
1330  ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
1331  ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
1332
1333  // Calculate amount of free space
1334  sub(tmp1, tmp1, top);
1335  // Retain tlab and allocate in shared space
1336  // if the amount of free space in tlab is too large to discard
1337  cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize));
1338  b(discard_tlab, ge);
1339
1340  // Increment waste limit to prevent getting stuck on this slow path
1341  mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment());
1342  add(tmp2, tmp2, tmp3);
1343  str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset()));
1344  if (TLABStats) {
1345    ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
1346    add_32(tmp2, tmp2, 1);
1347    str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset()));
1348  }
1349  b(try_eden);
1350  bind_literal(intArrayKlass_addr);
1351
1352  bind(discard_tlab);
1353  if (TLABStats) {
1354    ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
1355    ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
1356    add_32(tmp2, tmp2, 1);
1357    add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize));
1358    str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset()));
1359    str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset()));
1360  }
1361  // If tlab is currently allocated (top or end != null)
1362  // then fill [top, end + alignment_reserve) with array object
1363  cbz(top, do_refill);
1364
1365  // Set up the mark word
1366  mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2));
1367  str(tmp2, Address(top, oopDesc::mark_offset_in_bytes()));
1368  // Set klass to intArrayKlass and the length to the remaining space
1369  ldr_literal(tmp2, intArrayKlass_addr);
1370  add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() -
1371      typeArrayOopDesc::header_size(T_INT) * HeapWordSize);
1372  Register klass = tmp2;
1373  ldr(klass, Address(tmp2));
1374  logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint)
1375  str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes()));
1376  store_klass(klass, top); // blows klass:
1377  klass = noreg;
1378
1379  ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset()));
1380  sub(tmp1, top, tmp1); // size of tlab's allocated portion
1381  incr_allocated_bytes(tmp1, tmp2);
1382
1383  bind(do_refill);
1384  // Refill the tlab with an eden allocation
1385  ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset()));
1386  logical_shift_left(tmp4, tmp1, LogHeapWordSize);
1387  eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case);
1388  str(top, Address(Rthread, JavaThread::tlab_start_offset()));
1389  str(top, Address(Rthread, JavaThread::tlab_top_offset()));
1390
1391#ifdef ASSERT
1392  // Verify that tmp1 contains tlab_end
1393  ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset()));
1394  add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize));
1395  cmp(tmp1, tmp2);
1396  breakpoint(ne);
1397#endif
1398
1399  sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1400  str(tmp1, Address(Rthread, JavaThread::tlab_end_offset()));
1401
1402  if (ZeroTLAB) {
1403    // clobbers start and tmp
1404    // top must be preserved!
1405    add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
1406    ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset()));
1407    zero_memory(tmp2, tmp1, tmp3);
1408  }
1409}
1410
1411// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
1412void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
1413  Label loop;
1414  const Register ptr = start;
1415
1416#ifdef AARCH64
1417  // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x
1418  const Register size = tmp;
1419  Label remaining, done;
1420
1421  sub(size, end, start);
1422
1423#ifdef ASSERT
1424  { Label L;
1425    tst(size, wordSize - 1);
1426    b(L, eq);
1427    stop("size is not a multiple of wordSize");
1428    bind(L);
1429  }
1430#endif // ASSERT
1431
1432  subs(size, size, wordSize);
1433  b(remaining, le);
1434
1435  // Zero by 2 words per iteration.
1436  bind(loop);
1437  subs(size, size, 2*wordSize);
1438  stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed));
1439  b(loop, gt);
1440
1441  bind(remaining);
1442  b(done, ne);
1443  str(ZR, Address(ptr));
1444  bind(done);
1445#else
1446  mov(tmp, 0);
1447  bind(loop);
1448  cmp(ptr, end);
1449  str(tmp, Address(ptr, wordSize, post_indexed), lo);
1450  b(loop, lo);
1451#endif // AARCH64
1452}
1453
1454void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
1455#ifdef AARCH64
1456  ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1457  add_rc(tmp, tmp, size_in_bytes);
1458  str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1459#else
1460  // Bump total bytes allocated by this thread
1461  Label done;
1462
1463  ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1464  adds(tmp, tmp, size_in_bytes);
1465  str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc);
1466  b(done, cc);
1467
1468  // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
1469  // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
1470  // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
1471  Register low, high;
1472  // Select ether R0/R1 or R2/R3
1473
1474  if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
1475    low = R2;
1476    high  = R3;
1477  } else {
1478    low = R0;
1479    high  = R1;
1480  }
1481  push(RegisterSet(low, high));
1482
1483  ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1484  adds(low, low, size_in_bytes);
1485  adc(high, high, 0);
1486  strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
1487
1488  pop(RegisterSet(low, high));
1489
1490  bind(done);
1491#endif // AARCH64
1492}
1493
1494void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
1495  // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
1496  if (UseStackBanging) {
1497    const int page_size = os::vm_page_size();
1498
1499    sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size());
1500    strb(R0, Address(tmp));
1501#ifdef AARCH64
1502    for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) {
1503      sub(tmp, tmp, page_size);
1504      strb(R0, Address(tmp));
1505    }
1506#else
1507    for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
1508      strb(R0, Address(tmp, -0xff0, pre_indexed));
1509    }
1510#endif // AARCH64
1511  }
1512}
1513
1514void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
1515  if (UseStackBanging) {
1516    Label loop;
1517
1518    mov(tmp, SP);
1519    add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size());
1520#ifdef AARCH64
1521    sub(tmp, tmp, Rsize);
1522    bind(loop);
1523    subs(Rsize, Rsize, os::vm_page_size());
1524    strb(ZR, Address(tmp, Rsize));
1525#else
1526    bind(loop);
1527    subs(Rsize, Rsize, 0xff0);
1528    strb(R0, Address(tmp, -0xff0, pre_indexed));
1529#endif // AARCH64
1530    b(loop, hi);
1531  }
1532}
1533
1534void MacroAssembler::stop(const char* msg) {
1535  // This code pattern is matched in NativeIntruction::is_stop.
1536  // Update it at modifications.
1537#ifdef COMPILER1
1538  if (CommentedAssembly) {
1539    block_comment("stop");
1540  }
1541#endif
1542
1543  InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1544  InlinedString Lmsg(msg);
1545
1546  // save all registers for further inspection
1547  save_all_registers();
1548
1549  ldr_literal(R0, Lmsg);                     // message
1550  mov(R1, SP);                               // register save area
1551
1552#ifdef AARCH64
1553  ldr_literal(Rtemp, Ldebug);
1554  br(Rtemp);
1555#else
1556  ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1557#endif // AARCH64
1558
1559#if defined(COMPILER2) && defined(AARCH64)
1560  int off = offset();
1561#endif
1562  bind_literal(Lmsg);
1563  bind_literal(Ldebug);
1564#if defined(COMPILER2) && defined(AARCH64)
1565  if (offset() - off == 2 * wordSize) {
1566    // no padding, so insert nop for worst-case sizing
1567    nop();
1568  }
1569#endif
1570}
1571
1572void MacroAssembler::warn(const char* msg) {
1573#ifdef COMPILER1
1574  if (CommentedAssembly) {
1575    block_comment("warn");
1576  }
1577#endif
1578
1579  InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1580  InlinedString Lmsg(msg);
1581  Label done;
1582
1583  int push_size = save_caller_save_registers();
1584
1585#ifdef AARCH64
1586  // TODO-AARCH64 - get rid of extra debug parameters
1587  mov(R1, LR);
1588  mov(R2, FP);
1589  add(R3, SP, push_size);
1590#endif
1591
1592  ldr_literal(R0, Lmsg);                    // message
1593  ldr_literal(LR, Lwarn);                   // call warning
1594
1595  call(LR);
1596
1597  restore_caller_save_registers();
1598
1599  b(done);
1600  bind_literal(Lmsg);
1601  bind_literal(Lwarn);
1602  bind(done);
1603}
1604
1605
1606int MacroAssembler::save_all_registers() {
1607  // This code pattern is matched in NativeIntruction::is_save_all_registers.
1608  // Update it at modifications.
1609#ifdef AARCH64
1610  const Register tmp = Rtemp;
1611  raw_push(R30, ZR);
1612  for (int i = 28; i >= 0; i -= 2) {
1613      raw_push(as_Register(i), as_Register(i+1));
1614  }
1615  mov_pc_to(tmp);
1616  str(tmp, Address(SP, 31*wordSize));
1617  ldr(tmp, Address(SP, tmp->encoding()*wordSize));
1618  return 32*wordSize;
1619#else
1620  push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1621  return 15*wordSize;
1622#endif // AARCH64
1623}
1624
1625void MacroAssembler::restore_all_registers() {
1626#ifdef AARCH64
1627  for (int i = 0; i <= 28; i += 2) {
1628    raw_pop(as_Register(i), as_Register(i+1));
1629  }
1630  raw_pop(R30, ZR);
1631#else
1632  pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1633  add(SP, SP, wordSize);                         // discard saved PC
1634#endif // AARCH64
1635}
1636
1637int MacroAssembler::save_caller_save_registers() {
1638#ifdef AARCH64
1639  for (int i = 0; i <= 16; i += 2) {
1640    raw_push(as_Register(i), as_Register(i+1));
1641  }
1642  raw_push(R18, LR);
1643  return 20*wordSize;
1644#else
1645#if R9_IS_SCRATCHED
1646  // Save also R10 to preserve alignment
1647  push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1648  return 8*wordSize;
1649#else
1650  push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1651  return 6*wordSize;
1652#endif
1653#endif // AARCH64
1654}
1655
1656void MacroAssembler::restore_caller_save_registers() {
1657#ifdef AARCH64
1658  raw_pop(R18, LR);
1659  for (int i = 16; i >= 0; i -= 2) {
1660    raw_pop(as_Register(i), as_Register(i+1));
1661  }
1662#else
1663#if R9_IS_SCRATCHED
1664  pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1665#else
1666  pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1667#endif
1668#endif // AARCH64
1669}
1670
1671void MacroAssembler::debug(const char* msg, const intx* registers) {
1672  // In order to get locks to work, we need to fake a in_VM state
1673  JavaThread* thread = JavaThread::current();
1674  thread->set_thread_state(_thread_in_vm);
1675
1676  if (ShowMessageBoxOnError) {
1677    ttyLocker ttyl;
1678    if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1679      BytecodeCounter::print();
1680    }
1681    if (os::message_box(msg, "Execution stopped, print registers?")) {
1682#ifdef AARCH64
1683      // saved registers: R0-R30, PC
1684      const int nregs = 32;
1685#else
1686      // saved registers: R0-R12, LR, PC
1687      const int nregs = 15;
1688      const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1689#endif // AARCH64
1690
1691      for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) {
1692        tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]);
1693      }
1694
1695#ifdef AARCH64
1696      tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]);
1697#endif // AARCH64
1698
1699      // derive original SP value from the address of register save area
1700      tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1701    }
1702    BREAKPOINT;
1703  } else {
1704    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1705  }
1706  assert(false, "DEBUG MESSAGE: %s", msg);
1707  fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1708}
1709
1710void MacroAssembler::unimplemented(const char* what) {
1711  const char* buf = NULL;
1712  {
1713    ResourceMark rm;
1714    stringStream ss;
1715    ss.print("unimplemented: %s", what);
1716    buf = code_string(ss.as_string());
1717  }
1718  stop(buf);
1719}
1720
1721
1722// Implementation of FixedSizeCodeBlock
1723
1724FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1725_masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1726}
1727
1728FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1729  if (_enabled) {
1730    address curr_pc = _masm->pc();
1731
1732    assert(_start < curr_pc, "invalid current pc");
1733    guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1734
1735    int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1736    for (int i = 0; i < nops_count; i++) {
1737      _masm->nop();
1738    }
1739  }
1740}
1741
1742#ifdef AARCH64
1743
1744// Serializes memory.
1745// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM
1746void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) {
1747  if (!os::is_MP()) return;
1748
1749  // TODO-AARCH64 investigate dsb vs dmb effects
1750  if (order_constraint == StoreStore) {
1751    dmb(DMB_st);
1752  } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) {
1753    dmb(DMB_ld);
1754  } else {
1755    dmb(DMB_all);
1756  }
1757}
1758
1759#else
1760
1761// Serializes memory. Potentially blows flags and reg.
1762// tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions)
1763// preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1764// load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1765void MacroAssembler::membar(Membar_mask_bits order_constraint,
1766                            Register tmp,
1767                            bool preserve_flags,
1768                            Register load_tgt) {
1769  if (!os::is_MP()) return;
1770
1771  if (order_constraint == StoreStore) {
1772    dmb(DMB_st, tmp);
1773  } else if ((order_constraint & StoreLoad)  ||
1774             (order_constraint & LoadLoad)   ||
1775             (order_constraint & StoreStore) ||
1776             (load_tgt == noreg)             ||
1777             preserve_flags) {
1778    dmb(DMB_all, tmp);
1779  } else {
1780    // LoadStore: speculative stores reordeing is prohibited
1781
1782    // By providing an ordered load target register, we avoid an extra memory load reference
1783    Label not_taken;
1784    bind(not_taken);
1785    cmp(load_tgt, load_tgt);
1786    b(not_taken, ne);
1787  }
1788}
1789
1790#endif // AARCH64
1791
1792// If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1793// on failure, so fall-through can only mean success.
1794// "one_shot" controls whether we loop and retry to mitigate spurious failures.
1795// This is only needed for C2, which for some reason does not rety,
1796// while C1/interpreter does.
1797// TODO: measure if it makes a difference
1798
1799void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1800  Register base, Register tmp, Label &slow_case,
1801  bool allow_fallthrough_on_failure, bool one_shot)
1802{
1803
1804  bool fallthrough_is_success = false;
1805
1806  // ARM Litmus Test example does prefetching here.
1807  // TODO: investigate if it helps performance
1808
1809  // The last store was to the displaced header, so to prevent
1810  // reordering we must issue a StoreStore or Release barrier before
1811  // the CAS store.
1812
1813#ifdef AARCH64
1814
1815  Register Rscratch = tmp;
1816  Register Roop = base;
1817  Register mark = oldval;
1818  Register Rbox = newval;
1819  Label loop;
1820
1821  assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1822
1823  // Instead of StoreStore here, we use store-release-exclusive below
1824
1825  bind(loop);
1826
1827  ldaxr(tmp, base);  // acquire
1828  cmp(tmp, oldval);
1829  b(slow_case, ne);
1830  stlxr(tmp, newval, base); // release
1831  if (one_shot) {
1832    cmp_w(tmp, 0);
1833  } else {
1834    cbnz_w(tmp, loop);
1835    fallthrough_is_success = true;
1836  }
1837
1838  // MemBarAcquireLock would normally go here, but
1839  // we already do ldaxr+stlxr above, which has
1840  // Sequential Consistency
1841
1842#else
1843  membar(MacroAssembler::StoreStore, noreg);
1844
1845  if (one_shot) {
1846    ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1847    cmp(tmp, oldval);
1848    strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1849    cmp(tmp, 0, eq);
1850  } else {
1851    atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1852  }
1853
1854  // MemBarAcquireLock barrier
1855  // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1856  // but that doesn't prevent a load or store from floating up between
1857  // the load and store in the CAS sequence, so play it safe and
1858  // do a full fence.
1859  membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1860#endif
1861  if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1862    b(slow_case, ne);
1863  }
1864}
1865
1866void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1867  Register base, Register tmp, Label &slow_case,
1868  bool allow_fallthrough_on_failure, bool one_shot)
1869{
1870
1871  bool fallthrough_is_success = false;
1872
1873  assert_different_registers(oldval,newval,base,tmp);
1874
1875#ifdef AARCH64
1876  Label loop;
1877
1878  assert(oopDesc::mark_offset_in_bytes() == 0, "must be");
1879
1880  bind(loop);
1881  ldxr(tmp, base);
1882  cmp(tmp, oldval);
1883  b(slow_case, ne);
1884  // MemBarReleaseLock barrier
1885  stlxr(tmp, newval, base);
1886  if (one_shot) {
1887    cmp_w(tmp, 0);
1888  } else {
1889    cbnz_w(tmp, loop);
1890    fallthrough_is_success = true;
1891  }
1892#else
1893  // MemBarReleaseLock barrier
1894  // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1895  // but that doesn't prevent a load or store from floating down between
1896  // the load and store in the CAS sequence, so play it safe and
1897  // do a full fence.
1898  membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1899
1900  if (one_shot) {
1901    ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1902    cmp(tmp, oldval);
1903    strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1904    cmp(tmp, 0, eq);
1905  } else {
1906    atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1907  }
1908#endif
1909  if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1910    b(slow_case, ne);
1911  }
1912
1913  // ExitEnter
1914  // According to JSR-133 Cookbook, this should be StoreLoad, the same
1915  // barrier that follows volatile store.
1916  // TODO: Should be able to remove on armv8 if volatile loads
1917  // use the load-acquire instruction.
1918  membar(StoreLoad, noreg);
1919}
1920
1921#ifndef PRODUCT
1922
1923// Preserves flags and all registers.
1924// On SMP the updated value might not be visible to external observers without a sychronization barrier
1925void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1926  if (counter_addr != NULL) {
1927    InlinedAddress counter_addr_literal((address)counter_addr);
1928    Label done, retry;
1929    if (cond != al) {
1930      b(done, inverse(cond));
1931    }
1932
1933#ifdef AARCH64
1934    raw_push(R0, R1);
1935    raw_push(R2, ZR);
1936
1937    ldr_literal(R0, counter_addr_literal);
1938
1939    bind(retry);
1940    ldxr_w(R1, R0);
1941    add_w(R1, R1, 1);
1942    stxr_w(R2, R1, R0);
1943    cbnz_w(R2, retry);
1944
1945    raw_pop(R2, ZR);
1946    raw_pop(R0, R1);
1947#else
1948    push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1949    ldr_literal(R0, counter_addr_literal);
1950
1951    mrs(CPSR, Rtemp);
1952
1953    bind(retry);
1954    ldr_s32(R1, Address(R0));
1955    add(R2, R1, 1);
1956    atomic_cas_bool(R1, R2, R0, 0, R3);
1957    b(retry, ne);
1958
1959    msr(CPSR_fsxc, Rtemp);
1960
1961    pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1962#endif // AARCH64
1963
1964    b(done);
1965    bind_literal(counter_addr_literal);
1966
1967    bind(done);
1968  }
1969}
1970
1971#endif // !PRODUCT
1972
1973
1974// Building block for CAS cases of biased locking: makes CAS and records statistics.
1975// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set.
1976void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg,
1977                                                 Register tmp, Label& slow_case, int* counter_addr) {
1978
1979  cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case);
1980#ifdef ASSERT
1981  breakpoint(ne); // Fallthrough only on success
1982#endif
1983#ifndef PRODUCT
1984  if (counter_addr != NULL) {
1985    cond_atomic_inc32(al, counter_addr);
1986  }
1987#endif // !PRODUCT
1988}
1989
1990int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg,
1991                                         bool swap_reg_contains_mark,
1992                                         Register tmp2,
1993                                         Label& done, Label& slow_case,
1994                                         BiasedLockingCounters* counters) {
1995  // obj_reg must be preserved (at least) if the bias locking fails
1996  // tmp_reg is a temporary register
1997  // swap_reg was used as a temporary but contained a value
1998  //   that was used afterwards in some call pathes. Callers
1999  //   have been fixed so that swap_reg no longer needs to be
2000  //   saved.
2001  // Rtemp in no longer scratched
2002
2003  assert(UseBiasedLocking, "why call this otherwise?");
2004  assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2);
2005  guarantee(swap_reg!=tmp_reg, "invariant");
2006  assert(tmp_reg != noreg, "must supply tmp_reg");
2007
2008#ifndef PRODUCT
2009  if (PrintBiasedLockingStatistics && (counters == NULL)) {
2010    counters = BiasedLocking::counters();
2011  }
2012#endif
2013
2014  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
2015  Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes());
2016
2017  // Biased locking
2018  // See whether the lock is currently biased toward our thread and
2019  // whether the epoch is still valid
2020  // Note that the runtime guarantees sufficient alignment of JavaThread
2021  // pointers to allow age to be placed into low bits
2022  // First check to see whether biasing is even enabled for this object
2023  Label cas_label;
2024
2025  // The null check applies to the mark loading, if we need to load it.
2026  // If the mark has already been loaded in swap_reg then it has already
2027  // been performed and the offset is irrelevant.
2028  int null_check_offset = offset();
2029  if (!swap_reg_contains_mark) {
2030    ldr(swap_reg, mark_addr);
2031  }
2032
2033  // On MP platform loads could return 'stale' values in some cases.
2034  // That is acceptable since either CAS or slow case path is taken in the worst case.
2035
2036  andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2037  cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2038
2039  b(cas_label, ne);
2040
2041  // The bias pattern is present in the object's header. Need to check
2042  // whether the bias owner and the epoch are both still current.
2043  load_klass(tmp_reg, obj_reg);
2044  ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset()));
2045  orr(tmp_reg, tmp_reg, Rthread);
2046  eor(tmp_reg, tmp_reg, swap_reg);
2047
2048#ifdef AARCH64
2049  ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place));
2050#else
2051  bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place));
2052#endif // AARCH64
2053
2054#ifndef PRODUCT
2055  if (counters != NULL) {
2056    cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr());
2057  }
2058#endif // !PRODUCT
2059
2060  b(done, eq);
2061
2062  Label try_revoke_bias;
2063  Label try_rebias;
2064
2065  // At this point we know that the header has the bias pattern and
2066  // that we are not the bias owner in the current epoch. We need to
2067  // figure out more details about the state of the header in order to
2068  // know what operations can be legally performed on the object's
2069  // header.
2070
2071  // If the low three bits in the xor result aren't clear, that means
2072  // the prototype header is no longer biased and we have to revoke
2073  // the bias on this object.
2074  tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2075  b(try_revoke_bias, ne);
2076
2077  // Biasing is still enabled for this data type. See whether the
2078  // epoch of the current bias is still valid, meaning that the epoch
2079  // bits of the mark word are equal to the epoch bits of the
2080  // prototype header. (Note that the prototype header's epoch bits
2081  // only change at a safepoint.) If not, attempt to rebias the object
2082  // toward the current thread. Note that we must be absolutely sure
2083  // that the current epoch is invalid in order to do this because
2084  // otherwise the manipulations it performs on the mark word are
2085  // illegal.
2086  tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place);
2087  b(try_rebias, ne);
2088
2089  // tmp_reg has the age, epoch and pattern bits cleared
2090  // The remaining (owner) bits are (Thread ^ current_owner)
2091
2092  // The epoch of the current bias is still valid but we know nothing
2093  // about the owner; it might be set or it might be clear. Try to
2094  // acquire the bias of the object using an atomic operation. If this
2095  // fails we will go in to the runtime to revoke the object's bias.
2096  // Note that we first construct the presumed unbiased header so we
2097  // don't accidentally blow away another thread's valid bias.
2098
2099  // Note that we know the owner is not ourself. Hence, success can
2100  // only happen when the owner bits is 0
2101
2102#ifdef AARCH64
2103  // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has
2104  // cleared bit in the middle (cms bit). So it is loaded with separate instruction.
2105  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2106  andr(swap_reg, swap_reg, tmp2);
2107#else
2108  // until the assembler can be made smarter, we need to make some assumptions about the values
2109  // so we can optimize this:
2110  assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed");
2111
2112  mov(swap_reg, AsmOperand(swap_reg, lsl, 23));
2113  mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS)
2114#endif // AARCH64
2115
2116  orr(tmp_reg, swap_reg, Rthread); // new mark
2117
2118  biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2119        (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL);
2120
2121  // If the biasing toward our thread failed, this means that
2122  // another thread succeeded in biasing it toward itself and we
2123  // need to revoke that bias. The revocation will occur in the
2124  // interpreter runtime in the slow case.
2125
2126  b(done);
2127
2128  bind(try_rebias);
2129
2130  // At this point we know the epoch has expired, meaning that the
2131  // current "bias owner", if any, is actually invalid. Under these
2132  // circumstances _only_, we are allowed to use the current header's
2133  // value as the comparison value when doing the cas to acquire the
2134  // bias in the current epoch. In other words, we allow transfer of
2135  // the bias from one thread to another directly in this situation.
2136
2137  // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2138
2139  eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2140
2141  // owner bits 'random'. Set them to Rthread.
2142#ifdef AARCH64
2143  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2144  andr(tmp_reg, tmp_reg, tmp2);
2145#else
2146  mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2147  mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2148#endif // AARCH64
2149
2150  orr(tmp_reg, tmp_reg, Rthread); // new mark
2151
2152  biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case,
2153        (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL);
2154
2155  // If the biasing toward our thread failed, then another thread
2156  // succeeded in biasing it toward itself and we need to revoke that
2157  // bias. The revocation will occur in the runtime in the slow case.
2158
2159  b(done);
2160
2161  bind(try_revoke_bias);
2162
2163  // The prototype mark in the klass doesn't have the bias bit set any
2164  // more, indicating that objects of this data type are not supposed
2165  // to be biased any more. We are going to try to reset the mark of
2166  // this object to the prototype value and fall through to the
2167  // CAS-based locking scheme. Note that if our CAS fails, it means
2168  // that another thread raced us for the privilege of revoking the
2169  // bias of this particular object, so it's okay to continue in the
2170  // normal locking code.
2171
2172  // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg)
2173
2174  eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !)
2175
2176  // owner bits 'random'. Clear them
2177#ifdef AARCH64
2178  mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place));
2179  andr(tmp_reg, tmp_reg, tmp2);
2180#else
2181  mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23));
2182  mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23));
2183#endif // AARCH64
2184
2185  biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label,
2186        (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL);
2187
2188  // Fall through to the normal CAS-based lock, because no matter what
2189  // the result of the above CAS, some thread must have succeeded in
2190  // removing the bias bit from the object's header.
2191
2192  bind(cas_label);
2193
2194  return null_check_offset;
2195}
2196
2197
2198void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) {
2199  assert(UseBiasedLocking, "why call this otherwise?");
2200
2201  // Check for biased locking unlock case, which is a no-op
2202  // Note: we do not have to check the thread ID for two reasons.
2203  // First, the interpreter checks for IllegalMonitorStateException at
2204  // a higher level. Second, if the bias was revoked while we held the
2205  // lock, the object could not be rebiased toward another thread, so
2206  // the bias bit would be clear.
2207  ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
2208
2209  andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place);
2210  cmp(tmp_reg, markOopDesc::biased_lock_pattern);
2211  b(done, eq);
2212}
2213
2214
2215void MacroAssembler::resolve_jobject(Register value,
2216                                     Register tmp1,
2217                                     Register tmp2) {
2218  assert_different_registers(value, tmp1, tmp2);
2219  Label done, not_weak;
2220  cbz(value, done);             // Use NULL as-is.
2221  STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u);
2222  tbz(value, 0, not_weak);      // Test for jweak tag.
2223  // Resolve jweak.
2224  ldr(value, Address(value, -JNIHandles::weak_tag_value));
2225  verify_oop(value);
2226#if INCLUDE_ALL_GCS
2227  if (UseG1GC) {
2228    g1_write_barrier_pre(noreg, // store_addr
2229                         noreg, // new_val
2230                         value, // pre_val
2231                         tmp1,  // tmp1
2232                         tmp2); // tmp2
2233    }
2234#endif // INCLUDE_ALL_GCS
2235  b(done);
2236  bind(not_weak);
2237  // Resolve (untagged) jobject.
2238  ldr(value, Address(value));
2239  verify_oop(value);
2240  bind(done);
2241}
2242
2243
2244//////////////////////////////////////////////////////////////////////////////////
2245
2246#if INCLUDE_ALL_GCS
2247
2248// G1 pre-barrier.
2249// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2250// If store_addr != noreg, then previous value is loaded from [store_addr];
2251// in such case store_addr and new_val registers are preserved;
2252// otherwise pre_val register is preserved.
2253void MacroAssembler::g1_write_barrier_pre(Register store_addr,
2254                                          Register new_val,
2255                                          Register pre_val,
2256                                          Register tmp1,
2257                                          Register tmp2) {
2258  Label done;
2259  Label runtime;
2260
2261  if (store_addr != noreg) {
2262    assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg);
2263  } else {
2264    assert (new_val == noreg, "should be");
2265    assert_different_registers(pre_val, tmp1, tmp2, noreg);
2266  }
2267
2268  Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2269                                        SATBMarkQueue::byte_offset_of_active()));
2270  Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2271                                  SATBMarkQueue::byte_offset_of_index()));
2272  Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() +
2273                                   SATBMarkQueue::byte_offset_of_buf()));
2274
2275  // Is marking active?
2276  assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code");
2277  ldrb(tmp1, in_progress);
2278  cbz(tmp1, done);
2279
2280  // Do we need to load the previous value?
2281  if (store_addr != noreg) {
2282    load_heap_oop(pre_val, Address(store_addr, 0));
2283  }
2284
2285  // Is the previous value null?
2286  cbz(pre_val, done);
2287
2288  // Can we store original value in the thread's buffer?
2289  // Is index == 0?
2290  // (The index field is typed as size_t.)
2291
2292  ldr(tmp1, index);           // tmp1 := *index_adr
2293  ldr(tmp2, buffer);
2294
2295  subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize
2296  b(runtime, lt);             // If negative, goto runtime
2297
2298  str(tmp1, index);           // *index_adr := tmp1
2299
2300  // Record the previous value
2301  str(pre_val, Address(tmp2, tmp1));
2302  b(done);
2303
2304  bind(runtime);
2305
2306  // save the live input values
2307#ifdef AARCH64
2308  if (store_addr != noreg) {
2309    raw_push(store_addr, new_val);
2310  } else {
2311    raw_push(pre_val, ZR);
2312  }
2313#else
2314  if (store_addr != noreg) {
2315    // avoid raw_push to support any ordering of store_addr and new_val
2316    push(RegisterSet(store_addr) | RegisterSet(new_val));
2317  } else {
2318    push(pre_val);
2319  }
2320#endif // AARCH64
2321
2322  if (pre_val != R0) {
2323    mov(R0, pre_val);
2324  }
2325  mov(R1, Rthread);
2326
2327  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1);
2328
2329#ifdef AARCH64
2330  if (store_addr != noreg) {
2331    raw_pop(store_addr, new_val);
2332  } else {
2333    raw_pop(pre_val, ZR);
2334  }
2335#else
2336  if (store_addr != noreg) {
2337    pop(RegisterSet(store_addr) | RegisterSet(new_val));
2338  } else {
2339    pop(pre_val);
2340  }
2341#endif // AARCH64
2342
2343  bind(done);
2344}
2345
2346// G1 post-barrier.
2347// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR).
2348void MacroAssembler::g1_write_barrier_post(Register store_addr,
2349                                           Register new_val,
2350                                           Register tmp1,
2351                                           Register tmp2,
2352                                           Register tmp3) {
2353
2354  Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2355                                        DirtyCardQueue::byte_offset_of_index()));
2356  Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() +
2357                                   DirtyCardQueue::byte_offset_of_buf()));
2358
2359  BarrierSet* bs = Universe::heap()->barrier_set();
2360  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
2361  Label done;
2362  Label runtime;
2363
2364  // Does store cross heap regions?
2365
2366  eor(tmp1, store_addr, new_val);
2367#ifdef AARCH64
2368  logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes);
2369  cbz(tmp1, done);
2370#else
2371  movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes));
2372  b(done, eq);
2373#endif
2374
2375  // crosses regions, storing NULL?
2376
2377  cbz(new_val, done);
2378
2379  // storing region crossing non-NULL, is card already dirty?
2380  const Register card_addr = tmp1;
2381  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
2382
2383  mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference);
2384  add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift));
2385
2386  ldrb(tmp2, Address(card_addr));
2387  cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
2388  b(done, eq);
2389
2390  membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2);
2391
2392  assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code");
2393  ldrb(tmp2, Address(card_addr));
2394  cbz(tmp2, done);
2395
2396  // storing a region crossing, non-NULL oop, card is clean.
2397  // dirty card and log.
2398
2399  strb(zero_register(tmp2), Address(card_addr));
2400
2401  ldr(tmp2, queue_index);
2402  ldr(tmp3, buffer);
2403
2404  subs(tmp2, tmp2, wordSize);
2405  b(runtime, lt); // go to runtime if now negative
2406
2407  str(tmp2, queue_index);
2408
2409  str(card_addr, Address(tmp3, tmp2));
2410  b(done);
2411
2412  bind(runtime);
2413
2414  if (card_addr != R0) {
2415    mov(R0, card_addr);
2416  }
2417  mov(R1, Rthread);
2418  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1);
2419
2420  bind(done);
2421}
2422
2423#endif // INCLUDE_ALL_GCS
2424
2425//////////////////////////////////////////////////////////////////////////////////
2426
2427#ifdef AARCH64
2428
2429void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) {
2430  switch (size_in_bytes) {
2431    case  8: ldr(dst, src); break;
2432    case  4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break;
2433    case  2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break;
2434    case  1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break;
2435    default: ShouldNotReachHere();
2436  }
2437}
2438
2439void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) {
2440  switch (size_in_bytes) {
2441    case  8: str(src, dst);    break;
2442    case  4: str_32(src, dst); break;
2443    case  2: strh(src, dst);   break;
2444    case  1: strb(src, dst);   break;
2445    default: ShouldNotReachHere();
2446  }
2447}
2448
2449#else
2450
2451void MacroAssembler::load_sized_value(Register dst, Address src,
2452                                    size_t size_in_bytes, bool is_signed, AsmCondition cond) {
2453  switch (size_in_bytes) {
2454    case  4: ldr(dst, src, cond); break;
2455    case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
2456    case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
2457    default: ShouldNotReachHere();
2458  }
2459}
2460
2461
2462void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
2463  switch (size_in_bytes) {
2464    case  4: str(src, dst, cond); break;
2465    case  2: strh(src, dst, cond);   break;
2466    case  1: strb(src, dst, cond);   break;
2467    default: ShouldNotReachHere();
2468  }
2469}
2470#endif // AARCH64
2471
2472// Look up the method for a megamorphic invokeinterface call.
2473// The target method is determined by <Rinterf, Rindex>.
2474// The receiver klass is in Rklass.
2475// On success, the result will be in method_result, and execution falls through.
2476// On failure, execution transfers to the given label.
2477void MacroAssembler::lookup_interface_method(Register Rklass,
2478                                             Register Rinterf,
2479                                             Register Rindex,
2480                                             Register method_result,
2481                                             Register temp_reg1,
2482                                             Register temp_reg2,
2483                                             Label& L_no_such_interface) {
2484
2485  assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex);
2486
2487  Register Ritable = temp_reg1;
2488
2489  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
2490  const int base = in_bytes(Klass::vtable_start_offset());
2491  const int scale = exact_log2(vtableEntry::size_in_bytes());
2492  ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
2493  add(Ritable, Rklass, base);
2494  add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale));
2495
2496  Label entry, search;
2497
2498  b(entry);
2499
2500  bind(search);
2501  add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize);
2502
2503  bind(entry);
2504
2505  // Check that the entry is non-null.  A null entry means that the receiver
2506  // class doesn't implement the interface, and wasn't the same as the
2507  // receiver class checked when the interface was resolved.
2508
2509  ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes()));
2510  cbz(temp_reg2, L_no_such_interface);
2511
2512  cmp(Rinterf, temp_reg2);
2513  b(search, ne);
2514
2515  ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes()));
2516  add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass*
2517  assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
2518  assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below");
2519
2520  ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex));
2521}
2522
2523#ifdef COMPILER2
2524// TODO: 8 bytes at a time? pre-fetch?
2525// Compare char[] arrays aligned to 4 bytes.
2526void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
2527                                        Register limit, Register result,
2528                                      Register chr1, Register chr2, Label& Ldone) {
2529  Label Lvector, Lloop;
2530
2531  // Note: limit contains number of bytes (2*char_elements) != 0.
2532  tst(limit, 0x2); // trailing character ?
2533  b(Lvector, eq);
2534
2535  // compare the trailing char
2536  sub(limit, limit, sizeof(jchar));
2537  ldrh(chr1, Address(ary1, limit));
2538  ldrh(chr2, Address(ary2, limit));
2539  cmp(chr1, chr2);
2540  mov(result, 0, ne);     // not equal
2541  b(Ldone, ne);
2542
2543  // only one char ?
2544  tst(limit, limit);
2545  mov(result, 1, eq);
2546  b(Ldone, eq);
2547
2548  // word by word compare, dont't need alignment check
2549  bind(Lvector);
2550
2551  // Shift ary1 and ary2 to the end of the arrays, negate limit
2552  add(ary1, limit, ary1);
2553  add(ary2, limit, ary2);
2554  neg(limit, limit);
2555
2556  bind(Lloop);
2557  ldr_u32(chr1, Address(ary1, limit));
2558  ldr_u32(chr2, Address(ary2, limit));
2559  cmp_32(chr1, chr2);
2560  mov(result, 0, ne);     // not equal
2561  b(Ldone, ne);
2562  adds(limit, limit, 2*sizeof(jchar));
2563  b(Lloop, ne);
2564
2565  // Caller should set it:
2566  // mov(result_reg, 1);  //equal
2567}
2568#endif
2569
2570void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
2571  mov_slow(tmpreg1, counter_addr);
2572  ldr_s32(tmpreg2, tmpreg1);
2573  add_32(tmpreg2, tmpreg2, 1);
2574  str_32(tmpreg2, tmpreg1);
2575}
2576
2577void MacroAssembler::floating_cmp(Register dst) {
2578#ifdef AARCH64
2579  NOT_TESTED();
2580  cset(dst, gt);            // 1 if '>', else 0
2581  csinv(dst, dst, ZR, ge);  // previous value if '>=', else -1
2582#else
2583  vmrs(dst, FPSCR);
2584  orr(dst, dst, 0x08000000);
2585  eor(dst, dst, AsmOperand(dst, lsl, 3));
2586  mov(dst, AsmOperand(dst, asr, 30));
2587#endif
2588}
2589
2590void MacroAssembler::restore_default_fp_mode() {
2591#ifdef AARCH64
2592  msr(SysReg_FPCR, ZR);
2593#else
2594#ifndef __SOFTFP__
2595  // Round to Near mode, IEEE compatible, masked exceptions
2596  mov(Rtemp, 0);
2597  vmsr(FPSCR, Rtemp);
2598#endif // !__SOFTFP__
2599#endif // AARCH64
2600}
2601
2602#ifndef AARCH64
2603// 24-bit word range == 26-bit byte range
2604bool check26(int offset) {
2605  // this could be simplified, but it mimics encoding and decoding
2606  // an actual branch insrtuction
2607  int off1 = offset << 6 >> 8;
2608  int encoded = off1 & ((1<<24)-1);
2609  int decoded = encoded << 8 >> 6;
2610  return offset == decoded;
2611}
2612#endif // !AARCH64
2613
2614// Perform some slight adjustments so the default 32MB code cache
2615// is fully reachable.
2616static inline address first_cache_address() {
2617  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
2618}
2619static inline address last_cache_address() {
2620  return CodeCache::high_bound() - Assembler::InstructionSize;
2621}
2622
2623#ifdef AARCH64
2624// Can we reach target using ADRP?
2625bool MacroAssembler::page_reachable_from_cache(address target) {
2626  intptr_t cl = (intptr_t)first_cache_address() & ~0xfff;
2627  intptr_t ch = (intptr_t)last_cache_address() & ~0xfff;
2628  intptr_t addr = (intptr_t)target & ~0xfff;
2629
2630  intptr_t loffset = addr - cl;
2631  intptr_t hoffset = addr - ch;
2632  return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0);
2633}
2634#endif
2635
2636// Can we reach target using unconditional branch or call from anywhere
2637// in the code cache (because code can be relocated)?
2638bool MacroAssembler::_reachable_from_cache(address target) {
2639#ifdef __thumb__
2640  if ((1 & (intptr_t)target) != 0) {
2641    // Return false to avoid 'b' if we need switching to THUMB mode.
2642    return false;
2643  }
2644#endif
2645
2646  address cl = first_cache_address();
2647  address ch = last_cache_address();
2648
2649  if (ForceUnreachable) {
2650    // Only addresses from CodeCache can be treated as reachable.
2651    if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
2652      return false;
2653    }
2654  }
2655
2656  intptr_t loffset = (intptr_t)target - (intptr_t)cl;
2657  intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
2658
2659#ifdef AARCH64
2660  return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26);
2661#else
2662  return check26(loffset - 8) && check26(hoffset - 8);
2663#endif
2664}
2665
2666bool MacroAssembler::reachable_from_cache(address target) {
2667  assert(CodeCache::contains(pc()), "not supported");
2668  return _reachable_from_cache(target);
2669}
2670
2671// Can we reach the entire code cache from anywhere else in the code cache?
2672bool MacroAssembler::_cache_fully_reachable() {
2673  address cl = first_cache_address();
2674  address ch = last_cache_address();
2675  return _reachable_from_cache(cl) && _reachable_from_cache(ch);
2676}
2677
2678bool MacroAssembler::cache_fully_reachable() {
2679  assert(CodeCache::contains(pc()), "not supported");
2680  return _cache_fully_reachable();
2681}
2682
2683void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2684  assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2685  if (reachable_from_cache(target)) {
2686    relocate(rtype);
2687    b(target NOT_AARCH64_ARG(cond));
2688    return;
2689  }
2690
2691  // Note: relocate is not needed for the code below,
2692  // encoding targets in absolute format.
2693  if (ignore_non_patchable_relocations()) {
2694    rtype = relocInfo::none;
2695  }
2696
2697#ifdef AARCH64
2698  assert (scratch != noreg, "should be specified");
2699  InlinedAddress address_literal(target, rtype);
2700  ldr_literal(scratch, address_literal);
2701  br(scratch);
2702  int off = offset();
2703  bind_literal(address_literal);
2704#ifdef COMPILER2
2705  if (offset() - off == wordSize) {
2706    // no padding, so insert nop for worst-case sizing
2707    nop();
2708  }
2709#endif
2710#else
2711  if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
2712    // Note: this version cannot be (atomically) patched
2713    mov_slow(scratch, (intptr_t)target, cond);
2714    bx(scratch, cond);
2715  } else {
2716    Label skip;
2717    InlinedAddress address_literal(target);
2718    if (cond != al) {
2719      b(skip, inverse(cond));
2720    }
2721    relocate(rtype);
2722    ldr_literal(PC, address_literal);
2723    bind_literal(address_literal);
2724    bind(skip);
2725  }
2726#endif // AARCH64
2727}
2728
2729// Similar to jump except that:
2730// - near calls are valid only if any destination in the cache is near
2731// - no movt/movw (not atomically patchable)
2732void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) {
2733  assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
2734  if (cache_fully_reachable()) {
2735    // Note: this assumes that all possible targets (the initial one
2736    // and the addressed patched to) are all in the code cache.
2737    assert(CodeCache::contains(target), "target might be too far");
2738    relocate(rtype);
2739    b(target NOT_AARCH64_ARG(cond));
2740    return;
2741  }
2742
2743  // Discard the relocation information if not needed for CacheCompiledCode
2744  // since the next encodings are all in absolute format.
2745  if (ignore_non_patchable_relocations()) {
2746    rtype = relocInfo::none;
2747  }
2748
2749#ifdef AARCH64
2750  assert (scratch != noreg, "should be specified");
2751  InlinedAddress address_literal(target);
2752  relocate(rtype);
2753  ldr_literal(scratch, address_literal);
2754  br(scratch);
2755  int off = offset();
2756  bind_literal(address_literal);
2757#ifdef COMPILER2
2758  if (offset() - off == wordSize) {
2759    // no padding, so insert nop for worst-case sizing
2760    nop();
2761  }
2762#endif
2763#else
2764  {
2765    Label skip;
2766    InlinedAddress address_literal(target);
2767    if (cond != al) {
2768      b(skip, inverse(cond));
2769    }
2770    relocate(rtype);
2771    ldr_literal(PC, address_literal);
2772    bind_literal(address_literal);
2773    bind(skip);
2774  }
2775#endif // AARCH64
2776}
2777
2778void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) {
2779  Register scratch = LR;
2780  assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
2781  if (reachable_from_cache(target)) {
2782    relocate(rspec);
2783    bl(target NOT_AARCH64_ARG(cond));
2784    return;
2785  }
2786
2787  // Note: relocate is not needed for the code below,
2788  // encoding targets in absolute format.
2789  if (ignore_non_patchable_relocations()) {
2790    // This assumes the information was needed only for relocating the code.
2791    rspec = RelocationHolder::none;
2792  }
2793
2794#ifndef AARCH64
2795  if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
2796    // Note: this version cannot be (atomically) patched
2797    mov_slow(scratch, (intptr_t)target, cond);
2798    blx(scratch, cond);
2799    return;
2800  }
2801#endif
2802
2803  {
2804    Label ret_addr;
2805#ifndef AARCH64
2806    if (cond != al) {
2807      b(ret_addr, inverse(cond));
2808    }
2809#endif
2810
2811
2812#ifdef AARCH64
2813    // TODO-AARCH64: make more optimal implementation
2814    // [ Keep in sync with MacroAssembler::call_size ]
2815    assert(rspec.type() == relocInfo::none, "call reloc not implemented");
2816    mov_slow(scratch, target);
2817    blr(scratch);
2818#else
2819    InlinedAddress address_literal(target);
2820    relocate(rspec);
2821    adr(LR, ret_addr);
2822    ldr_literal(PC, address_literal);
2823
2824    bind_literal(address_literal);
2825    bind(ret_addr);
2826#endif
2827  }
2828}
2829
2830#if defined(AARCH64) && defined(COMPILER2)
2831int MacroAssembler::call_size(address target, bool far, bool patchable) {
2832  // FIXME: mov_slow is variable-length
2833  if (!far) return 1; // bl
2834  if (patchable) return 2;  // ldr; blr
2835  return instr_count_for_mov_slow((intptr_t)target) + 1;
2836}
2837#endif
2838
2839int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
2840  assert(rspec.type() == relocInfo::static_call_type ||
2841         rspec.type() == relocInfo::none ||
2842         rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
2843
2844  // Always generate the relocation information, needed for patching
2845  relocate(rspec); // used by NativeCall::is_call_before()
2846  if (cache_fully_reachable()) {
2847    // Note: this assumes that all possible targets (the initial one
2848    // and the addresses patched to) are all in the code cache.
2849    assert(CodeCache::contains(target), "target might be too far");
2850    bl(target);
2851  } else {
2852#if defined(AARCH64) && defined(COMPILER2)
2853    if (c2) {
2854      // return address needs to match call_size().
2855      // no need to trash Rtemp
2856      int off = offset();
2857      Label skip_literal;
2858      InlinedAddress address_literal(target);
2859      ldr_literal(LR, address_literal);
2860      blr(LR);
2861      int ret_addr_offset = offset();
2862      assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()");
2863      b(skip_literal);
2864      int off2 = offset();
2865      bind_literal(address_literal);
2866      if (offset() - off2 == wordSize) {
2867        // no padding, so insert nop for worst-case sizing
2868        nop();
2869      }
2870      bind(skip_literal);
2871      return ret_addr_offset;
2872    }
2873#endif
2874    Label ret_addr;
2875    InlinedAddress address_literal(target);
2876#ifdef AARCH64
2877    ldr_literal(Rtemp, address_literal);
2878    adr(LR, ret_addr);
2879    br(Rtemp);
2880#else
2881    adr(LR, ret_addr);
2882    ldr_literal(PC, address_literal);
2883#endif
2884    bind_literal(address_literal);
2885    bind(ret_addr);
2886  }
2887  return offset();
2888}
2889
2890// ((OopHandle)result).resolve();
2891void MacroAssembler::resolve_oop_handle(Register result) {
2892  // OopHandle::resolve is an indirection.
2893  ldr(result, Address(result, 0));
2894}
2895
2896void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
2897  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
2898  ldr(tmp, Address(method, Method::const_offset()));
2899  ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
2900  ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
2901  ldr(mirror, Address(tmp, mirror_offset));
2902}
2903
2904
2905///////////////////////////////////////////////////////////////////////////////
2906
2907// Compressed pointers
2908
2909#ifdef AARCH64
2910
2911void MacroAssembler::load_klass(Register dst_klass, Register src_oop) {
2912  if (UseCompressedClassPointers) {
2913    ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2914    decode_klass_not_null(dst_klass);
2915  } else {
2916    ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()));
2917  }
2918}
2919
2920#else
2921
2922void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
2923  ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
2924}
2925
2926#endif // AARCH64
2927
2928// Blows src_klass.
2929void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
2930#ifdef AARCH64
2931  if (UseCompressedClassPointers) {
2932    assert(src_klass != dst_oop, "not enough registers");
2933    encode_klass_not_null(src_klass);
2934    str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2935    return;
2936  }
2937#endif // AARCH64
2938  str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
2939}
2940
2941#ifdef AARCH64
2942
2943void MacroAssembler::store_klass_gap(Register dst) {
2944  if (UseCompressedClassPointers) {
2945    str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
2946  }
2947}
2948
2949#endif // AARCH64
2950
2951
2952void MacroAssembler::load_heap_oop(Register dst, Address src) {
2953#ifdef AARCH64
2954  if (UseCompressedOops) {
2955    ldr_w(dst, src);
2956    decode_heap_oop(dst);
2957    return;
2958  }
2959#endif // AARCH64
2960  ldr(dst, src);
2961}
2962
2963// Blows src and flags.
2964void MacroAssembler::store_heap_oop(Register src, Address dst) {
2965#ifdef AARCH64
2966  if (UseCompressedOops) {
2967    assert(!dst.uses(src), "not enough registers");
2968    encode_heap_oop(src);
2969    str_w(src, dst);
2970    return;
2971  }
2972#endif // AARCH64
2973  str(src, dst);
2974}
2975
2976void MacroAssembler::store_heap_oop_null(Register src, Address dst) {
2977#ifdef AARCH64
2978  if (UseCompressedOops) {
2979    str_w(src, dst);
2980    return;
2981  }
2982#endif // AARCH64
2983  str(src, dst);
2984}
2985
2986
2987#ifdef AARCH64
2988
2989// Algorithm must match oop.inline.hpp encode_heap_oop.
2990void MacroAssembler::encode_heap_oop(Register dst, Register src) {
2991  // This code pattern is matched in NativeIntruction::skip_encode_heap_oop.
2992  // Update it at modifications.
2993  assert (UseCompressedOops, "must be compressed");
2994  assert (Universe::heap() != NULL, "java heap should be initialized");
2995#ifdef ASSERT
2996  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
2997#endif
2998  verify_oop(src);
2999  if (Universe::narrow_oop_base() == NULL) {
3000    if (Universe::narrow_oop_shift() != 0) {
3001      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3002      _lsr(dst, src, Universe::narrow_oop_shift());
3003    } else if (dst != src) {
3004      mov(dst, src);
3005    }
3006  } else {
3007    tst(src, src);
3008    csel(dst, Rheap_base, src, eq);
3009    sub(dst, dst, Rheap_base);
3010    if (Universe::narrow_oop_shift() != 0) {
3011      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3012      _lsr(dst, dst, Universe::narrow_oop_shift());
3013    }
3014  }
3015}
3016
3017// Same algorithm as oop.inline.hpp decode_heap_oop.
3018void MacroAssembler::decode_heap_oop(Register dst, Register src) {
3019#ifdef ASSERT
3020  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
3021#endif
3022  assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3023  if (Universe::narrow_oop_base() != NULL) {
3024    tst(src, src);
3025    add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
3026    csel(dst, dst, ZR, ne);
3027  } else {
3028    _lsl(dst, src, Universe::narrow_oop_shift());
3029  }
3030  verify_oop(dst);
3031}
3032
3033#ifdef COMPILER2
3034// Algorithm must match oop.inline.hpp encode_heap_oop.
3035// Must preserve condition codes, or C2 encodeHeapOop_not_null rule
3036// must be changed.
3037void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
3038  assert (UseCompressedOops, "must be compressed");
3039  assert (Universe::heap() != NULL, "java heap should be initialized");
3040#ifdef ASSERT
3041  verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?");
3042#endif
3043  verify_oop(src);
3044  if (Universe::narrow_oop_base() == NULL) {
3045    if (Universe::narrow_oop_shift() != 0) {
3046      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3047      _lsr(dst, src, Universe::narrow_oop_shift());
3048    } else if (dst != src) {
3049          mov(dst, src);
3050    }
3051  } else {
3052    sub(dst, src, Rheap_base);
3053    if (Universe::narrow_oop_shift() != 0) {
3054      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3055      _lsr(dst, dst, Universe::narrow_oop_shift());
3056    }
3057  }
3058}
3059
3060// Same algorithm as oops.inline.hpp decode_heap_oop.
3061// Must preserve condition codes, or C2 decodeHeapOop_not_null rule
3062// must be changed.
3063void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
3064#ifdef ASSERT
3065  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
3066#endif
3067  assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
3068  if (Universe::narrow_oop_base() != NULL) {
3069    add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift()));
3070  } else {
3071    _lsl(dst, src, Universe::narrow_oop_shift());
3072  }
3073  verify_oop(dst);
3074}
3075
3076void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
3077  assert(UseCompressedClassPointers, "should only be used for compressed header");
3078  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3079  int klass_index = oop_recorder()->find_index(k);
3080  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
3081
3082  // Relocation with special format (see relocInfo_arm.hpp).
3083  relocate(rspec);
3084  narrowKlass encoded_k = Klass::encode_klass(k);
3085  movz(dst, encoded_k & 0xffff, 0);
3086  movk(dst, (encoded_k >> 16) & 0xffff, 16);
3087}
3088
3089void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
3090  assert(UseCompressedOops, "should only be used for compressed header");
3091  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
3092  int oop_index = oop_recorder()->find_index(obj);
3093  RelocationHolder rspec = oop_Relocation::spec(oop_index);
3094
3095  relocate(rspec);
3096  movz(dst, 0xffff, 0);
3097  movk(dst, 0xffff, 16);
3098}
3099
3100#endif // COMPILER2
3101
3102// Must preserve condition codes, or C2 encodeKlass_not_null rule
3103// must be changed.
3104void MacroAssembler::encode_klass_not_null(Register r) {
3105  if (Universe::narrow_klass_base() != NULL) {
3106    // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3107    assert(r != Rheap_base, "Encoding a klass in Rheap_base");
3108    mov_slow(Rheap_base, Universe::narrow_klass_base());
3109    sub(r, r, Rheap_base);
3110  }
3111  if (Universe::narrow_klass_shift() != 0) {
3112    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3113    _lsr(r, r, Universe::narrow_klass_shift());
3114  }
3115  if (Universe::narrow_klass_base() != NULL) {
3116    reinit_heapbase();
3117  }
3118}
3119
3120// Must preserve condition codes, or C2 encodeKlass_not_null rule
3121// must be changed.
3122void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
3123  if (dst == src) {
3124    encode_klass_not_null(src);
3125    return;
3126  }
3127  if (Universe::narrow_klass_base() != NULL) {
3128    mov_slow(dst, (int64_t)Universe::narrow_klass_base());
3129    sub(dst, src, dst);
3130    if (Universe::narrow_klass_shift() != 0) {
3131      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3132      _lsr(dst, dst, Universe::narrow_klass_shift());
3133    }
3134  } else {
3135    if (Universe::narrow_klass_shift() != 0) {
3136      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3137      _lsr(dst, src, Universe::narrow_klass_shift());
3138    } else {
3139      mov(dst, src);
3140    }
3141  }
3142}
3143
3144// Function instr_count_for_decode_klass_not_null() counts the instructions
3145// generated by decode_klass_not_null(register r) and reinit_heapbase(),
3146// when (Universe::heap() != NULL).  Hence, if the instructions they
3147// generate change, then this method needs to be updated.
3148int MacroAssembler::instr_count_for_decode_klass_not_null() {
3149  assert(UseCompressedClassPointers, "only for compressed klass ptrs");
3150  assert(Universe::heap() != NULL, "java heap should be initialized");
3151  if (Universe::narrow_klass_base() != NULL) {
3152    return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow
3153      1 +                                                                 // add
3154      instr_count_for_mov_slow(Universe::narrow_ptrs_base());   // reinit_heapbase() = mov_slow
3155  } else {
3156    if (Universe::narrow_klass_shift() != 0) {
3157      return 1;
3158    }
3159  }
3160  return 0;
3161}
3162
3163// Must preserve condition codes, or C2 decodeKlass_not_null rule
3164// must be changed.
3165void MacroAssembler::decode_klass_not_null(Register r) {
3166  int off = offset();
3167  assert(UseCompressedClassPointers, "should only be used for compressed headers");
3168  assert(Universe::heap() != NULL, "java heap should be initialized");
3169  assert(r != Rheap_base, "Decoding a klass in Rheap_base");
3170  // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions.
3171  // Also do not verify_oop as this is called by verify_oop.
3172  if (Universe::narrow_klass_base() != NULL) {
3173    // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base.
3174    mov_slow(Rheap_base, Universe::narrow_klass_base());
3175    add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift()));
3176    reinit_heapbase();
3177  } else {
3178    if (Universe::narrow_klass_shift() != 0) {
3179      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
3180      _lsl(r, r, Universe::narrow_klass_shift());
3181    }
3182  }
3183  assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null");
3184}
3185
3186// Must preserve condition codes, or C2 decodeKlass_not_null rule
3187// must be changed.
3188void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
3189  if (src == dst) {
3190    decode_klass_not_null(src);
3191    return;
3192  }
3193
3194  assert(UseCompressedClassPointers, "should only be used for compressed headers");
3195  assert(Universe::heap() != NULL, "java heap should be initialized");
3196  assert(src != Rheap_base, "Decoding a klass in Rheap_base");
3197  assert(dst != Rheap_base, "Decoding a klass into Rheap_base");
3198  // Also do not verify_oop as this is called by verify_oop.
3199  if (Universe::narrow_klass_base() != NULL) {
3200    mov_slow(dst, Universe::narrow_klass_base());
3201    add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift()));
3202  } else {
3203    _lsl(dst, src, Universe::narrow_klass_shift());
3204  }
3205}
3206
3207
3208void MacroAssembler::reinit_heapbase() {
3209  if (UseCompressedOops || UseCompressedClassPointers) {
3210    if (Universe::heap() != NULL) {
3211      mov_slow(Rheap_base, Universe::narrow_ptrs_base());
3212    } else {
3213      ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr());
3214    }
3215  }
3216}
3217
3218#ifdef ASSERT
3219void MacroAssembler::verify_heapbase(const char* msg) {
3220  // This code pattern is matched in NativeIntruction::skip_verify_heapbase.
3221  // Update it at modifications.
3222  assert (UseCompressedOops, "should be compressed");
3223  assert (Universe::heap() != NULL, "java heap should be initialized");
3224  if (CheckCompressedOops) {
3225    Label ok;
3226    str(Rthread, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3227    raw_push(Rtemp, ZR);
3228    mrs(Rtemp, Assembler::SysReg_NZCV);
3229    str(Rtemp, Address(SP, 1 * wordSize));
3230    mov_slow(Rtemp, Universe::narrow_ptrs_base());
3231    cmp(Rheap_base, Rtemp);
3232    b(ok, eq);
3233    stop(msg);
3234    bind(ok);
3235    ldr(Rtemp, Address(SP, 1 * wordSize));
3236    msr(Assembler::SysReg_NZCV, Rtemp);
3237    raw_pop(Rtemp, ZR);
3238    str(ZR, Address(Rthread, in_bytes(JavaThread::in_top_frame_unsafe_section_offset())));
3239  }
3240}
3241#endif // ASSERT
3242
3243#endif // AARCH64
3244
3245#ifdef COMPILER2
3246void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3))
3247{
3248  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3249
3250  Register Rmark      = Rscratch2;
3251
3252  assert(Roop != Rscratch, "");
3253  assert(Roop != Rmark, "");
3254  assert(Rbox != Rscratch, "");
3255  assert(Rbox != Rmark, "");
3256
3257  Label fast_lock, done;
3258
3259  if (UseBiasedLocking && !UseOptoBiasInlining) {
3260    Label failed;
3261#ifdef AARCH64
3262    biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed);
3263#else
3264    biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed);
3265#endif
3266    bind(failed);
3267  }
3268
3269  ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes()));
3270  tst(Rmark, markOopDesc::unlocked_value);
3271  b(fast_lock, ne);
3272
3273  // Check for recursive lock
3274  // See comments in InterpreterMacroAssembler::lock_object for
3275  // explanations on the fast recursive locking check.
3276#ifdef AARCH64
3277  intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
3278  Assembler::LogicalImmediate imm(mask, false);
3279  mov(Rscratch, SP);
3280  sub(Rscratch, Rmark, Rscratch);
3281  ands(Rscratch, Rscratch, imm);
3282  b(done, ne); // exit with failure
3283  str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero
3284  b(done);
3285
3286#else
3287  // -1- test low 2 bits
3288  movs(Rscratch, AsmOperand(Rmark, lsl, 30));
3289  // -2- test (hdr - SP) if the low two bits are 0
3290  sub(Rscratch, Rmark, SP, eq);
3291  movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq);
3292  // If still 'eq' then recursive locking OK
3293  str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero
3294  b(done);
3295#endif
3296
3297  bind(fast_lock);
3298  str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3299
3300  bool allow_fallthrough_on_failure = true;
3301  bool one_shot = true;
3302  cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3303
3304  bind(done);
3305
3306}
3307
3308void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2  AARCH64_ONLY_ARG(Register Rscratch3))
3309{
3310  assert(VM_Version::supports_ldrex(), "unsupported, yet?");
3311
3312  Register Rmark      = Rscratch2;
3313
3314  assert(Roop != Rscratch, "");
3315  assert(Roop != Rmark, "");
3316  assert(Rbox != Rscratch, "");
3317  assert(Rbox != Rmark, "");
3318
3319  Label done;
3320
3321  if (UseBiasedLocking && !UseOptoBiasInlining) {
3322    biased_locking_exit(Roop, Rscratch, done);
3323  }
3324
3325  ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()));
3326  // If hdr is NULL, we've got recursive locking and there's nothing more to do
3327  cmp(Rmark, 0);
3328  b(done, eq);
3329
3330  // Restore the object header
3331  bool allow_fallthrough_on_failure = true;
3332  bool one_shot = true;
3333  cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot);
3334
3335  bind(done);
3336
3337}
3338#endif // COMPILER2
3339
3340