1/*
2 * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/assembler.hpp"
27#include "assembler_arm.inline.hpp"
28#include "code/debugInfoRec.hpp"
29#include "code/icBuffer.hpp"
30#include "code/vtableStubs.hpp"
31#include "interpreter/interpreter.hpp"
32#include "logging/log.hpp"
33#include "memory/resourceArea.hpp"
34#include "oops/compiledICHolder.hpp"
35#include "runtime/sharedRuntime.hpp"
36#include "runtime/vframeArray.hpp"
37#include "utilities/align.hpp"
38#include "vmreg_arm.inline.hpp"
39#ifdef COMPILER1
40#include "c1/c1_Runtime1.hpp"
41#endif
42#ifdef COMPILER2
43#include "opto/runtime.hpp"
44#endif
45#ifdef SHARK
46#include "compiler/compileBroker.hpp"
47#include "shark/sharkCompiler.hpp"
48#endif
49
50#define __ masm->
51
52class RegisterSaver {
53public:
54
55  // Special registers:
56  //              32-bit ARM     64-bit ARM
57  //  Rthread:       R10            R28
58  //  LR:            R14            R30
59
60  // Rthread is callee saved in the C ABI and never changed by compiled code:
61  // no need to save it.
62
63  // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
64  // The one at LR_offset is a return address that is needed by stack walking.
65  // A c2 method uses LR as a standard register so it may be live when we
66  // branch to the runtime. The slot at R14/R30_offset is for the value of LR
67  // in case it's live in the method we are coming from.
68
69#ifdef AARCH64
70
71  //
72  // On AArch64 registers save area has the following layout:
73  //
74  // |---------------------|
75  // | return address (LR) |
76  // | FP                  |
77  // |---------------------|
78  // | V31                 |
79  // | ...                 |
80  // | V0                  |
81  // |---------------------|
82  // | padding             |
83  // | R30 (LR live value) |
84  // |---------------------|
85  // | R27                 |
86  // | ...                 |
87  // | R0                  |
88  // |---------------------| <-- SP
89  //
90
91  enum RegisterLayout {
92    number_of_saved_gprs = 28,
93    number_of_saved_fprs = FloatRegisterImpl::number_of_registers,
94    words_per_fpr = ConcreteRegisterImpl::words_per_fpr,
95
96    R0_offset  = 0,
97    R30_offset = R0_offset + number_of_saved_gprs,
98    D0_offset  = R30_offset + 2,
99    FP_offset  = D0_offset + number_of_saved_fprs * words_per_fpr,
100    LR_offset  = FP_offset + 1,
101
102    reg_save_size = LR_offset + 1,
103  };
104
105  static const int Rmethod_offset;
106  static const int Rtemp_offset;
107
108#else
109
110  enum RegisterLayout {
111    fpu_save_size = FloatRegisterImpl::number_of_registers,
112#ifndef __SOFTFP__
113    D0_offset = 0,
114#endif
115    R0_offset = fpu_save_size,
116    R1_offset,
117    R2_offset,
118    R3_offset,
119    R4_offset,
120    R5_offset,
121    R6_offset,
122#if (FP_REG_NUM != 7)
123    // if not saved as FP
124    R7_offset,
125#endif
126    R8_offset,
127    R9_offset,
128#if (FP_REG_NUM != 11)
129    // if not saved as FP
130    R11_offset,
131#endif
132    R12_offset,
133    R14_offset,
134    FP_offset,
135    LR_offset,
136    reg_save_size,
137
138    Rmethod_offset = R9_offset,
139    Rtemp_offset = R12_offset,
140  };
141
142  // all regs but Rthread (R10), FP (R7 or R11), SP and PC
143  // (altFP_7_11 is the one amoung R7 and R11 which is not FP)
144#define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
145
146#endif // AARCH64
147
148  //  When LR may be live in the nmethod from which we are comming
149  //  then lr_saved is true, the return address is saved before the
150  //  call to save_live_register by the caller and LR contains the
151  //  live value.
152
153  static OopMap* save_live_registers(MacroAssembler* masm,
154                                     int* total_frame_words,
155                                     bool lr_saved = false);
156  static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
157
158};
159
160
161#ifdef AARCH64
162const int RegisterSaver::Rmethod_offset = RegisterSaver::R0_offset + Rmethod->encoding();
163const int RegisterSaver::Rtemp_offset   = RegisterSaver::R0_offset + Rtemp->encoding();
164#endif // AARCH64
165
166
167OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
168                                           int* total_frame_words,
169                                           bool lr_saved) {
170  *total_frame_words = reg_save_size;
171
172  OopMapSet *oop_maps = new OopMapSet();
173  OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
174
175#ifdef AARCH64
176  assert((reg_save_size * wordSize) % StackAlignmentInBytes == 0, "SP should be aligned");
177
178  if (lr_saved) {
179    // LR was stashed here, so that jump could use it as a scratch reg
180    __ ldr(LR, Address(SP, 0));
181    // There are two words on the stack top:
182    //  [SP + 0]: placeholder for FP
183    //  [SP + wordSize]: saved return address
184    __ str(FP, Address(SP, 0));
185  } else {
186    __ raw_push(FP, LR);
187  }
188
189  __ sub(SP, SP, (reg_save_size - 2) * wordSize);
190
191  for (int i = 0; i < number_of_saved_gprs; i += 2) {
192    int offset = R0_offset + i;
193    __ stp(as_Register(i), as_Register(i+1), Address(SP, offset * wordSize));
194    map->set_callee_saved(VMRegImpl::stack2reg((offset + 0) * VMRegImpl::slots_per_word), as_Register(i)->as_VMReg());
195    map->set_callee_saved(VMRegImpl::stack2reg((offset + 1) * VMRegImpl::slots_per_word), as_Register(i+1)->as_VMReg());
196  }
197
198  __ str(R30, Address(SP, R30_offset * wordSize));
199  map->set_callee_saved(VMRegImpl::stack2reg(R30_offset * VMRegImpl::slots_per_word), R30->as_VMReg());
200
201  for (int i = 0; i < number_of_saved_fprs; i += 2) {
202    int offset1 = D0_offset + i * words_per_fpr;
203    int offset2 = offset1 + words_per_fpr;
204    Address base(SP, offset1 * wordSize);
205    if (words_per_fpr == 2) {
206      // pair of "wide" quad vector registers
207      __ stp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
208    } else {
209      // pair of double vector registers
210      __ stp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
211    }
212    map->set_callee_saved(VMRegImpl::stack2reg(offset1 * VMRegImpl::slots_per_word), as_FloatRegister(i)->as_VMReg());
213    map->set_callee_saved(VMRegImpl::stack2reg(offset2 * VMRegImpl::slots_per_word), as_FloatRegister(i+1)->as_VMReg());
214  }
215#else
216  if (lr_saved) {
217    __ push(RegisterSet(FP));
218  } else {
219    __ push(RegisterSet(FP) | RegisterSet(LR));
220  }
221  __ push(SAVED_BASE_REGS);
222  if (HaveVFP) {
223    if (VM_Version::has_vfp3_32()) {
224      __ fstmdbd(SP, FloatRegisterSet(D16, 16), writeback);
225    } else {
226      if (FloatRegisterImpl::number_of_registers > 32) {
227        assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
228        __ sub(SP, SP, 32 * wordSize);
229      }
230    }
231    __ fstmdbd(SP, FloatRegisterSet(D0, 16), writeback);
232  } else {
233    __ sub(SP, SP, fpu_save_size * wordSize);
234  }
235
236  int i;
237  int j=0;
238  for (i = R0_offset; i <= R9_offset; i++) {
239    if (j == FP_REG_NUM) {
240      // skip the FP register, managed below.
241      j++;
242    }
243    map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
244    j++;
245  }
246  assert(j == R10->encoding(), "must be");
247#if (FP_REG_NUM != 11)
248  // add R11, if not managed as FP
249  map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
250#endif
251  map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
252  map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
253  if (HaveVFP) {
254    for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
255      map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
256      map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
257    }
258  }
259#endif // AARCH64
260
261  return map;
262}
263
264void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
265#ifdef AARCH64
266  for (int i = 0; i < number_of_saved_gprs; i += 2) {
267    __ ldp(as_Register(i), as_Register(i+1), Address(SP, (R0_offset + i) * wordSize));
268  }
269
270  __ ldr(R30, Address(SP, R30_offset * wordSize));
271
272  for (int i = 0; i < number_of_saved_fprs; i += 2) {
273    Address base(SP, (D0_offset + i * words_per_fpr) * wordSize);
274    if (words_per_fpr == 2) {
275      // pair of "wide" quad vector registers
276      __ ldp_q(as_FloatRegister(i), as_FloatRegister(i+1), base);
277    } else {
278      // pair of double vector registers
279      __ ldp_d(as_FloatRegister(i), as_FloatRegister(i+1), base);
280    }
281  }
282
283  __ add(SP, SP, (reg_save_size - 2) * wordSize);
284
285  if (restore_lr) {
286    __ raw_pop(FP, LR);
287  } else {
288    __ ldr(FP, Address(SP, 0));
289  }
290#else
291  if (HaveVFP) {
292    __ fldmiad(SP, FloatRegisterSet(D0, 16), writeback);
293    if (VM_Version::has_vfp3_32()) {
294      __ fldmiad(SP, FloatRegisterSet(D16, 16), writeback);
295    } else {
296      if (FloatRegisterImpl::number_of_registers > 32) {
297        assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
298        __ add(SP, SP, 32 * wordSize);
299      }
300    }
301  } else {
302    __ add(SP, SP, fpu_save_size * wordSize);
303  }
304  __ pop(SAVED_BASE_REGS);
305  if (restore_lr) {
306    __ pop(RegisterSet(FP) | RegisterSet(LR));
307  } else {
308    __ pop(RegisterSet(FP));
309  }
310#endif // AARCH64
311}
312
313#ifdef AARCH64
314
315static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
316  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
317    __ str_d(D0, Address(SP, -2*wordSize, pre_indexed));
318  } else {
319    __ raw_push(R0, ZR);
320  }
321}
322
323static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
324  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
325    __ ldr_d(D0, Address(SP, 2*wordSize, post_indexed));
326  } else {
327    __ raw_pop(R0, ZR);
328  }
329}
330
331static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
332  __ raw_push(R0, R1);
333  __ raw_push(R2, R3);
334  __ raw_push(R4, R5);
335  __ raw_push(R6, R7);
336
337  assert(FPR_PARAMS == 8, "adjust this code");
338  assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
339
340  if (fp_regs_in_arguments > 6) __ stp_d(V6, V7, Address(SP, -2 * wordSize, pre_indexed));
341  if (fp_regs_in_arguments > 4) __ stp_d(V4, V5, Address(SP, -2 * wordSize, pre_indexed));
342  if (fp_regs_in_arguments > 2) __ stp_d(V2, V3, Address(SP, -2 * wordSize, pre_indexed));
343  if (fp_regs_in_arguments > 0) __ stp_d(V0, V1, Address(SP, -2 * wordSize, pre_indexed));
344}
345
346static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
347  assert(FPR_PARAMS == 8, "adjust this code");
348  assert((0 <= fp_regs_in_arguments) && (fp_regs_in_arguments <= FPR_PARAMS), "should be");
349
350  if (fp_regs_in_arguments > 0) __ ldp_d(V0, V1, Address(SP, 2 * wordSize, post_indexed));
351  if (fp_regs_in_arguments > 2) __ ldp_d(V2, V3, Address(SP, 2 * wordSize, post_indexed));
352  if (fp_regs_in_arguments > 4) __ ldp_d(V4, V5, Address(SP, 2 * wordSize, post_indexed));
353  if (fp_regs_in_arguments > 6) __ ldp_d(V6, V7, Address(SP, 2 * wordSize, post_indexed));
354
355  __ raw_pop(R6, R7);
356  __ raw_pop(R4, R5);
357  __ raw_pop(R2, R3);
358  __ raw_pop(R0, R1);
359}
360
361#else // AARCH64
362
363static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
364#ifdef __ABI_HARD__
365  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
366    __ sub(SP, SP, 8);
367    __ fstd(D0, Address(SP));
368    return;
369  }
370#endif // __ABI_HARD__
371  __ raw_push(R0, R1);
372}
373
374static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
375#ifdef __ABI_HARD__
376  if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
377    __ fldd(D0, Address(SP));
378    __ add(SP, SP, 8);
379    return;
380  }
381#endif // __ABI_HARD__
382  __ raw_pop(R0, R1);
383}
384
385static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
386  // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
387  __ push(RegisterSet(R0, R3));
388
389#ifdef __ABI_HARD__
390  // preserve arguments
391  // Likely not needed as the locking code won't probably modify volatile FP registers,
392  // but there is no way to guarantee that
393  if (fp_regs_in_arguments) {
394    // convert fp_regs_in_arguments to a number of double registers
395    int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
396    __ fstmdbd(SP, FloatRegisterSet(D0, double_regs_num), writeback);
397  }
398#endif // __ ABI_HARD__
399}
400
401static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
402#ifdef __ABI_HARD__
403  if (fp_regs_in_arguments) {
404    int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
405    __ fldmiad(SP, FloatRegisterSet(D0, double_regs_num), writeback);
406  }
407#endif // __ABI_HARD__
408
409  __ pop(RegisterSet(R0, R3));
410}
411
412#endif // AARCH64
413
414
415// Is vector's size (in bytes) bigger than a size saved by default?
416// All vector registers are saved by default on ARM.
417bool SharedRuntime::is_wide_vector(int size) {
418  return false;
419}
420
421size_t SharedRuntime::trampoline_size() {
422  return 16;
423}
424
425void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
426  InlinedAddress dest(destination);
427  __ indirect_jump(dest, Rtemp);
428  __ bind_literal(dest);
429}
430
431int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
432                                        VMRegPair *regs,
433                                        VMRegPair *regs2,
434                                        int total_args_passed) {
435  assert(regs2 == NULL, "not needed on arm");
436#ifdef AARCH64
437  int slot = 0; // counted in 32-bit VMReg slots
438  int reg = 0;
439  int fp_reg = 0;
440  for (int i = 0; i < total_args_passed; i++) {
441    switch (sig_bt[i]) {
442    case T_SHORT:
443    case T_CHAR:
444    case T_BYTE:
445    case T_BOOLEAN:
446    case T_INT:
447      if (reg < GPR_PARAMS) {
448        Register r = as_Register(reg);
449        regs[i].set1(r->as_VMReg());
450        reg++;
451      } else {
452        regs[i].set1(VMRegImpl::stack2reg(slot));
453        slot+=2;
454      }
455      break;
456    case T_LONG:
457      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
458      // fall through
459    case T_ARRAY:
460    case T_OBJECT:
461    case T_ADDRESS:
462      if (reg < GPR_PARAMS) {
463        Register r = as_Register(reg);
464        regs[i].set2(r->as_VMReg());
465        reg++;
466      } else {
467        regs[i].set2(VMRegImpl::stack2reg(slot));
468        slot+=2;
469      }
470      break;
471    case T_FLOAT:
472      if (fp_reg < FPR_PARAMS) {
473        FloatRegister r = as_FloatRegister(fp_reg);
474        regs[i].set1(r->as_VMReg());
475        fp_reg++;
476      } else {
477        regs[i].set1(VMRegImpl::stack2reg(slot));
478        slot+=2;
479      }
480      break;
481    case T_DOUBLE:
482      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
483      if (fp_reg < FPR_PARAMS) {
484        FloatRegister r = as_FloatRegister(fp_reg);
485        regs[i].set2(r->as_VMReg());
486        fp_reg++;
487      } else {
488        regs[i].set2(VMRegImpl::stack2reg(slot));
489        slot+=2;
490      }
491      break;
492    case T_VOID:
493      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
494      regs[i].set_bad();
495      break;
496    default:
497      ShouldNotReachHere();
498    }
499  }
500  return slot;
501
502#else // AARCH64
503
504  int slot = 0;
505  int ireg = 0;
506#ifdef __ABI_HARD__
507  int fp_slot = 0;
508  int single_fpr_slot = 0;
509#endif // __ABI_HARD__
510  for (int i = 0; i < total_args_passed; i++) {
511    switch (sig_bt[i]) {
512    case T_SHORT:
513    case T_CHAR:
514    case T_BYTE:
515    case T_BOOLEAN:
516    case T_INT:
517    case T_ARRAY:
518    case T_OBJECT:
519    case T_ADDRESS:
520#ifndef __ABI_HARD__
521    case T_FLOAT:
522#endif // !__ABI_HARD__
523      if (ireg < 4) {
524        Register r = as_Register(ireg);
525        regs[i].set1(r->as_VMReg());
526        ireg++;
527      } else {
528        regs[i].set1(VMRegImpl::stack2reg(slot));
529        slot++;
530      }
531      break;
532    case T_LONG:
533#ifndef __ABI_HARD__
534    case T_DOUBLE:
535#endif // !__ABI_HARD__
536      assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
537      if (ireg <= 2) {
538#if (ALIGN_WIDE_ARGUMENTS == 1)
539        if(ireg & 1) ireg++;  // Aligned location required
540#endif
541        Register r1 = as_Register(ireg);
542        Register r2 = as_Register(ireg + 1);
543        regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
544        ireg += 2;
545#if (ALIGN_WIDE_ARGUMENTS == 0)
546      } else if (ireg == 3) {
547        // uses R3 + one stack slot
548        Register r = as_Register(ireg);
549        regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
550        ireg += 1;
551        slot += 1;
552#endif
553      } else {
554        if (slot & 1) slot++; // Aligned location required
555        regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
556        slot += 2;
557        ireg = 4;
558      }
559      break;
560    case T_VOID:
561      regs[i].set_bad();
562      break;
563#ifdef __ABI_HARD__
564    case T_FLOAT:
565      if ((fp_slot < 16)||(single_fpr_slot & 1)) {
566        if ((single_fpr_slot & 1) == 0) {
567          single_fpr_slot = fp_slot;
568          fp_slot += 2;
569        }
570        FloatRegister r = as_FloatRegister(single_fpr_slot);
571        single_fpr_slot++;
572        regs[i].set1(r->as_VMReg());
573      } else {
574        regs[i].set1(VMRegImpl::stack2reg(slot));
575        slot++;
576      }
577      break;
578    case T_DOUBLE:
579      assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
580      if (fp_slot <= 14) {
581        FloatRegister r1 = as_FloatRegister(fp_slot);
582        FloatRegister r2 = as_FloatRegister(fp_slot+1);
583        regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
584        fp_slot += 2;
585      } else {
586        if(slot & 1) slot++;
587        regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
588        slot += 2;
589        single_fpr_slot = 16;
590      }
591      break;
592#endif // __ABI_HARD__
593    default:
594      ShouldNotReachHere();
595    }
596  }
597  return slot;
598#endif // AARCH64
599}
600
601int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
602                                           VMRegPair *regs,
603                                           int total_args_passed,
604                                           int is_outgoing) {
605#ifdef AARCH64
606  // C calling convention on AArch64 is good enough.
607  return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
608#else
609#ifdef __SOFTFP__
610  // soft float is the same as the C calling convention.
611  return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
612#endif // __SOFTFP__
613  (void) is_outgoing;
614  int slot = 0;
615  int ireg = 0;
616  int freg = 0;
617  int single_fpr = 0;
618
619  for (int i = 0; i < total_args_passed; i++) {
620    switch (sig_bt[i]) {
621    case T_SHORT:
622    case T_CHAR:
623    case T_BYTE:
624    case T_BOOLEAN:
625    case T_INT:
626    case T_ARRAY:
627    case T_OBJECT:
628    case T_ADDRESS:
629      if (ireg < 4) {
630        Register r = as_Register(ireg++);
631        regs[i].set1(r->as_VMReg());
632      } else {
633        regs[i].set1(VMRegImpl::stack2reg(slot++));
634      }
635      break;
636    case T_FLOAT:
637      // C2 utilizes S14/S15 for mem-mem moves
638      if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
639        if ((single_fpr & 1) == 0) {
640          single_fpr = freg;
641          freg += 2;
642        }
643        FloatRegister r = as_FloatRegister(single_fpr++);
644        regs[i].set1(r->as_VMReg());
645      } else {
646        regs[i].set1(VMRegImpl::stack2reg(slot++));
647      }
648      break;
649    case T_DOUBLE:
650      // C2 utilizes S14/S15 for mem-mem moves
651      if (freg <= 14 COMPILER2_PRESENT(-2)) {
652        FloatRegister r1 = as_FloatRegister(freg);
653        FloatRegister r2 = as_FloatRegister(freg + 1);
654        regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
655        freg += 2;
656      } else {
657        // Keep internally the aligned calling convention,
658        // ignoring ALIGN_WIDE_ARGUMENTS
659        if (slot & 1) slot++;
660        regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
661        slot += 2;
662        single_fpr = 16;
663      }
664      break;
665    case T_LONG:
666      // Keep internally the aligned calling convention,
667      // ignoring ALIGN_WIDE_ARGUMENTS
668      if (ireg <= 2) {
669        if (ireg & 1) ireg++;
670        Register r1 = as_Register(ireg);
671        Register r2 = as_Register(ireg + 1);
672        regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
673        ireg += 2;
674      } else {
675        if (slot & 1) slot++;
676        regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
677        slot += 2;
678        ireg = 4;
679      }
680      break;
681    case T_VOID:
682      regs[i].set_bad();
683      break;
684    default:
685      ShouldNotReachHere();
686    }
687  }
688
689  if (slot & 1) slot++;
690  return slot;
691#endif // AARCH64
692}
693
694static void patch_callers_callsite(MacroAssembler *masm) {
695  Label skip;
696
697  __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
698  __ cbz(Rtemp, skip);
699
700#ifdef AARCH64
701  push_param_registers(masm, FPR_PARAMS);
702  __ raw_push(LR, ZR);
703#else
704  // Pushing an even number of registers for stack alignment.
705  // Selecting R9, which had to be saved anyway for some platforms.
706  __ push(RegisterSet(R0, R3) | R9 | LR);
707#endif // AARCH64
708
709  __ mov(R0, Rmethod);
710  __ mov(R1, LR);
711  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
712
713#ifdef AARCH64
714  __ raw_pop(LR, ZR);
715  pop_param_registers(masm, FPR_PARAMS);
716#else
717  __ pop(RegisterSet(R0, R3) | R9 | LR);
718#endif // AARCH64
719
720  __ bind(skip);
721}
722
723void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
724                                    int total_args_passed, int comp_args_on_stack,
725                                    const BasicType *sig_bt, const VMRegPair *regs) {
726  // TODO: ARM - May be can use ldm to load arguments
727  const Register tmp = Rtemp; // avoid erasing R5_mh
728
729  // Next assert may not be needed but safer. Extra analysis required
730  // if this there is not enough free registers and we need to use R5 here.
731  assert_different_registers(tmp, R5_mh);
732
733  // 6243940 We might end up in handle_wrong_method if
734  // the callee is deoptimized as we race thru here. If that
735  // happens we don't want to take a safepoint because the
736  // caller frame will look interpreted and arguments are now
737  // "compiled" so it is much better to make this transition
738  // invisible to the stack walking code. Unfortunately if
739  // we try and find the callee by normal means a safepoint
740  // is possible. So we stash the desired callee in the thread
741  // and the vm will find there should this case occur.
742  Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
743  __ str(Rmethod, callee_target_addr);
744
745#ifdef AARCH64
746
747  assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rmethod);
748  assert_different_registers(tmp, R0, R1, R2, R3, R4, R5, R6, R7, Rsender_sp, Rparams);
749
750  if (comp_args_on_stack) {
751    __ sub_slow(SP, SP, align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes));
752  }
753
754  for (int i = 0; i < total_args_passed; i++) {
755    if (sig_bt[i] == T_VOID) {
756      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
757      continue;
758    }
759    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
760
761    int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
762    Address source_addr(Rparams, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
763
764    VMReg r = regs[i].first();
765    bool full_word = regs[i].second()->is_valid();
766
767    if (r->is_stack()) {
768      if (full_word) {
769        __ ldr(tmp, source_addr);
770        __ str(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
771      } else {
772        __ ldr_w(tmp, source_addr);
773        __ str_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
774      }
775    } else if (r->is_Register()) {
776      if (full_word) {
777        __ ldr(r->as_Register(), source_addr);
778      } else {
779        __ ldr_w(r->as_Register(), source_addr);
780      }
781    } else if (r->is_FloatRegister()) {
782      if (sig_bt[i] == T_DOUBLE) {
783        __ ldr_d(r->as_FloatRegister(), source_addr);
784      } else {
785        __ ldr_s(r->as_FloatRegister(), source_addr);
786      }
787    } else {
788      assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
789    }
790  }
791
792  __ ldr(tmp, Address(Rmethod, Method::from_compiled_offset()));
793  __ br(tmp);
794
795#else
796
797  assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
798
799  const Register initial_sp = Rmethod; // temporarily scratched
800
801  // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
802  assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
803
804  __ mov(initial_sp, SP);
805
806  if (comp_args_on_stack) {
807    __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
808  }
809  __ bic(SP, SP, StackAlignmentInBytes - 1);
810
811  for (int i = 0; i < total_args_passed; i++) {
812    if (sig_bt[i] == T_VOID) {
813      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
814      continue;
815    }
816    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
817    int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
818
819    VMReg r_1 = regs[i].first();
820    VMReg r_2 = regs[i].second();
821    if (r_1->is_stack()) {
822      int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
823      if (!r_2->is_valid()) {
824        __ ldr(tmp, Address(initial_sp, arg_offset));
825        __ str(tmp, Address(SP, stack_offset));
826      } else {
827        __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
828        __ str(tmp, Address(SP, stack_offset));
829        __ ldr(tmp, Address(initial_sp, arg_offset));
830        __ str(tmp, Address(SP, stack_offset + wordSize));
831      }
832    } else if (r_1->is_Register()) {
833      if (!r_2->is_valid()) {
834        __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
835      } else {
836        __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
837        __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
838      }
839    } else if (r_1->is_FloatRegister()) {
840#ifdef __SOFTFP__
841      ShouldNotReachHere();
842#endif // __SOFTFP__
843      if (!r_2->is_valid()) {
844        __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
845      } else {
846        __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
847      }
848    } else {
849      assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
850    }
851  }
852
853  // restore Rmethod (scratched for initial_sp)
854  __ ldr(Rmethod, callee_target_addr);
855  __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
856
857#endif // AARCH64
858}
859
860static void gen_c2i_adapter(MacroAssembler *masm,
861                            int total_args_passed,  int comp_args_on_stack,
862                            const BasicType *sig_bt, const VMRegPair *regs,
863                            Label& skip_fixup) {
864  // TODO: ARM - May be can use stm to deoptimize arguments
865  const Register tmp = Rtemp;
866
867  patch_callers_callsite(masm);
868  __ bind(skip_fixup);
869
870  __ mov(Rsender_sp, SP); // not yet saved
871
872#ifdef AARCH64
873
874  int extraspace = align_up(total_args_passed * Interpreter::stackElementSize, StackAlignmentInBytes);
875  if (extraspace) {
876    __ sub(SP, SP, extraspace);
877  }
878
879  for (int i = 0; i < total_args_passed; i++) {
880    if (sig_bt[i] == T_VOID) {
881      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
882      continue;
883    }
884
885    int expr_slots_count = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ? 2 : 1;
886    Address dest_addr(SP, Interpreter::expr_offset_in_bytes(total_args_passed - expr_slots_count - i));
887
888    VMReg r = regs[i].first();
889    bool full_word = regs[i].second()->is_valid();
890
891    if (r->is_stack()) {
892      if (full_word) {
893        __ ldr(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
894        __ str(tmp, dest_addr);
895      } else {
896        __ ldr_w(tmp, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + extraspace));
897        __ str_w(tmp, dest_addr);
898      }
899    } else if (r->is_Register()) {
900      if (full_word) {
901        __ str(r->as_Register(), dest_addr);
902      } else {
903        __ str_w(r->as_Register(), dest_addr);
904      }
905    } else if (r->is_FloatRegister()) {
906      if (sig_bt[i] == T_DOUBLE) {
907        __ str_d(r->as_FloatRegister(), dest_addr);
908      } else {
909        __ str_s(r->as_FloatRegister(), dest_addr);
910      }
911    } else {
912      assert(!r->is_valid() && !regs[i].second()->is_valid(), "must be");
913    }
914  }
915
916  __ mov(Rparams, SP);
917
918  __ ldr(tmp, Address(Rmethod, Method::interpreter_entry_offset()));
919  __ br(tmp);
920
921#else
922
923  int extraspace = total_args_passed * Interpreter::stackElementSize;
924  if (extraspace) {
925    __ sub_slow(SP, SP, extraspace);
926  }
927
928  for (int i = 0; i < total_args_passed; i++) {
929    if (sig_bt[i] == T_VOID) {
930      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
931      continue;
932    }
933    int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
934
935    VMReg r_1 = regs[i].first();
936    VMReg r_2 = regs[i].second();
937    if (r_1->is_stack()) {
938      int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
939      if (!r_2->is_valid()) {
940        __ ldr(tmp, Address(SP, arg_offset));
941        __ str(tmp, Address(SP, stack_offset));
942      } else {
943        __ ldr(tmp, Address(SP, arg_offset));
944        __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
945        __ ldr(tmp, Address(SP, arg_offset + wordSize));
946        __ str(tmp, Address(SP, stack_offset));
947      }
948    } else if (r_1->is_Register()) {
949      if (!r_2->is_valid()) {
950        __ str(r_1->as_Register(), Address(SP, stack_offset));
951      } else {
952        __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
953        __ str(r_2->as_Register(), Address(SP, stack_offset));
954      }
955    } else if (r_1->is_FloatRegister()) {
956#ifdef __SOFTFP__
957      ShouldNotReachHere();
958#endif // __SOFTFP__
959      if (!r_2->is_valid()) {
960        __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
961      } else {
962        __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
963      }
964    } else {
965      assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
966    }
967  }
968
969  __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
970
971#endif // AARCH64
972}
973
974AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
975                                                            int total_args_passed,
976                                                            int comp_args_on_stack,
977                                                            const BasicType *sig_bt,
978                                                            const VMRegPair *regs,
979                                                            AdapterFingerPrint* fingerprint) {
980  address i2c_entry = __ pc();
981  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
982
983  address c2i_unverified_entry = __ pc();
984  Label skip_fixup;
985  const Register receiver       = R0;
986  const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
987  const Register receiver_klass = AARCH64_ONLY(R8) NOT_AARCH64(R4);
988
989  __ load_klass(receiver_klass, receiver);
990  __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
991  __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_method_offset()));
992  __ cmp(receiver_klass, holder_klass);
993
994#ifdef AARCH64
995  Label ic_miss;
996  __ b(ic_miss, ne);
997  __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
998  __ cbz(Rtemp, skip_fixup);
999  __ bind(ic_miss);
1000  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1001#else
1002  __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
1003  __ cmp(Rtemp, 0, eq);
1004  __ b(skip_fixup, eq);
1005  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
1006#endif // AARCH64
1007
1008  address c2i_entry = __ pc();
1009  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
1010
1011  __ flush();
1012  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
1013}
1014
1015
1016static int reg2offset_in(VMReg r) {
1017  // Account for saved FP and LR
1018  return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
1019}
1020
1021static int reg2offset_out(VMReg r) {
1022  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1023}
1024
1025
1026static void verify_oop_args(MacroAssembler* masm,
1027                            const methodHandle& method,
1028                            const BasicType* sig_bt,
1029                            const VMRegPair* regs) {
1030  Register temp_reg = Rmethod;  // not part of any compiled calling seq
1031  if (VerifyOops) {
1032    for (int i = 0; i < method->size_of_parameters(); i++) {
1033      if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
1034        VMReg r = regs[i].first();
1035        assert(r->is_valid(), "bad oop arg");
1036        if (r->is_stack()) {
1037          __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1038          __ verify_oop(temp_reg);
1039        } else {
1040          __ verify_oop(r->as_Register());
1041        }
1042      }
1043    }
1044  }
1045}
1046
1047static void gen_special_dispatch(MacroAssembler* masm,
1048                                 const methodHandle& method,
1049                                 const BasicType* sig_bt,
1050                                 const VMRegPair* regs) {
1051  verify_oop_args(masm, method, sig_bt, regs);
1052  vmIntrinsics::ID iid = method->intrinsic_id();
1053
1054  // Now write the args into the outgoing interpreter space
1055  bool     has_receiver   = false;
1056  Register receiver_reg   = noreg;
1057  int      member_arg_pos = -1;
1058  Register member_reg     = noreg;
1059  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1060  if (ref_kind != 0) {
1061    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1062    member_reg = Rmethod;  // known to be free at this point
1063    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1064  } else if (iid == vmIntrinsics::_invokeBasic) {
1065    has_receiver = true;
1066  } else {
1067    fatal("unexpected intrinsic id %d", iid);
1068  }
1069
1070  if (member_reg != noreg) {
1071    // Load the member_arg into register, if necessary.
1072    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1073    VMReg r = regs[member_arg_pos].first();
1074    if (r->is_stack()) {
1075      __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1076    } else {
1077      // no data motion is needed
1078      member_reg = r->as_Register();
1079    }
1080  }
1081
1082  if (has_receiver) {
1083    // Make sure the receiver is loaded into a register.
1084    assert(method->size_of_parameters() > 0, "oob");
1085    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1086    VMReg r = regs[0].first();
1087    assert(r->is_valid(), "bad receiver arg");
1088    if (r->is_stack()) {
1089      // Porting note:  This assumes that compiled calling conventions always
1090      // pass the receiver oop in a register.  If this is not true on some
1091      // platform, pick a temp and load the receiver from stack.
1092      assert(false, "receiver always in a register");
1093      receiver_reg = j_rarg0;  // known to be free at this point
1094      __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
1095    } else {
1096      // no data motion is needed
1097      receiver_reg = r->as_Register();
1098    }
1099  }
1100
1101  // Figure out which address we are really jumping to:
1102  MethodHandles::generate_method_handle_dispatch(masm, iid,
1103                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1104}
1105
1106// ---------------------------------------------------------------------------
1107// Generate a native wrapper for a given method.  The method takes arguments
1108// in the Java compiled code convention, marshals them to the native
1109// convention (handlizes oops, etc), transitions to native, makes the call,
1110// returns to java state (possibly blocking), unhandlizes any result and
1111// returns.
1112nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1113                                                const methodHandle& method,
1114                                                int compile_id,
1115                                                BasicType* in_sig_bt,
1116                                                VMRegPair* in_regs,
1117                                                BasicType ret_type) {
1118  if (method->is_method_handle_intrinsic()) {
1119    vmIntrinsics::ID iid = method->intrinsic_id();
1120    intptr_t start = (intptr_t)__ pc();
1121    int vep_offset = ((intptr_t)__ pc()) - start;
1122    gen_special_dispatch(masm,
1123                         method,
1124                         in_sig_bt,
1125                         in_regs);
1126    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1127    __ flush();
1128    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1129    return nmethod::new_native_nmethod(method,
1130                                       compile_id,
1131                                       masm->code(),
1132                                       vep_offset,
1133                                       frame_complete,
1134                                       stack_slots / VMRegImpl::slots_per_word,
1135                                       in_ByteSize(-1),
1136                                       in_ByteSize(-1),
1137                                       (OopMapSet*)NULL);
1138  }
1139  // Arguments for JNI method include JNIEnv and Class if static
1140
1141  // Usage of Rtemp should be OK since scratched by native call
1142
1143  bool is_static = method->is_static();
1144
1145  const int total_in_args = method->size_of_parameters();
1146  int total_c_args = total_in_args + 1;
1147  if (is_static) {
1148    total_c_args++;
1149  }
1150
1151  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1152  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1153
1154  int argc = 0;
1155  out_sig_bt[argc++] = T_ADDRESS;
1156  if (is_static) {
1157    out_sig_bt[argc++] = T_OBJECT;
1158  }
1159
1160  int i;
1161  for (i = 0; i < total_in_args; i++) {
1162    out_sig_bt[argc++] = in_sig_bt[i];
1163  }
1164
1165  int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1166  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1167  // Since object arguments need to be wrapped, we must preserve space
1168  // for those object arguments which come in registers (GPR_PARAMS maximum)
1169  // plus one more slot for Klass handle (for static methods)
1170  int oop_handle_offset = stack_slots;
1171  stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
1172
1173  // Plus a lock if needed
1174  int lock_slot_offset = 0;
1175  if (method->is_synchronized()) {
1176    lock_slot_offset = stack_slots;
1177    assert(sizeof(BasicLock) == wordSize, "adjust this code");
1178    stack_slots += VMRegImpl::slots_per_word;
1179  }
1180
1181  // Space to save return address and FP
1182  stack_slots += 2 * VMRegImpl::slots_per_word;
1183
1184  // Calculate the final stack size taking account of alignment
1185  stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
1186  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1187  int lock_slot_fp_offset = stack_size - 2 * wordSize -
1188    lock_slot_offset * VMRegImpl::stack_slot_size;
1189
1190  // Unverified entry point
1191  address start = __ pc();
1192
1193  // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
1194  const Register receiver = R0; // see receiverOpr()
1195  __ load_klass(Rtemp, receiver);
1196  __ cmp(Rtemp, Ricklass);
1197  Label verified;
1198
1199  __ b(verified, eq); // jump over alignment no-ops too
1200  __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
1201  __ align(CodeEntryAlignment);
1202
1203  // Verified entry point
1204  __ bind(verified);
1205  int vep_offset = __ pc() - start;
1206
1207#ifdef AARCH64
1208  // Extra nop for MT-safe patching in NativeJump::patch_verified_entry
1209  __ nop();
1210#endif // AARCH64
1211
1212  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
1213    // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
1214    // instead of doing a full VM transition once it's been computed.
1215    Label slow_case;
1216    const Register obj_reg = R0;
1217
1218    // Unlike for Object.hashCode, System.identityHashCode is static method and
1219    // gets object as argument instead of the receiver.
1220    if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
1221      assert(method->is_static(), "method should be static");
1222      // return 0 for null reference input, return val = R0 = obj_reg = 0
1223#ifdef AARCH64
1224      Label Continue;
1225      __ cbnz(obj_reg, Continue);
1226      __ ret();
1227      __ bind(Continue);
1228#else
1229      __ cmp(obj_reg, 0);
1230      __ bx(LR, eq);
1231#endif
1232    }
1233
1234    __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1235
1236    assert(markOopDesc::unlocked_value == 1, "adjust this code");
1237    __ tbz(Rtemp, exact_log2(markOopDesc::unlocked_value), slow_case);
1238
1239    if (UseBiasedLocking) {
1240      assert(is_power_of_2(markOopDesc::biased_lock_bit_in_place), "adjust this code");
1241      __ tbnz(Rtemp, exact_log2(markOopDesc::biased_lock_bit_in_place), slow_case);
1242    }
1243
1244#ifdef AARCH64
1245    __ ands(Rtemp, Rtemp, (uintx)markOopDesc::hash_mask_in_place);
1246    __ b(slow_case, eq);
1247    __ logical_shift_right(R0, Rtemp, markOopDesc::hash_shift);
1248    __ ret();
1249#else
1250    __ bics(Rtemp, Rtemp, ~markOopDesc::hash_mask_in_place);
1251    __ mov(R0, AsmOperand(Rtemp, lsr, markOopDesc::hash_shift), ne);
1252    __ bx(LR, ne);
1253#endif // AARCH64
1254
1255    __ bind(slow_case);
1256  }
1257
1258  // Bang stack pages
1259  __ arm_stack_overflow_check(stack_size, Rtemp);
1260
1261  // Setup frame linkage
1262  __ raw_push(FP, LR);
1263  __ mov(FP, SP);
1264  __ sub_slow(SP, SP, stack_size - 2*wordSize);
1265
1266  int frame_complete = __ pc() - start;
1267
1268  OopMapSet* oop_maps = new OopMapSet();
1269  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1270  const int extra_args = is_static ? 2 : 1;
1271  int receiver_offset = -1;
1272  int fp_regs_in_arguments = 0;
1273
1274  for (i = total_in_args; --i >= 0; ) {
1275    switch (in_sig_bt[i]) {
1276    case T_ARRAY:
1277    case T_OBJECT: {
1278      VMReg src = in_regs[i].first();
1279      VMReg dst = out_regs[i + extra_args].first();
1280      if (src->is_stack()) {
1281        assert(dst->is_stack(), "must be");
1282        assert(i != 0, "Incoming receiver is always in a register");
1283        __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1284        __ cmp(Rtemp, 0);
1285#ifdef AARCH64
1286        __ add(Rtemp, FP, reg2offset_in(src));
1287        __ csel(Rtemp, ZR, Rtemp, eq);
1288#else
1289        __ add(Rtemp, FP, reg2offset_in(src), ne);
1290#endif // AARCH64
1291        __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1292        int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
1293        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
1294      } else {
1295        int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1296        __ str(src->as_Register(), Address(SP, offset));
1297        map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1298        if ((i == 0) && (!is_static)) {
1299          receiver_offset = offset;
1300        }
1301        oop_handle_offset += VMRegImpl::slots_per_word;
1302
1303#ifdef AARCH64
1304        __ cmp(src->as_Register(), 0);
1305        __ add(Rtemp, SP, offset);
1306        __ csel(dst->is_stack() ? Rtemp : dst->as_Register(), ZR, Rtemp, eq);
1307        if (dst->is_stack()) {
1308          __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1309        }
1310#else
1311        if (dst->is_stack()) {
1312          __ movs(Rtemp, src->as_Register());
1313          __ add(Rtemp, SP, offset, ne);
1314          __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1315        } else {
1316          __ movs(dst->as_Register(), src->as_Register());
1317          __ add(dst->as_Register(), SP, offset, ne);
1318        }
1319#endif // AARCH64
1320      }
1321    }
1322
1323    case T_VOID:
1324      break;
1325
1326#ifdef AARCH64
1327    case T_FLOAT:
1328    case T_DOUBLE: {
1329      VMReg src = in_regs[i].first();
1330      VMReg dst = out_regs[i + extra_args].first();
1331      if (src->is_stack()) {
1332        assert(dst->is_stack(), "must be");
1333        __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1334        __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1335      } else {
1336        assert(src->is_FloatRegister() && dst->is_FloatRegister(), "must be");
1337        assert(src->as_FloatRegister() == dst->as_FloatRegister(), "must be");
1338        fp_regs_in_arguments++;
1339      }
1340      break;
1341    }
1342#else // AARCH64
1343
1344#ifdef __SOFTFP__
1345    case T_DOUBLE:
1346#endif
1347    case T_LONG: {
1348      VMReg src_1 = in_regs[i].first();
1349      VMReg src_2 = in_regs[i].second();
1350      VMReg dst_1 = out_regs[i + extra_args].first();
1351      VMReg dst_2 = out_regs[i + extra_args].second();
1352#if (ALIGN_WIDE_ARGUMENTS == 0)
1353      // C convention can mix a register and a stack slot for a
1354      // 64-bits native argument.
1355
1356      // Note: following code should work independently of whether
1357      // the Java calling convention follows C convention or whether
1358      // it aligns 64-bit values.
1359      if (dst_2->is_Register()) {
1360        if (src_1->as_Register() != dst_1->as_Register()) {
1361          assert(src_1->as_Register() != dst_2->as_Register() &&
1362                 src_2->as_Register() != dst_2->as_Register(), "must be");
1363          __ mov(dst_2->as_Register(), src_2->as_Register());
1364          __ mov(dst_1->as_Register(), src_1->as_Register());
1365        } else {
1366          assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1367        }
1368      } else if (src_2->is_Register()) {
1369        if (dst_1->is_Register()) {
1370          // dst mixes a register and a stack slot
1371          assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1372          assert(src_1->as_Register() != dst_1->as_Register(), "must be");
1373          __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1374          __ mov(dst_1->as_Register(), src_1->as_Register());
1375        } else {
1376          // registers to stack slots
1377          assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1378          __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1379          __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1380        }
1381      } else if (src_1->is_Register()) {
1382        if (dst_1->is_Register()) {
1383          // src and dst must be R3 + stack slot
1384          assert(dst_1->as_Register() == src_1->as_Register(), "must be");
1385          __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
1386          __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
1387        } else {
1388          // <R3,stack> -> <stack,stack>
1389          assert(dst_2->is_stack() && src_2->is_stack(), "must be");
1390          __ ldr(LR, Address(FP, reg2offset_in(src_2)));
1391          __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1392          __ str(LR, Address(SP, reg2offset_out(dst_2)));
1393        }
1394      } else {
1395        assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1396        __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1397        __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1398        __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1399        __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1400      }
1401#else // ALIGN_WIDE_ARGUMENTS
1402      if (src_1->is_stack()) {
1403        assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1404        __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1405        __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1406        __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1407        __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1408      } else if (dst_1->is_stack()) {
1409        assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
1410        __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
1411        __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
1412      } else if (src_1->as_Register() == dst_1->as_Register()) {
1413        assert(src_2->as_Register() == dst_2->as_Register(), "must be");
1414      } else {
1415        assert(src_1->as_Register() != dst_2->as_Register() &&
1416               src_2->as_Register() != dst_2->as_Register(), "must be");
1417        __ mov(dst_2->as_Register(), src_2->as_Register());
1418        __ mov(dst_1->as_Register(), src_1->as_Register());
1419      }
1420#endif // ALIGN_WIDE_ARGUMENTS
1421      break;
1422    }
1423
1424#if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1425    case T_FLOAT: {
1426      VMReg src = in_regs[i].first();
1427      VMReg dst = out_regs[i + extra_args].first();
1428      if (src->is_stack()) {
1429        assert(dst->is_stack(), "must be");
1430        __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1431        __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1432      } else if (dst->is_stack()) {
1433        __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1434      } else {
1435        assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1436        __ fmrs(dst->as_Register(), src->as_FloatRegister());
1437      }
1438      break;
1439    }
1440
1441    case T_DOUBLE: {
1442      VMReg src_1 = in_regs[i].first();
1443      VMReg src_2 = in_regs[i].second();
1444      VMReg dst_1 = out_regs[i + extra_args].first();
1445      VMReg dst_2 = out_regs[i + extra_args].second();
1446      if (src_1->is_stack()) {
1447        assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1448        __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1449        __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1450        __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1451        __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1452      } else if (dst_1->is_stack()) {
1453        assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1454        __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1455#if (ALIGN_WIDE_ARGUMENTS == 0)
1456      } else if (dst_2->is_stack()) {
1457        assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1458        // double register must go into R3 + one stack slot
1459        __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1460        __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1461#endif
1462      } else {
1463        assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1464        __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1465      }
1466      break;
1467    }
1468#endif // __SOFTFP__
1469
1470#ifdef __ABI_HARD__
1471    case T_FLOAT: {
1472      VMReg src = in_regs[i].first();
1473      VMReg dst = out_regs[i + extra_args].first();
1474      if (src->is_stack()) {
1475        if (dst->is_stack()) {
1476          __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1477          __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1478        } else {
1479          // C2 Java calling convention does not populate S14 and S15, therefore
1480          // those need to be loaded from stack here
1481          __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1482          fp_regs_in_arguments++;
1483        }
1484      } else {
1485        assert(src->is_FloatRegister(), "must be");
1486        fp_regs_in_arguments++;
1487      }
1488      break;
1489    }
1490    case T_DOUBLE: {
1491      VMReg src_1 = in_regs[i].first();
1492      VMReg src_2 = in_regs[i].second();
1493      VMReg dst_1 = out_regs[i + extra_args].first();
1494      VMReg dst_2 = out_regs[i + extra_args].second();
1495      if (src_1->is_stack()) {
1496        if (dst_1->is_stack()) {
1497          assert(dst_2->is_stack(), "must be");
1498          __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1499          __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1500          __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1501          __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1502        } else {
1503          // C2 Java calling convention does not populate S14 and S15, therefore
1504          // those need to be loaded from stack here
1505          __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1506          fp_regs_in_arguments += 2;
1507        }
1508      } else {
1509        assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1510        fp_regs_in_arguments += 2;
1511      }
1512      break;
1513    }
1514#endif // __ABI_HARD__
1515#endif // AARCH64
1516
1517    default: {
1518      assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1519      VMReg src = in_regs[i].first();
1520      VMReg dst = out_regs[i + extra_args].first();
1521      if (src->is_stack()) {
1522        assert(dst->is_stack(), "must be");
1523        __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1524        __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1525      } else if (dst->is_stack()) {
1526        __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1527      } else {
1528        assert(src->is_Register() && dst->is_Register(), "must be");
1529        __ mov(dst->as_Register(), src->as_Register());
1530      }
1531    }
1532    }
1533  }
1534
1535  // Get Klass mirror
1536  int klass_offset = -1;
1537  if (is_static) {
1538    klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1539    __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1540    __ add(c_rarg1, SP, klass_offset);
1541    __ str(Rtemp, Address(SP, klass_offset));
1542    map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1543  }
1544
1545  // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1546  int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1547  assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1548  oop_maps->add_gc_map(pc_offset, map);
1549
1550#ifndef AARCH64
1551  // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1552  __ membar(MacroAssembler::StoreStore, Rtemp);
1553#endif // !AARCH64
1554
1555  // RedefineClasses() tracing support for obsolete method entry
1556  if (log_is_enabled(Trace, redefine, class, obsolete)) {
1557#ifdef AARCH64
1558    __ NOT_TESTED();
1559#endif
1560    __ save_caller_save_registers();
1561    __ mov(R0, Rthread);
1562    __ mov_metadata(R1, method());
1563    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1564    __ restore_caller_save_registers();
1565  }
1566
1567  const Register sync_handle = AARCH64_ONLY(R20) NOT_AARCH64(R5);
1568  const Register sync_obj    = AARCH64_ONLY(R21) NOT_AARCH64(R6);
1569  const Register disp_hdr    = AARCH64_ONLY(R22) NOT_AARCH64(altFP_7_11);
1570  const Register tmp         = AARCH64_ONLY(R23) NOT_AARCH64(R8);
1571
1572  Label slow_lock, slow_lock_biased, lock_done, fast_lock, leave;
1573  if (method->is_synchronized()) {
1574    // The first argument is a handle to sync object (a class or an instance)
1575    __ ldr(sync_obj, Address(R1));
1576    // Remember the handle for the unlocking code
1577    __ mov(sync_handle, R1);
1578
1579    if(UseBiasedLocking) {
1580      __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
1581    }
1582
1583    const Register mark = tmp;
1584#ifdef AARCH64
1585    __ sub(disp_hdr, FP, lock_slot_fp_offset);
1586    assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions");
1587
1588    __ ldr(mark, sync_obj);
1589
1590    // Test if object is already locked
1591    assert(markOopDesc::unlocked_value == 1, "adjust this code");
1592    __ tbnz(mark, exact_log2(markOopDesc::unlocked_value), fast_lock);
1593
1594    // Check for recursive lock
1595    // See comments in InterpreterMacroAssembler::lock_object for
1596    // explanations on the fast recursive locking check.
1597    __ mov(Rtemp, SP);
1598    __ sub(Rtemp, mark, Rtemp);
1599    intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size());
1600    Assembler::LogicalImmediate imm(mask, false);
1601    __ ands(Rtemp, Rtemp, imm);
1602    __ b(slow_lock, ne);
1603
1604    // Recursive locking: store 0 into a lock record
1605    __ str(ZR, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1606    __ b(lock_done);
1607
1608    __ bind(fast_lock);
1609    __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1610
1611    __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1612#else
1613    // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1614    // That would be acceptable as either CAS or slow case path is taken in that case
1615
1616    __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1617    __ sub(disp_hdr, FP, lock_slot_fp_offset);
1618    __ tst(mark, markOopDesc::unlocked_value);
1619    __ b(fast_lock, ne);
1620
1621    // Check for recursive lock
1622    // See comments in InterpreterMacroAssembler::lock_object for
1623    // explanations on the fast recursive locking check.
1624    // Check independently the low bits and the distance to SP
1625    // -1- test low 2 bits
1626    __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1627    // -2- test (hdr - SP) if the low two bits are 0
1628    __ sub(Rtemp, mark, SP, eq);
1629    __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1630    // If still 'eq' then recursive locking OK: set displaced header to 0
1631    __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()), eq);
1632    __ b(lock_done, eq);
1633    __ b(slow_lock);
1634
1635    __ bind(fast_lock);
1636    __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1637
1638    __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1639#endif // AARCH64
1640
1641    __ bind(lock_done);
1642  }
1643
1644  // Get JNIEnv*
1645  __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1646
1647  // Perform thread state transition
1648  __ mov(Rtemp, _thread_in_native);
1649#ifdef AARCH64
1650  // stlr instruction is used to force all preceding writes to be observed prior to thread state change
1651  __ add(Rtemp2, Rthread, in_bytes(JavaThread::thread_state_offset()));
1652  __ stlr_w(Rtemp, Rtemp2);
1653#else
1654  __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1655#endif // AARCH64
1656
1657  // Finally, call the native method
1658  __ call(method->native_function());
1659
1660  // Set FPSCR/FPCR to a known state
1661  if (AlwaysRestoreFPU) {
1662    __ restore_default_fp_mode();
1663  }
1664
1665  // Do a safepoint check while thread is in transition state
1666  InlinedAddress safepoint_state(SafepointSynchronize::address_of_state());
1667  Label call_safepoint_runtime, return_to_java;
1668  __ mov(Rtemp, _thread_in_native_trans);
1669  __ ldr_literal(R2, safepoint_state);
1670  __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1671
1672  // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1673  __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1674
1675  __ ldr_s32(R2, Address(R2));
1676  __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1677  __ cmp(R2, SafepointSynchronize::_not_synchronized);
1678  __ cond_cmp(R3, 0, eq);
1679  __ b(call_safepoint_runtime, ne);
1680  __ bind(return_to_java);
1681
1682  // Perform thread state transition and reguard stack yellow pages if needed
1683  Label reguard, reguard_done;
1684  __ mov(Rtemp, _thread_in_Java);
1685  __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1686  __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1687
1688  __ cmp(R2, JavaThread::stack_guard_yellow_reserved_disabled);
1689  __ b(reguard, eq);
1690  __ bind(reguard_done);
1691
1692  Label slow_unlock, unlock_done, retry;
1693  if (method->is_synchronized()) {
1694    __ ldr(sync_obj, Address(sync_handle));
1695
1696    if(UseBiasedLocking) {
1697      __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
1698      // disp_hdr may not have been saved on entry with biased locking
1699      __ sub(disp_hdr, FP, lock_slot_fp_offset);
1700    }
1701
1702    // See C1_MacroAssembler::unlock_object() for more comments
1703    __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1704    __ cbz(R2, unlock_done);
1705
1706    __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1707
1708    __ bind(unlock_done);
1709  }
1710
1711  // Set last java frame and handle block to zero
1712  __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1713  __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1714
1715#ifdef AARCH64
1716  __ str_32(ZR, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1717  if (CheckJNICalls) {
1718    __ str(ZR, Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1719  }
1720
1721
1722  switch (ret_type) {
1723  case T_BOOLEAN:
1724    __ tst(R0, 0xff);
1725    __ cset(R0, ne);
1726    break;
1727  case T_CHAR   : __ zero_extend(R0, R0, 16);  break;
1728  case T_BYTE   : __ sign_extend(R0, R0,  8);  break;
1729  case T_SHORT  : __ sign_extend(R0, R0, 16);  break;
1730  case T_INT    : // fall through
1731  case T_LONG   : // fall through
1732  case T_VOID   : // fall through
1733  case T_FLOAT  : // fall through
1734  case T_DOUBLE : /* nothing to do */          break;
1735  case T_OBJECT : // fall through
1736  case T_ARRAY  : break; // See JNIHandles::resolve below
1737  default:
1738    ShouldNotReachHere();
1739  }
1740#else
1741  __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1742  if (CheckJNICalls) {
1743    __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1744  }
1745#endif // AARCH64
1746
1747  // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1748  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1749    __ resolve_jobject(R0,      // value
1750                       Rtemp,   // tmp1
1751                       R1_tmp); // tmp2
1752  }
1753
1754  // Any exception pending?
1755  __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1756  __ mov(SP, FP);
1757
1758#ifdef AARCH64
1759  Label except;
1760  __ cbnz(Rtemp, except);
1761  __ raw_pop(FP, LR);
1762  __ ret();
1763
1764  __ bind(except);
1765  // Pop the frame and forward the exception. Rexception_pc contains return address.
1766  __ raw_pop(FP, Rexception_pc);
1767#else
1768  __ cmp(Rtemp, 0);
1769  // Pop the frame and return if no exception pending
1770  __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1771  // Pop the frame and forward the exception. Rexception_pc contains return address.
1772  __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1773  __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1774#endif // AARCH64
1775  __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1776
1777  // Safepoint operation and/or pending suspend request is in progress.
1778  // Save the return values and call the runtime function by hand.
1779  __ bind(call_safepoint_runtime);
1780  push_result_registers(masm, ret_type);
1781  __ mov(R0, Rthread);
1782  __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1783  pop_result_registers(masm, ret_type);
1784  __ b(return_to_java);
1785
1786  __ bind_literal(safepoint_state);
1787
1788  // Reguard stack pages. Save native results around a call to C runtime.
1789  __ bind(reguard);
1790  push_result_registers(masm, ret_type);
1791  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1792  pop_result_registers(masm, ret_type);
1793  __ b(reguard_done);
1794
1795  if (method->is_synchronized()) {
1796    // Locking slow case
1797    if(UseBiasedLocking) {
1798      __ bind(slow_lock_biased);
1799      __ sub(disp_hdr, FP, lock_slot_fp_offset);
1800    }
1801
1802    __ bind(slow_lock);
1803
1804    push_param_registers(masm, fp_regs_in_arguments);
1805
1806    // last_Java_frame is already set, so do call_VM manually; no exception can occur
1807    __ mov(R0, sync_obj);
1808    __ mov(R1, disp_hdr);
1809    __ mov(R2, Rthread);
1810    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1811
1812    pop_param_registers(masm, fp_regs_in_arguments);
1813
1814    __ b(lock_done);
1815
1816    // Unlocking slow case
1817    __ bind(slow_unlock);
1818
1819    push_result_registers(masm, ret_type);
1820
1821    // Clear pending exception before reentering VM.
1822    // Can store the oop in register since it is a leaf call.
1823    assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1824    __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1825    Register zero = __ zero_register(Rtemp);
1826    __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1827    __ mov(R0, sync_obj);
1828    __ mov(R1, disp_hdr);
1829    __ mov(R2, Rthread);
1830    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1831    __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1832
1833    pop_result_registers(masm, ret_type);
1834
1835    __ b(unlock_done);
1836  }
1837
1838  __ flush();
1839  return nmethod::new_native_nmethod(method,
1840                                     compile_id,
1841                                     masm->code(),
1842                                     vep_offset,
1843                                     frame_complete,
1844                                     stack_slots / VMRegImpl::slots_per_word,
1845                                     in_ByteSize(is_static ? klass_offset : receiver_offset),
1846                                     in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1847                                     oop_maps);
1848}
1849
1850// this function returns the adjust size (in number of words) to a c2i adapter
1851// activation for use during deoptimization
1852int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1853  int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1854#ifdef AARCH64
1855  extra_locals_size = align_up(extra_locals_size, StackAlignmentInBytes/BytesPerWord);
1856#endif // AARCH64
1857  return extra_locals_size;
1858}
1859
1860
1861uint SharedRuntime::out_preserve_stack_slots() {
1862  return 0;
1863}
1864
1865
1866//------------------------------generate_deopt_blob----------------------------
1867void SharedRuntime::generate_deopt_blob() {
1868  ResourceMark rm;
1869#ifdef AARCH64
1870  CodeBuffer buffer("deopt_blob", 1024+256, 1);
1871#else
1872  CodeBuffer buffer("deopt_blob", 1024, 1024);
1873#endif
1874  int frame_size_in_words;
1875  OopMapSet* oop_maps;
1876  int reexecute_offset;
1877  int exception_in_tls_offset;
1878  int exception_offset;
1879
1880  MacroAssembler* masm = new MacroAssembler(&buffer);
1881  Label cont;
1882  const Register Rkind   = AARCH64_ONLY(R21) NOT_AARCH64(R9); // caller-saved on 32bit
1883  const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
1884  const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
1885  assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1886
1887  address start = __ pc();
1888
1889  oop_maps = new OopMapSet();
1890  // LR saved by caller (can be live in c2 method)
1891
1892  // A deopt is a case where LR may be live in the c2 nmethod. So it's
1893  // not possible to call the deopt blob from the nmethod and pass the
1894  // address of the deopt handler of the nmethod in LR. What happens
1895  // now is that the caller of the deopt blob pushes the current
1896  // address so the deopt blob doesn't have to do it. This way LR can
1897  // be preserved, contains the live value from the nmethod and is
1898  // saved at R14/R30_offset here.
1899  OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1900  __ mov(Rkind, Deoptimization::Unpack_deopt);
1901  __ b(cont);
1902
1903  exception_offset = __ pc() - start;
1904
1905  // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1906  // exception_in_tls_offset entry point.
1907  __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1908  __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1909  // Force return value to NULL to avoid confusing the escape analysis
1910  // logic. Everything is dead here anyway.
1911  __ mov(R0, 0);
1912
1913  exception_in_tls_offset = __ pc() - start;
1914
1915  // Exception data is in JavaThread structure
1916  // Patch the return address of the current frame
1917  __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1918  (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1919  {
1920    const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1921    __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1922  }
1923  __ mov(Rkind, Deoptimization::Unpack_exception);
1924  __ b(cont);
1925
1926  reexecute_offset = __ pc() - start;
1927
1928  (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1929  __ mov(Rkind, Deoptimization::Unpack_reexecute);
1930
1931  // Calculate UnrollBlock and save the result in Rublock
1932  __ bind(cont);
1933  __ mov(R0, Rthread);
1934  __ mov(R1, Rkind);
1935
1936  int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1937  assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1938  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1939  if (pc_offset == -1) {
1940    pc_offset = __ offset();
1941  }
1942  oop_maps->add_gc_map(pc_offset, map);
1943  __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1944
1945  __ mov(Rublock, R0);
1946
1947  // Reload Rkind from the UnrollBlock (might have changed)
1948  __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1949  Label noException;
1950  __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1951  __ b(noException, ne);
1952  // handle exception case
1953#ifdef ASSERT
1954  // assert that exception_pc is zero in tls
1955  { Label L;
1956    __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1957    __ cbz(Rexception_pc, L);
1958    __ stop("exception pc should be null");
1959    __ bind(L);
1960  }
1961#endif
1962  __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1963  __ verify_oop(Rexception_obj);
1964  {
1965    const Register Rzero = __ zero_register(Rtemp);
1966    __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1967  }
1968
1969  __ bind(noException);
1970
1971  // This frame is going away.  Fetch return value, so we can move it to
1972  // a new frame.
1973  __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1974#ifndef AARCH64
1975  __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1976#endif // !AARCH64
1977#ifndef __SOFTFP__
1978  __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1979#endif
1980  // pop frame
1981  __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1982
1983  // Set initial stack state before pushing interpreter frames
1984  __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1985  __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1986  __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1987
1988#ifdef AARCH64
1989  // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
1990  // They are needed for correct stack walking during stack overflow handling.
1991  // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
1992  __ sub(Rtemp, Rtemp, 2*wordSize);
1993  __ add(SP, SP, Rtemp, ex_uxtx);
1994  __ raw_pop(FP, LR);
1995
1996#ifdef ASSERT
1997  { Label L;
1998    __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1999    __ cmp(FP, Rtemp);
2000    __ b(L, eq);
2001    __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2002    __ bind(L);
2003  }
2004  { Label L;
2005    __ ldr(Rtemp, Address(R2));
2006    __ cmp(LR, Rtemp);
2007    __ b(L, eq);
2008    __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2009    __ bind(L);
2010  }
2011#endif // ASSERT
2012
2013#else
2014  __ add(SP, SP, Rtemp);
2015#endif // AARCH64
2016
2017#ifdef ASSERT
2018  // Compilers generate code that bang the stack by as much as the
2019  // interpreter would need. So this stack banging should never
2020  // trigger a fault. Verify that it does not on non product builds.
2021  // See if it is enough stack to push deoptimized frames
2022  if (UseStackBanging) {
2023#ifndef AARCH64
2024    // The compiled method that we are deoptimizing was popped from the stack.
2025    // If the stack bang results in a stack overflow, we don't return to the
2026    // method that is being deoptimized. The stack overflow exception is
2027    // propagated to the caller of the deoptimized method. Need to get the pc
2028    // from the caller in LR and restore FP.
2029    __ ldr(LR, Address(R2, 0));
2030    __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2031#endif // !AARCH64
2032    __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2033    __ arm_stack_overflow_check(R8, Rtemp);
2034  }
2035#endif
2036  __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2037
2038#ifndef AARCH64
2039  // Pick up the initial fp we should save
2040  // XXX Note: was ldr(FP, Address(FP));
2041
2042  // The compiler no longer uses FP as a frame pointer for the
2043  // compiled code. It can be used by the allocator in C2 or to
2044  // memorize the original SP for JSR292 call sites.
2045
2046  // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
2047  // Deoptimization::fetch_unroll_info computes the right FP value and
2048  // stores it in Rublock.initial_info. This has been activated for ARM.
2049  __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2050#endif // !AARCH64
2051
2052  __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2053  __ mov(Rsender, SP);
2054#ifdef AARCH64
2055  __ sub(SP, SP, Rtemp, ex_uxtx);
2056#else
2057  __ sub(SP, SP, Rtemp);
2058#endif // AARCH64
2059
2060  // Push interpreter frames in a loop
2061  Label loop;
2062  __ bind(loop);
2063  __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2064  __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2065
2066  __ raw_push(FP, LR);                                     // create new frame
2067  __ mov(FP, SP);
2068  __ sub(Rtemp, Rtemp, 2*wordSize);
2069
2070#ifdef AARCH64
2071  __ sub(SP, SP, Rtemp, ex_uxtx);
2072#else
2073  __ sub(SP, SP, Rtemp);
2074#endif // AARCH64
2075
2076  __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2077#ifdef AARCH64
2078  __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2079#else
2080  __ mov(LR, 0);
2081  __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2082#endif // AARCH64
2083
2084  __ subs(R8, R8, 1);                               // decrement counter
2085  __ mov(Rsender, SP);
2086  __ b(loop, ne);
2087
2088  // Re-push self-frame
2089  __ ldr(LR, Address(R2));
2090  __ raw_push(FP, LR);
2091  __ mov(FP, SP);
2092  __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
2093
2094  // Restore frame locals after moving the frame
2095  __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2096#ifndef AARCH64
2097  __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2098#endif // !AARCH64
2099
2100#ifndef __SOFTFP__
2101  __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2102#endif // !__SOFTFP__
2103
2104#ifndef AARCH64
2105#ifdef ASSERT
2106  // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
2107  { Label L;
2108    __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2109    __ cmp_32(Rkind, Rtemp);
2110    __ b(L, eq);
2111    __ stop("Rkind was overwritten");
2112    __ bind(L);
2113  }
2114#endif
2115#endif
2116
2117  // Call unpack_frames with proper arguments
2118  __ mov(R0, Rthread);
2119  __ mov(R1, Rkind);
2120
2121  pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2122  assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2123  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2124  if (pc_offset == -1) {
2125    pc_offset = __ offset();
2126  }
2127  oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
2128  __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2129
2130  // Collect return values, pop self-frame and jump to interpreter
2131  __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
2132#ifndef AARCH64
2133  __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
2134#endif // !AARCH64
2135  // Interpreter floats controlled by __SOFTFP__, but compiler
2136  // float return value registers controlled by __ABI_HARD__
2137  // This matters for vfp-sflt builds.
2138#ifndef __SOFTFP__
2139  // Interpreter hard float
2140#ifdef __ABI_HARD__
2141  // Compiler float return value in FP registers
2142  __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
2143#else
2144  // Compiler float return value in integer registers,
2145  // copy to D0 for interpreter (S0 <-- R0)
2146  __ fmdrr(D0_tos, R0, R1);
2147#endif
2148#endif // !__SOFTFP__
2149  __ mov(SP, FP);
2150
2151#ifdef AARCH64
2152  __ raw_pop(FP, LR);
2153  __ ret();
2154#else
2155  __ pop(RegisterSet(FP) | RegisterSet(PC));
2156#endif // AARCH64
2157
2158  __ flush();
2159
2160  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
2161                                           reexecute_offset, frame_size_in_words);
2162  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2163}
2164
2165#ifdef COMPILER2
2166
2167//------------------------------generate_uncommon_trap_blob--------------------
2168// Ought to generate an ideal graph & compile, but here's some SPARC ASM
2169// instead.
2170void SharedRuntime::generate_uncommon_trap_blob() {
2171  // allocate space for the code
2172  ResourceMark rm;
2173
2174  // setup code generation tools
2175  int pad = VerifyThread ? 512 : 0;
2176#ifdef _LP64
2177  CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
2178#else
2179  // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
2180  // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
2181  CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
2182#endif
2183  // bypassed when code generation useless
2184  MacroAssembler* masm               = new MacroAssembler(&buffer);
2185  const Register Rublock = AARCH64_ONLY(R22) NOT_AARCH64(R6);
2186  const Register Rsender = AARCH64_ONLY(R23) NOT_AARCH64(altFP_7_11);
2187  assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
2188
2189  //
2190  // This is the entry point for all traps the compiler takes when it thinks
2191  // it cannot handle further execution of compilation code. The frame is
2192  // deoptimized in these cases and converted into interpreter frames for
2193  // execution
2194  // The steps taken by this frame are as follows:
2195  //   - push a fake "unpack_frame"
2196  //   - call the C routine Deoptimization::uncommon_trap (this function
2197  //     packs the current compiled frame into vframe arrays and returns
2198  //     information about the number and size of interpreter frames which
2199  //     are equivalent to the frame which is being deoptimized)
2200  //   - deallocate the "unpack_frame"
2201  //   - deallocate the deoptimization frame
2202  //   - in a loop using the information returned in the previous step
2203  //     push interpreter frames;
2204  //   - create a dummy "unpack_frame"
2205  //   - call the C routine: Deoptimization::unpack_frames (this function
2206  //     lays out values on the interpreter frame which was just created)
2207  //   - deallocate the dummy unpack_frame
2208  //   - return to the interpreter entry point
2209  //
2210  //  Refer to the following methods for more information:
2211  //   - Deoptimization::uncommon_trap
2212  //   - Deoptimization::unpack_frame
2213
2214  // the unloaded class index is in R0 (first parameter to this blob)
2215
2216  __ raw_push(FP, LR);
2217  __ set_last_Java_frame(SP, FP, false, Rtemp);
2218  __ mov(R2, Deoptimization::Unpack_uncommon_trap);
2219  __ mov(R1, R0);
2220  __ mov(R0, Rthread);
2221  __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
2222  __ mov(Rublock, R0);
2223  __ reset_last_Java_frame(Rtemp);
2224  __ raw_pop(FP, LR);
2225
2226#ifdef ASSERT
2227  { Label L;
2228    __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2229    __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
2230    __ b(L, eq);
2231    __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
2232    __ bind(L);
2233  }
2234#endif
2235
2236
2237  // Set initial stack state before pushing interpreter frames
2238  __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2239  __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2240  __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2241
2242#ifdef AARCH64
2243  // Pop deoptimized frame. Make sure to restore the initial saved FP/LR of the caller.
2244  // They are needed for correct stack walking during stack overflow handling.
2245  // Also, restored FP is saved in the bottom interpreter frame (LR is reloaded from unroll block).
2246  __ sub(Rtemp, Rtemp, 2*wordSize);
2247  __ add(SP, SP, Rtemp, ex_uxtx);
2248  __ raw_pop(FP, LR);
2249
2250#ifdef ASSERT
2251  { Label L;
2252    __ ldr(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2253    __ cmp(FP, Rtemp);
2254    __ b(L, eq);
2255    __ stop("FP restored from deoptimized frame does not match FP stored in unroll block");
2256    __ bind(L);
2257  }
2258  { Label L;
2259    __ ldr(Rtemp, Address(R2));
2260    __ cmp(LR, Rtemp);
2261    __ b(L, eq);
2262    __ stop("LR restored from deoptimized frame does not match the 1st PC in unroll block");
2263    __ bind(L);
2264  }
2265#endif // ASSERT
2266
2267#else
2268  __ add(SP, SP, Rtemp);
2269#endif //AARCH64
2270
2271  // See if it is enough stack to push deoptimized frames
2272#ifdef ASSERT
2273  // Compilers generate code that bang the stack by as much as the
2274  // interpreter would need. So this stack banging should never
2275  // trigger a fault. Verify that it does not on non product builds.
2276  if (UseStackBanging) {
2277#ifndef AARCH64
2278    // The compiled method that we are deoptimizing was popped from the stack.
2279    // If the stack bang results in a stack overflow, we don't return to the
2280    // method that is being deoptimized. The stack overflow exception is
2281    // propagated to the caller of the deoptimized method. Need to get the pc
2282    // from the caller in LR and restore FP.
2283    __ ldr(LR, Address(R2, 0));
2284    __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2285#endif // !AARCH64
2286    __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2287    __ arm_stack_overflow_check(R8, Rtemp);
2288  }
2289#endif
2290  __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2291  __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2292  __ mov(Rsender, SP);
2293#ifdef AARCH64
2294  __ sub(SP, SP, Rtemp, ex_uxtx);
2295#else
2296  __ sub(SP, SP, Rtemp);
2297#endif
2298#ifndef AARCH64
2299  //  __ ldr(FP, Address(FP));
2300  __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
2301#endif // AARCH64
2302
2303  // Push interpreter frames in a loop
2304  Label loop;
2305  __ bind(loop);
2306  __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
2307  __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
2308
2309  __ raw_push(FP, LR);                                     // create new frame
2310  __ mov(FP, SP);
2311  __ sub(Rtemp, Rtemp, 2*wordSize);
2312
2313#ifdef AARCH64
2314  __ sub(SP, SP, Rtemp, ex_uxtx);
2315#else
2316  __ sub(SP, SP, Rtemp);
2317#endif // AARCH64
2318
2319  __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
2320#ifdef AARCH64
2321  __ str(ZR, Address(FP, frame::interpreter_frame_stack_top_offset * wordSize));
2322#else
2323  __ mov(LR, 0);
2324  __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
2325#endif // AARCH64
2326  __ subs(R8, R8, 1);                               // decrement counter
2327  __ mov(Rsender, SP);
2328  __ b(loop, ne);
2329
2330  // Re-push self-frame
2331  __ ldr(LR, Address(R2));
2332  __ raw_push(FP, LR);
2333  __ mov(FP, SP);
2334
2335  // Call unpack_frames with proper arguments
2336  __ mov(R0, Rthread);
2337  __ mov(R1, Deoptimization::Unpack_uncommon_trap);
2338  __ set_last_Java_frame(SP, FP, false, Rtemp);
2339  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
2340  //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
2341  __ reset_last_Java_frame(Rtemp);
2342
2343  __ mov(SP, FP);
2344#ifdef AARCH64
2345  __ raw_pop(FP, LR);
2346  __ ret();
2347#else
2348  __ pop(RegisterSet(FP) | RegisterSet(PC));
2349#endif
2350
2351  masm->flush();
2352  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
2353}
2354
2355#endif // COMPILER2
2356
2357//------------------------------generate_handler_blob------
2358//
2359// Generate a special Compile2Runtime blob that saves all registers,
2360// setup oopmap, and calls safepoint code to stop the compiled code for
2361// a safepoint.
2362//
2363SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2364  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2365
2366  ResourceMark rm;
2367  CodeBuffer buffer("handler_blob", 256, 256);
2368  int frame_size_words;
2369  OopMapSet* oop_maps;
2370
2371  bool cause_return = (poll_type == POLL_AT_RETURN);
2372
2373  MacroAssembler* masm = new MacroAssembler(&buffer);
2374  address start = __ pc();
2375  oop_maps = new OopMapSet();
2376
2377  if (!cause_return) {
2378#ifdef AARCH64
2379    __ raw_push(LR, LR);
2380#else
2381    __ sub(SP, SP, 4); // make room for LR which may still be live
2382                       // here if we are coming from a c2 method
2383#endif // AARCH64
2384  }
2385
2386  OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
2387  if (!cause_return) {
2388    // update saved PC with correct value
2389    // need 2 steps because LR can be live in c2 method
2390    __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
2391    __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
2392  }
2393
2394  __ mov(R0, Rthread);
2395  int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
2396  assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
2397  __ call(call_ptr);
2398  if (pc_offset == -1) {
2399    pc_offset = __ offset();
2400  }
2401  oop_maps->add_gc_map(pc_offset, map);
2402  __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2403
2404  // Check for pending exception
2405  __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
2406  __ cmp(Rtemp, 0);
2407
2408#ifdef AARCH64
2409  RegisterSaver::restore_live_registers(masm, cause_return);
2410  Register ret_addr = cause_return ? LR : Rtemp;
2411  if (!cause_return) {
2412    __ raw_pop(FP, ret_addr);
2413  }
2414
2415  Label throw_exception;
2416  __ b(throw_exception, ne);
2417  __ br(ret_addr);
2418
2419  __ bind(throw_exception);
2420  __ mov(Rexception_pc, ret_addr);
2421#else // AARCH64
2422  if (!cause_return) {
2423    RegisterSaver::restore_live_registers(masm, false);
2424    __ pop(PC, eq);
2425    __ pop(Rexception_pc);
2426  } else {
2427    RegisterSaver::restore_live_registers(masm);
2428    __ bx(LR, eq);
2429    __ mov(Rexception_pc, LR);
2430  }
2431#endif // AARCH64
2432
2433  __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2434
2435  __ flush();
2436
2437  return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
2438}
2439
2440RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2441  assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2442
2443  ResourceMark rm;
2444  CodeBuffer buffer(name, 1000, 512);
2445  int frame_size_words;
2446  OopMapSet *oop_maps;
2447  int frame_complete;
2448
2449  MacroAssembler* masm = new MacroAssembler(&buffer);
2450  Label pending_exception;
2451
2452  int start = __ offset();
2453
2454  oop_maps = new OopMapSet();
2455  OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
2456
2457  frame_complete = __ offset();
2458
2459  __ mov(R0, Rthread);
2460
2461  int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
2462  assert(start == 0, "warning: start differs from code_begin");
2463  __ call(destination);
2464  if (pc_offset == -1) {
2465    pc_offset = __ offset();
2466  }
2467  oop_maps->add_gc_map(pc_offset, map);
2468  __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
2469
2470  __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
2471  __ cbnz(R1, pending_exception);
2472
2473  // Overwrite saved register values
2474
2475  // Place metadata result of VM call into Rmethod
2476  __ get_vm_result_2(R1, Rtemp);
2477  __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
2478
2479  // Place target address (VM call result) into Rtemp
2480  __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
2481
2482  RegisterSaver::restore_live_registers(masm);
2483  __ jump(Rtemp);
2484
2485  __ bind(pending_exception);
2486
2487  RegisterSaver::restore_live_registers(masm);
2488  const Register Rzero = __ zero_register(Rtemp);
2489  __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
2490  __ mov(Rexception_pc, LR);
2491  __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
2492
2493  __ flush();
2494
2495  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
2496}
2497