1/*
2 * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2017 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26#ifndef CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
27#define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
28
29#include "asm/assembler.hpp"
30#include "runtime/rtmLocking.hpp"
31#include "utilities/macros.hpp"
32
33// MacroAssembler extends Assembler by a few frequently used macros.
34
35class ciTypeArray;
36
37class MacroAssembler: public Assembler {
38 public:
39  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
40
41  //
42  // Optimized instruction emitters
43  //
44
45  inline static int largeoffset_si16_si16_hi(int si31) { return (si31 + (1<<15)) >> 16; }
46  inline static int largeoffset_si16_si16_lo(int si31) { return si31 - (((si31 + (1<<15)) >> 16) << 16); }
47
48  // load d = *[a+si31]
49  // Emits several instructions if the offset is not encodable in one instruction.
50  void ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop);
51  void ld_largeoffset          (Register d, int si31, Register a, int emit_filler_nop);
52  inline static bool is_ld_largeoffset(address a);
53  inline static int get_ld_largeoffset_offset(address a);
54
55  inline void round_to(Register r, int modulus);
56
57  // Load/store with type given by parameter.
58  void load_sized_value( Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes, bool is_signed);
59  void store_sized_value(Register dst, RegisterOrConstant offs, Register base, size_t size_in_bytes);
60
61  // Move register if destination register and target register are different
62  inline void mr_if_needed(Register rd, Register rs);
63  inline void fmr_if_needed(FloatRegister rd, FloatRegister rs);
64  // This is dedicated for emitting scheduled mach nodes. For better
65  // readability of the ad file I put it here.
66  // Endgroups are not needed if
67  //  - the scheduler is off
68  //  - the scheduler found that there is a natural group end, in that
69  //    case it reduced the size of the instruction used in the test
70  //    yielding 'needed'.
71  inline void endgroup_if_needed(bool needed);
72
73  // Memory barriers.
74  inline void membar(int bits);
75  inline void release();
76  inline void acquire();
77  inline void fence();
78
79  // nop padding
80  void align(int modulus, int max = 252, int rem = 0);
81
82  //
83  // Constants, loading constants, TOC support
84  //
85
86  // Address of the global TOC.
87  inline static address global_toc();
88  // Offset of given address to the global TOC.
89  inline static int offset_to_global_toc(const address addr);
90
91  // Address of TOC of the current method.
92  inline address method_toc();
93  // Offset of given address to TOC of the current method.
94  inline int offset_to_method_toc(const address addr);
95
96  // Global TOC.
97  void calculate_address_from_global_toc(Register dst, address addr,
98                                         bool hi16 = true, bool lo16 = true,
99                                         bool add_relocation = true, bool emit_dummy_addr = false);
100  inline void calculate_address_from_global_toc_hi16only(Register dst, address addr) {
101    calculate_address_from_global_toc(dst, addr, true, false);
102  };
103  inline void calculate_address_from_global_toc_lo16only(Register dst, address addr) {
104    calculate_address_from_global_toc(dst, addr, false, true);
105  };
106
107  inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
108  static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
109  static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
110
111#ifdef _LP64
112  // Patch narrow oop constant.
113  inline static bool is_set_narrow_oop(address a, address bound);
114  static int patch_set_narrow_oop(address a, address bound, narrowOop data);
115  static narrowOop get_narrow_oop(address a, address bound);
116#endif
117
118  inline static bool is_load_const_at(address a);
119
120  // Emits an oop const to the constant pool, loads the constant, and
121  // sets a relocation info with address current_pc.
122  // Returns true if successful.
123  bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false);
124
125  static bool is_load_const_from_method_toc_at(address a);
126  static int get_offset_of_load_const_from_method_toc_at(address a);
127
128  // Get the 64 bit constant from a `load_const' sequence.
129  static long get_const(address load_const);
130
131  // Patch the 64 bit constant of a `load_const' sequence. This is a
132  // low level procedure. It neither flushes the instruction cache nor
133  // is it atomic.
134  static void patch_const(address load_const, long x);
135
136  // Metadata in code that we have to keep track of.
137  AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index
138  AddressLiteral constant_metadata_address(Metadata* obj); // find_index
139  // Oops used directly in compiled code are stored in the constant pool,
140  // and loaded from there.
141  // Allocate new entry for oop in constant pool. Generate relocation.
142  AddressLiteral allocate_oop_address(jobject obj);
143  // Find oop obj in constant pool. Return relocation with it's index.
144  AddressLiteral constant_oop_address(jobject obj);
145
146  // Find oop in constant pool and emit instructions to load it.
147  // Uses constant_oop_address.
148  inline void set_oop_constant(jobject obj, Register d);
149  // Same as load_address.
150  inline void set_oop         (AddressLiteral obj_addr, Register d);
151
152  // Read runtime constant:  Issue load if constant not yet established,
153  // else use real constant.
154  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
155                                                Register tmp,
156                                                int offset);
157
158  //
159  // branch, jump
160  //
161
162  inline void pd_patch_instruction(address branch, address target);
163  NOT_PRODUCT(static void pd_print_patched_instruction(address branch);)
164
165  // Conditional far branch for destinations encodable in 24+2 bits.
166  // Same interface as bc, e.g. no inverse boint-field.
167  enum {
168    bc_far_optimize_not         = 0,
169    bc_far_optimize_on_relocate = 1
170  };
171  // optimize: flag for telling the conditional far branch to optimize
172  //           itself when relocated.
173  void bc_far(int boint, int biint, Label& dest, int optimize);
174  void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions
175  // Relocation of conditional far branches.
176  static bool    is_bc_far_at(address instruction_addr);
177  static address get_dest_of_bc_far_at(address instruction_addr);
178  static void    set_dest_of_bc_far_at(address instruction_addr, address dest);
179 private:
180  static bool inline is_bc_far_variant1_at(address instruction_addr);
181  static bool inline is_bc_far_variant2_at(address instruction_addr);
182  static bool inline is_bc_far_variant3_at(address instruction_addr);
183 public:
184
185  // Convenience bc_far versions.
186  inline void blt_far(ConditionRegister crx, Label& L, int optimize);
187  inline void bgt_far(ConditionRegister crx, Label& L, int optimize);
188  inline void beq_far(ConditionRegister crx, Label& L, int optimize);
189  inline void bso_far(ConditionRegister crx, Label& L, int optimize);
190  inline void bge_far(ConditionRegister crx, Label& L, int optimize);
191  inline void ble_far(ConditionRegister crx, Label& L, int optimize);
192  inline void bne_far(ConditionRegister crx, Label& L, int optimize);
193  inline void bns_far(ConditionRegister crx, Label& L, int optimize);
194
195  // Emit, identify and patch a NOT mt-safe patchable 64 bit absolute call/jump.
196 private:
197  enum {
198    bxx64_patchable_instruction_count = (2/*load_codecache_const*/ + 3/*5load_const*/ + 1/*mtctr*/ + 1/*bctrl*/),
199    bxx64_patchable_size              = bxx64_patchable_instruction_count * BytesPerInstWord,
200    bxx64_patchable_ret_addr_offset   = bxx64_patchable_size
201  };
202  void bxx64_patchable(address target, relocInfo::relocType rt, bool link);
203  static bool is_bxx64_patchable_at(            address instruction_addr, bool link);
204  // Does the instruction use a pc-relative encoding of the destination?
205  static bool is_bxx64_patchable_pcrelative_at( address instruction_addr, bool link);
206  static bool is_bxx64_patchable_variant1_at(   address instruction_addr, bool link);
207  // Load destination relative to global toc.
208  static bool is_bxx64_patchable_variant1b_at(  address instruction_addr, bool link);
209  static bool is_bxx64_patchable_variant2_at(   address instruction_addr, bool link);
210  static void set_dest_of_bxx64_patchable_at(   address instruction_addr, address target, bool link);
211  static address get_dest_of_bxx64_patchable_at(address instruction_addr, bool link);
212
213 public:
214  // call
215  enum {
216    bl64_patchable_instruction_count = bxx64_patchable_instruction_count,
217    bl64_patchable_size              = bxx64_patchable_size,
218    bl64_patchable_ret_addr_offset   = bxx64_patchable_ret_addr_offset
219  };
220  inline void bl64_patchable(address target, relocInfo::relocType rt) {
221    bxx64_patchable(target, rt, /*link=*/true);
222  }
223  inline static bool is_bl64_patchable_at(address instruction_addr) {
224    return is_bxx64_patchable_at(instruction_addr, /*link=*/true);
225  }
226  inline static bool is_bl64_patchable_pcrelative_at(address instruction_addr) {
227    return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/true);
228  }
229  inline static void set_dest_of_bl64_patchable_at(address instruction_addr, address target) {
230    set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/true);
231  }
232  inline static address get_dest_of_bl64_patchable_at(address instruction_addr) {
233    return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/true);
234  }
235  // jump
236  enum {
237    b64_patchable_instruction_count = bxx64_patchable_instruction_count,
238    b64_patchable_size              = bxx64_patchable_size,
239  };
240  inline void b64_patchable(address target, relocInfo::relocType rt) {
241    bxx64_patchable(target, rt, /*link=*/false);
242  }
243  inline static bool is_b64_patchable_at(address instruction_addr) {
244    return is_bxx64_patchable_at(instruction_addr, /*link=*/false);
245  }
246  inline static bool is_b64_patchable_pcrelative_at(address instruction_addr) {
247    return is_bxx64_patchable_pcrelative_at(instruction_addr, /*link=*/false);
248  }
249  inline static void set_dest_of_b64_patchable_at(address instruction_addr, address target) {
250    set_dest_of_bxx64_patchable_at(instruction_addr, target, /*link=*/false);
251  }
252  inline static address get_dest_of_b64_patchable_at(address instruction_addr) {
253    return get_dest_of_bxx64_patchable_at(instruction_addr, /*link=*/false);
254  }
255
256  //
257  // Support for frame handling
258  //
259
260  // some ABI-related functions
261  void save_nonvolatile_gprs(   Register dst_base, int offset);
262  void restore_nonvolatile_gprs(Register src_base, int offset);
263  enum { num_volatile_regs = 11 + 14 }; // GPR + FPR
264  void save_volatile_gprs(   Register dst_base, int offset);
265  void restore_volatile_gprs(Register src_base, int offset);
266  void save_LR_CR(   Register tmp);     // tmp contains LR on return.
267  void restore_LR_CR(Register tmp);
268
269  // Get current PC using bl-next-instruction trick.
270  address get_PC_trash_LR(Register result);
271
272  // Resize current frame either relatively wrt to current SP or absolute.
273  void resize_frame(Register offset, Register tmp);
274  void resize_frame(int      offset, Register tmp);
275  void resize_frame_absolute(Register addr, Register tmp1, Register tmp2);
276
277  // Push a frame of size bytes.
278  void push_frame(Register bytes, Register tmp);
279
280  // Push a frame of size `bytes'. No abi space provided.
281  void push_frame(unsigned int bytes, Register tmp);
282
283  // Push a frame of size `bytes' plus abi_reg_args on top.
284  void push_frame_reg_args(unsigned int bytes, Register tmp);
285
286  // Setup up a new C frame with a spill area for non-volatile GPRs and additional
287  // space for local variables
288  void push_frame_reg_args_nonvolatiles(unsigned int bytes, Register tmp);
289
290  // pop current C frame
291  void pop_frame();
292
293  //
294  // Calls
295  //
296
297 private:
298  address _last_calls_return_pc;
299
300#if defined(ABI_ELFv2)
301  // Generic version of a call to C function.
302  // Updates and returns _last_calls_return_pc.
303  address branch_to(Register function_entry, bool and_link);
304#else
305  // Generic version of a call to C function via a function descriptor
306  // with variable support for C calling conventions (TOC, ENV, etc.).
307  // updates and returns _last_calls_return_pc.
308  address branch_to(Register function_descriptor, bool and_link, bool save_toc_before_call,
309                    bool restore_toc_after_call, bool load_toc_of_callee, bool load_env_of_callee);
310#endif
311
312 public:
313
314  // Get the pc where the last call will return to. returns _last_calls_return_pc.
315  inline address last_calls_return_pc();
316
317#if defined(ABI_ELFv2)
318  // Call a C function via a function descriptor and use full C
319  // calling conventions. Updates and returns _last_calls_return_pc.
320  address call_c(Register function_entry);
321  // For tail calls: only branch, don't link, so callee returns to caller of this function.
322  address call_c_and_return_to_caller(Register function_entry);
323  address call_c(address function_entry, relocInfo::relocType rt);
324#else
325  // Call a C function via a function descriptor and use full C
326  // calling conventions. Updates and returns _last_calls_return_pc.
327  address call_c(Register function_descriptor);
328  // For tail calls: only branch, don't link, so callee returns to caller of this function.
329  address call_c_and_return_to_caller(Register function_descriptor);
330  address call_c(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt);
331  address call_c_using_toc(const FunctionDescriptor* function_descriptor, relocInfo::relocType rt,
332                           Register toc);
333#endif
334
335 protected:
336
337  // It is imperative that all calls into the VM are handled via the
338  // call_VM macros. They make sure that the stack linkage is setup
339  // correctly. call_VM's correspond to ENTRY/ENTRY_X entry points
340  // while call_VM_leaf's correspond to LEAF entry points.
341  //
342  // This is the base routine called by the different versions of
343  // call_VM. The interpreter may customize this version by overriding
344  // it for its purposes (e.g., to save/restore additional registers
345  // when doing a VM call).
346  //
347  // If no last_java_sp is specified (noreg) then SP will be used instead.
348  virtual void call_VM_base(
349     // where an oop-result ends up if any; use noreg otherwise
350    Register        oop_result,
351    // to set up last_Java_frame in stubs; use noreg otherwise
352    Register        last_java_sp,
353    // the entry point
354    address         entry_point,
355    // flag which indicates if exception should be checked
356    bool            check_exception = true
357  );
358
359  // Support for VM calls. This is the base routine called by the
360  // different versions of call_VM_leaf. The interpreter may customize
361  // this version by overriding it for its purposes (e.g., to
362  // save/restore additional registers when doing a VM call).
363  void call_VM_leaf_base(address entry_point);
364
365 public:
366  // Call into the VM.
367  // Passes the thread pointer (in R3_ARG1) as a prepended argument.
368  // Makes sure oop return values are visible to the GC.
369  void call_VM(Register oop_result, address entry_point, bool check_exceptions = true);
370  void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true);
371  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
372  void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg3, bool check_exceptions = true);
373  void call_VM_leaf(address entry_point);
374  void call_VM_leaf(address entry_point, Register arg_1);
375  void call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
376  void call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
377
378  // Call a stub function via a function descriptor, but don't save
379  // TOC before call, don't setup TOC and ENV for call, and don't
380  // restore TOC after call. Updates and returns _last_calls_return_pc.
381  inline address call_stub(Register function_entry);
382  inline void call_stub_and_return_to(Register function_entry, Register return_pc);
383
384  //
385  // Java utilities
386  //
387
388  // Read from the polling page, its address is already in a register.
389  inline void load_from_polling_page(Register polling_page_address, int offset = 0);
390  // Check whether instruction is a read access to the polling page
391  // which was emitted by load_from_polling_page(..).
392  static bool is_load_from_polling_page(int instruction, void* ucontext/*may be NULL*/,
393                                        address* polling_address_ptr = NULL);
394
395  // Check whether instruction is a write access to the memory
396  // serialization page realized by one of the instructions stw, stwu,
397  // stwx, or stwux.
398  static bool is_memory_serialization(int instruction, JavaThread* thread, void* ucontext);
399
400  // Support for NULL-checks
401  //
402  // Generates code that causes a NULL OS exception if the content of reg is NULL.
403  // If the accessed location is M[reg + offset] and the offset is known, provide the
404  // offset. No explicit code generation is needed if the offset is within a certain
405  // range (0 <= offset <= page_size).
406
407  // Stack overflow checking
408  void bang_stack_with_offset(int offset);
409
410  // If instruction is a stack bang of the form ld, stdu, or
411  // stdux, return the banged address. Otherwise, return 0.
412  static address get_stack_bang_address(int instruction, void* ucontext);
413
414  // Check for reserved stack access in method being exited. If the reserved
415  // stack area was accessed, protect it again and throw StackOverflowError.
416  void reserved_stack_check(Register return_pc);
417
418  // Atomics
419  // CmpxchgX sets condition register to cmpX(current, compare).
420  // (flag == ne) => (dest_current_value != compare_value), (!swapped)
421  // (flag == eq) => (dest_current_value == compare_value), ( swapped)
422  static inline bool cmpxchgx_hint_acquire_lock()  { return true; }
423  // The stxcx will probably not be succeeded by a releasing store.
424  static inline bool cmpxchgx_hint_release_lock()  { return false; }
425  static inline bool cmpxchgx_hint_atomic_update() { return false; }
426
427  // Cmpxchg semantics
428  enum {
429    MemBarNone = 0,
430    MemBarRel  = 1,
431    MemBarAcq  = 2,
432    MemBarFenceAfter = 4 // use powers of 2
433  };
434 private:
435  // Helper functions for word/sub-word atomics.
436  void atomic_get_and_modify_generic(Register dest_current_value, Register exchange_value,
437                                     Register addr_base, Register tmp1, Register tmp2, Register tmp3,
438                                     bool cmpxchgx_hint, bool is_add, int size);
439  void cmpxchg_loop_body(ConditionRegister flag, Register dest_current_value,
440                         Register compare_value, Register exchange_value,
441                         Register addr_base, Register tmp1, Register tmp2,
442                         Label &retry, Label &failed, bool cmpxchgx_hint, int size);
443  void cmpxchg_generic(ConditionRegister flag,
444                       Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
445                       Register tmp1, Register tmp2,
446                       int semantics, bool cmpxchgx_hint, Register int_flag_success, bool contention_hint, bool weak, int size);
447 public:
448  // Temps and addr_base are killed if processor does not support Power 8 instructions.
449  // Result will be sign extended.
450  void getandsetb(Register dest_current_value, Register exchange_value, Register addr_base,
451                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
452    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 1);
453  }
454  // Temps and addr_base are killed if processor does not support Power 8 instructions.
455  // Result will be sign extended.
456  void getandseth(Register dest_current_value, Register exchange_value, Register addr_base,
457                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
458    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, false, 2);
459  }
460  void getandsetw(Register dest_current_value, Register exchange_value, Register addr_base,
461                  bool cmpxchgx_hint) {
462    atomic_get_and_modify_generic(dest_current_value, exchange_value, addr_base, noreg, noreg, noreg, cmpxchgx_hint, false, 4);
463  }
464  void getandsetd(Register dest_current_value, Register exchange_value, Register addr_base,
465                  bool cmpxchgx_hint);
466  // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
467  // Result will be sign extended.
468  void getandaddb(Register dest_current_value, Register inc_value, Register addr_base,
469                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
470    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 1);
471  }
472  // tmp2/3 and addr_base are killed if processor does not support Power 8 instructions (tmp1 is always needed).
473  // Result will be sign extended.
474  void getandaddh(Register dest_current_value, Register inc_value, Register addr_base,
475                  Register tmp1, Register tmp2, Register tmp3, bool cmpxchgx_hint) {
476    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, tmp2, tmp3, cmpxchgx_hint, true, 2);
477  }
478  void getandaddw(Register dest_current_value, Register inc_value, Register addr_base,
479                  Register tmp1, bool cmpxchgx_hint) {
480    atomic_get_and_modify_generic(dest_current_value, inc_value, addr_base, tmp1, noreg, noreg, cmpxchgx_hint, true, 4);
481  }
482  void getandaddd(Register dest_current_value, Register exchange_value, Register addr_base,
483                  Register tmp, bool cmpxchgx_hint);
484  // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
485  // compare_value must be at least 32 bit sign extended. Result will be sign extended.
486  void cmpxchgb(ConditionRegister flag,
487                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
488                Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
489                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
490    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
491                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 1);
492  }
493  // Temps, addr_base and exchange_value are killed if processor does not support Power 8 instructions.
494  // compare_value must be at least 32 bit sign extended. Result will be sign extended.
495  void cmpxchgh(ConditionRegister flag,
496                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
497                Register tmp1, Register tmp2, int semantics, bool cmpxchgx_hint = false,
498                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
499    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, tmp1, tmp2,
500                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 2);
501  }
502  void cmpxchgw(ConditionRegister flag,
503                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
504                int semantics, bool cmpxchgx_hint = false,
505                Register int_flag_success = noreg, bool contention_hint = false, bool weak = false) {
506    cmpxchg_generic(flag, dest_current_value, compare_value, exchange_value, addr_base, noreg, noreg,
507                    semantics, cmpxchgx_hint, int_flag_success, contention_hint, weak, 4);
508  }
509  void cmpxchgd(ConditionRegister flag,
510                Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
511                Register addr_base, int semantics, bool cmpxchgx_hint = false,
512                Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false, bool weak = false);
513
514  // interface method calling
515  void lookup_interface_method(Register recv_klass,
516                               Register intf_klass,
517                               RegisterOrConstant itable_index,
518                               Register method_result,
519                               Register temp_reg, Register temp2_reg,
520                               Label& no_such_interface);
521
522  // virtual method calling
523  void lookup_virtual_method(Register recv_klass,
524                             RegisterOrConstant vtable_index,
525                             Register method_result);
526
527  // Test sub_klass against super_klass, with fast and slow paths.
528
529  // The fast path produces a tri-state answer: yes / no / maybe-slow.
530  // One of the three labels can be NULL, meaning take the fall-through.
531  // If super_check_offset is -1, the value is loaded up from super_klass.
532  // No registers are killed, except temp_reg and temp2_reg.
533  // If super_check_offset is not -1, temp2_reg is not used and can be noreg.
534  void check_klass_subtype_fast_path(Register sub_klass,
535                                     Register super_klass,
536                                     Register temp1_reg,
537                                     Register temp2_reg,
538                                     Label* L_success,
539                                     Label* L_failure,
540                                     Label* L_slow_path = NULL, // default fall through
541                                     RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
542
543  // The rest of the type check; must be wired to a corresponding fast path.
544  // It does not repeat the fast path logic, so don't use it standalone.
545  // The temp_reg can be noreg, if no temps are available.
546  // It can also be sub_klass or super_klass, meaning it's OK to kill that one.
547  // Updates the sub's secondary super cache as necessary.
548  void check_klass_subtype_slow_path(Register sub_klass,
549                                     Register super_klass,
550                                     Register temp1_reg,
551                                     Register temp2_reg,
552                                     Label* L_success = NULL,
553                                     Register result_reg = noreg);
554
555  // Simplified, combined version, good for typical uses.
556  // Falls through on failure.
557  void check_klass_subtype(Register sub_klass,
558                           Register super_klass,
559                           Register temp1_reg,
560                           Register temp2_reg,
561                           Label& L_success);
562
563  // Method handle support (JSR 292).
564  void check_method_handle_type(Register mtype_reg, Register mh_reg, Register temp_reg, Label& wrong_method_type);
565
566  RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, Register temp_reg, int extra_slot_offset = 0);
567
568  // Biased locking support
569  // Upon entry,obj_reg must contain the target object, and mark_reg
570  // must contain the target object's header.
571  // Destroys mark_reg if an attempt is made to bias an anonymously
572  // biased lock. In this case a failure will go either to the slow
573  // case or fall through with the notEqual condition code set with
574  // the expectation that the slow case in the runtime will be called.
575  // In the fall-through case where the CAS-based lock is done,
576  // mark_reg is not destroyed.
577  void biased_locking_enter(ConditionRegister cr_reg, Register obj_reg, Register mark_reg, Register temp_reg,
578                            Register temp2_reg, Label& done, Label* slow_case = NULL);
579  // Upon entry, the base register of mark_addr must contain the oop.
580  // Destroys temp_reg.
581  // If allow_delay_slot_filling is set to true, the next instruction
582  // emitted after this one will go in an annulled delay slot if the
583  // biased locking exit case failed.
584  void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
585
586  // allocation (for C1)
587  void eden_allocate(
588    Register obj,                      // result: pointer to object after successful allocation
589    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
590    int      con_size_in_bytes,        // object size in bytes if   known at compile time
591    Register t1,                       // temp register
592    Register t2,                       // temp register
593    Label&   slow_case                 // continuation point if fast allocation fails
594  );
595  void tlab_allocate(
596    Register obj,                      // result: pointer to object after successful allocation
597    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
598    int      con_size_in_bytes,        // object size in bytes if   known at compile time
599    Register t1,                       // temp register
600    Label&   slow_case                 // continuation point if fast allocation fails
601  );
602  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
603  void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2);
604
605  enum { trampoline_stub_size = 6 * 4 };
606  address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
607
608  void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
609  void atomic_ori_int(Register addr, Register result, int uimm16);
610
611#if INCLUDE_RTM_OPT
612  void rtm_counters_update(Register abort_status, Register rtm_counters);
613  void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
614  void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
615                                   Metadata* method_data);
616  void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
617                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
618  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
619                               Label& retryLabel, Label* checkRetry = NULL);
620  void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
621  void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
622                         Register retry_on_abort_count,
623                         RTMLockingCounters* stack_rtm_counters,
624                         Metadata* method_data, bool profile_rtm,
625                         Label& DONE_LABEL, Label& IsInflated);
626  void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
627                            Register retry_on_busy_count, Register retry_on_abort_count,
628                            RTMLockingCounters* rtm_counters,
629                            Metadata* method_data, bool profile_rtm,
630                            Label& DONE_LABEL);
631#endif
632
633  void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
634                                 Register tmp1, Register tmp2, Register tmp3,
635                                 bool try_bias = UseBiasedLocking,
636                                 RTMLockingCounters* rtm_counters = NULL,
637                                 RTMLockingCounters* stack_rtm_counters = NULL,
638                                 Metadata* method_data = NULL,
639                                 bool use_rtm = false, bool profile_rtm = false);
640
641  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
642                                   Register tmp1, Register tmp2, Register tmp3,
643                                   bool try_bias = UseBiasedLocking, bool use_rtm = false);
644
645  // Support for serializing memory accesses between threads
646  void serialize_memory(Register thread, Register tmp1, Register tmp2);
647
648  // GC barrier support.
649  void card_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp);
650  void card_table_write(jbyte* byte_map_base, Register Rtmp, Register Robj);
651
652  void resolve_jobject(Register value, Register tmp1, Register tmp2, bool needs_frame);
653
654#if INCLUDE_ALL_GCS
655  // General G1 pre-barrier generator.
656  void g1_write_barrier_pre(Register Robj, RegisterOrConstant offset, Register Rpre_val,
657                            Register Rtmp1, Register Rtmp2, bool needs_frame = false);
658  // General G1 post-barrier generator
659  void g1_write_barrier_post(Register Rstore_addr, Register Rnew_val, Register Rtmp1,
660                             Register Rtmp2, Register Rtmp3, Label *filtered_ext = NULL);
661#endif
662
663  // Support for managing the JavaThread pointer (i.e.; the reference to
664  // thread-local information).
665
666  // Support for last Java frame (but use call_VM instead where possible):
667  // access R16_thread->last_Java_sp.
668  void set_last_Java_frame(Register last_java_sp, Register last_Java_pc);
669  void reset_last_Java_frame(void);
670  void set_top_ijava_frame_at_SP_as_last_Java_frame(Register sp, Register tmp1);
671
672  // Read vm result from thread: oop_result = R16_thread->result;
673  void get_vm_result  (Register oop_result);
674  void get_vm_result_2(Register metadata_result);
675
676  static bool needs_explicit_null_check(intptr_t offset);
677
678  // Trap-instruction-based checks.
679  // Range checks can be distinguished from zero checks as they check 32 bit,
680  // zero checks all 64 bits (tw, td).
681  inline void trap_null_check(Register a, trap_to_bits cmp = traptoEqual);
682  static bool is_trap_null_check(int x) {
683    return is_tdi(x, traptoEqual,               -1/*any reg*/, 0) ||
684           is_tdi(x, traptoGreaterThanUnsigned, -1/*any reg*/, 0);
685  }
686
687  inline void trap_zombie_not_entrant();
688  static bool is_trap_zombie_not_entrant(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 1); }
689
690  inline void trap_should_not_reach_here();
691  static bool is_trap_should_not_reach_here(int x) { return is_tdi(x, traptoUnconditional, 0/*reg 0*/, 2); }
692
693  inline void trap_ic_miss_check(Register a, Register b);
694  static bool is_trap_ic_miss_check(int x) {
695    return is_td(x, traptoGreaterThanUnsigned | traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/);
696  }
697
698  // Implicit or explicit null check, jumps to static address exception_entry.
699  inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
700  inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided
701
702  // Load heap oop and decompress. Loaded oop may not be null.
703  // Specify tmp to save one cycle.
704  inline void load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1 = noreg,
705                                     Register tmp = noreg);
706  // Store heap oop and decompress.  Decompressed oop may not be null.
707  // Specify tmp register if d should not be changed.
708  inline void store_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1,
709                                      Register tmp = noreg);
710
711  // Null allowed.
712  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
713
714  // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
715  // src == d allowed.
716  inline Register encode_heap_oop_not_null(Register d, Register src = noreg);
717  inline Register decode_heap_oop_not_null(Register d, Register src = noreg);
718
719  // Null allowed.
720  inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier!
721  inline void decode_heap_oop(Register d);
722
723  // Load/Store klass oop from klass field. Compress.
724  void load_klass(Register dst, Register src);
725  void store_klass(Register dst_oop, Register klass, Register tmp = R0);
726  void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
727
728  void load_mirror_from_const_method(Register mirror, Register const_method);
729
730  static int instr_size_for_decode_klass_not_null();
731  void decode_klass_not_null(Register dst, Register src = noreg);
732  Register encode_klass_not_null(Register dst, Register src = noreg);
733
734  // SIGTRAP-based range checks for arrays.
735  inline void trap_range_check_l(Register a, Register b);
736  inline void trap_range_check_l(Register a, int si16);
737  static bool is_trap_range_check_l(int x) {
738    return (is_tw (x, traptoLessThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
739            is_twi(x, traptoLessThanUnsigned, -1/*any reg*/)                  );
740  }
741  inline void trap_range_check_le(Register a, int si16);
742  static bool is_trap_range_check_le(int x) {
743    return is_twi(x, traptoEqual | traptoLessThanUnsigned, -1/*any reg*/);
744  }
745  inline void trap_range_check_g(Register a, int si16);
746  static bool is_trap_range_check_g(int x) {
747    return is_twi(x, traptoGreaterThanUnsigned, -1/*any reg*/);
748  }
749  inline void trap_range_check_ge(Register a, Register b);
750  inline void trap_range_check_ge(Register a, int si16);
751  static bool is_trap_range_check_ge(int x) {
752    return (is_tw (x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/, -1/*any reg*/) ||
753            is_twi(x, traptoEqual | traptoGreaterThanUnsigned, -1/*any reg*/)                  );
754  }
755  static bool is_trap_range_check(int x) {
756    return is_trap_range_check_l(x) || is_trap_range_check_le(x) ||
757           is_trap_range_check_g(x) || is_trap_range_check_ge(x);
758  }
759
760  void clear_memory_unrolled(Register base_ptr, int cnt_dwords, Register tmp = R0, int offset = 0);
761  void clear_memory_constlen(Register base_ptr, int cnt_dwords, Register tmp = R0);
762  void clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp = R0, long const_cnt = -1);
763
764#ifdef COMPILER2
765  // Intrinsics for CompactStrings
766  // Compress char[] to byte[] by compressing 16 bytes at once.
767  void string_compress_16(Register src, Register dst, Register cnt,
768                          Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
769                          Label& Lfailure);
770
771  // Compress char[] to byte[]. cnt must be positive int.
772  void string_compress(Register src, Register dst, Register cnt, Register tmp, Label& Lfailure);
773
774  // Inflate byte[] to char[] by inflating 16 bytes at once.
775  void string_inflate_16(Register src, Register dst, Register cnt,
776                         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
777
778  // Inflate byte[] to char[]. cnt must be positive int.
779  void string_inflate(Register src, Register dst, Register cnt, Register tmp);
780
781  void string_compare(Register str1, Register str2, Register cnt1, Register cnt2,
782                      Register tmp1, Register result, int ae);
783
784  void array_equals(bool is_array_equ, Register ary1, Register ary2,
785                    Register limit, Register tmp1, Register result, bool is_byte);
786
787  void string_indexof(Register result, Register haystack, Register haycnt,
788                      Register needle, ciTypeArray* needle_values, Register needlecnt, int needlecntval,
789                      Register tmp1, Register tmp2, Register tmp3, Register tmp4, int ae);
790
791  void string_indexof_char(Register result, Register haystack, Register haycnt,
792                           Register needle, jchar needleChar, Register tmp1, Register tmp2, bool is_byte);
793
794  void has_negatives(Register src, Register cnt, Register result, Register tmp1, Register tmp2);
795#endif
796
797  // Emitters for BigInteger.multiplyToLen intrinsic.
798  inline void multiply64(Register dest_hi, Register dest_lo,
799                         Register x, Register y);
800  void add2_with_carry(Register dest_hi, Register dest_lo,
801                       Register src1, Register src2);
802  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
803                             Register y, Register y_idx, Register z,
804                             Register carry, Register product_high, Register product,
805                             Register idx, Register kdx, Register tmp);
806  void multiply_add_128_x_128(Register x_xstart, Register y, Register z,
807                              Register yz_idx, Register idx, Register carry,
808                              Register product_high, Register product, Register tmp,
809                              int offset);
810  void multiply_128_x_128_loop(Register x_xstart,
811                               Register y, Register z,
812                               Register yz_idx, Register idx, Register carry,
813                               Register product_high, Register product,
814                               Register carry2, Register tmp);
815  void multiply_to_len(Register x, Register xlen,
816                       Register y, Register ylen,
817                       Register z, Register zlen,
818                       Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
819                       Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
820                       Register tmp11, Register tmp12, Register tmp13);
821
822  // CRC32 Intrinsics.
823  void load_reverse_32(Register dst, Register src);
824  int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
825  void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
826  void fold_8bit_crc32(Register crc, Register table, Register tmp);
827  void update_byte_crc32(Register crc, Register val, Register table);
828  void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
829                             Register data, bool loopAlignment, bool invertCRC);
830  void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
831                          Register t0,  Register t1,  Register t2,  Register t3,
832                          Register tc0, Register tc1, Register tc2, Register tc3);
833  void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
834                          Register t0,  Register t1,  Register t2,  Register t3,
835                          Register tc0, Register tc1, Register tc2, Register tc3);
836  void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
837                          Register t0,  Register t1,  Register t2,  Register t3,
838                          Register tc0, Register tc1, Register tc2, Register tc3);
839  void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
840                          Register t0,  Register t1,  Register t2,  Register t3);
841  void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
842                          Register constants, Register barretConstants,
843                          Register t0,  Register t1, Register t2, Register t3, Register t4);
844  void kernel_crc32_1word_aligned(Register crc, Register buf, Register len,
845                          Register constants, Register barretConstants,
846                          Register t0, Register t1, Register t2);
847
848  void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
849
850  //
851  // Debugging
852  //
853
854  // assert on cr0
855  void asm_assert(bool check_equal, const char* msg, int id);
856  void asm_assert_eq(const char* msg, int id) { asm_assert(true, msg, id); }
857  void asm_assert_ne(const char* msg, int id) { asm_assert(false, msg, id); }
858
859 private:
860  void asm_assert_mems_zero(bool check_equal, int size, int mem_offset, Register mem_base,
861                            const char* msg, int id);
862
863 public:
864
865  void asm_assert_mem8_is_zero(int mem_offset, Register mem_base, const char* msg, int id) {
866    asm_assert_mems_zero(true,  8, mem_offset, mem_base, msg, id);
867  }
868  void asm_assert_mem8_isnot_zero(int mem_offset, Register mem_base, const char* msg, int id) {
869    asm_assert_mems_zero(false, 8, mem_offset, mem_base, msg, id);
870  }
871
872  // Verify R16_thread contents.
873  void verify_thread();
874
875  // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
876  void verify_oop(Register reg, const char* s = "broken oop");
877  void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop");
878
879  // TODO: verify method and klass metadata (compare against vptr?)
880  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
881  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line) {}
882
883  // Convenience method returning function entry. For the ELFv1 case
884  // creates function descriptor at the current address and returs
885  // the pointer to it. For the ELFv2 case returns the current address.
886  inline address function_entry();
887
888#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
889#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
890
891 private:
892
893  enum {
894    stop_stop                = 0,
895    stop_untested            = 1,
896    stop_unimplemented       = 2,
897    stop_shouldnotreachhere  = 3,
898    stop_end                 = 4
899  };
900  void stop(int type, const char* msg, int id);
901
902 public:
903  // Prints msg, dumps registers and stops execution.
904  void stop         (const char* msg = "", int id = 0) { stop(stop_stop,               msg, id); }
905  void untested     (const char* msg = "", int id = 0) { stop(stop_untested,           msg, id); }
906  void unimplemented(const char* msg = "", int id = 0) { stop(stop_unimplemented,      msg, id); }
907  void should_not_reach_here()                         { stop(stop_shouldnotreachhere,  "", -1); }
908
909  void zap_from_to(Register low, int before, Register high, int after, Register val, Register addr) PRODUCT_RETURN;
910};
911
912// class SkipIfEqualZero:
913//
914// Instantiating this class will result in assembly code being output that will
915// jump around any code emitted between the creation of the instance and it's
916// automatic destruction at the end of a scope block, depending on the value of
917// the flag passed to the constructor, which will be checked at run-time.
918class SkipIfEqualZero : public StackObj {
919 private:
920  MacroAssembler* _masm;
921  Label _label;
922
923 public:
924   // 'Temp' is a temp register that this object can use (and trash).
925   explicit SkipIfEqualZero(MacroAssembler*, Register temp, const bool* flag_addr);
926   ~SkipIfEqualZero();
927};
928
929#endif // CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
930