library_call.cpp revision 605:98cb887364d3
1/*
2 * Copyright 1999-2008 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_library_call.cpp.incl"
27
28class LibraryIntrinsic : public InlineCallGenerator {
29  // Extend the set of intrinsics known to the runtime:
30 public:
31 private:
32  bool             _is_virtual;
33  vmIntrinsics::ID _intrinsic_id;
34
35 public:
36  LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
37    : InlineCallGenerator(m),
38      _is_virtual(is_virtual),
39      _intrinsic_id(id)
40  {
41  }
42  virtual bool is_intrinsic() const { return true; }
43  virtual bool is_virtual()   const { return _is_virtual; }
44  virtual JVMState* generate(JVMState* jvms);
45  vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
46};
47
48
49// Local helper class for LibraryIntrinsic:
50class LibraryCallKit : public GraphKit {
51 private:
52  LibraryIntrinsic* _intrinsic;   // the library intrinsic being called
53
54 public:
55  LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic)
56    : GraphKit(caller),
57      _intrinsic(intrinsic)
58  {
59  }
60
61  ciMethod*         caller()    const    { return jvms()->method(); }
62  int               bci()       const    { return jvms()->bci(); }
63  LibraryIntrinsic* intrinsic() const    { return _intrinsic; }
64  vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
65  ciMethod*         callee()    const    { return _intrinsic->method(); }
66  ciSignature*      signature() const    { return callee()->signature(); }
67  int               arg_size()  const    { return callee()->arg_size(); }
68
69  bool try_to_inline();
70
71  // Helper functions to inline natives
72  void push_result(RegionNode* region, PhiNode* value);
73  Node* generate_guard(Node* test, RegionNode* region, float true_prob);
74  Node* generate_slow_guard(Node* test, RegionNode* region);
75  Node* generate_fair_guard(Node* test, RegionNode* region);
76  Node* generate_negative_guard(Node* index, RegionNode* region,
77                                // resulting CastII of index:
78                                Node* *pos_index = NULL);
79  Node* generate_nonpositive_guard(Node* index, bool never_negative,
80                                   // resulting CastII of index:
81                                   Node* *pos_index = NULL);
82  Node* generate_limit_guard(Node* offset, Node* subseq_length,
83                             Node* array_length,
84                             RegionNode* region);
85  Node* generate_current_thread(Node* &tls_output);
86  address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
87                              bool disjoint_bases, const char* &name);
88  Node* load_mirror_from_klass(Node* klass);
89  Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
90                                      int nargs,
91                                      RegionNode* region, int null_path,
92                                      int offset);
93  Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs,
94                               RegionNode* region, int null_path) {
95    int offset = java_lang_Class::klass_offset_in_bytes();
96    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
97                                         region, null_path,
98                                         offset);
99  }
100  Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
101                                     int nargs,
102                                     RegionNode* region, int null_path) {
103    int offset = java_lang_Class::array_klass_offset_in_bytes();
104    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
105                                         region, null_path,
106                                         offset);
107  }
108  Node* generate_access_flags_guard(Node* kls,
109                                    int modifier_mask, int modifier_bits,
110                                    RegionNode* region);
111  Node* generate_interface_guard(Node* kls, RegionNode* region);
112  Node* generate_array_guard(Node* kls, RegionNode* region) {
113    return generate_array_guard_common(kls, region, false, false);
114  }
115  Node* generate_non_array_guard(Node* kls, RegionNode* region) {
116    return generate_array_guard_common(kls, region, false, true);
117  }
118  Node* generate_objArray_guard(Node* kls, RegionNode* region) {
119    return generate_array_guard_common(kls, region, true, false);
120  }
121  Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
122    return generate_array_guard_common(kls, region, true, true);
123  }
124  Node* generate_array_guard_common(Node* kls, RegionNode* region,
125                                    bool obj_array, bool not_array);
126  Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
127  CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
128                                     bool is_virtual = false, bool is_static = false);
129  CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
130    return generate_method_call(method_id, false, true);
131  }
132  CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
133    return generate_method_call(method_id, true, false);
134  }
135
136  bool inline_string_compareTo();
137  bool inline_string_indexOf();
138  Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
139  Node* pop_math_arg();
140  bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
141  bool inline_math_native(vmIntrinsics::ID id);
142  bool inline_trig(vmIntrinsics::ID id);
143  bool inline_trans(vmIntrinsics::ID id);
144  bool inline_abs(vmIntrinsics::ID id);
145  bool inline_sqrt(vmIntrinsics::ID id);
146  bool inline_pow(vmIntrinsics::ID id);
147  bool inline_exp(vmIntrinsics::ID id);
148  bool inline_min_max(vmIntrinsics::ID id);
149  Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
150  // This returns Type::AnyPtr, RawPtr, or OopPtr.
151  int classify_unsafe_addr(Node* &base, Node* &offset);
152  Node* make_unsafe_address(Node* base, Node* offset);
153  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
154  bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
155  bool inline_unsafe_allocate();
156  bool inline_unsafe_copyMemory();
157  bool inline_native_currentThread();
158  bool inline_native_time_funcs(bool isNano);
159  bool inline_native_isInterrupted();
160  bool inline_native_Class_query(vmIntrinsics::ID id);
161  bool inline_native_subtype_check();
162
163  bool inline_native_newArray();
164  bool inline_native_getLength();
165  bool inline_array_copyOf(bool is_copyOfRange);
166  bool inline_array_equals();
167  bool inline_native_clone(bool is_virtual);
168  bool inline_native_Reflection_getCallerClass();
169  bool inline_native_AtomicLong_get();
170  bool inline_native_AtomicLong_attemptUpdate();
171  bool is_method_invoke_or_aux_frame(JVMState* jvms);
172  // Helper function for inlining native object hash method
173  bool inline_native_hashcode(bool is_virtual, bool is_static);
174  bool inline_native_getClass();
175
176  // Helper functions for inlining arraycopy
177  bool inline_arraycopy();
178  void generate_arraycopy(const TypePtr* adr_type,
179                          BasicType basic_elem_type,
180                          Node* src,  Node* src_offset,
181                          Node* dest, Node* dest_offset,
182                          Node* copy_length,
183                          int nargs,  // arguments on stack for debug info
184                          bool disjoint_bases = false,
185                          bool length_never_negative = false,
186                          RegionNode* slow_region = NULL);
187  AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
188                                                RegionNode* slow_region);
189  void generate_clear_array(const TypePtr* adr_type,
190                            Node* dest,
191                            BasicType basic_elem_type,
192                            Node* slice_off,
193                            Node* slice_len,
194                            Node* slice_end);
195  bool generate_block_arraycopy(const TypePtr* adr_type,
196                                BasicType basic_elem_type,
197                                AllocateNode* alloc,
198                                Node* src,  Node* src_offset,
199                                Node* dest, Node* dest_offset,
200                                Node* dest_size);
201  void generate_slow_arraycopy(const TypePtr* adr_type,
202                               Node* src,  Node* src_offset,
203                               Node* dest, Node* dest_offset,
204                               Node* copy_length,
205                               int nargs);
206  Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
207                                     Node* dest_elem_klass,
208                                     Node* src,  Node* src_offset,
209                                     Node* dest, Node* dest_offset,
210                                     Node* copy_length, int nargs);
211  Node* generate_generic_arraycopy(const TypePtr* adr_type,
212                                   Node* src,  Node* src_offset,
213                                   Node* dest, Node* dest_offset,
214                                   Node* copy_length, int nargs);
215  void generate_unchecked_arraycopy(const TypePtr* adr_type,
216                                    BasicType basic_elem_type,
217                                    bool disjoint_bases,
218                                    Node* src,  Node* src_offset,
219                                    Node* dest, Node* dest_offset,
220                                    Node* copy_length);
221  bool inline_unsafe_CAS(BasicType type);
222  bool inline_unsafe_ordered_store(BasicType type);
223  bool inline_fp_conversions(vmIntrinsics::ID id);
224  bool inline_reverseBytes(vmIntrinsics::ID id);
225};
226
227
228//---------------------------make_vm_intrinsic----------------------------
229CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
230  vmIntrinsics::ID id = m->intrinsic_id();
231  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
232
233  if (DisableIntrinsic[0] != '\0'
234      && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) {
235    // disabled by a user request on the command line:
236    // example: -XX:DisableIntrinsic=_hashCode,_getClass
237    return NULL;
238  }
239
240  if (!m->is_loaded()) {
241    // do not attempt to inline unloaded methods
242    return NULL;
243  }
244
245  // Only a few intrinsics implement a virtual dispatch.
246  // They are expensive calls which are also frequently overridden.
247  if (is_virtual) {
248    switch (id) {
249    case vmIntrinsics::_hashCode:
250    case vmIntrinsics::_clone:
251      // OK, Object.hashCode and Object.clone intrinsics come in both flavors
252      break;
253    default:
254      return NULL;
255    }
256  }
257
258  // -XX:-InlineNatives disables nearly all intrinsics:
259  if (!InlineNatives) {
260    switch (id) {
261    case vmIntrinsics::_indexOf:
262    case vmIntrinsics::_compareTo:
263    case vmIntrinsics::_equalsC:
264      break;  // InlineNatives does not control String.compareTo
265    default:
266      return NULL;
267    }
268  }
269
270  switch (id) {
271  case vmIntrinsics::_compareTo:
272    if (!SpecialStringCompareTo)  return NULL;
273    break;
274  case vmIntrinsics::_indexOf:
275    if (!SpecialStringIndexOf)  return NULL;
276    break;
277  case vmIntrinsics::_equalsC:
278    if (!SpecialArraysEquals)  return NULL;
279    break;
280  case vmIntrinsics::_arraycopy:
281    if (!InlineArrayCopy)  return NULL;
282    break;
283  case vmIntrinsics::_copyMemory:
284    if (StubRoutines::unsafe_arraycopy() == NULL)  return NULL;
285    if (!InlineArrayCopy)  return NULL;
286    break;
287  case vmIntrinsics::_hashCode:
288    if (!InlineObjectHash)  return NULL;
289    break;
290  case vmIntrinsics::_clone:
291  case vmIntrinsics::_copyOf:
292  case vmIntrinsics::_copyOfRange:
293    if (!InlineObjectCopy)  return NULL;
294    // These also use the arraycopy intrinsic mechanism:
295    if (!InlineArrayCopy)  return NULL;
296    break;
297  case vmIntrinsics::_checkIndex:
298    // We do not intrinsify this.  The optimizer does fine with it.
299    return NULL;
300
301  case vmIntrinsics::_get_AtomicLong:
302  case vmIntrinsics::_attemptUpdate:
303    if (!InlineAtomicLong)  return NULL;
304    break;
305
306  case vmIntrinsics::_Object_init:
307  case vmIntrinsics::_invoke:
308    // We do not intrinsify these; they are marked for other purposes.
309    return NULL;
310
311  case vmIntrinsics::_getCallerClass:
312    if (!UseNewReflection)  return NULL;
313    if (!InlineReflectionGetCallerClass)  return NULL;
314    if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
315    break;
316
317 default:
318    break;
319  }
320
321  // -XX:-InlineClassNatives disables natives from the Class class.
322  // The flag applies to all reflective calls, notably Array.newArray
323  // (visible to Java programmers as Array.newInstance).
324  if (m->holder()->name() == ciSymbol::java_lang_Class() ||
325      m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
326    if (!InlineClassNatives)  return NULL;
327  }
328
329  // -XX:-InlineThreadNatives disables natives from the Thread class.
330  if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
331    if (!InlineThreadNatives)  return NULL;
332  }
333
334  // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
335  if (m->holder()->name() == ciSymbol::java_lang_Math() ||
336      m->holder()->name() == ciSymbol::java_lang_Float() ||
337      m->holder()->name() == ciSymbol::java_lang_Double()) {
338    if (!InlineMathNatives)  return NULL;
339  }
340
341  // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
342  if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
343    if (!InlineUnsafeOps)  return NULL;
344  }
345
346  return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
347}
348
349//----------------------register_library_intrinsics-----------------------
350// Initialize this file's data structures, for each Compile instance.
351void Compile::register_library_intrinsics() {
352  // Nothing to do here.
353}
354
355JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
356  LibraryCallKit kit(jvms, this);
357  Compile* C = kit.C;
358  int nodes = C->unique();
359#ifndef PRODUCT
360  if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
361    char buf[1000];
362    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
363    tty->print_cr("Intrinsic %s", str);
364  }
365#endif
366  if (kit.try_to_inline()) {
367    if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
368      tty->print("Inlining intrinsic %s%s at bci:%d in",
369                 vmIntrinsics::name_at(intrinsic_id()),
370                 (is_virtual() ? " (virtual)" : ""), kit.bci());
371      kit.caller()->print_short_name(tty);
372      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
373    }
374    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
375    if (C->log()) {
376      C->log()->elem("intrinsic id='%s'%s nodes='%d'",
377                     vmIntrinsics::name_at(intrinsic_id()),
378                     (is_virtual() ? " virtual='1'" : ""),
379                     C->unique() - nodes);
380    }
381    return kit.transfer_exceptions_into_jvms();
382  }
383
384  if (PrintIntrinsics) {
385    switch (intrinsic_id()) {
386    case vmIntrinsics::_invoke:
387    case vmIntrinsics::_Object_init:
388      // We do not expect to inline these, so do not produce any noise about them.
389      break;
390    default:
391      tty->print("Did not inline intrinsic %s%s at bci:%d in",
392                 vmIntrinsics::name_at(intrinsic_id()),
393                 (is_virtual() ? " (virtual)" : ""), kit.bci());
394      kit.caller()->print_short_name(tty);
395      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
396    }
397  }
398  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
399  return NULL;
400}
401
402bool LibraryCallKit::try_to_inline() {
403  // Handle symbolic names for otherwise undistinguished boolean switches:
404  const bool is_store       = true;
405  const bool is_native_ptr  = true;
406  const bool is_static      = true;
407
408  switch (intrinsic_id()) {
409  case vmIntrinsics::_hashCode:
410    return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
411  case vmIntrinsics::_identityHashCode:
412    return inline_native_hashcode(/*!virtual*/ false, is_static);
413  case vmIntrinsics::_getClass:
414    return inline_native_getClass();
415
416  case vmIntrinsics::_dsin:
417  case vmIntrinsics::_dcos:
418  case vmIntrinsics::_dtan:
419  case vmIntrinsics::_dabs:
420  case vmIntrinsics::_datan2:
421  case vmIntrinsics::_dsqrt:
422  case vmIntrinsics::_dexp:
423  case vmIntrinsics::_dlog:
424  case vmIntrinsics::_dlog10:
425  case vmIntrinsics::_dpow:
426    return inline_math_native(intrinsic_id());
427
428  case vmIntrinsics::_min:
429  case vmIntrinsics::_max:
430    return inline_min_max(intrinsic_id());
431
432  case vmIntrinsics::_arraycopy:
433    return inline_arraycopy();
434
435  case vmIntrinsics::_compareTo:
436    return inline_string_compareTo();
437  case vmIntrinsics::_indexOf:
438    return inline_string_indexOf();
439
440  case vmIntrinsics::_getObject:
441    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
442  case vmIntrinsics::_getBoolean:
443    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false);
444  case vmIntrinsics::_getByte:
445    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false);
446  case vmIntrinsics::_getShort:
447    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false);
448  case vmIntrinsics::_getChar:
449    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false);
450  case vmIntrinsics::_getInt:
451    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false);
452  case vmIntrinsics::_getLong:
453    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false);
454  case vmIntrinsics::_getFloat:
455    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false);
456  case vmIntrinsics::_getDouble:
457    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false);
458
459  case vmIntrinsics::_putObject:
460    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false);
461  case vmIntrinsics::_putBoolean:
462    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false);
463  case vmIntrinsics::_putByte:
464    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false);
465  case vmIntrinsics::_putShort:
466    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false);
467  case vmIntrinsics::_putChar:
468    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false);
469  case vmIntrinsics::_putInt:
470    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false);
471  case vmIntrinsics::_putLong:
472    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false);
473  case vmIntrinsics::_putFloat:
474    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false);
475  case vmIntrinsics::_putDouble:
476    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false);
477
478  case vmIntrinsics::_getByte_raw:
479    return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false);
480  case vmIntrinsics::_getShort_raw:
481    return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false);
482  case vmIntrinsics::_getChar_raw:
483    return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false);
484  case vmIntrinsics::_getInt_raw:
485    return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false);
486  case vmIntrinsics::_getLong_raw:
487    return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false);
488  case vmIntrinsics::_getFloat_raw:
489    return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false);
490  case vmIntrinsics::_getDouble_raw:
491    return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false);
492  case vmIntrinsics::_getAddress_raw:
493    return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false);
494
495  case vmIntrinsics::_putByte_raw:
496    return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false);
497  case vmIntrinsics::_putShort_raw:
498    return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false);
499  case vmIntrinsics::_putChar_raw:
500    return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false);
501  case vmIntrinsics::_putInt_raw:
502    return inline_unsafe_access(is_native_ptr, is_store, T_INT, false);
503  case vmIntrinsics::_putLong_raw:
504    return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false);
505  case vmIntrinsics::_putFloat_raw:
506    return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false);
507  case vmIntrinsics::_putDouble_raw:
508    return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false);
509  case vmIntrinsics::_putAddress_raw:
510    return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false);
511
512  case vmIntrinsics::_getObjectVolatile:
513    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true);
514  case vmIntrinsics::_getBooleanVolatile:
515    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true);
516  case vmIntrinsics::_getByteVolatile:
517    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true);
518  case vmIntrinsics::_getShortVolatile:
519    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true);
520  case vmIntrinsics::_getCharVolatile:
521    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true);
522  case vmIntrinsics::_getIntVolatile:
523    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true);
524  case vmIntrinsics::_getLongVolatile:
525    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true);
526  case vmIntrinsics::_getFloatVolatile:
527    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true);
528  case vmIntrinsics::_getDoubleVolatile:
529    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true);
530
531  case vmIntrinsics::_putObjectVolatile:
532    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true);
533  case vmIntrinsics::_putBooleanVolatile:
534    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true);
535  case vmIntrinsics::_putByteVolatile:
536    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true);
537  case vmIntrinsics::_putShortVolatile:
538    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true);
539  case vmIntrinsics::_putCharVolatile:
540    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true);
541  case vmIntrinsics::_putIntVolatile:
542    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true);
543  case vmIntrinsics::_putLongVolatile:
544    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true);
545  case vmIntrinsics::_putFloatVolatile:
546    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true);
547  case vmIntrinsics::_putDoubleVolatile:
548    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true);
549
550  case vmIntrinsics::_prefetchRead:
551    return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
552  case vmIntrinsics::_prefetchWrite:
553    return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static);
554  case vmIntrinsics::_prefetchReadStatic:
555    return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static);
556  case vmIntrinsics::_prefetchWriteStatic:
557    return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
558
559  case vmIntrinsics::_compareAndSwapObject:
560    return inline_unsafe_CAS(T_OBJECT);
561  case vmIntrinsics::_compareAndSwapInt:
562    return inline_unsafe_CAS(T_INT);
563  case vmIntrinsics::_compareAndSwapLong:
564    return inline_unsafe_CAS(T_LONG);
565
566  case vmIntrinsics::_putOrderedObject:
567    return inline_unsafe_ordered_store(T_OBJECT);
568  case vmIntrinsics::_putOrderedInt:
569    return inline_unsafe_ordered_store(T_INT);
570  case vmIntrinsics::_putOrderedLong:
571    return inline_unsafe_ordered_store(T_LONG);
572
573  case vmIntrinsics::_currentThread:
574    return inline_native_currentThread();
575  case vmIntrinsics::_isInterrupted:
576    return inline_native_isInterrupted();
577
578  case vmIntrinsics::_currentTimeMillis:
579    return inline_native_time_funcs(false);
580  case vmIntrinsics::_nanoTime:
581    return inline_native_time_funcs(true);
582  case vmIntrinsics::_allocateInstance:
583    return inline_unsafe_allocate();
584  case vmIntrinsics::_copyMemory:
585    return inline_unsafe_copyMemory();
586  case vmIntrinsics::_newArray:
587    return inline_native_newArray();
588  case vmIntrinsics::_getLength:
589    return inline_native_getLength();
590  case vmIntrinsics::_copyOf:
591    return inline_array_copyOf(false);
592  case vmIntrinsics::_copyOfRange:
593    return inline_array_copyOf(true);
594  case vmIntrinsics::_equalsC:
595    return inline_array_equals();
596  case vmIntrinsics::_clone:
597    return inline_native_clone(intrinsic()->is_virtual());
598
599  case vmIntrinsics::_isAssignableFrom:
600    return inline_native_subtype_check();
601
602  case vmIntrinsics::_isInstance:
603  case vmIntrinsics::_getModifiers:
604  case vmIntrinsics::_isInterface:
605  case vmIntrinsics::_isArray:
606  case vmIntrinsics::_isPrimitive:
607  case vmIntrinsics::_getSuperclass:
608  case vmIntrinsics::_getComponentType:
609  case vmIntrinsics::_getClassAccessFlags:
610    return inline_native_Class_query(intrinsic_id());
611
612  case vmIntrinsics::_floatToRawIntBits:
613  case vmIntrinsics::_floatToIntBits:
614  case vmIntrinsics::_intBitsToFloat:
615  case vmIntrinsics::_doubleToRawLongBits:
616  case vmIntrinsics::_doubleToLongBits:
617  case vmIntrinsics::_longBitsToDouble:
618    return inline_fp_conversions(intrinsic_id());
619
620  case vmIntrinsics::_reverseBytes_i:
621  case vmIntrinsics::_reverseBytes_l:
622    return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
623
624  case vmIntrinsics::_get_AtomicLong:
625    return inline_native_AtomicLong_get();
626  case vmIntrinsics::_attemptUpdate:
627    return inline_native_AtomicLong_attemptUpdate();
628
629  case vmIntrinsics::_getCallerClass:
630    return inline_native_Reflection_getCallerClass();
631
632  default:
633    // If you get here, it may be that someone has added a new intrinsic
634    // to the list in vmSymbols.hpp without implementing it here.
635#ifndef PRODUCT
636    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
637      tty->print_cr("*** Warning: Unimplemented intrinsic %s(%d)",
638                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
639    }
640#endif
641    return false;
642  }
643}
644
645//------------------------------push_result------------------------------
646// Helper function for finishing intrinsics.
647void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
648  record_for_igvn(region);
649  set_control(_gvn.transform(region));
650  BasicType value_type = value->type()->basic_type();
651  push_node(value_type, _gvn.transform(value));
652}
653
654//------------------------------generate_guard---------------------------
655// Helper function for generating guarded fast-slow graph structures.
656// The given 'test', if true, guards a slow path.  If the test fails
657// then a fast path can be taken.  (We generally hope it fails.)
658// In all cases, GraphKit::control() is updated to the fast path.
659// The returned value represents the control for the slow path.
660// The return value is never 'top'; it is either a valid control
661// or NULL if it is obvious that the slow path can never be taken.
662// Also, if region and the slow control are not NULL, the slow edge
663// is appended to the region.
664Node* LibraryCallKit::generate_guard(Node* test, RegionNode* region, float true_prob) {
665  if (stopped()) {
666    // Already short circuited.
667    return NULL;
668  }
669
670  // Build an if node and its projections.
671  // If test is true we take the slow path, which we assume is uncommon.
672  if (_gvn.type(test) == TypeInt::ZERO) {
673    // The slow branch is never taken.  No need to build this guard.
674    return NULL;
675  }
676
677  IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);
678
679  Node* if_slow = _gvn.transform( new (C, 1) IfTrueNode(iff) );
680  if (if_slow == top()) {
681    // The slow branch is never taken.  No need to build this guard.
682    return NULL;
683  }
684
685  if (region != NULL)
686    region->add_req(if_slow);
687
688  Node* if_fast = _gvn.transform( new (C, 1) IfFalseNode(iff) );
689  set_control(if_fast);
690
691  return if_slow;
692}
693
694inline Node* LibraryCallKit::generate_slow_guard(Node* test, RegionNode* region) {
695  return generate_guard(test, region, PROB_UNLIKELY_MAG(3));
696}
697inline Node* LibraryCallKit::generate_fair_guard(Node* test, RegionNode* region) {
698  return generate_guard(test, region, PROB_FAIR);
699}
700
701inline Node* LibraryCallKit::generate_negative_guard(Node* index, RegionNode* region,
702                                                     Node* *pos_index) {
703  if (stopped())
704    return NULL;                // already stopped
705  if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
706    return NULL;                // index is already adequately typed
707  Node* cmp_lt = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
708  Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
709  Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
710  if (is_neg != NULL && pos_index != NULL) {
711    // Emulate effect of Parse::adjust_map_after_if.
712    Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS);
713    ccast->set_req(0, control());
714    (*pos_index) = _gvn.transform(ccast);
715  }
716  return is_neg;
717}
718
719inline Node* LibraryCallKit::generate_nonpositive_guard(Node* index, bool never_negative,
720                                                        Node* *pos_index) {
721  if (stopped())
722    return NULL;                // already stopped
723  if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
724    return NULL;                // index is already adequately typed
725  Node* cmp_le = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
726  BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
727  Node* bol_le = _gvn.transform( new (C, 2) BoolNode(cmp_le, le_or_eq) );
728  Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
729  if (is_notp != NULL && pos_index != NULL) {
730    // Emulate effect of Parse::adjust_map_after_if.
731    Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS1);
732    ccast->set_req(0, control());
733    (*pos_index) = _gvn.transform(ccast);
734  }
735  return is_notp;
736}
737
738// Make sure that 'position' is a valid limit index, in [0..length].
739// There are two equivalent plans for checking this:
740//   A. (offset + copyLength)  unsigned<=  arrayLength
741//   B. offset  <=  (arrayLength - copyLength)
742// We require that all of the values above, except for the sum and
743// difference, are already known to be non-negative.
744// Plan A is robust in the face of overflow, if offset and copyLength
745// are both hugely positive.
746//
747// Plan B is less direct and intuitive, but it does not overflow at
748// all, since the difference of two non-negatives is always
749// representable.  Whenever Java methods must perform the equivalent
750// check they generally use Plan B instead of Plan A.
751// For the moment we use Plan A.
752inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
753                                                  Node* subseq_length,
754                                                  Node* array_length,
755                                                  RegionNode* region) {
756  if (stopped())
757    return NULL;                // already stopped
758  bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
759  if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
760    return NULL;                // common case of whole-array copy
761  Node* last = subseq_length;
762  if (!zero_offset)             // last += offset
763    last = _gvn.transform( new (C, 3) AddINode(last, offset));
764  Node* cmp_lt = _gvn.transform( new (C, 3) CmpUNode(array_length, last) );
765  Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
766  Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
767  return is_over;
768}
769
770
771//--------------------------generate_current_thread--------------------
772Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
773  ciKlass*    thread_klass = env()->Thread_klass();
774  const Type* thread_type  = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
775  Node* thread = _gvn.transform(new (C, 1) ThreadLocalNode());
776  Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
777  Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT);
778  tls_output = thread;
779  return threadObj;
780}
781
782
783//------------------------------inline_string_compareTo------------------------
784bool LibraryCallKit::inline_string_compareTo() {
785
786  const int value_offset = java_lang_String::value_offset_in_bytes();
787  const int count_offset = java_lang_String::count_offset_in_bytes();
788  const int offset_offset = java_lang_String::offset_offset_in_bytes();
789
790  _sp += 2;
791  Node *argument = pop();  // pop non-receiver first:  it was pushed second
792  Node *receiver = pop();
793
794  // Null check on self without removing any arguments.  The argument
795  // null check technically happens in the wrong place, which can lead to
796  // invalid stack traces when string compare is inlined into a method
797  // which handles NullPointerExceptions.
798  _sp += 2;
799  receiver = do_null_check(receiver, T_OBJECT);
800  argument = do_null_check(argument, T_OBJECT);
801  _sp -= 2;
802  if (stopped()) {
803    return true;
804  }
805
806  ciInstanceKlass* klass = env()->String_klass();
807  const TypeInstPtr* string_type =
808    TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
809
810  Node* compare =
811    _gvn.transform(new (C, 7) StrCompNode(
812                        control(),
813                        memory(TypeAryPtr::CHARS),
814                        memory(string_type->add_offset(value_offset)),
815                        memory(string_type->add_offset(count_offset)),
816                        memory(string_type->add_offset(offset_offset)),
817                        receiver,
818                        argument));
819  push(compare);
820  return true;
821}
822
823//------------------------------inline_array_equals----------------------------
824bool LibraryCallKit::inline_array_equals() {
825
826  if (!Matcher::has_match_rule(Op_AryEq)) return false;
827
828  _sp += 2;
829  Node *argument2 = pop();
830  Node *argument1 = pop();
831
832  Node* equals =
833    _gvn.transform(new (C, 3) AryEqNode(control(),
834                                        argument1,
835                                        argument2)
836                   );
837  push(equals);
838  return true;
839}
840
841// Java version of String.indexOf(constant string)
842// class StringDecl {
843//   StringDecl(char[] ca) {
844//     offset = 0;
845//     count = ca.length;
846//     value = ca;
847//   }
848//   int offset;
849//   int count;
850//   char[] value;
851// }
852//
853// static int string_indexOf_J(StringDecl string_object, char[] target_object,
854//                             int targetOffset, int cache_i, int md2) {
855//   int cache = cache_i;
856//   int sourceOffset = string_object.offset;
857//   int sourceCount = string_object.count;
858//   int targetCount = target_object.length;
859//
860//   int targetCountLess1 = targetCount - 1;
861//   int sourceEnd = sourceOffset + sourceCount - targetCountLess1;
862//
863//   char[] source = string_object.value;
864//   char[] target = target_object;
865//   int lastChar = target[targetCountLess1];
866//
867//  outer_loop:
868//   for (int i = sourceOffset; i < sourceEnd; ) {
869//     int src = source[i + targetCountLess1];
870//     if (src == lastChar) {
871//       // With random strings and a 4-character alphabet,
872//       // reverse matching at this point sets up 0.8% fewer
873//       // frames, but (paradoxically) makes 0.3% more probes.
874//       // Since those probes are nearer the lastChar probe,
875//       // there is may be a net D$ win with reverse matching.
876//       // But, reversing loop inhibits unroll of inner loop
877//       // for unknown reason.  So, does running outer loop from
878//       // (sourceOffset - targetCountLess1) to (sourceOffset + sourceCount)
879//       for (int j = 0; j < targetCountLess1; j++) {
880//         if (target[targetOffset + j] != source[i+j]) {
881//           if ((cache & (1 << source[i+j])) == 0) {
882//             if (md2 < j+1) {
883//               i += j+1;
884//               continue outer_loop;
885//             }
886//           }
887//           i += md2;
888//           continue outer_loop;
889//         }
890//       }
891//       return i - sourceOffset;
892//     }
893//     if ((cache & (1 << src)) == 0) {
894//       i += targetCountLess1;
895//     } // using "i += targetCount;" and an "else i++;" causes a jump to jump.
896//     i++;
897//   }
898//   return -1;
899// }
900
901//------------------------------string_indexOf------------------------
902Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_array, jint targetOffset_i,
903                                     jint cache_i, jint md2_i) {
904
905  Node* no_ctrl  = NULL;
906  float likely   = PROB_LIKELY(0.9);
907  float unlikely = PROB_UNLIKELY(0.9);
908
909  const int value_offset  = java_lang_String::value_offset_in_bytes();
910  const int count_offset  = java_lang_String::count_offset_in_bytes();
911  const int offset_offset = java_lang_String::offset_offset_in_bytes();
912
913  ciInstanceKlass* klass = env()->String_klass();
914  const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
915  const TypeAryPtr*  source_type = TypeAryPtr::make(TypePtr::NotNull, TypeAry::make(TypeInt::CHAR,TypeInt::POS), ciTypeArrayKlass::make(T_CHAR), true, 0);
916
917  Node* sourceOffseta = basic_plus_adr(string_object, string_object, offset_offset);
918  Node* sourceOffset  = make_load(no_ctrl, sourceOffseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
919  Node* sourceCounta  = basic_plus_adr(string_object, string_object, count_offset);
920  Node* sourceCount   = make_load(no_ctrl, sourceCounta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
921  Node* sourcea       = basic_plus_adr(string_object, string_object, value_offset);
922  Node* source        = make_load(no_ctrl, sourcea, source_type, T_OBJECT, string_type->add_offset(value_offset));
923
924  Node* target = _gvn.transform( makecon(TypeOopPtr::make_from_constant(target_array)) );
925  jint target_length = target_array->length();
926  const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
927  const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
928
929  IdealKit kit(gvn(), control(), merged_memory());
930#define __ kit.
931  Node* zero             = __ ConI(0);
932  Node* one              = __ ConI(1);
933  Node* cache            = __ ConI(cache_i);
934  Node* md2              = __ ConI(md2_i);
935  Node* lastChar         = __ ConI(target_array->char_at(target_length - 1));
936  Node* targetCount      = __ ConI(target_length);
937  Node* targetCountLess1 = __ ConI(target_length - 1);
938  Node* targetOffset     = __ ConI(targetOffset_i);
939  Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
940
941  IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
942  Node* outer_loop = __ make_label(2 /* goto */);
943  Node* return_    = __ make_label(1);
944
945  __ set(rtn,__ ConI(-1));
946  __ loop(i, sourceOffset, BoolTest::lt, sourceEnd); {
947       Node* i2  = __ AddI(__ value(i), targetCountLess1);
948       // pin to prohibit loading of "next iteration" value which may SEGV (rare)
949       Node* src = load_array_element(__ ctrl(), source, i2, TypeAryPtr::CHARS);
950       __ if_then(src, BoolTest::eq, lastChar, unlikely); {
951         __ loop(j, zero, BoolTest::lt, targetCountLess1); {
952              Node* tpj = __ AddI(targetOffset, __ value(j));
953              Node* targ = load_array_element(no_ctrl, target, tpj, target_type);
954              Node* ipj  = __ AddI(__ value(i), __ value(j));
955              Node* src2 = load_array_element(no_ctrl, source, ipj, TypeAryPtr::CHARS);
956              __ if_then(targ, BoolTest::ne, src2); {
957                __ if_then(__ AndI(cache, __ LShiftI(one, src2)), BoolTest::eq, zero); {
958                  __ if_then(md2, BoolTest::lt, __ AddI(__ value(j), one)); {
959                    __ increment(i, __ AddI(__ value(j), one));
960                    __ goto_(outer_loop);
961                  } __ end_if(); __ dead(j);
962                }__ end_if(); __ dead(j);
963                __ increment(i, md2);
964                __ goto_(outer_loop);
965              }__ end_if();
966              __ increment(j, one);
967         }__ end_loop(); __ dead(j);
968         __ set(rtn, __ SubI(__ value(i), sourceOffset)); __ dead(i);
969         __ goto_(return_);
970       }__ end_if();
971       __ if_then(__ AndI(cache, __ LShiftI(one, src)), BoolTest::eq, zero, likely); {
972         __ increment(i, targetCountLess1);
973       }__ end_if();
974       __ increment(i, one);
975       __ bind(outer_loop);
976  }__ end_loop(); __ dead(i);
977  __ bind(return_);
978  __ drain_delay_transform();
979
980  set_control(__ ctrl());
981  Node* result = __ value(rtn);
982#undef __
983  C->set_has_loops(true);
984  return result;
985}
986
987
988//------------------------------inline_string_indexOf------------------------
989bool LibraryCallKit::inline_string_indexOf() {
990
991  _sp += 2;
992  Node *argument = pop();  // pop non-receiver first:  it was pushed second
993  Node *receiver = pop();
994
995  // don't intrinsify if argument isn't a constant string.
996  if (!argument->is_Con()) {
997    return false;
998  }
999  const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
1000  if (str_type == NULL) {
1001    return false;
1002  }
1003  ciInstanceKlass* klass = env()->String_klass();
1004  ciObject* str_const = str_type->const_oop();
1005  if (str_const == NULL || str_const->klass() != klass) {
1006    return false;
1007  }
1008  ciInstance* str = str_const->as_instance();
1009  assert(str != NULL, "must be instance");
1010
1011  const int value_offset  = java_lang_String::value_offset_in_bytes();
1012  const int count_offset  = java_lang_String::count_offset_in_bytes();
1013  const int offset_offset = java_lang_String::offset_offset_in_bytes();
1014
1015  ciObject* v = str->field_value_by_offset(value_offset).as_object();
1016  int       o = str->field_value_by_offset(offset_offset).as_int();
1017  int       c = str->field_value_by_offset(count_offset).as_int();
1018  ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
1019
1020  // constant strings have no offset and count == length which
1021  // simplifies the resulting code somewhat so lets optimize for that.
1022  if (o != 0 || c != pat->length()) {
1023    return false;
1024  }
1025
1026  // Null check on self without removing any arguments.  The argument
1027  // null check technically happens in the wrong place, which can lead to
1028  // invalid stack traces when string compare is inlined into a method
1029  // which handles NullPointerExceptions.
1030  _sp += 2;
1031  receiver = do_null_check(receiver, T_OBJECT);
1032  // No null check on the argument is needed since it's a constant String oop.
1033  _sp -= 2;
1034  if (stopped()) {
1035    return true;
1036  }
1037
1038  // The null string as a pattern always returns 0 (match at beginning of string)
1039  if (c == 0) {
1040    push(intcon(0));
1041    return true;
1042  }
1043
1044  jchar lastChar = pat->char_at(o + (c - 1));
1045  int cache = 0;
1046  int i;
1047  for (i = 0; i < c - 1; i++) {
1048    assert(i < pat->length(), "out of range");
1049    cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
1050  }
1051
1052  int md2 = c;
1053  for (i = 0; i < c - 1; i++) {
1054    assert(i < pat->length(), "out of range");
1055    if (pat->char_at(o + i) == lastChar) {
1056      md2 = (c - 1) - i;
1057    }
1058  }
1059
1060  Node* result = string_indexOf(receiver, pat, o, cache, md2);
1061  push(result);
1062  return true;
1063}
1064
1065//--------------------------pop_math_arg--------------------------------
1066// Pop a double argument to a math function from the stack
1067// rounding it if necessary.
1068Node * LibraryCallKit::pop_math_arg() {
1069  Node *arg = pop_pair();
1070  if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 )
1071    arg = _gvn.transform( new (C, 2) RoundDoubleNode(0, arg) );
1072  return arg;
1073}
1074
1075//------------------------------inline_trig----------------------------------
1076// Inline sin/cos/tan instructions, if possible.  If rounding is required, do
1077// argument reduction which will turn into a fast/slow diamond.
1078bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
1079  _sp += arg_size();            // restore stack pointer
1080  Node* arg = pop_math_arg();
1081  Node* trig = NULL;
1082
1083  switch (id) {
1084  case vmIntrinsics::_dsin:
1085    trig = _gvn.transform((Node*)new (C, 2) SinDNode(arg));
1086    break;
1087  case vmIntrinsics::_dcos:
1088    trig = _gvn.transform((Node*)new (C, 2) CosDNode(arg));
1089    break;
1090  case vmIntrinsics::_dtan:
1091    trig = _gvn.transform((Node*)new (C, 2) TanDNode(arg));
1092    break;
1093  default:
1094    assert(false, "bad intrinsic was passed in");
1095    return false;
1096  }
1097
1098  // Rounding required?  Check for argument reduction!
1099  if( Matcher::strict_fp_requires_explicit_rounding ) {
1100
1101    static const double     pi_4 =  0.7853981633974483;
1102    static const double neg_pi_4 = -0.7853981633974483;
1103    // pi/2 in 80-bit extended precision
1104    // static const unsigned char pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00};
1105    // -pi/2 in 80-bit extended precision
1106    // static const unsigned char neg_pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0xbf,0x00,0x00,0x00,0x00,0x00,0x00};
1107    // Cutoff value for using this argument reduction technique
1108    //static const double    pi_2_minus_epsilon =  1.564660403643354;
1109    //static const double neg_pi_2_plus_epsilon = -1.564660403643354;
1110
1111    // Pseudocode for sin:
1112    // if (x <= Math.PI / 4.0) {
1113    //   if (x >= -Math.PI / 4.0) return  fsin(x);
1114    //   if (x >= -Math.PI / 2.0) return -fcos(x + Math.PI / 2.0);
1115    // } else {
1116    //   if (x <=  Math.PI / 2.0) return  fcos(x - Math.PI / 2.0);
1117    // }
1118    // return StrictMath.sin(x);
1119
1120    // Pseudocode for cos:
1121    // if (x <= Math.PI / 4.0) {
1122    //   if (x >= -Math.PI / 4.0) return  fcos(x);
1123    //   if (x >= -Math.PI / 2.0) return  fsin(x + Math.PI / 2.0);
1124    // } else {
1125    //   if (x <=  Math.PI / 2.0) return -fsin(x - Math.PI / 2.0);
1126    // }
1127    // return StrictMath.cos(x);
1128
1129    // Actually, sticking in an 80-bit Intel value into C2 will be tough; it
1130    // requires a special machine instruction to load it.  Instead we'll try
1131    // the 'easy' case.  If we really need the extra range +/- PI/2 we'll
1132    // probably do the math inside the SIN encoding.
1133
1134    // Make the merge point
1135    RegionNode *r = new (C, 3) RegionNode(3);
1136    Node *phi = new (C, 3) PhiNode(r,Type::DOUBLE);
1137
1138    // Flatten arg so we need only 1 test
1139    Node *abs = _gvn.transform(new (C, 2) AbsDNode(arg));
1140    // Node for PI/4 constant
1141    Node *pi4 = makecon(TypeD::make(pi_4));
1142    // Check PI/4 : abs(arg)
1143    Node *cmp = _gvn.transform(new (C, 3) CmpDNode(pi4,abs));
1144    // Check: If PI/4 < abs(arg) then go slow
1145    Node *bol = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::lt ) );
1146    // Branch either way
1147    IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
1148    set_control(opt_iff(r,iff));
1149
1150    // Set fast path result
1151    phi->init_req(2,trig);
1152
1153    // Slow path - non-blocking leaf call
1154    Node* call = NULL;
1155    switch (id) {
1156    case vmIntrinsics::_dsin:
1157      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
1158                               CAST_FROM_FN_PTR(address, SharedRuntime::dsin),
1159                               "Sin", NULL, arg, top());
1160      break;
1161    case vmIntrinsics::_dcos:
1162      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
1163                               CAST_FROM_FN_PTR(address, SharedRuntime::dcos),
1164                               "Cos", NULL, arg, top());
1165      break;
1166    case vmIntrinsics::_dtan:
1167      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
1168                               CAST_FROM_FN_PTR(address, SharedRuntime::dtan),
1169                               "Tan", NULL, arg, top());
1170      break;
1171    }
1172    assert(control()->in(0) == call, "");
1173    Node* slow_result = _gvn.transform(new (C, 1) ProjNode(call,TypeFunc::Parms));
1174    r->init_req(1,control());
1175    phi->init_req(1,slow_result);
1176
1177    // Post-merge
1178    set_control(_gvn.transform(r));
1179    record_for_igvn(r);
1180    trig = _gvn.transform(phi);
1181
1182    C->set_has_split_ifs(true); // Has chance for split-if optimization
1183  }
1184  // Push result back on JVM stack
1185  push_pair(trig);
1186  return true;
1187}
1188
1189//------------------------------inline_sqrt-------------------------------------
1190// Inline square root instruction, if possible.
1191bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) {
1192  assert(id == vmIntrinsics::_dsqrt, "Not square root");
1193  _sp += arg_size();        // restore stack pointer
1194  push_pair(_gvn.transform(new (C, 2) SqrtDNode(0, pop_math_arg())));
1195  return true;
1196}
1197
1198//------------------------------inline_abs-------------------------------------
1199// Inline absolute value instruction, if possible.
1200bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) {
1201  assert(id == vmIntrinsics::_dabs, "Not absolute value");
1202  _sp += arg_size();        // restore stack pointer
1203  push_pair(_gvn.transform(new (C, 2) AbsDNode(pop_math_arg())));
1204  return true;
1205}
1206
1207//------------------------------inline_exp-------------------------------------
1208// Inline exp instructions, if possible.  The Intel hardware only misses
1209// really odd corner cases (+/- Infinity).  Just uncommon-trap them.
1210bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
1211  assert(id == vmIntrinsics::_dexp, "Not exp");
1212
1213  // If this inlining ever returned NaN in the past, we do not intrinsify it
1214  // every again.  NaN results requires StrictMath.exp handling.
1215  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
1216
1217  // Do not intrinsify on older platforms which lack cmove.
1218  if (ConditionalMoveLimit == 0)  return false;
1219
1220  _sp += arg_size();        // restore stack pointer
1221  Node *x = pop_math_arg();
1222  Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x));
1223
1224  //-------------------
1225  //result=(result.isNaN())? StrictMath::exp():result;
1226  // Check: If isNaN() by checking result!=result? then go to Strict Math
1227  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
1228  // Build the boolean node
1229  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
1230
1231  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
1232    // End the current control-flow path
1233    push_pair(x);
1234    // Math.exp intrinsic returned a NaN, which requires StrictMath.exp
1235    // to handle.  Recompile without intrinsifying Math.exp
1236    uncommon_trap(Deoptimization::Reason_intrinsic,
1237                  Deoptimization::Action_make_not_entrant);
1238  }
1239
1240  C->set_has_split_ifs(true); // Has chance for split-if optimization
1241
1242  push_pair(result);
1243
1244  return true;
1245}
1246
1247//------------------------------inline_pow-------------------------------------
1248// Inline power instructions, if possible.
1249bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) {
1250  assert(id == vmIntrinsics::_dpow, "Not pow");
1251
1252  // If this inlining ever returned NaN in the past, we do not intrinsify it
1253  // every again.  NaN results requires StrictMath.pow handling.
1254  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
1255
1256  // Do not intrinsify on older platforms which lack cmove.
1257  if (ConditionalMoveLimit == 0)  return false;
1258
1259  // Pseudocode for pow
1260  // if (x <= 0.0) {
1261  //   if ((double)((int)y)==y) { // if y is int
1262  //     result = ((1&(int)y)==0)?-DPow(abs(x), y):DPow(abs(x), y)
1263  //   } else {
1264  //     result = NaN;
1265  //   }
1266  // } else {
1267  //   result = DPow(x,y);
1268  // }
1269  // if (result != result)?  {
1270  //   uncommon_trap();
1271  // }
1272  // return result;
1273
1274  _sp += arg_size();        // restore stack pointer
1275  Node* y = pop_math_arg();
1276  Node* x = pop_math_arg();
1277
1278  Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
1279
1280  // Short form: if not top-level (i.e., Math.pow but inlining Math.pow
1281  // inside of something) then skip the fancy tests and just check for
1282  // NaN result.
1283  Node *result = NULL;
1284  if( jvms()->depth() >= 1 ) {
1285    result = fast_result;
1286  } else {
1287
1288    // Set the merge point for If node with condition of (x <= 0.0)
1289    // There are four possible paths to region node and phi node
1290    RegionNode *r = new (C, 4) RegionNode(4);
1291    Node *phi = new (C, 4) PhiNode(r, Type::DOUBLE);
1292
1293    // Build the first if node: if (x <= 0.0)
1294    // Node for 0 constant
1295    Node *zeronode = makecon(TypeD::ZERO);
1296    // Check x:0
1297    Node *cmp = _gvn.transform(new (C, 3) CmpDNode(x, zeronode));
1298    // Check: If (x<=0) then go complex path
1299    Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::le ) );
1300    // Branch either way
1301    IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
1302    Node *opt_test = _gvn.transform(if1);
1303    //assert( opt_test->is_If(), "Expect an IfNode");
1304    IfNode *opt_if1 = (IfNode*)opt_test;
1305    // Fast path taken; set region slot 3
1306    Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_if1) );
1307    r->init_req(3,fast_taken); // Capture fast-control
1308
1309    // Fast path not-taken, i.e. slow path
1310    Node *complex_path = _gvn.transform( new (C, 1) IfTrueNode(opt_if1) );
1311
1312    // Set fast path result
1313    Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, y, x) );
1314    phi->init_req(3, fast_result);
1315
1316    // Complex path
1317    // Build the second if node (if y is int)
1318    // Node for (int)y
1319    Node *inty = _gvn.transform( new (C, 2) ConvD2INode(y));
1320    // Node for (double)((int) y)
1321    Node *doubleinty= _gvn.transform( new (C, 2) ConvI2DNode(inty));
1322    // Check (double)((int) y) : y
1323    Node *cmpinty= _gvn.transform(new (C, 3) CmpDNode(doubleinty, y));
1324    // Check if (y isn't int) then go to slow path
1325
1326    Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) );
1327    // Branch either way
1328    IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
1329    Node *slow_path = opt_iff(r,if2); // Set region path 2
1330
1331    // Calculate DPow(abs(x), y)*(1 & (int)y)
1332    // Node for constant 1
1333    Node *conone = intcon(1);
1334    // 1& (int)y
1335    Node *signnode= _gvn.transform( new (C, 3) AndINode(conone, inty) );
1336    // zero node
1337    Node *conzero = intcon(0);
1338    // Check (1&(int)y)==0?
1339    Node *cmpeq1 = _gvn.transform(new (C, 3) CmpINode(signnode, conzero));
1340    // Check if (1&(int)y)!=0?, if so the result is negative
1341    Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmpeq1, BoolTest::ne ) );
1342    // abs(x)
1343    Node *absx=_gvn.transform( new (C, 2) AbsDNode(x));
1344    // abs(x)^y
1345    Node *absxpowy = _gvn.transform( new (C, 3) PowDNode(0, y, absx) );
1346    // -abs(x)^y
1347    Node *negabsxpowy = _gvn.transform(new (C, 2) NegDNode (absxpowy));
1348    // (1&(int)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
1349    Node *signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
1350    // Set complex path fast result
1351    phi->init_req(2, signresult);
1352
1353    static const jlong nan_bits = CONST64(0x7ff8000000000000);
1354    Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
1355    r->init_req(1,slow_path);
1356    phi->init_req(1,slow_result);
1357
1358    // Post merge
1359    set_control(_gvn.transform(r));
1360    record_for_igvn(r);
1361    result=_gvn.transform(phi);
1362  }
1363
1364  //-------------------
1365  //result=(result.isNaN())? uncommon_trap():result;
1366  // Check: If isNaN() by checking result!=result? then go to Strict Math
1367  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
1368  // Build the boolean node
1369  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
1370
1371  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
1372    // End the current control-flow path
1373    push_pair(x);
1374    push_pair(y);
1375    // Math.pow intrinsic returned a NaN, which requires StrictMath.pow
1376    // to handle.  Recompile without intrinsifying Math.pow.
1377    uncommon_trap(Deoptimization::Reason_intrinsic,
1378                  Deoptimization::Action_make_not_entrant);
1379  }
1380
1381  C->set_has_split_ifs(true); // Has chance for split-if optimization
1382
1383  push_pair(result);
1384
1385  return true;
1386}
1387
1388//------------------------------inline_trans-------------------------------------
1389// Inline transcendental instructions, if possible.  The Intel hardware gets
1390// these right, no funny corner cases missed.
1391bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) {
1392  _sp += arg_size();        // restore stack pointer
1393  Node* arg = pop_math_arg();
1394  Node* trans = NULL;
1395
1396  switch (id) {
1397  case vmIntrinsics::_dlog:
1398    trans = _gvn.transform((Node*)new (C, 2) LogDNode(arg));
1399    break;
1400  case vmIntrinsics::_dlog10:
1401    trans = _gvn.transform((Node*)new (C, 2) Log10DNode(arg));
1402    break;
1403  default:
1404    assert(false, "bad intrinsic was passed in");
1405    return false;
1406  }
1407
1408  // Push result back on JVM stack
1409  push_pair(trans);
1410  return true;
1411}
1412
1413//------------------------------runtime_math-----------------------------
1414bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
1415  Node* a = NULL;
1416  Node* b = NULL;
1417
1418  assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
1419         "must be (DD)D or (D)D type");
1420
1421  // Inputs
1422  _sp += arg_size();        // restore stack pointer
1423  if (call_type == OptoRuntime::Math_DD_D_Type()) {
1424    b = pop_math_arg();
1425  }
1426  a = pop_math_arg();
1427
1428  const TypePtr* no_memory_effects = NULL;
1429  Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
1430                                 no_memory_effects,
1431                                 a, top(), b, b ? top() : NULL);
1432  Node* value = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+0));
1433#ifdef ASSERT
1434  Node* value_top = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+1));
1435  assert(value_top == top(), "second value must be top");
1436#endif
1437
1438  push_pair(value);
1439  return true;
1440}
1441
1442//------------------------------inline_math_native-----------------------------
1443bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
1444  switch (id) {
1445    // These intrinsics are not properly supported on all hardware
1446  case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) :
1447    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS");
1448  case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) :
1449    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN");
1450  case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
1451    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN");
1452
1453  case vmIntrinsics::_dlog:   return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) :
1454    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG");
1455  case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) :
1456    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10");
1457
1458    // These intrinsics are supported on all hardware
1459  case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
1460  case vmIntrinsics::_dabs:  return Matcher::has_match_rule(Op_AbsD)  ? inline_abs(id)  : false;
1461
1462    // These intrinsics don't work on X86.  The ad implementation doesn't
1463    // handle NaN's properly.  Instead of returning infinity, the ad
1464    // implementation returns a NaN on overflow. See bug: 6304089
1465    // Once the ad implementations are fixed, change the code below
1466    // to match the intrinsics above
1467
1468  case vmIntrinsics::_dexp:  return
1469    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
1470  case vmIntrinsics::_dpow:  return
1471    runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
1472
1473   // These intrinsics are not yet correctly implemented
1474  case vmIntrinsics::_datan2:
1475    return false;
1476
1477  default:
1478    ShouldNotReachHere();
1479    return false;
1480  }
1481}
1482
1483static bool is_simple_name(Node* n) {
1484  return (n->req() == 1         // constant
1485          || (n->is_Type() && n->as_Type()->type()->singleton())
1486          || n->is_Proj()       // parameter or return value
1487          || n->is_Phi()        // local of some sort
1488          );
1489}
1490
1491//----------------------------inline_min_max-----------------------------------
1492bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
1493  push(generate_min_max(id, argument(0), argument(1)));
1494
1495  return true;
1496}
1497
1498Node*
1499LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
1500  // These are the candidate return value:
1501  Node* xvalue = x0;
1502  Node* yvalue = y0;
1503
1504  if (xvalue == yvalue) {
1505    return xvalue;
1506  }
1507
1508  bool want_max = (id == vmIntrinsics::_max);
1509
1510  const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
1511  const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
1512  if (txvalue == NULL || tyvalue == NULL)  return top();
1513  // This is not really necessary, but it is consistent with a
1514  // hypothetical MaxINode::Value method:
1515  int widen = MAX2(txvalue->_widen, tyvalue->_widen);
1516
1517  // %%% This folding logic should (ideally) be in a different place.
1518  // Some should be inside IfNode, and there to be a more reliable
1519  // transformation of ?: style patterns into cmoves.  We also want
1520  // more powerful optimizations around cmove and min/max.
1521
1522  // Try to find a dominating comparison of these guys.
1523  // It can simplify the index computation for Arrays.copyOf
1524  // and similar uses of System.arraycopy.
1525  // First, compute the normalized version of CmpI(x, y).
1526  int   cmp_op = Op_CmpI;
1527  Node* xkey = xvalue;
1528  Node* ykey = yvalue;
1529  Node* ideal_cmpxy = _gvn.transform( new(C, 3) CmpINode(xkey, ykey) );
1530  if (ideal_cmpxy->is_Cmp()) {
1531    // E.g., if we have CmpI(length - offset, count),
1532    // it might idealize to CmpI(length, count + offset)
1533    cmp_op = ideal_cmpxy->Opcode();
1534    xkey = ideal_cmpxy->in(1);
1535    ykey = ideal_cmpxy->in(2);
1536  }
1537
1538  // Start by locating any relevant comparisons.
1539  Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
1540  Node* cmpxy = NULL;
1541  Node* cmpyx = NULL;
1542  for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
1543    Node* cmp = start_from->fast_out(k);
1544    if (cmp->outcnt() > 0 &&            // must have prior uses
1545        cmp->in(0) == NULL &&           // must be context-independent
1546        cmp->Opcode() == cmp_op) {      // right kind of compare
1547      if (cmp->in(1) == xkey && cmp->in(2) == ykey)  cmpxy = cmp;
1548      if (cmp->in(1) == ykey && cmp->in(2) == xkey)  cmpyx = cmp;
1549    }
1550  }
1551
1552  const int NCMPS = 2;
1553  Node* cmps[NCMPS] = { cmpxy, cmpyx };
1554  int cmpn;
1555  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
1556    if (cmps[cmpn] != NULL)  break;     // find a result
1557  }
1558  if (cmpn < NCMPS) {
1559    // Look for a dominating test that tells us the min and max.
1560    int depth = 0;                // Limit search depth for speed
1561    Node* dom = control();
1562    for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
1563      if (++depth >= 100)  break;
1564      Node* ifproj = dom;
1565      if (!ifproj->is_Proj())  continue;
1566      Node* iff = ifproj->in(0);
1567      if (!iff->is_If())  continue;
1568      Node* bol = iff->in(1);
1569      if (!bol->is_Bool())  continue;
1570      Node* cmp = bol->in(1);
1571      if (cmp == NULL)  continue;
1572      for (cmpn = 0; cmpn < NCMPS; cmpn++)
1573        if (cmps[cmpn] == cmp)  break;
1574      if (cmpn == NCMPS)  continue;
1575      BoolTest::mask btest = bol->as_Bool()->_test._test;
1576      if (ifproj->is_IfFalse())  btest = BoolTest(btest).negate();
1577      if (cmp->in(1) == ykey)    btest = BoolTest(btest).commute();
1578      // At this point, we know that 'x btest y' is true.
1579      switch (btest) {
1580      case BoolTest::eq:
1581        // They are proven equal, so we can collapse the min/max.
1582        // Either value is the answer.  Choose the simpler.
1583        if (is_simple_name(yvalue) && !is_simple_name(xvalue))
1584          return yvalue;
1585        return xvalue;
1586      case BoolTest::lt:          // x < y
1587      case BoolTest::le:          // x <= y
1588        return (want_max ? yvalue : xvalue);
1589      case BoolTest::gt:          // x > y
1590      case BoolTest::ge:          // x >= y
1591        return (want_max ? xvalue : yvalue);
1592      }
1593    }
1594  }
1595
1596  // We failed to find a dominating test.
1597  // Let's pick a test that might GVN with prior tests.
1598  Node*          best_bol   = NULL;
1599  BoolTest::mask best_btest = BoolTest::illegal;
1600  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
1601    Node* cmp = cmps[cmpn];
1602    if (cmp == NULL)  continue;
1603    for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
1604      Node* bol = cmp->fast_out(j);
1605      if (!bol->is_Bool())  continue;
1606      BoolTest::mask btest = bol->as_Bool()->_test._test;
1607      if (btest == BoolTest::eq || btest == BoolTest::ne)  continue;
1608      if (cmp->in(1) == ykey)   btest = BoolTest(btest).commute();
1609      if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
1610        best_bol   = bol->as_Bool();
1611        best_btest = btest;
1612      }
1613    }
1614  }
1615
1616  Node* answer_if_true  = NULL;
1617  Node* answer_if_false = NULL;
1618  switch (best_btest) {
1619  default:
1620    if (cmpxy == NULL)
1621      cmpxy = ideal_cmpxy;
1622    best_bol = _gvn.transform( new(C, 2) BoolNode(cmpxy, BoolTest::lt) );
1623    // and fall through:
1624  case BoolTest::lt:          // x < y
1625  case BoolTest::le:          // x <= y
1626    answer_if_true  = (want_max ? yvalue : xvalue);
1627    answer_if_false = (want_max ? xvalue : yvalue);
1628    break;
1629  case BoolTest::gt:          // x > y
1630  case BoolTest::ge:          // x >= y
1631    answer_if_true  = (want_max ? xvalue : yvalue);
1632    answer_if_false = (want_max ? yvalue : xvalue);
1633    break;
1634  }
1635
1636  jint hi, lo;
1637  if (want_max) {
1638    // We can sharpen the minimum.
1639    hi = MAX2(txvalue->_hi, tyvalue->_hi);
1640    lo = MAX2(txvalue->_lo, tyvalue->_lo);
1641  } else {
1642    // We can sharpen the maximum.
1643    hi = MIN2(txvalue->_hi, tyvalue->_hi);
1644    lo = MIN2(txvalue->_lo, tyvalue->_lo);
1645  }
1646
1647  // Use a flow-free graph structure, to avoid creating excess control edges
1648  // which could hinder other optimizations.
1649  // Since Math.min/max is often used with arraycopy, we want
1650  // tightly_coupled_allocation to be able to see beyond min/max expressions.
1651  Node* cmov = CMoveNode::make(C, NULL, best_bol,
1652                               answer_if_false, answer_if_true,
1653                               TypeInt::make(lo, hi, widen));
1654
1655  return _gvn.transform(cmov);
1656
1657  /*
1658  // This is not as desirable as it may seem, since Min and Max
1659  // nodes do not have a full set of optimizations.
1660  // And they would interfere, anyway, with 'if' optimizations
1661  // and with CMoveI canonical forms.
1662  switch (id) {
1663  case vmIntrinsics::_min:
1664    result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
1665  case vmIntrinsics::_max:
1666    result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
1667  default:
1668    ShouldNotReachHere();
1669  }
1670  */
1671}
1672
1673inline int
1674LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset) {
1675  const TypePtr* base_type = TypePtr::NULL_PTR;
1676  if (base != NULL)  base_type = _gvn.type(base)->isa_ptr();
1677  if (base_type == NULL) {
1678    // Unknown type.
1679    return Type::AnyPtr;
1680  } else if (base_type == TypePtr::NULL_PTR) {
1681    // Since this is a NULL+long form, we have to switch to a rawptr.
1682    base   = _gvn.transform( new (C, 2) CastX2PNode(offset) );
1683    offset = MakeConX(0);
1684    return Type::RawPtr;
1685  } else if (base_type->base() == Type::RawPtr) {
1686    return Type::RawPtr;
1687  } else if (base_type->isa_oopptr()) {
1688    // Base is never null => always a heap address.
1689    if (base_type->ptr() == TypePtr::NotNull) {
1690      return Type::OopPtr;
1691    }
1692    // Offset is small => always a heap address.
1693    const TypeX* offset_type = _gvn.type(offset)->isa_intptr_t();
1694    if (offset_type != NULL &&
1695        base_type->offset() == 0 &&     // (should always be?)
1696        offset_type->_lo >= 0 &&
1697        !MacroAssembler::needs_explicit_null_check(offset_type->_hi)) {
1698      return Type::OopPtr;
1699    }
1700    // Otherwise, it might either be oop+off or NULL+addr.
1701    return Type::AnyPtr;
1702  } else {
1703    // No information:
1704    return Type::AnyPtr;
1705  }
1706}
1707
1708inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
1709  int kind = classify_unsafe_addr(base, offset);
1710  if (kind == Type::RawPtr) {
1711    return basic_plus_adr(top(), base, offset);
1712  } else {
1713    return basic_plus_adr(base, offset);
1714  }
1715}
1716
1717//----------------------------inline_reverseBytes_int/long-------------------
1718// inline Integer.reverseBytes(int)
1719// inline Long.reverseBytes(long)
1720bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) {
1721  assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l, "not reverse Bytes");
1722  if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false;
1723  if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL)) return false;
1724  _sp += arg_size();        // restore stack pointer
1725  switch (id) {
1726  case vmIntrinsics::_reverseBytes_i:
1727    push(_gvn.transform(new (C, 2) ReverseBytesINode(0, pop())));
1728    break;
1729  case vmIntrinsics::_reverseBytes_l:
1730    push_pair(_gvn.transform(new (C, 2) ReverseBytesLNode(0, pop_pair())));
1731    break;
1732  default:
1733    ;
1734  }
1735  return true;
1736}
1737
1738//----------------------------inline_unsafe_access----------------------------
1739
1740const static BasicType T_ADDRESS_HOLDER = T_LONG;
1741
1742// Interpret Unsafe.fieldOffset cookies correctly:
1743extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
1744
1745bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
1746  if (callee()->is_static())  return false;  // caller must have the capability!
1747
1748#ifndef PRODUCT
1749  {
1750    ResourceMark rm;
1751    // Check the signatures.
1752    ciSignature* sig = signature();
1753#ifdef ASSERT
1754    if (!is_store) {
1755      // Object getObject(Object base, int/long offset), etc.
1756      BasicType rtype = sig->return_type()->basic_type();
1757      if (rtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::getAddress_name())
1758          rtype = T_ADDRESS;  // it is really a C void*
1759      assert(rtype == type, "getter must return the expected value");
1760      if (!is_native_ptr) {
1761        assert(sig->count() == 2, "oop getter has 2 arguments");
1762        assert(sig->type_at(0)->basic_type() == T_OBJECT, "getter base is object");
1763        assert(sig->type_at(1)->basic_type() == T_LONG, "getter offset is correct");
1764      } else {
1765        assert(sig->count() == 1, "native getter has 1 argument");
1766        assert(sig->type_at(0)->basic_type() == T_LONG, "getter base is long");
1767      }
1768    } else {
1769      // void putObject(Object base, int/long offset, Object x), etc.
1770      assert(sig->return_type()->basic_type() == T_VOID, "putter must not return a value");
1771      if (!is_native_ptr) {
1772        assert(sig->count() == 3, "oop putter has 3 arguments");
1773        assert(sig->type_at(0)->basic_type() == T_OBJECT, "putter base is object");
1774        assert(sig->type_at(1)->basic_type() == T_LONG, "putter offset is correct");
1775      } else {
1776        assert(sig->count() == 2, "native putter has 2 arguments");
1777        assert(sig->type_at(0)->basic_type() == T_LONG, "putter base is long");
1778      }
1779      BasicType vtype = sig->type_at(sig->count()-1)->basic_type();
1780      if (vtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::putAddress_name())
1781        vtype = T_ADDRESS;  // it is really a C void*
1782      assert(vtype == type, "putter must accept the expected value");
1783    }
1784#endif // ASSERT
1785 }
1786#endif //PRODUCT
1787
1788  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
1789
1790  int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ];
1791
1792  // Argument words:  "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words
1793  int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0);
1794
1795  debug_only(int saved_sp = _sp);
1796  _sp += nargs;
1797
1798  Node* val;
1799  debug_only(val = (Node*)(uintptr_t)-1);
1800
1801
1802  if (is_store) {
1803    // Get the value being stored.  (Pop it first; it was pushed last.)
1804    switch (type) {
1805    case T_DOUBLE:
1806    case T_LONG:
1807    case T_ADDRESS:
1808      val = pop_pair();
1809      break;
1810    default:
1811      val = pop();
1812    }
1813  }
1814
1815  // Build address expression.  See the code in inline_unsafe_prefetch.
1816  Node *adr;
1817  Node *heap_base_oop = top();
1818  if (!is_native_ptr) {
1819    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
1820    Node* offset = pop_pair();
1821    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
1822    Node* base   = pop();
1823    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
1824    // to be plain byte offsets, which are also the same as those accepted
1825    // by oopDesc::field_base.
1826    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
1827           "fieldOffset must be byte-scaled");
1828    // 32-bit machines ignore the high half!
1829    offset = ConvL2X(offset);
1830    adr = make_unsafe_address(base, offset);
1831    heap_base_oop = base;
1832  } else {
1833    Node* ptr = pop_pair();
1834    // Adjust Java long to machine word:
1835    ptr = ConvL2X(ptr);
1836    adr = make_unsafe_address(NULL, ptr);
1837  }
1838
1839  // Pop receiver last:  it was pushed first.
1840  Node *receiver = pop();
1841
1842  assert(saved_sp == _sp, "must have correct argument count");
1843
1844  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
1845
1846  // First guess at the value type.
1847  const Type *value_type = Type::get_const_basic_type(type);
1848
1849  // Try to categorize the address.  If it comes up as TypeJavaPtr::BOTTOM,
1850  // there was not enough information to nail it down.
1851  Compile::AliasType* alias_type = C->alias_type(adr_type);
1852  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
1853
1854  // We will need memory barriers unless we can determine a unique
1855  // alias category for this reference.  (Note:  If for some reason
1856  // the barriers get omitted and the unsafe reference begins to "pollute"
1857  // the alias analysis of the rest of the graph, either Compile::can_alias
1858  // or Compile::must_alias will throw a diagnostic assert.)
1859  bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
1860
1861  if (!is_store && type == T_OBJECT) {
1862    // Attempt to infer a sharper value type from the offset and base type.
1863    ciKlass* sharpened_klass = NULL;
1864
1865    // See if it is an instance field, with an object type.
1866    if (alias_type->field() != NULL) {
1867      assert(!is_native_ptr, "native pointer op cannot use a java address");
1868      if (alias_type->field()->type()->is_klass()) {
1869        sharpened_klass = alias_type->field()->type()->as_klass();
1870      }
1871    }
1872
1873    // See if it is a narrow oop array.
1874    if (adr_type->isa_aryptr()) {
1875      if (adr_type->offset() >= objArrayOopDesc::base_offset_in_bytes(type)) {
1876        const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
1877        if (elem_type != NULL) {
1878          sharpened_klass = elem_type->klass();
1879        }
1880      }
1881    }
1882
1883    if (sharpened_klass != NULL) {
1884      const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
1885
1886      // Sharpen the value type.
1887      value_type = tjp;
1888
1889#ifndef PRODUCT
1890      if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
1891        tty->print("  from base type:  ");   adr_type->dump();
1892        tty->print("  sharpened value: "); value_type->dump();
1893      }
1894#endif
1895    }
1896  }
1897
1898  // Null check on self without removing any arguments.  The argument
1899  // null check technically happens in the wrong place, which can lead to
1900  // invalid stack traces when the primitive is inlined into a method
1901  // which handles NullPointerExceptions.
1902  _sp += nargs;
1903  do_null_check(receiver, T_OBJECT);
1904  _sp -= nargs;
1905  if (stopped()) {
1906    return true;
1907  }
1908  // Heap pointers get a null-check from the interpreter,
1909  // as a courtesy.  However, this is not guaranteed by Unsafe,
1910  // and it is not possible to fully distinguish unintended nulls
1911  // from intended ones in this API.
1912
1913  if (is_volatile) {
1914    // We need to emit leading and trailing CPU membars (see below) in
1915    // addition to memory membars when is_volatile. This is a little
1916    // too strong, but avoids the need to insert per-alias-type
1917    // volatile membars (for stores; compare Parse::do_put_xxx), which
1918    // we cannot do effectively here because we probably only have a
1919    // rough approximation of type.
1920    need_mem_bar = true;
1921    // For Stores, place a memory ordering barrier now.
1922    if (is_store)
1923      insert_mem_bar(Op_MemBarRelease);
1924  }
1925
1926  // Memory barrier to prevent normal and 'unsafe' accesses from
1927  // bypassing each other.  Happens after null checks, so the
1928  // exception paths do not take memory state from the memory barrier,
1929  // so there's no problems making a strong assert about mixing users
1930  // of safe & unsafe memory.  Otherwise fails in a CTW of rt.jar
1931  // around 5701, class sun/reflect/UnsafeBooleanFieldAccessorImpl.
1932  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
1933
1934  if (!is_store) {
1935    Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile);
1936    // load value and push onto stack
1937    switch (type) {
1938    case T_BOOLEAN:
1939    case T_CHAR:
1940    case T_BYTE:
1941    case T_SHORT:
1942    case T_INT:
1943    case T_FLOAT:
1944    case T_OBJECT:
1945      push( p );
1946      break;
1947    case T_ADDRESS:
1948      // Cast to an int type.
1949      p = _gvn.transform( new (C, 2) CastP2XNode(NULL,p) );
1950      p = ConvX2L(p);
1951      push_pair(p);
1952      break;
1953    case T_DOUBLE:
1954    case T_LONG:
1955      push_pair( p );
1956      break;
1957    default: ShouldNotReachHere();
1958    }
1959  } else {
1960    // place effect of store into memory
1961    switch (type) {
1962    case T_DOUBLE:
1963      val = dstore_rounding(val);
1964      break;
1965    case T_ADDRESS:
1966      // Repackage the long as a pointer.
1967      val = ConvL2X(val);
1968      val = _gvn.transform( new (C, 2) CastX2PNode(val) );
1969      break;
1970    }
1971
1972    if (type != T_OBJECT ) {
1973      (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
1974    } else {
1975      // Possibly an oop being stored to Java heap or native memory
1976      if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
1977        // oop to Java heap.
1978        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
1979      } else {
1980
1981        // We can't tell at compile time if we are storing in the Java heap or outside
1982        // of it. So we need to emit code to conditionally do the proper type of
1983        // store.
1984
1985        IdealKit kit(gvn(), control(),  merged_memory());
1986        kit.declares_done();
1987        // QQQ who knows what probability is here??
1988        kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
1989          (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
1990        } kit.else_(); {
1991          (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
1992        } kit.end_if();
1993      }
1994    }
1995  }
1996
1997  if (is_volatile) {
1998    if (!is_store)
1999      insert_mem_bar(Op_MemBarAcquire);
2000    else
2001      insert_mem_bar(Op_MemBarVolatile);
2002  }
2003
2004  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
2005
2006  return true;
2007}
2008
2009//----------------------------inline_unsafe_prefetch----------------------------
2010
2011bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static) {
2012#ifndef PRODUCT
2013  {
2014    ResourceMark rm;
2015    // Check the signatures.
2016    ciSignature* sig = signature();
2017#ifdef ASSERT
2018    // Object getObject(Object base, int/long offset), etc.
2019    BasicType rtype = sig->return_type()->basic_type();
2020    if (!is_native_ptr) {
2021      assert(sig->count() == 2, "oop prefetch has 2 arguments");
2022      assert(sig->type_at(0)->basic_type() == T_OBJECT, "prefetch base is object");
2023      assert(sig->type_at(1)->basic_type() == T_LONG, "prefetcha offset is correct");
2024    } else {
2025      assert(sig->count() == 1, "native prefetch has 1 argument");
2026      assert(sig->type_at(0)->basic_type() == T_LONG, "prefetch base is long");
2027    }
2028#endif // ASSERT
2029  }
2030#endif // !PRODUCT
2031
2032  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
2033
2034  // Argument words:  "this" if not static, plus (oop/offset) or (lo/hi) args
2035  int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3);
2036
2037  debug_only(int saved_sp = _sp);
2038  _sp += nargs;
2039
2040  // Build address expression.  See the code in inline_unsafe_access.
2041  Node *adr;
2042  if (!is_native_ptr) {
2043    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
2044    Node* offset = pop_pair();
2045    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
2046    Node* base   = pop();
2047    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
2048    // to be plain byte offsets, which are also the same as those accepted
2049    // by oopDesc::field_base.
2050    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
2051           "fieldOffset must be byte-scaled");
2052    // 32-bit machines ignore the high half!
2053    offset = ConvL2X(offset);
2054    adr = make_unsafe_address(base, offset);
2055  } else {
2056    Node* ptr = pop_pair();
2057    // Adjust Java long to machine word:
2058    ptr = ConvL2X(ptr);
2059    adr = make_unsafe_address(NULL, ptr);
2060  }
2061
2062  if (is_static) {
2063    assert(saved_sp == _sp, "must have correct argument count");
2064  } else {
2065    // Pop receiver last:  it was pushed first.
2066    Node *receiver = pop();
2067    assert(saved_sp == _sp, "must have correct argument count");
2068
2069    // Null check on self without removing any arguments.  The argument
2070    // null check technically happens in the wrong place, which can lead to
2071    // invalid stack traces when the primitive is inlined into a method
2072    // which handles NullPointerExceptions.
2073    _sp += nargs;
2074    do_null_check(receiver, T_OBJECT);
2075    _sp -= nargs;
2076    if (stopped()) {
2077      return true;
2078    }
2079  }
2080
2081  // Generate the read or write prefetch
2082  Node *prefetch;
2083  if (is_store) {
2084    prefetch = new (C, 3) PrefetchWriteNode(i_o(), adr);
2085  } else {
2086    prefetch = new (C, 3) PrefetchReadNode(i_o(), adr);
2087  }
2088  prefetch->init_req(0, control());
2089  set_i_o(_gvn.transform(prefetch));
2090
2091  return true;
2092}
2093
2094//----------------------------inline_unsafe_CAS----------------------------
2095
2096bool LibraryCallKit::inline_unsafe_CAS(BasicType type) {
2097  // This basic scheme here is the same as inline_unsafe_access, but
2098  // differs in enough details that combining them would make the code
2099  // overly confusing.  (This is a true fact! I originally combined
2100  // them, but even I was confused by it!) As much code/comments as
2101  // possible are retained from inline_unsafe_access though to make
2102  // the correspondences clearer. - dl
2103
2104  if (callee()->is_static())  return false;  // caller must have the capability!
2105
2106#ifndef PRODUCT
2107  {
2108    ResourceMark rm;
2109    // Check the signatures.
2110    ciSignature* sig = signature();
2111#ifdef ASSERT
2112    BasicType rtype = sig->return_type()->basic_type();
2113    assert(rtype == T_BOOLEAN, "CAS must return boolean");
2114    assert(sig->count() == 4, "CAS has 4 arguments");
2115    assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
2116    assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
2117#endif // ASSERT
2118  }
2119#endif //PRODUCT
2120
2121  // number of stack slots per value argument (1 or 2)
2122  int type_words = type2size[type];
2123
2124  // Cannot inline wide CAS on machines that don't support it natively
2125  if (type2aelembytes(type) > BytesPerInt && !VM_Version::supports_cx8())
2126    return false;
2127
2128  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
2129
2130  // Argument words:  "this" plus oop plus offset plus oldvalue plus newvalue;
2131  int nargs = 1 + 1 + 2  + type_words + type_words;
2132
2133  // pop arguments: newval, oldval, offset, base, and receiver
2134  debug_only(int saved_sp = _sp);
2135  _sp += nargs;
2136  Node* newval   = (type_words == 1) ? pop() : pop_pair();
2137  Node* oldval   = (type_words == 1) ? pop() : pop_pair();
2138  Node *offset   = pop_pair();
2139  Node *base     = pop();
2140  Node *receiver = pop();
2141  assert(saved_sp == _sp, "must have correct argument count");
2142
2143  //  Null check receiver.
2144  _sp += nargs;
2145  do_null_check(receiver, T_OBJECT);
2146  _sp -= nargs;
2147  if (stopped()) {
2148    return true;
2149  }
2150
2151  // Build field offset expression.
2152  // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
2153  // to be plain byte offsets, which are also the same as those accepted
2154  // by oopDesc::field_base.
2155  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
2156  // 32-bit machines ignore the high half of long offsets
2157  offset = ConvL2X(offset);
2158  Node* adr = make_unsafe_address(base, offset);
2159  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
2160
2161  // (Unlike inline_unsafe_access, there seems no point in trying
2162  // to refine types. Just use the coarse types here.
2163  const Type *value_type = Type::get_const_basic_type(type);
2164  Compile::AliasType* alias_type = C->alias_type(adr_type);
2165  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
2166  int alias_idx = C->get_alias_index(adr_type);
2167
2168  // Memory-model-wise, a CAS acts like a little synchronized block,
2169  // so needs barriers on each side.  These don't translate into
2170  // actual barriers on most machines, but we still need rest of
2171  // compiler to respect ordering.
2172
2173  insert_mem_bar(Op_MemBarRelease);
2174  insert_mem_bar(Op_MemBarCPUOrder);
2175
2176  // 4984716: MemBars must be inserted before this
2177  //          memory node in order to avoid a false
2178  //          dependency which will confuse the scheduler.
2179  Node *mem = memory(alias_idx);
2180
2181  // For now, we handle only those cases that actually exist: ints,
2182  // longs, and Object. Adding others should be straightforward.
2183  Node* cas;
2184  switch(type) {
2185  case T_INT:
2186    cas = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval));
2187    break;
2188  case T_LONG:
2189    cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
2190    break;
2191  case T_OBJECT:
2192     // reference stores need a store barrier.
2193    // (They don't if CAS fails, but it isn't worth checking.)
2194    pre_barrier(control(), base, adr, alias_idx, newval, value_type, T_OBJECT);
2195#ifdef _LP64
2196    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
2197      Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
2198      Node *oldval_enc = _gvn.transform(new (C, 2) EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
2199      cas = _gvn.transform(new (C, 5) CompareAndSwapNNode(control(), mem, adr,
2200                                                          newval_enc, oldval_enc));
2201    } else
2202#endif
2203    {
2204      cas = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
2205    }
2206    post_barrier(control(), cas, base, adr, alias_idx, newval, T_OBJECT, true);
2207    break;
2208  default:
2209    ShouldNotReachHere();
2210    break;
2211  }
2212
2213  // SCMemProjNodes represent the memory state of CAS. Their main
2214  // role is to prevent CAS nodes from being optimized away when their
2215  // results aren't used.
2216  Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
2217  set_memory(proj, alias_idx);
2218
2219  // Add the trailing membar surrounding the access
2220  insert_mem_bar(Op_MemBarCPUOrder);
2221  insert_mem_bar(Op_MemBarAcquire);
2222
2223  push(cas);
2224  return true;
2225}
2226
2227bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
2228  // This is another variant of inline_unsafe_access, differing in
2229  // that it always issues store-store ("release") barrier and ensures
2230  // store-atomicity (which only matters for "long").
2231
2232  if (callee()->is_static())  return false;  // caller must have the capability!
2233
2234#ifndef PRODUCT
2235  {
2236    ResourceMark rm;
2237    // Check the signatures.
2238    ciSignature* sig = signature();
2239#ifdef ASSERT
2240    BasicType rtype = sig->return_type()->basic_type();
2241    assert(rtype == T_VOID, "must return void");
2242    assert(sig->count() == 3, "has 3 arguments");
2243    assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
2244    assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
2245#endif // ASSERT
2246  }
2247#endif //PRODUCT
2248
2249  // number of stack slots per value argument (1 or 2)
2250  int type_words = type2size[type];
2251
2252  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
2253
2254  // Argument words:  "this" plus oop plus offset plus value;
2255  int nargs = 1 + 1 + 2 + type_words;
2256
2257  // pop arguments: val, offset, base, and receiver
2258  debug_only(int saved_sp = _sp);
2259  _sp += nargs;
2260  Node* val      = (type_words == 1) ? pop() : pop_pair();
2261  Node *offset   = pop_pair();
2262  Node *base     = pop();
2263  Node *receiver = pop();
2264  assert(saved_sp == _sp, "must have correct argument count");
2265
2266  //  Null check receiver.
2267  _sp += nargs;
2268  do_null_check(receiver, T_OBJECT);
2269  _sp -= nargs;
2270  if (stopped()) {
2271    return true;
2272  }
2273
2274  // Build field offset expression.
2275  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
2276  // 32-bit machines ignore the high half of long offsets
2277  offset = ConvL2X(offset);
2278  Node* adr = make_unsafe_address(base, offset);
2279  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
2280  const Type *value_type = Type::get_const_basic_type(type);
2281  Compile::AliasType* alias_type = C->alias_type(adr_type);
2282
2283  insert_mem_bar(Op_MemBarRelease);
2284  insert_mem_bar(Op_MemBarCPUOrder);
2285  // Ensure that the store is atomic for longs:
2286  bool require_atomic_access = true;
2287  Node* store;
2288  if (type == T_OBJECT) // reference stores need a store barrier.
2289    store = store_oop_to_unknown(control(), base, adr, adr_type, val, value_type, type);
2290  else {
2291    store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access);
2292  }
2293  insert_mem_bar(Op_MemBarCPUOrder);
2294  return true;
2295}
2296
2297bool LibraryCallKit::inline_unsafe_allocate() {
2298  if (callee()->is_static())  return false;  // caller must have the capability!
2299  int nargs = 1 + 1;
2300  assert(signature()->size() == nargs-1, "alloc has 1 argument");
2301  null_check_receiver(callee());  // check then ignore argument(0)
2302  _sp += nargs;  // set original stack for use by uncommon_trap
2303  Node* cls = do_null_check(argument(1), T_OBJECT);
2304  _sp -= nargs;
2305  if (stopped())  return true;
2306
2307  Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
2308  _sp += nargs;  // set original stack for use by uncommon_trap
2309  kls = do_null_check(kls, T_OBJECT);
2310  _sp -= nargs;
2311  if (stopped())  return true;  // argument was like int.class
2312
2313  // Note:  The argument might still be an illegal value like
2314  // Serializable.class or Object[].class.   The runtime will handle it.
2315  // But we must make an explicit check for initialization.
2316  Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
2317  Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT);
2318  Node* bits = intcon(instanceKlass::fully_initialized);
2319  Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) );
2320  // The 'test' is non-zero if we need to take a slow path.
2321
2322  Node* obj = new_instance(kls, test);
2323  push(obj);
2324
2325  return true;
2326}
2327
2328//------------------------inline_native_time_funcs--------------
2329// inline code for System.currentTimeMillis() and System.nanoTime()
2330// these have the same type and signature
2331bool LibraryCallKit::inline_native_time_funcs(bool isNano) {
2332  address funcAddr = isNano ? CAST_FROM_FN_PTR(address, os::javaTimeNanos) :
2333                              CAST_FROM_FN_PTR(address, os::javaTimeMillis);
2334  const char * funcName = isNano ? "nanoTime" : "currentTimeMillis";
2335  const TypeFunc *tf = OptoRuntime::current_time_millis_Type();
2336  const TypePtr* no_memory_effects = NULL;
2337  Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
2338  Node* value = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms+0));
2339#ifdef ASSERT
2340  Node* value_top = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms + 1));
2341  assert(value_top == top(), "second value must be top");
2342#endif
2343  push_pair(value);
2344  return true;
2345}
2346
2347//------------------------inline_native_currentThread------------------
2348bool LibraryCallKit::inline_native_currentThread() {
2349  Node* junk = NULL;
2350  push(generate_current_thread(junk));
2351  return true;
2352}
2353
2354//------------------------inline_native_isInterrupted------------------
2355bool LibraryCallKit::inline_native_isInterrupted() {
2356  const int nargs = 1+1;  // receiver + boolean
2357  assert(nargs == arg_size(), "sanity");
2358  // Add a fast path to t.isInterrupted(clear_int):
2359  //   (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
2360  //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
2361  // So, in the common case that the interrupt bit is false,
2362  // we avoid making a call into the VM.  Even if the interrupt bit
2363  // is true, if the clear_int argument is false, we avoid the VM call.
2364  // However, if the receiver is not currentThread, we must call the VM,
2365  // because there must be some locking done around the operation.
2366
2367  // We only go to the fast case code if we pass two guards.
2368  // Paths which do not pass are accumulated in the slow_region.
2369  RegionNode* slow_region = new (C, 1) RegionNode(1);
2370  record_for_igvn(slow_region);
2371  RegionNode* result_rgn = new (C, 4) RegionNode(1+3); // fast1, fast2, slow
2372  PhiNode*    result_val = new (C, 4) PhiNode(result_rgn, TypeInt::BOOL);
2373  enum { no_int_result_path   = 1,
2374         no_clear_result_path = 2,
2375         slow_result_path     = 3
2376  };
2377
2378  // (a) Receiving thread must be the current thread.
2379  Node* rec_thr = argument(0);
2380  Node* tls_ptr = NULL;
2381  Node* cur_thr = generate_current_thread(tls_ptr);
2382  Node* cmp_thr = _gvn.transform( new (C, 3) CmpPNode(cur_thr, rec_thr) );
2383  Node* bol_thr = _gvn.transform( new (C, 2) BoolNode(cmp_thr, BoolTest::ne) );
2384
2385  bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
2386  if (!known_current_thread)
2387    generate_slow_guard(bol_thr, slow_region);
2388
2389  // (b) Interrupt bit on TLS must be false.
2390  Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
2391  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
2392  p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
2393  Node* int_bit = make_load(NULL, p, TypeInt::BOOL, T_INT);
2394  Node* cmp_bit = _gvn.transform( new (C, 3) CmpINode(int_bit, intcon(0)) );
2395  Node* bol_bit = _gvn.transform( new (C, 2) BoolNode(cmp_bit, BoolTest::ne) );
2396
2397  IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
2398
2399  // First fast path:  if (!TLS._interrupted) return false;
2400  Node* false_bit = _gvn.transform( new (C, 1) IfFalseNode(iff_bit) );
2401  result_rgn->init_req(no_int_result_path, false_bit);
2402  result_val->init_req(no_int_result_path, intcon(0));
2403
2404  // drop through to next case
2405  set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_bit)) );
2406
2407  // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
2408  Node* clr_arg = argument(1);
2409  Node* cmp_arg = _gvn.transform( new (C, 3) CmpINode(clr_arg, intcon(0)) );
2410  Node* bol_arg = _gvn.transform( new (C, 2) BoolNode(cmp_arg, BoolTest::ne) );
2411  IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);
2412
2413  // Second fast path:  ... else if (!clear_int) return true;
2414  Node* false_arg = _gvn.transform( new (C, 1) IfFalseNode(iff_arg) );
2415  result_rgn->init_req(no_clear_result_path, false_arg);
2416  result_val->init_req(no_clear_result_path, intcon(1));
2417
2418  // drop through to next case
2419  set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_arg)) );
2420
2421  // (d) Otherwise, go to the slow path.
2422  slow_region->add_req(control());
2423  set_control( _gvn.transform(slow_region) );
2424
2425  if (stopped()) {
2426    // There is no slow path.
2427    result_rgn->init_req(slow_result_path, top());
2428    result_val->init_req(slow_result_path, top());
2429  } else {
2430    // non-virtual because it is a private non-static
2431    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_isInterrupted);
2432
2433    Node* slow_val = set_results_for_java_call(slow_call);
2434    // this->control() comes from set_results_for_java_call
2435
2436    // If we know that the result of the slow call will be true, tell the optimizer!
2437    if (known_current_thread)  slow_val = intcon(1);
2438
2439    Node* fast_io  = slow_call->in(TypeFunc::I_O);
2440    Node* fast_mem = slow_call->in(TypeFunc::Memory);
2441    // These two phis are pre-filled with copies of of the fast IO and Memory
2442    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
2443    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
2444
2445    result_rgn->init_req(slow_result_path, control());
2446    io_phi    ->init_req(slow_result_path, i_o());
2447    mem_phi   ->init_req(slow_result_path, reset_memory());
2448    result_val->init_req(slow_result_path, slow_val);
2449
2450    set_all_memory( _gvn.transform(mem_phi) );
2451    set_i_o(        _gvn.transform(io_phi) );
2452  }
2453
2454  push_result(result_rgn, result_val);
2455  C->set_has_split_ifs(true); // Has chance for split-if optimization
2456
2457  return true;
2458}
2459
2460//---------------------------load_mirror_from_klass----------------------------
2461// Given a klass oop, load its java mirror (a java.lang.Class oop).
2462Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
2463  Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
2464  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
2465}
2466
2467//-----------------------load_klass_from_mirror_common-------------------------
2468// Given a java mirror (a java.lang.Class oop), load its corresponding klass oop.
2469// Test the klass oop for null (signifying a primitive Class like Integer.TYPE),
2470// and branch to the given path on the region.
2471// If never_see_null, take an uncommon trap on null, so we can optimistically
2472// compile for the non-null case.
2473// If the region is NULL, force never_see_null = true.
2474Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror,
2475                                                    bool never_see_null,
2476                                                    int nargs,
2477                                                    RegionNode* region,
2478                                                    int null_path,
2479                                                    int offset) {
2480  if (region == NULL)  never_see_null = true;
2481  Node* p = basic_plus_adr(mirror, offset);
2482  const TypeKlassPtr*  kls_type = TypeKlassPtr::OBJECT_OR_NULL;
2483  Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) );
2484  _sp += nargs; // any deopt will start just before call to enclosing method
2485  Node* null_ctl = top();
2486  kls = null_check_oop(kls, &null_ctl, never_see_null);
2487  if (region != NULL) {
2488    // Set region->in(null_path) if the mirror is a primitive (e.g, int.class).
2489    region->init_req(null_path, null_ctl);
2490  } else {
2491    assert(null_ctl == top(), "no loose ends");
2492  }
2493  _sp -= nargs;
2494  return kls;
2495}
2496
2497//--------------------(inline_native_Class_query helpers)---------------------
2498// Use this for JVM_ACC_INTERFACE, JVM_ACC_IS_CLONEABLE, JVM_ACC_HAS_FINALIZER.
2499// Fall through if (mods & mask) == bits, take the guard otherwise.
2500Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
2501  // Branch around if the given klass has the given modifier bit set.
2502  // Like generate_guard, adds a new path onto the region.
2503  Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
2504  Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
2505  Node* mask = intcon(modifier_mask);
2506  Node* bits = intcon(modifier_bits);
2507  Node* mbit = _gvn.transform( new (C, 3) AndINode(mods, mask) );
2508  Node* cmp  = _gvn.transform( new (C, 3) CmpINode(mbit, bits) );
2509  Node* bol  = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
2510  return generate_fair_guard(bol, region);
2511}
2512Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
2513  return generate_access_flags_guard(kls, JVM_ACC_INTERFACE, 0, region);
2514}
2515
2516//-------------------------inline_native_Class_query-------------------
2517bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
2518  int nargs = 1+0;  // just the Class mirror, in most cases
2519  const Type* return_type = TypeInt::BOOL;
2520  Node* prim_return_value = top();  // what happens if it's a primitive class?
2521  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
2522  bool expect_prim = false;     // most of these guys expect to work on refs
2523
2524  enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT };
2525
2526  switch (id) {
2527  case vmIntrinsics::_isInstance:
2528    nargs = 1+1;  // the Class mirror, plus the object getting queried about
2529    // nothing is an instance of a primitive type
2530    prim_return_value = intcon(0);
2531    break;
2532  case vmIntrinsics::_getModifiers:
2533    prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
2534    assert(is_power_of_2((int)JVM_ACC_WRITTEN_FLAGS+1), "change next line");
2535    return_type = TypeInt::make(0, JVM_ACC_WRITTEN_FLAGS, Type::WidenMin);
2536    break;
2537  case vmIntrinsics::_isInterface:
2538    prim_return_value = intcon(0);
2539    break;
2540  case vmIntrinsics::_isArray:
2541    prim_return_value = intcon(0);
2542    expect_prim = true;  // cf. ObjectStreamClass.getClassSignature
2543    break;
2544  case vmIntrinsics::_isPrimitive:
2545    prim_return_value = intcon(1);
2546    expect_prim = true;  // obviously
2547    break;
2548  case vmIntrinsics::_getSuperclass:
2549    prim_return_value = null();
2550    return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
2551    break;
2552  case vmIntrinsics::_getComponentType:
2553    prim_return_value = null();
2554    return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
2555    break;
2556  case vmIntrinsics::_getClassAccessFlags:
2557    prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
2558    return_type = TypeInt::INT;  // not bool!  6297094
2559    break;
2560  default:
2561    ShouldNotReachHere();
2562  }
2563
2564  Node* mirror =                      argument(0);
2565  Node* obj    = (nargs <= 1)? top(): argument(1);
2566
2567  const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr();
2568  if (mirror_con == NULL)  return false;  // cannot happen?
2569
2570#ifndef PRODUCT
2571  if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
2572    ciType* k = mirror_con->java_mirror_type();
2573    if (k) {
2574      tty->print("Inlining %s on constant Class ", vmIntrinsics::name_at(intrinsic_id()));
2575      k->print_name();
2576      tty->cr();
2577    }
2578  }
2579#endif
2580
2581  // Null-check the mirror, and the mirror's klass ptr (in case it is a primitive).
2582  RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
2583  record_for_igvn(region);
2584  PhiNode* phi = new (C, PATH_LIMIT) PhiNode(region, return_type);
2585
2586  // The mirror will never be null of Reflection.getClassAccessFlags, however
2587  // it may be null for Class.isInstance or Class.getModifiers. Throw a NPE
2588  // if it is. See bug 4774291.
2589
2590  // For Reflection.getClassAccessFlags(), the null check occurs in
2591  // the wrong place; see inline_unsafe_access(), above, for a similar
2592  // situation.
2593  _sp += nargs;  // set original stack for use by uncommon_trap
2594  mirror = do_null_check(mirror, T_OBJECT);
2595  _sp -= nargs;
2596  // If mirror or obj is dead, only null-path is taken.
2597  if (stopped())  return true;
2598
2599  if (expect_prim)  never_see_null = false;  // expect nulls (meaning prims)
2600
2601  // Now load the mirror's klass metaobject, and null-check it.
2602  // Side-effects region with the control path if the klass is null.
2603  Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs,
2604                                     region, _prim_path);
2605  // If kls is null, we have a primitive mirror.
2606  phi->init_req(_prim_path, prim_return_value);
2607  if (stopped()) { push_result(region, phi); return true; }
2608
2609  Node* p;  // handy temp
2610  Node* null_ctl;
2611
2612  // Now that we have the non-null klass, we can perform the real query.
2613  // For constant classes, the query will constant-fold in LoadNode::Value.
2614  Node* query_value = top();
2615  switch (id) {
2616  case vmIntrinsics::_isInstance:
2617    // nothing is an instance of a primitive type
2618    query_value = gen_instanceof(obj, kls);
2619    break;
2620
2621  case vmIntrinsics::_getModifiers:
2622    p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
2623    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
2624    break;
2625
2626  case vmIntrinsics::_isInterface:
2627    // (To verify this code sequence, check the asserts in JVM_IsInterface.)
2628    if (generate_interface_guard(kls, region) != NULL)
2629      // A guard was added.  If the guard is taken, it was an interface.
2630      phi->add_req(intcon(1));
2631    // If we fall through, it's a plain class.
2632    query_value = intcon(0);
2633    break;
2634
2635  case vmIntrinsics::_isArray:
2636    // (To verify this code sequence, check the asserts in JVM_IsArrayClass.)
2637    if (generate_array_guard(kls, region) != NULL)
2638      // A guard was added.  If the guard is taken, it was an array.
2639      phi->add_req(intcon(1));
2640    // If we fall through, it's a plain class.
2641    query_value = intcon(0);
2642    break;
2643
2644  case vmIntrinsics::_isPrimitive:
2645    query_value = intcon(0); // "normal" path produces false
2646    break;
2647
2648  case vmIntrinsics::_getSuperclass:
2649    // The rules here are somewhat unfortunate, but we can still do better
2650    // with random logic than with a JNI call.
2651    // Interfaces store null or Object as _super, but must report null.
2652    // Arrays store an intermediate super as _super, but must report Object.
2653    // Other types can report the actual _super.
2654    // (To verify this code sequence, check the asserts in JVM_IsInterface.)
2655    if (generate_interface_guard(kls, region) != NULL)
2656      // A guard was added.  If the guard is taken, it was an interface.
2657      phi->add_req(null());
2658    if (generate_array_guard(kls, region) != NULL)
2659      // A guard was added.  If the guard is taken, it was an array.
2660      phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
2661    // If we fall through, it's a plain class.  Get its _super.
2662    p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
2663    kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL) );
2664    null_ctl = top();
2665    kls = null_check_oop(kls, &null_ctl);
2666    if (null_ctl != top()) {
2667      // If the guard is taken, Object.superClass is null (both klass and mirror).
2668      region->add_req(null_ctl);
2669      phi   ->add_req(null());
2670    }
2671    if (!stopped()) {
2672      query_value = load_mirror_from_klass(kls);
2673    }
2674    break;
2675
2676  case vmIntrinsics::_getComponentType:
2677    if (generate_array_guard(kls, region) != NULL) {
2678      // Be sure to pin the oop load to the guard edge just created:
2679      Node* is_array_ctrl = region->in(region->req()-1);
2680      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
2681      Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
2682      phi->add_req(cmo);
2683    }
2684    query_value = null();  // non-array case is null
2685    break;
2686
2687  case vmIntrinsics::_getClassAccessFlags:
2688    p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
2689    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
2690    break;
2691
2692  default:
2693    ShouldNotReachHere();
2694  }
2695
2696  // Fall-through is the normal case of a query to a real class.
2697  phi->init_req(1, query_value);
2698  region->init_req(1, control());
2699
2700  push_result(region, phi);
2701  C->set_has_split_ifs(true); // Has chance for split-if optimization
2702
2703  return true;
2704}
2705
2706//--------------------------inline_native_subtype_check------------------------
2707// This intrinsic takes the JNI calls out of the heart of
2708// UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc.
2709bool LibraryCallKit::inline_native_subtype_check() {
2710  int nargs = 1+1;  // the Class mirror, plus the other class getting examined
2711
2712  // Pull both arguments off the stack.
2713  Node* args[2];                // two java.lang.Class mirrors: superc, subc
2714  args[0] = argument(0);
2715  args[1] = argument(1);
2716  Node* klasses[2];             // corresponding Klasses: superk, subk
2717  klasses[0] = klasses[1] = top();
2718
2719  enum {
2720    // A full decision tree on {superc is prim, subc is prim}:
2721    _prim_0_path = 1,           // {P,N} => false
2722                                // {P,P} & superc!=subc => false
2723    _prim_same_path,            // {P,P} & superc==subc => true
2724    _prim_1_path,               // {N,P} => false
2725    _ref_subtype_path,          // {N,N} & subtype check wins => true
2726    _both_ref_path,             // {N,N} & subtype check loses => false
2727    PATH_LIMIT
2728  };
2729
2730  RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
2731  Node*       phi    = new (C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
2732  record_for_igvn(region);
2733
2734  const TypePtr* adr_type = TypeRawPtr::BOTTOM;   // memory type of loads
2735  const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL;
2736  int class_klass_offset = java_lang_Class::klass_offset_in_bytes();
2737
2738  // First null-check both mirrors and load each mirror's klass metaobject.
2739  int which_arg;
2740  for (which_arg = 0; which_arg <= 1; which_arg++) {
2741    Node* arg = args[which_arg];
2742    _sp += nargs;  // set original stack for use by uncommon_trap
2743    arg = do_null_check(arg, T_OBJECT);
2744    _sp -= nargs;
2745    if (stopped())  break;
2746    args[which_arg] = _gvn.transform(arg);
2747
2748    Node* p = basic_plus_adr(arg, class_klass_offset);
2749    Node* kls = LoadKlassNode::make(_gvn, immutable_memory(), p, adr_type, kls_type);
2750    klasses[which_arg] = _gvn.transform(kls);
2751  }
2752
2753  // Having loaded both klasses, test each for null.
2754  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
2755  for (which_arg = 0; which_arg <= 1; which_arg++) {
2756    Node* kls = klasses[which_arg];
2757    Node* null_ctl = top();
2758    _sp += nargs;  // set original stack for use by uncommon_trap
2759    kls = null_check_oop(kls, &null_ctl, never_see_null);
2760    _sp -= nargs;
2761    int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path);
2762    region->init_req(prim_path, null_ctl);
2763    if (stopped())  break;
2764    klasses[which_arg] = kls;
2765  }
2766
2767  if (!stopped()) {
2768    // now we have two reference types, in klasses[0..1]
2769    Node* subk   = klasses[1];  // the argument to isAssignableFrom
2770    Node* superk = klasses[0];  // the receiver
2771    region->set_req(_both_ref_path, gen_subtype_check(subk, superk));
2772    // now we have a successful reference subtype check
2773    region->set_req(_ref_subtype_path, control());
2774  }
2775
2776  // If both operands are primitive (both klasses null), then
2777  // we must return true when they are identical primitives.
2778  // It is convenient to test this after the first null klass check.
2779  set_control(region->in(_prim_0_path)); // go back to first null check
2780  if (!stopped()) {
2781    // Since superc is primitive, make a guard for the superc==subc case.
2782    Node* cmp_eq = _gvn.transform( new (C, 3) CmpPNode(args[0], args[1]) );
2783    Node* bol_eq = _gvn.transform( new (C, 2) BoolNode(cmp_eq, BoolTest::eq) );
2784    generate_guard(bol_eq, region, PROB_FAIR);
2785    if (region->req() == PATH_LIMIT+1) {
2786      // A guard was added.  If the added guard is taken, superc==subc.
2787      region->swap_edges(PATH_LIMIT, _prim_same_path);
2788      region->del_req(PATH_LIMIT);
2789    }
2790    region->set_req(_prim_0_path, control()); // Not equal after all.
2791  }
2792
2793  // these are the only paths that produce 'true':
2794  phi->set_req(_prim_same_path,   intcon(1));
2795  phi->set_req(_ref_subtype_path, intcon(1));
2796
2797  // pull together the cases:
2798  assert(region->req() == PATH_LIMIT, "sane region");
2799  for (uint i = 1; i < region->req(); i++) {
2800    Node* ctl = region->in(i);
2801    if (ctl == NULL || ctl == top()) {
2802      region->set_req(i, top());
2803      phi   ->set_req(i, top());
2804    } else if (phi->in(i) == NULL) {
2805      phi->set_req(i, intcon(0)); // all other paths produce 'false'
2806    }
2807  }
2808
2809  set_control(_gvn.transform(region));
2810  push(_gvn.transform(phi));
2811
2812  return true;
2813}
2814
2815//---------------------generate_array_guard_common------------------------
2816Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
2817                                                  bool obj_array, bool not_array) {
2818  // If obj_array/non_array==false/false:
2819  // Branch around if the given klass is in fact an array (either obj or prim).
2820  // If obj_array/non_array==false/true:
2821  // Branch around if the given klass is not an array klass of any kind.
2822  // If obj_array/non_array==true/true:
2823  // Branch around if the kls is not an oop array (kls is int[], String, etc.)
2824  // If obj_array/non_array==true/false:
2825  // Branch around if the kls is an oop array (Object[] or subtype)
2826  //
2827  // Like generate_guard, adds a new path onto the region.
2828  jint  layout_con = 0;
2829  Node* layout_val = get_layout_helper(kls, layout_con);
2830  if (layout_val == NULL) {
2831    bool query = (obj_array
2832                  ? Klass::layout_helper_is_objArray(layout_con)
2833                  : Klass::layout_helper_is_javaArray(layout_con));
2834    if (query == not_array) {
2835      return NULL;                       // never a branch
2836    } else {                             // always a branch
2837      Node* always_branch = control();
2838      if (region != NULL)
2839        region->add_req(always_branch);
2840      set_control(top());
2841      return always_branch;
2842    }
2843  }
2844  // Now test the correct condition.
2845  jint  nval = (obj_array
2846                ? ((jint)Klass::_lh_array_tag_type_value
2847                   <<    Klass::_lh_array_tag_shift)
2848                : Klass::_lh_neutral_value);
2849  Node* cmp = _gvn.transform( new(C, 3) CmpINode(layout_val, intcon(nval)) );
2850  BoolTest::mask btest = BoolTest::lt;  // correct for testing is_[obj]array
2851  // invert the test if we are looking for a non-array
2852  if (not_array)  btest = BoolTest(btest).negate();
2853  Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, btest) );
2854  return generate_fair_guard(bol, region);
2855}
2856
2857
2858//-----------------------inline_native_newArray--------------------------
2859bool LibraryCallKit::inline_native_newArray() {
2860  int nargs = 2;
2861  Node* mirror    = argument(0);
2862  Node* count_val = argument(1);
2863
2864  _sp += nargs;  // set original stack for use by uncommon_trap
2865  mirror = do_null_check(mirror, T_OBJECT);
2866  _sp -= nargs;
2867  // If mirror or obj is dead, only null-path is taken.
2868  if (stopped())  return true;
2869
2870  enum { _normal_path = 1, _slow_path = 2, PATH_LIMIT };
2871  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
2872  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
2873                                                      TypeInstPtr::NOTNULL);
2874  PhiNode*    result_io  = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
2875  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
2876                                                      TypePtr::BOTTOM);
2877
2878  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
2879  Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null,
2880                                                  nargs,
2881                                                  result_reg, _slow_path);
2882  Node* normal_ctl   = control();
2883  Node* no_array_ctl = result_reg->in(_slow_path);
2884
2885  // Generate code for the slow case.  We make a call to newArray().
2886  set_control(no_array_ctl);
2887  if (!stopped()) {
2888    // Either the input type is void.class, or else the
2889    // array klass has not yet been cached.  Either the
2890    // ensuing call will throw an exception, or else it
2891    // will cache the array klass for next time.
2892    PreserveJVMState pjvms(this);
2893    CallJavaNode* slow_call = generate_method_call_static(vmIntrinsics::_newArray);
2894    Node* slow_result = set_results_for_java_call(slow_call);
2895    // this->control() comes from set_results_for_java_call
2896    result_reg->set_req(_slow_path, control());
2897    result_val->set_req(_slow_path, slow_result);
2898    result_io ->set_req(_slow_path, i_o());
2899    result_mem->set_req(_slow_path, reset_memory());
2900  }
2901
2902  set_control(normal_ctl);
2903  if (!stopped()) {
2904    // Normal case:  The array type has been cached in the java.lang.Class.
2905    // The following call works fine even if the array type is polymorphic.
2906    // It could be a dynamic mix of int[], boolean[], Object[], etc.
2907    _sp += nargs;  // set original stack for use by uncommon_trap
2908    Node* obj = new_array(klass_node, count_val);
2909    _sp -= nargs;
2910    result_reg->init_req(_normal_path, control());
2911    result_val->init_req(_normal_path, obj);
2912    result_io ->init_req(_normal_path, i_o());
2913    result_mem->init_req(_normal_path, reset_memory());
2914  }
2915
2916  // Return the combined state.
2917  set_i_o(        _gvn.transform(result_io)  );
2918  set_all_memory( _gvn.transform(result_mem) );
2919  push_result(result_reg, result_val);
2920  C->set_has_split_ifs(true); // Has chance for split-if optimization
2921
2922  return true;
2923}
2924
2925//----------------------inline_native_getLength--------------------------
2926bool LibraryCallKit::inline_native_getLength() {
2927  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
2928
2929  int nargs = 1;
2930  Node* array = argument(0);
2931
2932  _sp += nargs;  // set original stack for use by uncommon_trap
2933  array = do_null_check(array, T_OBJECT);
2934  _sp -= nargs;
2935
2936  // If array is dead, only null-path is taken.
2937  if (stopped())  return true;
2938
2939  // Deoptimize if it is a non-array.
2940  Node* non_array = generate_non_array_guard(load_object_klass(array), NULL);
2941
2942  if (non_array != NULL) {
2943    PreserveJVMState pjvms(this);
2944    set_control(non_array);
2945    _sp += nargs;  // push the arguments back on the stack
2946    uncommon_trap(Deoptimization::Reason_intrinsic,
2947                  Deoptimization::Action_maybe_recompile);
2948  }
2949
2950  // If control is dead, only non-array-path is taken.
2951  if (stopped())  return true;
2952
2953  // The works fine even if the array type is polymorphic.
2954  // It could be a dynamic mix of int[], boolean[], Object[], etc.
2955  push( load_array_length(array) );
2956
2957  C->set_has_split_ifs(true); // Has chance for split-if optimization
2958
2959  return true;
2960}
2961
2962//------------------------inline_array_copyOf----------------------------
2963bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
2964  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
2965
2966  // Restore the stack and pop off the arguments.
2967  int nargs = 3 + (is_copyOfRange? 1: 0);
2968  Node* original          = argument(0);
2969  Node* start             = is_copyOfRange? argument(1): intcon(0);
2970  Node* end               = is_copyOfRange? argument(2): argument(1);
2971  Node* array_type_mirror = is_copyOfRange? argument(3): argument(2);
2972
2973  _sp += nargs;  // set original stack for use by uncommon_trap
2974  array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
2975  original          = do_null_check(original, T_OBJECT);
2976  _sp -= nargs;
2977
2978  // Check if a null path was taken unconditionally.
2979  if (stopped())  return true;
2980
2981  Node* orig_length = load_array_length(original);
2982
2983  Node* klass_node = load_klass_from_mirror(array_type_mirror, false, nargs,
2984                                            NULL, 0);
2985  _sp += nargs;  // set original stack for use by uncommon_trap
2986  klass_node = do_null_check(klass_node, T_OBJECT);
2987  _sp -= nargs;
2988
2989  RegionNode* bailout = new (C, 1) RegionNode(1);
2990  record_for_igvn(bailout);
2991
2992  // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc.
2993  // Bail out if that is so.
2994  Node* not_objArray = generate_non_objArray_guard(klass_node, bailout);
2995  if (not_objArray != NULL) {
2996    // Improve the klass node's type from the new optimistic assumption:
2997    ciKlass* ak = ciArrayKlass::make(env()->Object_klass());
2998    const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
2999    Node* cast = new (C, 2) CastPPNode(klass_node, akls);
3000    cast->init_req(0, control());
3001    klass_node = _gvn.transform(cast);
3002  }
3003
3004  // Bail out if either start or end is negative.
3005  generate_negative_guard(start, bailout, &start);
3006  generate_negative_guard(end,   bailout, &end);
3007
3008  Node* length = end;
3009  if (_gvn.type(start) != TypeInt::ZERO) {
3010    length = _gvn.transform( new (C, 3) SubINode(end, start) );
3011  }
3012
3013  // Bail out if length is negative.
3014  // ...Not needed, since the new_array will throw the right exception.
3015  //generate_negative_guard(length, bailout, &length);
3016
3017  if (bailout->req() > 1) {
3018    PreserveJVMState pjvms(this);
3019    set_control( _gvn.transform(bailout) );
3020    _sp += nargs;  // push the arguments back on the stack
3021    uncommon_trap(Deoptimization::Reason_intrinsic,
3022                  Deoptimization::Action_maybe_recompile);
3023  }
3024
3025  if (!stopped()) {
3026    // How many elements will we copy from the original?
3027    // The answer is MinI(orig_length - start, length).
3028    Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
3029    Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
3030
3031    _sp += nargs;  // set original stack for use by uncommon_trap
3032    Node* newcopy = new_array(klass_node, length);
3033    _sp -= nargs;
3034
3035    // Generate a direct call to the right arraycopy function(s).
3036    // We know the copy is disjoint but we might not know if the
3037    // oop stores need checking.
3038    // Extreme case:  Arrays.copyOf((Integer[])x, 10, String[].class).
3039    // This will fail a store-check if x contains any non-nulls.
3040    bool disjoint_bases = true;
3041    bool length_never_negative = true;
3042    generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
3043                       original, start, newcopy, intcon(0), moved,
3044                       nargs, disjoint_bases, length_never_negative);
3045
3046    push(newcopy);
3047  }
3048
3049  C->set_has_split_ifs(true); // Has chance for split-if optimization
3050
3051  return true;
3052}
3053
3054
3055//----------------------generate_virtual_guard---------------------------
3056// Helper for hashCode and clone.  Peeks inside the vtable to avoid a call.
3057Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass,
3058                                             RegionNode* slow_region) {
3059  ciMethod* method = callee();
3060  int vtable_index = method->vtable_index();
3061  // Get the methodOop out of the appropriate vtable entry.
3062  int entry_offset  = (instanceKlass::vtable_start_offset() +
3063                     vtable_index*vtableEntry::size()) * wordSize +
3064                     vtableEntry::method_offset_in_bytes();
3065  Node* entry_addr  = basic_plus_adr(obj_klass, entry_offset);
3066  Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
3067
3068  // Compare the target method with the expected method (e.g., Object.hashCode).
3069  const TypeInstPtr* native_call_addr = TypeInstPtr::make(method);
3070
3071  Node* native_call = makecon(native_call_addr);
3072  Node* chk_native  = _gvn.transform( new(C, 3) CmpPNode(target_call, native_call) );
3073  Node* test_native = _gvn.transform( new(C, 2) BoolNode(chk_native, BoolTest::ne) );
3074
3075  return generate_slow_guard(test_native, slow_region);
3076}
3077
3078//-----------------------generate_method_call----------------------------
3079// Use generate_method_call to make a slow-call to the real
3080// method if the fast path fails.  An alternative would be to
3081// use a stub like OptoRuntime::slow_arraycopy_Java.
3082// This only works for expanding the current library call,
3083// not another intrinsic.  (E.g., don't use this for making an
3084// arraycopy call inside of the copyOf intrinsic.)
3085CallJavaNode*
3086LibraryCallKit::generate_method_call(vmIntrinsics::ID method_id, bool is_virtual, bool is_static) {
3087  // When compiling the intrinsic method itself, do not use this technique.
3088  guarantee(callee() != C->method(), "cannot make slow-call to self");
3089
3090  ciMethod* method = callee();
3091  // ensure the JVMS we have will be correct for this call
3092  guarantee(method_id == method->intrinsic_id(), "must match");
3093
3094  const TypeFunc* tf = TypeFunc::make(method);
3095  int tfdc = tf->domain()->cnt();
3096  CallJavaNode* slow_call;
3097  if (is_static) {
3098    assert(!is_virtual, "");
3099    slow_call = new(C, tfdc) CallStaticJavaNode(tf,
3100                                SharedRuntime::get_resolve_static_call_stub(),
3101                                method, bci());
3102  } else if (is_virtual) {
3103    null_check_receiver(method);
3104    int vtable_index = methodOopDesc::invalid_vtable_index;
3105    if (UseInlineCaches) {
3106      // Suppress the vtable call
3107    } else {
3108      // hashCode and clone are not a miranda methods,
3109      // so the vtable index is fixed.
3110      // No need to use the linkResolver to get it.
3111       vtable_index = method->vtable_index();
3112    }
3113    slow_call = new(C, tfdc) CallDynamicJavaNode(tf,
3114                                SharedRuntime::get_resolve_virtual_call_stub(),
3115                                method, vtable_index, bci());
3116  } else {  // neither virtual nor static:  opt_virtual
3117    null_check_receiver(method);
3118    slow_call = new(C, tfdc) CallStaticJavaNode(tf,
3119                                SharedRuntime::get_resolve_opt_virtual_call_stub(),
3120                                method, bci());
3121    slow_call->set_optimized_virtual(true);
3122  }
3123  set_arguments_for_java_call(slow_call);
3124  set_edges_for_java_call(slow_call);
3125  return slow_call;
3126}
3127
3128
3129//------------------------------inline_native_hashcode--------------------
3130// Build special case code for calls to hashCode on an object.
3131bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) {
3132  assert(is_static == callee()->is_static(), "correct intrinsic selection");
3133  assert(!(is_virtual && is_static), "either virtual, special, or static");
3134
3135  enum { _slow_path = 1, _fast_path, _null_path, PATH_LIMIT };
3136
3137  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
3138  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
3139                                                      TypeInt::INT);
3140  PhiNode*    result_io  = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
3141  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
3142                                                      TypePtr::BOTTOM);
3143  Node* obj = NULL;
3144  if (!is_static) {
3145    // Check for hashing null object
3146    obj = null_check_receiver(callee());
3147    if (stopped())  return true;        // unconditionally null
3148    result_reg->init_req(_null_path, top());
3149    result_val->init_req(_null_path, top());
3150  } else {
3151    // Do a null check, and return zero if null.
3152    // System.identityHashCode(null) == 0
3153    obj = argument(0);
3154    Node* null_ctl = top();
3155    obj = null_check_oop(obj, &null_ctl);
3156    result_reg->init_req(_null_path, null_ctl);
3157    result_val->init_req(_null_path, _gvn.intcon(0));
3158  }
3159
3160  // Unconditionally null?  Then return right away.
3161  if (stopped()) {
3162    set_control( result_reg->in(_null_path) );
3163    if (!stopped())
3164      push(      result_val ->in(_null_path) );
3165    return true;
3166  }
3167
3168  // After null check, get the object's klass.
3169  Node* obj_klass = load_object_klass(obj);
3170
3171  // This call may be virtual (invokevirtual) or bound (invokespecial).
3172  // For each case we generate slightly different code.
3173
3174  // We only go to the fast case code if we pass a number of guards.  The
3175  // paths which do not pass are accumulated in the slow_region.
3176  RegionNode* slow_region = new (C, 1) RegionNode(1);
3177  record_for_igvn(slow_region);
3178
3179  // If this is a virtual call, we generate a funny guard.  We pull out
3180  // the vtable entry corresponding to hashCode() from the target object.
3181  // If the target method which we are calling happens to be the native
3182  // Object hashCode() method, we pass the guard.  We do not need this
3183  // guard for non-virtual calls -- the caller is known to be the native
3184  // Object hashCode().
3185  if (is_virtual) {
3186    generate_virtual_guard(obj_klass, slow_region);
3187  }
3188
3189  // Get the header out of the object, use LoadMarkNode when available
3190  Node* header_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
3191  Node* header = make_load(NULL, header_addr, TypeRawPtr::BOTTOM, T_ADDRESS);
3192  header = _gvn.transform( new (C, 2) CastP2XNode(NULL, header) );
3193
3194  // Test the header to see if it is unlocked.
3195  Node *lock_mask      = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
3196  Node *lmasked_header = _gvn.transform( new (C, 3) AndXNode(header, lock_mask) );
3197  Node *unlocked_val   = _gvn.MakeConX(markOopDesc::unlocked_value);
3198  Node *chk_unlocked   = _gvn.transform( new (C, 3) CmpXNode( lmasked_header, unlocked_val));
3199  Node *test_unlocked  = _gvn.transform( new (C, 2) BoolNode( chk_unlocked, BoolTest::ne) );
3200
3201  generate_slow_guard(test_unlocked, slow_region);
3202
3203  // Get the hash value and check to see that it has been properly assigned.
3204  // We depend on hash_mask being at most 32 bits and avoid the use of
3205  // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
3206  // vm: see markOop.hpp.
3207  Node *hash_mask      = _gvn.intcon(markOopDesc::hash_mask);
3208  Node *hash_shift     = _gvn.intcon(markOopDesc::hash_shift);
3209  Node *hshifted_header= _gvn.transform( new (C, 3) URShiftXNode(header, hash_shift) );
3210  // This hack lets the hash bits live anywhere in the mark object now, as long
3211  // as the shift drops the relevant bits into the low 32 bits.  Note that
3212  // Java spec says that HashCode is an int so there's no point in capturing
3213  // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
3214  hshifted_header      = ConvX2I(hshifted_header);
3215  Node *hash_val       = _gvn.transform( new (C, 3) AndINode(hshifted_header, hash_mask) );
3216
3217  Node *no_hash_val    = _gvn.intcon(markOopDesc::no_hash);
3218  Node *chk_assigned   = _gvn.transform( new (C, 3) CmpINode( hash_val, no_hash_val));
3219  Node *test_assigned  = _gvn.transform( new (C, 2) BoolNode( chk_assigned, BoolTest::eq) );
3220
3221  generate_slow_guard(test_assigned, slow_region);
3222
3223  Node* init_mem = reset_memory();
3224  // fill in the rest of the null path:
3225  result_io ->init_req(_null_path, i_o());
3226  result_mem->init_req(_null_path, init_mem);
3227
3228  result_val->init_req(_fast_path, hash_val);
3229  result_reg->init_req(_fast_path, control());
3230  result_io ->init_req(_fast_path, i_o());
3231  result_mem->init_req(_fast_path, init_mem);
3232
3233  // Generate code for the slow case.  We make a call to hashCode().
3234  set_control(_gvn.transform(slow_region));
3235  if (!stopped()) {
3236    // No need for PreserveJVMState, because we're using up the present state.
3237    set_all_memory(init_mem);
3238    vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode;
3239    if (is_static)   hashCode_id = vmIntrinsics::_identityHashCode;
3240    CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static);
3241    Node* slow_result = set_results_for_java_call(slow_call);
3242    // this->control() comes from set_results_for_java_call
3243    result_reg->init_req(_slow_path, control());
3244    result_val->init_req(_slow_path, slow_result);
3245    result_io  ->set_req(_slow_path, i_o());
3246    result_mem ->set_req(_slow_path, reset_memory());
3247  }
3248
3249  // Return the combined state.
3250  set_i_o(        _gvn.transform(result_io)  );
3251  set_all_memory( _gvn.transform(result_mem) );
3252  push_result(result_reg, result_val);
3253
3254  return true;
3255}
3256
3257//---------------------------inline_native_getClass----------------------------
3258// Build special case code for calls to getClass on an object.
3259bool LibraryCallKit::inline_native_getClass() {
3260  Node* obj = null_check_receiver(callee());
3261  if (stopped())  return true;
3262  push( load_mirror_from_klass(load_object_klass(obj)) );
3263  return true;
3264}
3265
3266//-----------------inline_native_Reflection_getCallerClass---------------------
3267// In the presence of deep enough inlining, getCallerClass() becomes a no-op.
3268//
3269// NOTE that this code must perform the same logic as
3270// vframeStream::security_get_caller_frame in that it must skip
3271// Method.invoke() and auxiliary frames.
3272
3273
3274
3275
3276bool LibraryCallKit::inline_native_Reflection_getCallerClass() {
3277  ciMethod*       method = callee();
3278
3279#ifndef PRODUCT
3280  if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3281    tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass");
3282  }
3283#endif
3284
3285  debug_only(int saved_sp = _sp);
3286
3287  // Argument words:  (int depth)
3288  int nargs = 1;
3289
3290  _sp += nargs;
3291  Node* caller_depth_node = pop();
3292
3293  assert(saved_sp == _sp, "must have correct argument count");
3294
3295  // The depth value must be a constant in order for the runtime call
3296  // to be eliminated.
3297  const TypeInt* caller_depth_type = _gvn.type(caller_depth_node)->isa_int();
3298  if (caller_depth_type == NULL || !caller_depth_type->is_con()) {
3299#ifndef PRODUCT
3300    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3301      tty->print_cr("  Bailing out because caller depth was not a constant");
3302    }
3303#endif
3304    return false;
3305  }
3306  // Note that the JVM state at this point does not include the
3307  // getCallerClass() frame which we are trying to inline. The
3308  // semantics of getCallerClass(), however, are that the "first"
3309  // frame is the getCallerClass() frame, so we subtract one from the
3310  // requested depth before continuing. We don't inline requests of
3311  // getCallerClass(0).
3312  int caller_depth = caller_depth_type->get_con() - 1;
3313  if (caller_depth < 0) {
3314#ifndef PRODUCT
3315    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3316      tty->print_cr("  Bailing out because caller depth was %d", caller_depth);
3317    }
3318#endif
3319    return false;
3320  }
3321
3322  if (!jvms()->has_method()) {
3323#ifndef PRODUCT
3324    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3325      tty->print_cr("  Bailing out because intrinsic was inlined at top level");
3326    }
3327#endif
3328    return false;
3329  }
3330  int _depth = jvms()->depth();  // cache call chain depth
3331
3332  // Walk back up the JVM state to find the caller at the required
3333  // depth. NOTE that this code must perform the same logic as
3334  // vframeStream::security_get_caller_frame in that it must skip
3335  // Method.invoke() and auxiliary frames. Note also that depth is
3336  // 1-based (1 is the bottom of the inlining).
3337  int inlining_depth = _depth;
3338  JVMState* caller_jvms = NULL;
3339
3340  if (inlining_depth > 0) {
3341    caller_jvms = jvms();
3342    assert(caller_jvms = jvms()->of_depth(inlining_depth), "inlining_depth == our depth");
3343    do {
3344      // The following if-tests should be performed in this order
3345      if (is_method_invoke_or_aux_frame(caller_jvms)) {
3346        // Skip a Method.invoke() or auxiliary frame
3347      } else if (caller_depth > 0) {
3348        // Skip real frame
3349        --caller_depth;
3350      } else {
3351        // We're done: reached desired caller after skipping.
3352        break;
3353      }
3354      caller_jvms = caller_jvms->caller();
3355      --inlining_depth;
3356    } while (inlining_depth > 0);
3357  }
3358
3359  if (inlining_depth == 0) {
3360#ifndef PRODUCT
3361    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3362      tty->print_cr("  Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth);
3363      tty->print_cr("  JVM state at this point:");
3364      for (int i = _depth; i >= 1; i--) {
3365        tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
3366      }
3367    }
3368#endif
3369    return false; // Reached end of inlining
3370  }
3371
3372  // Acquire method holder as java.lang.Class
3373  ciInstanceKlass* caller_klass  = caller_jvms->method()->holder();
3374  ciInstance*      caller_mirror = caller_klass->java_mirror();
3375  // Push this as a constant
3376  push(makecon(TypeInstPtr::make(caller_mirror)));
3377#ifndef PRODUCT
3378  if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
3379    tty->print_cr("  Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth);
3380    tty->print_cr("  JVM state at this point:");
3381    for (int i = _depth; i >= 1; i--) {
3382      tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
3383    }
3384  }
3385#endif
3386  return true;
3387}
3388
3389// Helper routine for above
3390bool LibraryCallKit::is_method_invoke_or_aux_frame(JVMState* jvms) {
3391  // Is this the Method.invoke method itself?
3392  if (jvms->method()->intrinsic_id() == vmIntrinsics::_invoke)
3393    return true;
3394
3395  // Is this a helper, defined somewhere underneath MethodAccessorImpl.
3396  ciKlass* k = jvms->method()->holder();
3397  if (k->is_instance_klass()) {
3398    ciInstanceKlass* ik = k->as_instance_klass();
3399    for (; ik != NULL; ik = ik->super()) {
3400      if (ik->name() == ciSymbol::sun_reflect_MethodAccessorImpl() &&
3401          ik == env()->find_system_klass(ik->name())) {
3402        return true;
3403      }
3404    }
3405  }
3406
3407  return false;
3408}
3409
3410static int value_field_offset = -1;  // offset of the "value" field of AtomicLongCSImpl.  This is needed by
3411                                     // inline_native_AtomicLong_attemptUpdate() but it has no way of
3412                                     // computing it since there is no lookup field by name function in the
3413                                     // CI interface.  This is computed and set by inline_native_AtomicLong_get().
3414                                     // Using a static variable here is safe even if we have multiple compilation
3415                                     // threads because the offset is constant.  At worst the same offset will be
3416                                     // computed and  stored multiple
3417
3418bool LibraryCallKit::inline_native_AtomicLong_get() {
3419  // Restore the stack and pop off the argument
3420  _sp+=1;
3421  Node *obj = pop();
3422
3423  // get the offset of the "value" field. Since the CI interfaces
3424  // does not provide a way to look up a field by name, we scan the bytecodes
3425  // to get the field index.  We expect the first 2 instructions of the method
3426  // to be:
3427  //    0 aload_0
3428  //    1 getfield "value"
3429  ciMethod* method = callee();
3430  if (value_field_offset == -1)
3431  {
3432    ciField* value_field;
3433    ciBytecodeStream iter(method);
3434    Bytecodes::Code bc = iter.next();
3435
3436    if ((bc != Bytecodes::_aload_0) &&
3437              ((bc != Bytecodes::_aload) || (iter.get_index() != 0)))
3438      return false;
3439    bc = iter.next();
3440    if (bc != Bytecodes::_getfield)
3441      return false;
3442    bool ignore;
3443    value_field = iter.get_field(ignore);
3444    value_field_offset = value_field->offset_in_bytes();
3445  }
3446
3447  // Null check without removing any arguments.
3448  _sp++;
3449  obj = do_null_check(obj, T_OBJECT);
3450  _sp--;
3451  // Check for locking null object
3452  if (stopped()) return true;
3453
3454  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
3455  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
3456  int alias_idx = C->get_alias_index(adr_type);
3457
3458  Node *result = _gvn.transform(new (C, 3) LoadLLockedNode(control(), memory(alias_idx), adr));
3459
3460  push_pair(result);
3461
3462  return true;
3463}
3464
3465bool LibraryCallKit::inline_native_AtomicLong_attemptUpdate() {
3466  // Restore the stack and pop off the arguments
3467  _sp+=5;
3468  Node *newVal = pop_pair();
3469  Node *oldVal = pop_pair();
3470  Node *obj = pop();
3471
3472  // we need the offset of the "value" field which was computed when
3473  // inlining the get() method.  Give up if we don't have it.
3474  if (value_field_offset == -1)
3475    return false;
3476
3477  // Null check without removing any arguments.
3478  _sp+=5;
3479  obj = do_null_check(obj, T_OBJECT);
3480  _sp-=5;
3481  // Check for locking null object
3482  if (stopped()) return true;
3483
3484  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
3485  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
3486  int alias_idx = C->get_alias_index(adr_type);
3487
3488  Node *cas = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
3489  Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
3490  set_memory(store_proj, alias_idx);
3491  Node *bol = _gvn.transform( new (C, 2) BoolNode( cas, BoolTest::eq ) );
3492
3493  Node *result;
3494  // CMove node is not used to be able fold a possible check code
3495  // after attemptUpdate() call. This code could be transformed
3496  // into CMove node by loop optimizations.
3497  {
3498    RegionNode *r = new (C, 3) RegionNode(3);
3499    result = new (C, 3) PhiNode(r, TypeInt::BOOL);
3500
3501    Node *iff = create_and_xform_if(control(), bol, PROB_FAIR, COUNT_UNKNOWN);
3502    Node *iftrue = opt_iff(r, iff);
3503    r->init_req(1, iftrue);
3504    result->init_req(1, intcon(1));
3505    result->init_req(2, intcon(0));
3506
3507    set_control(_gvn.transform(r));
3508    record_for_igvn(r);
3509
3510    C->set_has_split_ifs(true); // Has chance for split-if optimization
3511  }
3512
3513  push(_gvn.transform(result));
3514  return true;
3515}
3516
3517bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
3518  // restore the arguments
3519  _sp += arg_size();
3520
3521  switch (id) {
3522  case vmIntrinsics::_floatToRawIntBits:
3523    push(_gvn.transform( new (C, 2) MoveF2INode(pop())));
3524    break;
3525
3526  case vmIntrinsics::_intBitsToFloat:
3527    push(_gvn.transform( new (C, 2) MoveI2FNode(pop())));
3528    break;
3529
3530  case vmIntrinsics::_doubleToRawLongBits:
3531    push_pair(_gvn.transform( new (C, 2) MoveD2LNode(pop_pair())));
3532    break;
3533
3534  case vmIntrinsics::_longBitsToDouble:
3535    push_pair(_gvn.transform( new (C, 2) MoveL2DNode(pop_pair())));
3536    break;
3537
3538  case vmIntrinsics::_doubleToLongBits: {
3539    Node* value = pop_pair();
3540
3541    // two paths (plus control) merge in a wood
3542    RegionNode *r = new (C, 3) RegionNode(3);
3543    Node *phi = new (C, 3) PhiNode(r, TypeLong::LONG);
3544
3545    Node *cmpisnan = _gvn.transform( new (C, 3) CmpDNode(value, value));
3546    // Build the boolean node
3547    Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
3548
3549    // Branch either way.
3550    // NaN case is less traveled, which makes all the difference.
3551    IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
3552    Node *opt_isnan = _gvn.transform(ifisnan);
3553    assert( opt_isnan->is_If(), "Expect an IfNode");
3554    IfNode *opt_ifisnan = (IfNode*)opt_isnan;
3555    Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
3556
3557    set_control(iftrue);
3558
3559    static const jlong nan_bits = CONST64(0x7ff8000000000000);
3560    Node *slow_result = longcon(nan_bits); // return NaN
3561    phi->init_req(1, _gvn.transform( slow_result ));
3562    r->init_req(1, iftrue);
3563
3564    // Else fall through
3565    Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
3566    set_control(iffalse);
3567
3568    phi->init_req(2, _gvn.transform( new (C, 2) MoveD2LNode(value)));
3569    r->init_req(2, iffalse);
3570
3571    // Post merge
3572    set_control(_gvn.transform(r));
3573    record_for_igvn(r);
3574
3575    Node* result = _gvn.transform(phi);
3576    assert(result->bottom_type()->isa_long(), "must be");
3577    push_pair(result);
3578
3579    C->set_has_split_ifs(true); // Has chance for split-if optimization
3580
3581    break;
3582  }
3583
3584  case vmIntrinsics::_floatToIntBits: {
3585    Node* value = pop();
3586
3587    // two paths (plus control) merge in a wood
3588    RegionNode *r = new (C, 3) RegionNode(3);
3589    Node *phi = new (C, 3) PhiNode(r, TypeInt::INT);
3590
3591    Node *cmpisnan = _gvn.transform( new (C, 3) CmpFNode(value, value));
3592    // Build the boolean node
3593    Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
3594
3595    // Branch either way.
3596    // NaN case is less traveled, which makes all the difference.
3597    IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
3598    Node *opt_isnan = _gvn.transform(ifisnan);
3599    assert( opt_isnan->is_If(), "Expect an IfNode");
3600    IfNode *opt_ifisnan = (IfNode*)opt_isnan;
3601    Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
3602
3603    set_control(iftrue);
3604
3605    static const jint nan_bits = 0x7fc00000;
3606    Node *slow_result = makecon(TypeInt::make(nan_bits)); // return NaN
3607    phi->init_req(1, _gvn.transform( slow_result ));
3608    r->init_req(1, iftrue);
3609
3610    // Else fall through
3611    Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
3612    set_control(iffalse);
3613
3614    phi->init_req(2, _gvn.transform( new (C, 2) MoveF2INode(value)));
3615    r->init_req(2, iffalse);
3616
3617    // Post merge
3618    set_control(_gvn.transform(r));
3619    record_for_igvn(r);
3620
3621    Node* result = _gvn.transform(phi);
3622    assert(result->bottom_type()->isa_int(), "must be");
3623    push(result);
3624
3625    C->set_has_split_ifs(true); // Has chance for split-if optimization
3626
3627    break;
3628  }
3629
3630  default:
3631    ShouldNotReachHere();
3632  }
3633
3634  return true;
3635}
3636
3637#ifdef _LP64
3638#define XTOP ,top() /*additional argument*/
3639#else  //_LP64
3640#define XTOP        /*no additional argument*/
3641#endif //_LP64
3642
3643//----------------------inline_unsafe_copyMemory-------------------------
3644bool LibraryCallKit::inline_unsafe_copyMemory() {
3645  if (callee()->is_static())  return false;  // caller must have the capability!
3646  int nargs = 1 + 5 + 3;  // 5 args:  (src: ptr,off, dst: ptr,off, size)
3647  assert(signature()->size() == nargs-1, "copy has 5 arguments");
3648  null_check_receiver(callee());  // check then ignore argument(0)
3649  if (stopped())  return true;
3650
3651  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
3652
3653  Node* src_ptr = argument(1);
3654  Node* src_off = ConvL2X(argument(2));
3655  assert(argument(3)->is_top(), "2nd half of long");
3656  Node* dst_ptr = argument(4);
3657  Node* dst_off = ConvL2X(argument(5));
3658  assert(argument(6)->is_top(), "2nd half of long");
3659  Node* size    = ConvL2X(argument(7));
3660  assert(argument(8)->is_top(), "2nd half of long");
3661
3662  assert(Unsafe_field_offset_to_byte_offset(11) == 11,
3663         "fieldOffset must be byte-scaled");
3664
3665  Node* src = make_unsafe_address(src_ptr, src_off);
3666  Node* dst = make_unsafe_address(dst_ptr, dst_off);
3667
3668  // Conservatively insert a memory barrier on all memory slices.
3669  // Do not let writes of the copy source or destination float below the copy.
3670  insert_mem_bar(Op_MemBarCPUOrder);
3671
3672  // Call it.  Note that the length argument is not scaled.
3673  make_runtime_call(RC_LEAF|RC_NO_FP,
3674                    OptoRuntime::fast_arraycopy_Type(),
3675                    StubRoutines::unsafe_arraycopy(),
3676                    "unsafe_arraycopy",
3677                    TypeRawPtr::BOTTOM,
3678                    src, dst, size XTOP);
3679
3680  // Do not let reads of the copy destination float above the copy.
3681  insert_mem_bar(Op_MemBarCPUOrder);
3682
3683  return true;
3684}
3685
3686
3687//------------------------inline_native_clone----------------------------
3688// Here are the simple edge cases:
3689//  null receiver => normal trap
3690//  virtual and clone was overridden => slow path to out-of-line clone
3691//  not cloneable or finalizer => slow path to out-of-line Object.clone
3692//
3693// The general case has two steps, allocation and copying.
3694// Allocation has two cases, and uses GraphKit::new_instance or new_array.
3695//
3696// Copying also has two cases, oop arrays and everything else.
3697// Oop arrays use arrayof_oop_arraycopy (same as System.arraycopy).
3698// Everything else uses the tight inline loop supplied by CopyArrayNode.
3699//
3700// These steps fold up nicely if and when the cloned object's klass
3701// can be sharply typed as an object array, a type array, or an instance.
3702//
3703bool LibraryCallKit::inline_native_clone(bool is_virtual) {
3704  int nargs = 1;
3705  Node* obj = null_check_receiver(callee());
3706  if (stopped())  return true;
3707  Node* obj_klass = load_object_klass(obj);
3708  const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
3709  const TypeOopPtr*   toop   = ((tklass != NULL)
3710                                ? tklass->as_instance_type()
3711                                : TypeInstPtr::NOTNULL);
3712
3713  // Conservatively insert a memory barrier on all memory slices.
3714  // Do not let writes into the original float below the clone.
3715  insert_mem_bar(Op_MemBarCPUOrder);
3716
3717  // paths into result_reg:
3718  enum {
3719    _slow_path = 1,     // out-of-line call to clone method (virtual or not)
3720    _objArray_path,     // plain allocation, plus arrayof_oop_arraycopy
3721    _fast_path,         // plain allocation, plus a CopyArray operation
3722    PATH_LIMIT
3723  };
3724  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
3725  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
3726                                                      TypeInstPtr::NOTNULL);
3727  PhiNode*    result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
3728  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
3729                                                      TypePtr::BOTTOM);
3730  record_for_igvn(result_reg);
3731
3732  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
3733  int raw_adr_idx = Compile::AliasIdxRaw;
3734  const bool raw_mem_only = true;
3735
3736  // paths into alloc_reg (on the fast path, just before the CopyArray):
3737  enum { _typeArray_alloc = 1, _instance_alloc, ALLOC_LIMIT };
3738  RegionNode* alloc_reg = new(C, ALLOC_LIMIT) RegionNode(ALLOC_LIMIT);
3739  PhiNode*    alloc_val = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, raw_adr_type);
3740  PhiNode*    alloc_siz = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, TypeX_X);
3741  PhiNode*    alloc_i_o = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::ABIO);
3742  PhiNode*    alloc_mem = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::MEMORY,
3743                                                      raw_adr_type);
3744  record_for_igvn(alloc_reg);
3745
3746  bool card_mark = false;  // (see below)
3747
3748  Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
3749  if (array_ctl != NULL) {
3750    // It's an array.
3751    PreserveJVMState pjvms(this);
3752    set_control(array_ctl);
3753    Node* obj_length = load_array_length(obj);
3754    Node* obj_size = NULL;
3755    _sp += nargs;  // set original stack for use by uncommon_trap
3756    Node* alloc_obj = new_array(obj_klass, obj_length,
3757                                raw_mem_only, &obj_size);
3758    _sp -= nargs;
3759    assert(obj_size != NULL, "");
3760    Node* raw_obj = alloc_obj->in(1);
3761    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
3762    if (ReduceBulkZeroing) {
3763      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
3764      if (alloc != NULL) {
3765        // We will be completely responsible for initializing this object.
3766        alloc->maybe_set_complete(&_gvn);
3767      }
3768    }
3769
3770    if (!use_ReduceInitialCardMarks()) {
3771      // If it is an oop array, it requires very special treatment,
3772      // because card marking is required on each card of the array.
3773      Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
3774      if (is_obja != NULL) {
3775        PreserveJVMState pjvms2(this);
3776        set_control(is_obja);
3777        // Generate a direct call to the right arraycopy function(s).
3778        bool disjoint_bases = true;
3779        bool length_never_negative = true;
3780        generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
3781                           obj, intcon(0), alloc_obj, intcon(0),
3782                           obj_length, nargs,
3783                           disjoint_bases, length_never_negative);
3784        result_reg->init_req(_objArray_path, control());
3785        result_val->init_req(_objArray_path, alloc_obj);
3786        result_i_o ->set_req(_objArray_path, i_o());
3787        result_mem ->set_req(_objArray_path, reset_memory());
3788      }
3789    }
3790    // We can dispense with card marks if we know the allocation
3791    // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
3792    // causes the non-eden paths to simulate a fresh allocation,
3793    // insofar that no further card marks are required to initialize
3794    // the object.
3795
3796    // Otherwise, there are no card marks to worry about.
3797    alloc_val->init_req(_typeArray_alloc, raw_obj);
3798    alloc_siz->init_req(_typeArray_alloc, obj_size);
3799    alloc_reg->init_req(_typeArray_alloc, control());
3800    alloc_i_o->init_req(_typeArray_alloc, i_o());
3801    alloc_mem->init_req(_typeArray_alloc, memory(raw_adr_type));
3802  }
3803
3804  // We only go to the fast case code if we pass a number of guards.
3805  // The paths which do not pass are accumulated in the slow_region.
3806  RegionNode* slow_region = new (C, 1) RegionNode(1);
3807  record_for_igvn(slow_region);
3808  if (!stopped()) {
3809    // It's an instance.  Make the slow-path tests.
3810    // If this is a virtual call, we generate a funny guard.  We grab
3811    // the vtable entry corresponding to clone() from the target object.
3812    // If the target method which we are calling happens to be the
3813    // Object clone() method, we pass the guard.  We do not need this
3814    // guard for non-virtual calls; the caller is known to be the native
3815    // Object clone().
3816    if (is_virtual) {
3817      generate_virtual_guard(obj_klass, slow_region);
3818    }
3819
3820    // The object must be cloneable and must not have a finalizer.
3821    // Both of these conditions may be checked in a single test.
3822    // We could optimize the cloneable test further, but we don't care.
3823    generate_access_flags_guard(obj_klass,
3824                                // Test both conditions:
3825                                JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER,
3826                                // Must be cloneable but not finalizer:
3827                                JVM_ACC_IS_CLONEABLE,
3828                                slow_region);
3829  }
3830
3831  if (!stopped()) {
3832    // It's an instance, and it passed the slow-path tests.
3833    PreserveJVMState pjvms(this);
3834    Node* obj_size = NULL;
3835    Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
3836    assert(obj_size != NULL, "");
3837    Node* raw_obj = alloc_obj->in(1);
3838    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
3839    if (ReduceBulkZeroing) {
3840      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
3841      if (alloc != NULL && !alloc->maybe_set_complete(&_gvn))
3842        alloc = NULL;
3843    }
3844    if (!use_ReduceInitialCardMarks()) {
3845      // Put in store barrier for any and all oops we are sticking
3846      // into this object.  (We could avoid this if we could prove
3847      // that the object type contains no oop fields at all.)
3848      card_mark = true;
3849    }
3850    alloc_val->init_req(_instance_alloc, raw_obj);
3851    alloc_siz->init_req(_instance_alloc, obj_size);
3852    alloc_reg->init_req(_instance_alloc, control());
3853    alloc_i_o->init_req(_instance_alloc, i_o());
3854    alloc_mem->init_req(_instance_alloc, memory(raw_adr_type));
3855  }
3856
3857  // Generate code for the slow case.  We make a call to clone().
3858  set_control(_gvn.transform(slow_region));
3859  if (!stopped()) {
3860    PreserveJVMState pjvms(this);
3861    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual);
3862    Node* slow_result = set_results_for_java_call(slow_call);
3863    // this->control() comes from set_results_for_java_call
3864    result_reg->init_req(_slow_path, control());
3865    result_val->init_req(_slow_path, slow_result);
3866    result_i_o ->set_req(_slow_path, i_o());
3867    result_mem ->set_req(_slow_path, reset_memory());
3868  }
3869
3870  // The object is allocated, as an array and/or an instance.  Now copy it.
3871  set_control( _gvn.transform(alloc_reg) );
3872  set_i_o(     _gvn.transform(alloc_i_o) );
3873  set_memory(  _gvn.transform(alloc_mem), raw_adr_type );
3874  Node* raw_obj  = _gvn.transform(alloc_val);
3875
3876  if (!stopped()) {
3877    // Copy the fastest available way.
3878    // (No need for PreserveJVMState, since we're using it all up now.)
3879    // TODO: generate fields/elements copies for small objects instead.
3880    Node* src  = obj;
3881    Node* dest = raw_obj;
3882    Node* size = _gvn.transform(alloc_siz);
3883
3884    // Exclude the header.
3885    int base_off = instanceOopDesc::base_offset_in_bytes();
3886    if (UseCompressedOops) {
3887      assert(base_off % BytesPerLong != 0, "base with compressed oops");
3888      // With compressed oops base_offset_in_bytes is 12 which creates
3889      // the gap since countx is rounded by 8 bytes below.
3890      // Copy klass and the gap.
3891      base_off = instanceOopDesc::klass_offset_in_bytes();
3892    }
3893    src  = basic_plus_adr(src,  base_off);
3894    dest = basic_plus_adr(dest, base_off);
3895
3896    // Compute the length also, if needed:
3897    Node* countx = size;
3898    countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(base_off)) );
3899    countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong) ));
3900
3901    // Select an appropriate instruction to initialize the range.
3902    // The CopyArray instruction (if supported) can be optimized
3903    // into a discrete set of scalar loads and stores.
3904    bool disjoint_bases = true;
3905    generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
3906                                 src, NULL, dest, NULL, countx);
3907
3908    // Now that the object is properly initialized, type it as an oop.
3909    // Use a secondary InitializeNode memory barrier.
3910    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, raw_adr_idx,
3911                                                   raw_obj)->as_Initialize();
3912    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
3913    Node* new_obj = new(C, 2) CheckCastPPNode(control(), raw_obj,
3914                                              TypeInstPtr::NOTNULL);
3915    new_obj = _gvn.transform(new_obj);
3916
3917    // If necessary, emit some card marks afterwards.  (Non-arrays only.)
3918    if (card_mark) {
3919      Node* no_particular_value = NULL;
3920      Node* no_particular_field = NULL;
3921      post_barrier(control(),
3922                   memory(raw_adr_type),
3923                   new_obj,
3924                   no_particular_field,
3925                   raw_adr_idx,
3926                   no_particular_value,
3927                   T_OBJECT,
3928                   false);
3929    }
3930    // Present the results of the slow call.
3931    result_reg->init_req(_fast_path, control());
3932    result_val->init_req(_fast_path, new_obj);
3933    result_i_o ->set_req(_fast_path, i_o());
3934    result_mem ->set_req(_fast_path, reset_memory());
3935  }
3936
3937  // Return the combined state.
3938  set_control(    _gvn.transform(result_reg) );
3939  set_i_o(        _gvn.transform(result_i_o) );
3940  set_all_memory( _gvn.transform(result_mem) );
3941
3942  // Cast the result to a sharper type, since we know what clone does.
3943  Node* new_obj = _gvn.transform(result_val);
3944  Node* cast    = new (C, 2) CheckCastPPNode(control(), new_obj, toop);
3945  push(_gvn.transform(cast));
3946
3947  return true;
3948}
3949
3950
3951// constants for computing the copy function
3952enum {
3953  COPYFUNC_UNALIGNED = 0,
3954  COPYFUNC_ALIGNED = 1,                 // src, dest aligned to HeapWordSize
3955  COPYFUNC_CONJOINT = 0,
3956  COPYFUNC_DISJOINT = 2                 // src != dest, or transfer can descend
3957};
3958
3959// Note:  The condition "disjoint" applies also for overlapping copies
3960// where an descending copy is permitted (i.e., dest_offset <= src_offset).
3961static address
3962select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
3963  int selector =
3964    (aligned  ? COPYFUNC_ALIGNED  : COPYFUNC_UNALIGNED) +
3965    (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
3966
3967#define RETURN_STUB(xxx_arraycopy) { \
3968  name = #xxx_arraycopy; \
3969  return StubRoutines::xxx_arraycopy(); }
3970
3971  switch (t) {
3972  case T_BYTE:
3973  case T_BOOLEAN:
3974    switch (selector) {
3975    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jbyte_arraycopy);
3976    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jbyte_arraycopy);
3977    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jbyte_disjoint_arraycopy);
3978    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jbyte_disjoint_arraycopy);
3979    }
3980  case T_CHAR:
3981  case T_SHORT:
3982    switch (selector) {
3983    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jshort_arraycopy);
3984    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jshort_arraycopy);
3985    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jshort_disjoint_arraycopy);
3986    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jshort_disjoint_arraycopy);
3987    }
3988  case T_INT:
3989  case T_FLOAT:
3990    switch (selector) {
3991    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jint_arraycopy);
3992    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jint_arraycopy);
3993    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jint_disjoint_arraycopy);
3994    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jint_disjoint_arraycopy);
3995    }
3996  case T_DOUBLE:
3997  case T_LONG:
3998    switch (selector) {
3999    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jlong_arraycopy);
4000    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jlong_arraycopy);
4001    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jlong_disjoint_arraycopy);
4002    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jlong_disjoint_arraycopy);
4003    }
4004  case T_ARRAY:
4005  case T_OBJECT:
4006    switch (selector) {
4007    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_arraycopy);
4008    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_arraycopy);
4009    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_disjoint_arraycopy);
4010    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_disjoint_arraycopy);
4011    }
4012  default:
4013    ShouldNotReachHere();
4014    return NULL;
4015  }
4016
4017#undef RETURN_STUB
4018}
4019
4020//------------------------------basictype2arraycopy----------------------------
4021address LibraryCallKit::basictype2arraycopy(BasicType t,
4022                                            Node* src_offset,
4023                                            Node* dest_offset,
4024                                            bool disjoint_bases,
4025                                            const char* &name) {
4026  const TypeInt* src_offset_inttype  = gvn().find_int_type(src_offset);;
4027  const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
4028
4029  bool aligned = false;
4030  bool disjoint = disjoint_bases;
4031
4032  // if the offsets are the same, we can treat the memory regions as
4033  // disjoint, because either the memory regions are in different arrays,
4034  // or they are identical (which we can treat as disjoint.)  We can also
4035  // treat a copy with a destination index  less that the source index
4036  // as disjoint since a low->high copy will work correctly in this case.
4037  if (src_offset_inttype != NULL && src_offset_inttype->is_con() &&
4038      dest_offset_inttype != NULL && dest_offset_inttype->is_con()) {
4039    // both indices are constants
4040    int s_offs = src_offset_inttype->get_con();
4041    int d_offs = dest_offset_inttype->get_con();
4042    int element_size = type2aelembytes(t);
4043    aligned = ((arrayOopDesc::base_offset_in_bytes(t) + s_offs * element_size) % HeapWordSize == 0) &&
4044              ((arrayOopDesc::base_offset_in_bytes(t) + d_offs * element_size) % HeapWordSize == 0);
4045    if (s_offs >= d_offs)  disjoint = true;
4046  } else if (src_offset == dest_offset && src_offset != NULL) {
4047    // This can occur if the offsets are identical non-constants.
4048    disjoint = true;
4049  }
4050
4051  return select_arraycopy_function(t, aligned, disjoint, name);
4052}
4053
4054
4055//------------------------------inline_arraycopy-----------------------
4056bool LibraryCallKit::inline_arraycopy() {
4057  // Restore the stack and pop off the arguments.
4058  int nargs = 5;  // 2 oops, 3 ints, no size_t or long
4059  assert(callee()->signature()->size() == nargs, "copy has 5 arguments");
4060
4061  Node *src         = argument(0);
4062  Node *src_offset  = argument(1);
4063  Node *dest        = argument(2);
4064  Node *dest_offset = argument(3);
4065  Node *length      = argument(4);
4066
4067  // Compile time checks.  If any of these checks cannot be verified at compile time,
4068  // we do not make a fast path for this call.  Instead, we let the call remain as it
4069  // is.  The checks we choose to mandate at compile time are:
4070  //
4071  // (1) src and dest are arrays.
4072  const Type* src_type = src->Value(&_gvn);
4073  const Type* dest_type = dest->Value(&_gvn);
4074  const TypeAryPtr* top_src = src_type->isa_aryptr();
4075  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
4076  if (top_src  == NULL || top_src->klass()  == NULL ||
4077      top_dest == NULL || top_dest->klass() == NULL) {
4078    // Conservatively insert a memory barrier on all memory slices.
4079    // Do not let writes into the source float below the arraycopy.
4080    insert_mem_bar(Op_MemBarCPUOrder);
4081
4082    // Call StubRoutines::generic_arraycopy stub.
4083    generate_arraycopy(TypeRawPtr::BOTTOM, T_CONFLICT,
4084                       src, src_offset, dest, dest_offset, length,
4085                       nargs);
4086
4087    // Do not let reads from the destination float above the arraycopy.
4088    // Since we cannot type the arrays, we don't know which slices
4089    // might be affected.  We could restrict this barrier only to those
4090    // memory slices which pertain to array elements--but don't bother.
4091    if (!InsertMemBarAfterArraycopy)
4092      // (If InsertMemBarAfterArraycopy, there is already one in place.)
4093      insert_mem_bar(Op_MemBarCPUOrder);
4094    return true;
4095  }
4096
4097  // (2) src and dest arrays must have elements of the same BasicType
4098  // Figure out the size and type of the elements we will be copying.
4099  BasicType src_elem  =  top_src->klass()->as_array_klass()->element_type()->basic_type();
4100  BasicType dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
4101  if (src_elem  == T_ARRAY)  src_elem  = T_OBJECT;
4102  if (dest_elem == T_ARRAY)  dest_elem = T_OBJECT;
4103
4104  if (src_elem != dest_elem || dest_elem == T_VOID) {
4105    // The component types are not the same or are not recognized.  Punt.
4106    // (But, avoid the native method wrapper to JVM_ArrayCopy.)
4107    generate_slow_arraycopy(TypePtr::BOTTOM,
4108                            src, src_offset, dest, dest_offset, length,
4109                            nargs);
4110    return true;
4111  }
4112
4113  //---------------------------------------------------------------------------
4114  // We will make a fast path for this call to arraycopy.
4115
4116  // We have the following tests left to perform:
4117  //
4118  // (3) src and dest must not be null.
4119  // (4) src_offset must not be negative.
4120  // (5) dest_offset must not be negative.
4121  // (6) length must not be negative.
4122  // (7) src_offset + length must not exceed length of src.
4123  // (8) dest_offset + length must not exceed length of dest.
4124  // (9) each element of an oop array must be assignable
4125
4126  RegionNode* slow_region = new (C, 1) RegionNode(1);
4127  record_for_igvn(slow_region);
4128
4129  // (3) operands must not be null
4130  // We currently perform our null checks with the do_null_check routine.
4131  // This means that the null exceptions will be reported in the caller
4132  // rather than (correctly) reported inside of the native arraycopy call.
4133  // This should be corrected, given time.  We do our null check with the
4134  // stack pointer restored.
4135  _sp += nargs;
4136  src  = do_null_check(src,  T_ARRAY);
4137  dest = do_null_check(dest, T_ARRAY);
4138  _sp -= nargs;
4139
4140  // (4) src_offset must not be negative.
4141  generate_negative_guard(src_offset, slow_region);
4142
4143  // (5) dest_offset must not be negative.
4144  generate_negative_guard(dest_offset, slow_region);
4145
4146  // (6) length must not be negative (moved to generate_arraycopy()).
4147  // generate_negative_guard(length, slow_region);
4148
4149  // (7) src_offset + length must not exceed length of src.
4150  generate_limit_guard(src_offset, length,
4151                       load_array_length(src),
4152                       slow_region);
4153
4154  // (8) dest_offset + length must not exceed length of dest.
4155  generate_limit_guard(dest_offset, length,
4156                       load_array_length(dest),
4157                       slow_region);
4158
4159  // (9) each element of an oop array must be assignable
4160  // The generate_arraycopy subroutine checks this.
4161
4162  // This is where the memory effects are placed:
4163  const TypePtr* adr_type = TypeAryPtr::get_array_body_type(dest_elem);
4164  generate_arraycopy(adr_type, dest_elem,
4165                     src, src_offset, dest, dest_offset, length,
4166                     nargs, false, false, slow_region);
4167
4168  return true;
4169}
4170
4171//-----------------------------generate_arraycopy----------------------
4172// Generate an optimized call to arraycopy.
4173// Caller must guard against non-arrays.
4174// Caller must determine a common array basic-type for both arrays.
4175// Caller must validate offsets against array bounds.
4176// The slow_region has already collected guard failure paths
4177// (such as out of bounds length or non-conformable array types).
4178// The generated code has this shape, in general:
4179//
4180//     if (length == 0)  return   // via zero_path
4181//     slowval = -1
4182//     if (types unknown) {
4183//       slowval = call generic copy loop
4184//       if (slowval == 0)  return  // via checked_path
4185//     } else if (indexes in bounds) {
4186//       if ((is object array) && !(array type check)) {
4187//         slowval = call checked copy loop
4188//         if (slowval == 0)  return  // via checked_path
4189//       } else {
4190//         call bulk copy loop
4191//         return  // via fast_path
4192//       }
4193//     }
4194//     // adjust params for remaining work:
4195//     if (slowval != -1) {
4196//       n = -1^slowval; src_offset += n; dest_offset += n; length -= n
4197//     }
4198//   slow_region:
4199//     call slow arraycopy(src, src_offset, dest, dest_offset, length)
4200//     return  // via slow_call_path
4201//
4202// This routine is used from several intrinsics:  System.arraycopy,
4203// Object.clone (the array subcase), and Arrays.copyOf[Range].
4204//
4205void
4206LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
4207                                   BasicType basic_elem_type,
4208                                   Node* src,  Node* src_offset,
4209                                   Node* dest, Node* dest_offset,
4210                                   Node* copy_length,
4211                                   int nargs,
4212                                   bool disjoint_bases,
4213                                   bool length_never_negative,
4214                                   RegionNode* slow_region) {
4215
4216  if (slow_region == NULL) {
4217    slow_region = new(C,1) RegionNode(1);
4218    record_for_igvn(slow_region);
4219  }
4220
4221  Node* original_dest      = dest;
4222  AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
4223  Node* raw_dest           = NULL;  // used before zeroing, if needed
4224  bool  must_clear_dest    = false;
4225
4226  // See if this is the initialization of a newly-allocated array.
4227  // If so, we will take responsibility here for initializing it to zero.
4228  // (Note:  Because tightly_coupled_allocation performs checks on the
4229  // out-edges of the dest, we need to avoid making derived pointers
4230  // from it until we have checked its uses.)
4231  if (ReduceBulkZeroing
4232      && !ZeroTLAB              // pointless if already zeroed
4233      && basic_elem_type != T_CONFLICT // avoid corner case
4234      && !_gvn.eqv_uncast(src, dest)
4235      && ((alloc = tightly_coupled_allocation(dest, slow_region))
4236          != NULL)
4237      && _gvn.find_int_con(alloc->in(AllocateNode::ALength), 1) > 0
4238      && alloc->maybe_set_complete(&_gvn)) {
4239    // "You break it, you buy it."
4240    InitializeNode* init = alloc->initialization();
4241    assert(init->is_complete(), "we just did this");
4242    assert(dest->Opcode() == Op_CheckCastPP, "sanity");
4243    assert(dest->in(0)->in(0) == init, "dest pinned");
4244    raw_dest = dest->in(1);  // grab the raw pointer!
4245    original_dest = dest;
4246    dest = raw_dest;
4247    adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
4248    // Decouple the original InitializeNode, turning it into a simple membar.
4249    // We will build a new one at the end of this routine.
4250    init->set_req(InitializeNode::RawAddress, top());
4251    // From this point on, every exit path is responsible for
4252    // initializing any non-copied parts of the object to zero.
4253    must_clear_dest = true;
4254  } else {
4255    // No zeroing elimination here.
4256    alloc             = NULL;
4257    //original_dest   = dest;
4258    //must_clear_dest = false;
4259  }
4260
4261  // Results are placed here:
4262  enum { fast_path        = 1,  // normal void-returning assembly stub
4263         checked_path     = 2,  // special assembly stub with cleanup
4264         slow_call_path   = 3,  // something went wrong; call the VM
4265         zero_path        = 4,  // bypass when length of copy is zero
4266         bcopy_path       = 5,  // copy primitive array by 64-bit blocks
4267         PATH_LIMIT       = 6
4268  };
4269  RegionNode* result_region = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
4270  PhiNode*    result_i_o    = new(C, PATH_LIMIT) PhiNode(result_region, Type::ABIO);
4271  PhiNode*    result_memory = new(C, PATH_LIMIT) PhiNode(result_region, Type::MEMORY, adr_type);
4272  record_for_igvn(result_region);
4273  _gvn.set_type_bottom(result_i_o);
4274  _gvn.set_type_bottom(result_memory);
4275  assert(adr_type != TypePtr::BOTTOM, "must be RawMem or a T[] slice");
4276
4277  // The slow_control path:
4278  Node* slow_control;
4279  Node* slow_i_o = i_o();
4280  Node* slow_mem = memory(adr_type);
4281  debug_only(slow_control = (Node*) badAddress);
4282
4283  // Checked control path:
4284  Node* checked_control = top();
4285  Node* checked_mem     = NULL;
4286  Node* checked_i_o     = NULL;
4287  Node* checked_value   = NULL;
4288
4289  if (basic_elem_type == T_CONFLICT) {
4290    assert(!must_clear_dest, "");
4291    Node* cv = generate_generic_arraycopy(adr_type,
4292                                          src, src_offset, dest, dest_offset,
4293                                          copy_length, nargs);
4294    if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
4295    checked_control = control();
4296    checked_i_o     = i_o();
4297    checked_mem     = memory(adr_type);
4298    checked_value   = cv;
4299    set_control(top());         // no fast path
4300  }
4301
4302  Node* not_pos = generate_nonpositive_guard(copy_length, length_never_negative);
4303  if (not_pos != NULL) {
4304    PreserveJVMState pjvms(this);
4305    set_control(not_pos);
4306
4307    // (6) length must not be negative.
4308    if (!length_never_negative) {
4309      generate_negative_guard(copy_length, slow_region);
4310    }
4311
4312    if (!stopped() && must_clear_dest) {
4313      Node* dest_length = alloc->in(AllocateNode::ALength);
4314      if (_gvn.eqv_uncast(copy_length, dest_length)
4315          || _gvn.find_int_con(dest_length, 1) <= 0) {
4316        // There is no zeroing to do.
4317      } else {
4318        // Clear the whole thing since there are no source elements to copy.
4319        generate_clear_array(adr_type, dest, basic_elem_type,
4320                             intcon(0), NULL,
4321                             alloc->in(AllocateNode::AllocSize));
4322      }
4323    }
4324
4325    // Present the results of the fast call.
4326    result_region->init_req(zero_path, control());
4327    result_i_o   ->init_req(zero_path, i_o());
4328    result_memory->init_req(zero_path, memory(adr_type));
4329  }
4330
4331  if (!stopped() && must_clear_dest) {
4332    // We have to initialize the *uncopied* part of the array to zero.
4333    // The copy destination is the slice dest[off..off+len].  The other slices
4334    // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
4335    Node* dest_size   = alloc->in(AllocateNode::AllocSize);
4336    Node* dest_length = alloc->in(AllocateNode::ALength);
4337    Node* dest_tail   = _gvn.transform( new(C,3) AddINode(dest_offset,
4338                                                          copy_length) );
4339
4340    // If there is a head section that needs zeroing, do it now.
4341    if (find_int_con(dest_offset, -1) != 0) {
4342      generate_clear_array(adr_type, dest, basic_elem_type,
4343                           intcon(0), dest_offset,
4344                           NULL);
4345    }
4346
4347    // Next, perform a dynamic check on the tail length.
4348    // It is often zero, and we can win big if we prove this.
4349    // There are two wins:  Avoid generating the ClearArray
4350    // with its attendant messy index arithmetic, and upgrade
4351    // the copy to a more hardware-friendly word size of 64 bits.
4352    Node* tail_ctl = NULL;
4353    if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
4354      Node* cmp_lt   = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
4355      Node* bol_lt   = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
4356      tail_ctl = generate_slow_guard(bol_lt, NULL);
4357      assert(tail_ctl != NULL || !stopped(), "must be an outcome");
4358    }
4359
4360    // At this point, let's assume there is no tail.
4361    if (!stopped() && alloc != NULL && basic_elem_type != T_OBJECT) {
4362      // There is no tail.  Try an upgrade to a 64-bit copy.
4363      bool didit = false;
4364      { PreserveJVMState pjvms(this);
4365        didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
4366                                         src, src_offset, dest, dest_offset,
4367                                         dest_size);
4368        if (didit) {
4369          // Present the results of the block-copying fast call.
4370          result_region->init_req(bcopy_path, control());
4371          result_i_o   ->init_req(bcopy_path, i_o());
4372          result_memory->init_req(bcopy_path, memory(adr_type));
4373        }
4374      }
4375      if (didit)
4376        set_control(top());     // no regular fast path
4377    }
4378
4379    // Clear the tail, if any.
4380    if (tail_ctl != NULL) {
4381      Node* notail_ctl = stopped() ? NULL : control();
4382      set_control(tail_ctl);
4383      if (notail_ctl == NULL) {
4384        generate_clear_array(adr_type, dest, basic_elem_type,
4385                             dest_tail, NULL,
4386                             dest_size);
4387      } else {
4388        // Make a local merge.
4389        Node* done_ctl = new(C,3) RegionNode(3);
4390        Node* done_mem = new(C,3) PhiNode(done_ctl, Type::MEMORY, adr_type);
4391        done_ctl->init_req(1, notail_ctl);
4392        done_mem->init_req(1, memory(adr_type));
4393        generate_clear_array(adr_type, dest, basic_elem_type,
4394                             dest_tail, NULL,
4395                             dest_size);
4396        done_ctl->init_req(2, control());
4397        done_mem->init_req(2, memory(adr_type));
4398        set_control( _gvn.transform(done_ctl) );
4399        set_memory(  _gvn.transform(done_mem), adr_type );
4400      }
4401    }
4402  }
4403
4404  BasicType copy_type = basic_elem_type;
4405  assert(basic_elem_type != T_ARRAY, "caller must fix this");
4406  if (!stopped() && copy_type == T_OBJECT) {
4407    // If src and dest have compatible element types, we can copy bits.
4408    // Types S[] and D[] are compatible if D is a supertype of S.
4409    //
4410    // If they are not, we will use checked_oop_disjoint_arraycopy,
4411    // which performs a fast optimistic per-oop check, and backs off
4412    // further to JVM_ArrayCopy on the first per-oop check that fails.
4413    // (Actually, we don't move raw bits only; the GC requires card marks.)
4414
4415    // Get the klassOop for both src and dest
4416    Node* src_klass  = load_object_klass(src);
4417    Node* dest_klass = load_object_klass(dest);
4418
4419    // Generate the subtype check.
4420    // This might fold up statically, or then again it might not.
4421    //
4422    // Non-static example:  Copying List<String>.elements to a new String[].
4423    // The backing store for a List<String> is always an Object[],
4424    // but its elements are always type String, if the generic types
4425    // are correct at the source level.
4426    //
4427    // Test S[] against D[], not S against D, because (probably)
4428    // the secondary supertype cache is less busy for S[] than S.
4429    // This usually only matters when D is an interface.
4430    Node* not_subtype_ctrl = gen_subtype_check(src_klass, dest_klass);
4431    // Plug failing path into checked_oop_disjoint_arraycopy
4432    if (not_subtype_ctrl != top()) {
4433      PreserveJVMState pjvms(this);
4434      set_control(not_subtype_ctrl);
4435      // (At this point we can assume disjoint_bases, since types differ.)
4436      int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
4437      Node* p1 = basic_plus_adr(dest_klass, ek_offset);
4438      Node* n1 = LoadKlassNode::make(_gvn, immutable_memory(), p1, TypeRawPtr::BOTTOM);
4439      Node* dest_elem_klass = _gvn.transform(n1);
4440      Node* cv = generate_checkcast_arraycopy(adr_type,
4441                                              dest_elem_klass,
4442                                              src, src_offset, dest, dest_offset,
4443                                              copy_length,
4444                                              nargs);
4445      if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
4446      checked_control = control();
4447      checked_i_o     = i_o();
4448      checked_mem     = memory(adr_type);
4449      checked_value   = cv;
4450    }
4451    // At this point we know we do not need type checks on oop stores.
4452
4453    // Let's see if we need card marks:
4454    if (alloc != NULL && use_ReduceInitialCardMarks()) {
4455      // If we do not need card marks, copy using the jint or jlong stub.
4456      copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT);
4457      assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type),
4458             "sizes agree");
4459    }
4460  }
4461
4462  if (!stopped()) {
4463    // Generate the fast path, if possible.
4464    PreserveJVMState pjvms(this);
4465    generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
4466                                 src, src_offset, dest, dest_offset,
4467                                 ConvI2X(copy_length));
4468
4469    // Present the results of the fast call.
4470    result_region->init_req(fast_path, control());
4471    result_i_o   ->init_req(fast_path, i_o());
4472    result_memory->init_req(fast_path, memory(adr_type));
4473  }
4474
4475  // Here are all the slow paths up to this point, in one bundle:
4476  slow_control = top();
4477  if (slow_region != NULL)
4478    slow_control = _gvn.transform(slow_region);
4479  debug_only(slow_region = (RegionNode*)badAddress);
4480
4481  set_control(checked_control);
4482  if (!stopped()) {
4483    // Clean up after the checked call.
4484    // The returned value is either 0 or -1^K,
4485    // where K = number of partially transferred array elements.
4486    Node* cmp = _gvn.transform( new(C, 3) CmpINode(checked_value, intcon(0)) );
4487    Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
4488    IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
4489
4490    // If it is 0, we are done, so transfer to the end.
4491    Node* checks_done = _gvn.transform( new(C, 1) IfTrueNode(iff) );
4492    result_region->init_req(checked_path, checks_done);
4493    result_i_o   ->init_req(checked_path, checked_i_o);
4494    result_memory->init_req(checked_path, checked_mem);
4495
4496    // If it is not zero, merge into the slow call.
4497    set_control( _gvn.transform( new(C, 1) IfFalseNode(iff) ));
4498    RegionNode* slow_reg2 = new(C, 3) RegionNode(3);
4499    PhiNode*    slow_i_o2 = new(C, 3) PhiNode(slow_reg2, Type::ABIO);
4500    PhiNode*    slow_mem2 = new(C, 3) PhiNode(slow_reg2, Type::MEMORY, adr_type);
4501    record_for_igvn(slow_reg2);
4502    slow_reg2  ->init_req(1, slow_control);
4503    slow_i_o2  ->init_req(1, slow_i_o);
4504    slow_mem2  ->init_req(1, slow_mem);
4505    slow_reg2  ->init_req(2, control());
4506    slow_i_o2  ->init_req(2, i_o());
4507    slow_mem2  ->init_req(2, memory(adr_type));
4508
4509    slow_control = _gvn.transform(slow_reg2);
4510    slow_i_o     = _gvn.transform(slow_i_o2);
4511    slow_mem     = _gvn.transform(slow_mem2);
4512
4513    if (alloc != NULL) {
4514      // We'll restart from the very beginning, after zeroing the whole thing.
4515      // This can cause double writes, but that's OK since dest is brand new.
4516      // So we ignore the low 31 bits of the value returned from the stub.
4517    } else {
4518      // We must continue the copy exactly where it failed, or else
4519      // another thread might see the wrong number of writes to dest.
4520      Node* checked_offset = _gvn.transform( new(C, 3) XorINode(checked_value, intcon(-1)) );
4521      Node* slow_offset    = new(C, 3) PhiNode(slow_reg2, TypeInt::INT);
4522      slow_offset->init_req(1, intcon(0));
4523      slow_offset->init_req(2, checked_offset);
4524      slow_offset  = _gvn.transform(slow_offset);
4525
4526      // Adjust the arguments by the conditionally incoming offset.
4527      Node* src_off_plus  = _gvn.transform( new(C, 3) AddINode(src_offset,  slow_offset) );
4528      Node* dest_off_plus = _gvn.transform( new(C, 3) AddINode(dest_offset, slow_offset) );
4529      Node* length_minus  = _gvn.transform( new(C, 3) SubINode(copy_length, slow_offset) );
4530
4531      // Tweak the node variables to adjust the code produced below:
4532      src_offset  = src_off_plus;
4533      dest_offset = dest_off_plus;
4534      copy_length = length_minus;
4535    }
4536  }
4537
4538  set_control(slow_control);
4539  if (!stopped()) {
4540    // Generate the slow path, if needed.
4541    PreserveJVMState pjvms(this);   // replace_in_map may trash the map
4542
4543    set_memory(slow_mem, adr_type);
4544    set_i_o(slow_i_o);
4545
4546    if (must_clear_dest) {
4547      generate_clear_array(adr_type, dest, basic_elem_type,
4548                           intcon(0), NULL,
4549                           alloc->in(AllocateNode::AllocSize));
4550    }
4551
4552    if (dest != original_dest) {
4553      // Promote from rawptr to oop, so it looks right in the call's GC map.
4554      dest = _gvn.transform( new(C,2) CheckCastPPNode(control(), dest,
4555                                                      TypeInstPtr::NOTNULL) );
4556
4557      // Edit the call's debug-info to avoid referring to original_dest.
4558      // (The problem with original_dest is that it isn't ready until
4559      // after the InitializeNode completes, but this stuff is before.)
4560      // Substitute in the locally valid dest_oop.
4561      replace_in_map(original_dest, dest);
4562    }
4563
4564    generate_slow_arraycopy(adr_type,
4565                            src, src_offset, dest, dest_offset,
4566                            copy_length, nargs);
4567
4568    result_region->init_req(slow_call_path, control());
4569    result_i_o   ->init_req(slow_call_path, i_o());
4570    result_memory->init_req(slow_call_path, memory(adr_type));
4571  }
4572
4573  // Remove unused edges.
4574  for (uint i = 1; i < result_region->req(); i++) {
4575    if (result_region->in(i) == NULL)
4576      result_region->init_req(i, top());
4577  }
4578
4579  // Finished; return the combined state.
4580  set_control( _gvn.transform(result_region) );
4581  set_i_o(     _gvn.transform(result_i_o)    );
4582  set_memory(  _gvn.transform(result_memory), adr_type );
4583
4584  if (dest != original_dest) {
4585    // Pin the "finished" array node after the arraycopy/zeroing operations.
4586    // Use a secondary InitializeNode memory barrier.
4587    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize,
4588                                                   Compile::AliasIdxRaw,
4589                                                   raw_dest)->as_Initialize();
4590    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
4591    _gvn.hash_delete(original_dest);
4592    original_dest->set_req(0, control());
4593    _gvn.hash_find_insert(original_dest);  // put back into GVN table
4594  }
4595
4596  // The memory edges above are precise in order to model effects around
4597  // array copies accurately to allow value numbering of field loads around
4598  // arraycopy.  Such field loads, both before and after, are common in Java
4599  // collections and similar classes involving header/array data structures.
4600  //
4601  // But with low number of register or when some registers are used or killed
4602  // by arraycopy calls it causes registers spilling on stack. See 6544710.
4603  // The next memory barrier is added to avoid it. If the arraycopy can be
4604  // optimized away (which it can, sometimes) then we can manually remove
4605  // the membar also.
4606  if (InsertMemBarAfterArraycopy)
4607    insert_mem_bar(Op_MemBarCPUOrder);
4608}
4609
4610
4611// Helper function which determines if an arraycopy immediately follows
4612// an allocation, with no intervening tests or other escapes for the object.
4613AllocateArrayNode*
4614LibraryCallKit::tightly_coupled_allocation(Node* ptr,
4615                                           RegionNode* slow_region) {
4616  if (stopped())             return NULL;  // no fast path
4617  if (C->AliasLevel() == 0)  return NULL;  // no MergeMems around
4618
4619  AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(ptr, &_gvn);
4620  if (alloc == NULL)  return NULL;
4621
4622  Node* rawmem = memory(Compile::AliasIdxRaw);
4623  // Is the allocation's memory state untouched?
4624  if (!(rawmem->is_Proj() && rawmem->in(0)->is_Initialize())) {
4625    // Bail out if there have been raw-memory effects since the allocation.
4626    // (Example:  There might have been a call or safepoint.)
4627    return NULL;
4628  }
4629  rawmem = rawmem->in(0)->as_Initialize()->memory(Compile::AliasIdxRaw);
4630  if (!(rawmem->is_Proj() && rawmem->in(0) == alloc)) {
4631    return NULL;
4632  }
4633
4634  // There must be no unexpected observers of this allocation.
4635  for (DUIterator_Fast imax, i = ptr->fast_outs(imax); i < imax; i++) {
4636    Node* obs = ptr->fast_out(i);
4637    if (obs != this->map()) {
4638      return NULL;
4639    }
4640  }
4641
4642  // This arraycopy must unconditionally follow the allocation of the ptr.
4643  Node* alloc_ctl = ptr->in(0);
4644  assert(just_allocated_object(alloc_ctl) == ptr, "most recent allo");
4645
4646  Node* ctl = control();
4647  while (ctl != alloc_ctl) {
4648    // There may be guards which feed into the slow_region.
4649    // Any other control flow means that we might not get a chance
4650    // to finish initializing the allocated object.
4651    if ((ctl->is_IfFalse() || ctl->is_IfTrue()) && ctl->in(0)->is_If()) {
4652      IfNode* iff = ctl->in(0)->as_If();
4653      Node* not_ctl = iff->proj_out(1 - ctl->as_Proj()->_con);
4654      assert(not_ctl != NULL && not_ctl != ctl, "found alternate");
4655      if (slow_region != NULL && slow_region->find_edge(not_ctl) >= 1) {
4656        ctl = iff->in(0);       // This test feeds the known slow_region.
4657        continue;
4658      }
4659      // One more try:  Various low-level checks bottom out in
4660      // uncommon traps.  If the debug-info of the trap omits
4661      // any reference to the allocation, as we've already
4662      // observed, then there can be no objection to the trap.
4663      bool found_trap = false;
4664      for (DUIterator_Fast jmax, j = not_ctl->fast_outs(jmax); j < jmax; j++) {
4665        Node* obs = not_ctl->fast_out(j);
4666        if (obs->in(0) == not_ctl && obs->is_Call() &&
4667            (obs->as_Call()->entry_point() ==
4668             SharedRuntime::uncommon_trap_blob()->instructions_begin())) {
4669          found_trap = true; break;
4670        }
4671      }
4672      if (found_trap) {
4673        ctl = iff->in(0);       // This test feeds a harmless uncommon trap.
4674        continue;
4675      }
4676    }
4677    return NULL;
4678  }
4679
4680  // If we get this far, we have an allocation which immediately
4681  // precedes the arraycopy, and we can take over zeroing the new object.
4682  // The arraycopy will finish the initialization, and provide
4683  // a new control state to which we will anchor the destination pointer.
4684
4685  return alloc;
4686}
4687
4688// Helper for initialization of arrays, creating a ClearArray.
4689// It writes zero bits in [start..end), within the body of an array object.
4690// The memory effects are all chained onto the 'adr_type' alias category.
4691//
4692// Since the object is otherwise uninitialized, we are free
4693// to put a little "slop" around the edges of the cleared area,
4694// as long as it does not go back into the array's header,
4695// or beyond the array end within the heap.
4696//
4697// The lower edge can be rounded down to the nearest jint and the
4698// upper edge can be rounded up to the nearest MinObjAlignmentInBytes.
4699//
4700// Arguments:
4701//   adr_type           memory slice where writes are generated
4702//   dest               oop of the destination array
4703//   basic_elem_type    element type of the destination
4704//   slice_idx          array index of first element to store
4705//   slice_len          number of elements to store (or NULL)
4706//   dest_size          total size in bytes of the array object
4707//
4708// Exactly one of slice_len or dest_size must be non-NULL.
4709// If dest_size is non-NULL, zeroing extends to the end of the object.
4710// If slice_len is non-NULL, the slice_idx value must be a constant.
4711void
4712LibraryCallKit::generate_clear_array(const TypePtr* adr_type,
4713                                     Node* dest,
4714                                     BasicType basic_elem_type,
4715                                     Node* slice_idx,
4716                                     Node* slice_len,
4717                                     Node* dest_size) {
4718  // one or the other but not both of slice_len and dest_size:
4719  assert((slice_len != NULL? 1: 0) + (dest_size != NULL? 1: 0) == 1, "");
4720  if (slice_len == NULL)  slice_len = top();
4721  if (dest_size == NULL)  dest_size = top();
4722
4723  // operate on this memory slice:
4724  Node* mem = memory(adr_type); // memory slice to operate on
4725
4726  // scaling and rounding of indexes:
4727  int scale = exact_log2(type2aelembytes(basic_elem_type));
4728  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
4729  int clear_low = (-1 << scale) & (BytesPerInt  - 1);
4730  int bump_bit  = (-1 << scale) & BytesPerInt;
4731
4732  // determine constant starts and ends
4733  const intptr_t BIG_NEG = -128;
4734  assert(BIG_NEG + 2*abase < 0, "neg enough");
4735  intptr_t slice_idx_con = (intptr_t) find_int_con(slice_idx, BIG_NEG);
4736  intptr_t slice_len_con = (intptr_t) find_int_con(slice_len, BIG_NEG);
4737  if (slice_len_con == 0) {
4738    return;                     // nothing to do here
4739  }
4740  intptr_t start_con = (abase + (slice_idx_con << scale)) & ~clear_low;
4741  intptr_t end_con   = find_intptr_t_con(dest_size, -1);
4742  if (slice_idx_con >= 0 && slice_len_con >= 0) {
4743    assert(end_con < 0, "not two cons");
4744    end_con = round_to(abase + ((slice_idx_con + slice_len_con) << scale),
4745                       BytesPerLong);
4746  }
4747
4748  if (start_con >= 0 && end_con >= 0) {
4749    // Constant start and end.  Simple.
4750    mem = ClearArrayNode::clear_memory(control(), mem, dest,
4751                                       start_con, end_con, &_gvn);
4752  } else if (start_con >= 0 && dest_size != top()) {
4753    // Constant start, pre-rounded end after the tail of the array.
4754    Node* end = dest_size;
4755    mem = ClearArrayNode::clear_memory(control(), mem, dest,
4756                                       start_con, end, &_gvn);
4757  } else if (start_con >= 0 && slice_len != top()) {
4758    // Constant start, non-constant end.  End needs rounding up.
4759    // End offset = round_up(abase + ((slice_idx_con + slice_len) << scale), 8)
4760    intptr_t end_base  = abase + (slice_idx_con << scale);
4761    int      end_round = (-1 << scale) & (BytesPerLong  - 1);
4762    Node*    end       = ConvI2X(slice_len);
4763    if (scale != 0)
4764      end = _gvn.transform( new(C,3) LShiftXNode(end, intcon(scale) ));
4765    end_base += end_round;
4766    end = _gvn.transform( new(C,3) AddXNode(end, MakeConX(end_base)) );
4767    end = _gvn.transform( new(C,3) AndXNode(end, MakeConX(~end_round)) );
4768    mem = ClearArrayNode::clear_memory(control(), mem, dest,
4769                                       start_con, end, &_gvn);
4770  } else if (start_con < 0 && dest_size != top()) {
4771    // Non-constant start, pre-rounded end after the tail of the array.
4772    // This is almost certainly a "round-to-end" operation.
4773    Node* start = slice_idx;
4774    start = ConvI2X(start);
4775    if (scale != 0)
4776      start = _gvn.transform( new(C,3) LShiftXNode( start, intcon(scale) ));
4777    start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(abase)) );
4778    if ((bump_bit | clear_low) != 0) {
4779      int to_clear = (bump_bit | clear_low);
4780      // Align up mod 8, then store a jint zero unconditionally
4781      // just before the mod-8 boundary.
4782      if (((abase + bump_bit) & ~to_clear) - bump_bit
4783          < arrayOopDesc::length_offset_in_bytes() + BytesPerInt) {
4784        bump_bit = 0;
4785        assert((abase & to_clear) == 0, "array base must be long-aligned");
4786      } else {
4787        // Bump 'start' up to (or past) the next jint boundary:
4788        start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(bump_bit)) );
4789        assert((abase & clear_low) == 0, "array base must be int-aligned");
4790      }
4791      // Round bumped 'start' down to jlong boundary in body of array.
4792      start = _gvn.transform( new(C,3) AndXNode(start, MakeConX(~to_clear)) );
4793      if (bump_bit != 0) {
4794        // Store a zero to the immediately preceding jint:
4795        Node* x1 = _gvn.transform( new(C,3) AddXNode(start, MakeConX(-bump_bit)) );
4796        Node* p1 = basic_plus_adr(dest, x1);
4797        mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT);
4798        mem = _gvn.transform(mem);
4799      }
4800    }
4801    Node* end = dest_size; // pre-rounded
4802    mem = ClearArrayNode::clear_memory(control(), mem, dest,
4803                                       start, end, &_gvn);
4804  } else {
4805    // Non-constant start, unrounded non-constant end.
4806    // (Nobody zeroes a random midsection of an array using this routine.)
4807    ShouldNotReachHere();       // fix caller
4808  }
4809
4810  // Done.
4811  set_memory(mem, adr_type);
4812}
4813
4814
4815bool
4816LibraryCallKit::generate_block_arraycopy(const TypePtr* adr_type,
4817                                         BasicType basic_elem_type,
4818                                         AllocateNode* alloc,
4819                                         Node* src,  Node* src_offset,
4820                                         Node* dest, Node* dest_offset,
4821                                         Node* dest_size) {
4822  // See if there is an advantage from block transfer.
4823  int scale = exact_log2(type2aelembytes(basic_elem_type));
4824  if (scale >= LogBytesPerLong)
4825    return false;               // it is already a block transfer
4826
4827  // Look at the alignment of the starting offsets.
4828  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
4829  const intptr_t BIG_NEG = -128;
4830  assert(BIG_NEG + 2*abase < 0, "neg enough");
4831
4832  intptr_t src_off  = abase + ((intptr_t) find_int_con(src_offset, -1)  << scale);
4833  intptr_t dest_off = abase + ((intptr_t) find_int_con(dest_offset, -1) << scale);
4834  if (src_off < 0 || dest_off < 0)
4835    // At present, we can only understand constants.
4836    return false;
4837
4838  if (((src_off | dest_off) & (BytesPerLong-1)) != 0) {
4839    // Non-aligned; too bad.
4840    // One more chance:  Pick off an initial 32-bit word.
4841    // This is a common case, since abase can be odd mod 8.
4842    if (((src_off | dest_off) & (BytesPerLong-1)) == BytesPerInt &&
4843        ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) {
4844      Node* sptr = basic_plus_adr(src,  src_off);
4845      Node* dptr = basic_plus_adr(dest, dest_off);
4846      Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type);
4847      store_to_memory(control(), dptr, sval, T_INT, adr_type);
4848      src_off += BytesPerInt;
4849      dest_off += BytesPerInt;
4850    } else {
4851      return false;
4852    }
4853  }
4854  assert(src_off % BytesPerLong == 0, "");
4855  assert(dest_off % BytesPerLong == 0, "");
4856
4857  // Do this copy by giant steps.
4858  Node* sptr  = basic_plus_adr(src,  src_off);
4859  Node* dptr  = basic_plus_adr(dest, dest_off);
4860  Node* countx = dest_size;
4861  countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(dest_off)) );
4862  countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong)) );
4863
4864  bool disjoint_bases = true;   // since alloc != NULL
4865  generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
4866                               sptr, NULL, dptr, NULL, countx);
4867
4868  return true;
4869}
4870
4871
4872// Helper function; generates code for the slow case.
4873// We make a call to a runtime method which emulates the native method,
4874// but without the native wrapper overhead.
4875void
4876LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
4877                                        Node* src,  Node* src_offset,
4878                                        Node* dest, Node* dest_offset,
4879                                        Node* copy_length,
4880                                        int nargs) {
4881  _sp += nargs; // any deopt will start just before call to enclosing method
4882  Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
4883                                 OptoRuntime::slow_arraycopy_Type(),
4884                                 OptoRuntime::slow_arraycopy_Java(),
4885                                 "slow_arraycopy", adr_type,
4886                                 src, src_offset, dest, dest_offset,
4887                                 copy_length);
4888  _sp -= nargs;
4889
4890  // Handle exceptions thrown by this fellow:
4891  make_slow_call_ex(call, env()->Throwable_klass(), false);
4892}
4893
4894// Helper function; generates code for cases requiring runtime checks.
4895Node*
4896LibraryCallKit::generate_checkcast_arraycopy(const TypePtr* adr_type,
4897                                             Node* dest_elem_klass,
4898                                             Node* src,  Node* src_offset,
4899                                             Node* dest, Node* dest_offset,
4900                                             Node* copy_length,
4901                                             int nargs) {
4902  if (stopped())  return NULL;
4903
4904  address copyfunc_addr = StubRoutines::checkcast_arraycopy();
4905  if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
4906    return NULL;
4907  }
4908
4909  // Pick out the parameters required to perform a store-check
4910  // for the target array.  This is an optimistic check.  It will
4911  // look in each non-null element's class, at the desired klass's
4912  // super_check_offset, for the desired klass.
4913  int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
4914  Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
4915  Node* n3 = new(C, 3) LoadINode(NULL, immutable_memory(), p3, TypeRawPtr::BOTTOM);
4916  Node* check_offset = _gvn.transform(n3);
4917  Node* check_value  = dest_elem_klass;
4918
4919  Node* src_start  = array_element_address(src,  src_offset,  T_OBJECT);
4920  Node* dest_start = array_element_address(dest, dest_offset, T_OBJECT);
4921
4922  // (We know the arrays are never conjoint, because their types differ.)
4923  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
4924                                 OptoRuntime::checkcast_arraycopy_Type(),
4925                                 copyfunc_addr, "checkcast_arraycopy", adr_type,
4926                                 // five arguments, of which two are
4927                                 // intptr_t (jlong in LP64)
4928                                 src_start, dest_start,
4929                                 copy_length XTOP,
4930                                 check_offset XTOP,
4931                                 check_value);
4932
4933  return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
4934}
4935
4936
4937// Helper function; generates code for cases requiring runtime checks.
4938Node*
4939LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
4940                                           Node* src,  Node* src_offset,
4941                                           Node* dest, Node* dest_offset,
4942                                           Node* copy_length,
4943                                           int nargs) {
4944  if (stopped())  return NULL;
4945
4946  address copyfunc_addr = StubRoutines::generic_arraycopy();
4947  if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
4948    return NULL;
4949  }
4950
4951  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
4952                    OptoRuntime::generic_arraycopy_Type(),
4953                    copyfunc_addr, "generic_arraycopy", adr_type,
4954                    src, src_offset, dest, dest_offset, copy_length);
4955
4956  return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
4957}
4958
4959// Helper function; generates the fast out-of-line call to an arraycopy stub.
4960void
4961LibraryCallKit::generate_unchecked_arraycopy(const TypePtr* adr_type,
4962                                             BasicType basic_elem_type,
4963                                             bool disjoint_bases,
4964                                             Node* src,  Node* src_offset,
4965                                             Node* dest, Node* dest_offset,
4966                                             Node* copy_length) {
4967  if (stopped())  return;               // nothing to do
4968
4969  Node* src_start  = src;
4970  Node* dest_start = dest;
4971  if (src_offset != NULL || dest_offset != NULL) {
4972    assert(src_offset != NULL && dest_offset != NULL, "");
4973    src_start  = array_element_address(src,  src_offset,  basic_elem_type);
4974    dest_start = array_element_address(dest, dest_offset, basic_elem_type);
4975  }
4976
4977  // Figure out which arraycopy runtime method to call.
4978  const char* copyfunc_name = "arraycopy";
4979  address     copyfunc_addr =
4980      basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
4981                          disjoint_bases, copyfunc_name);
4982
4983  // Call it.  Note that the count_ix value is not scaled to a byte-size.
4984  make_runtime_call(RC_LEAF|RC_NO_FP,
4985                    OptoRuntime::fast_arraycopy_Type(),
4986                    copyfunc_addr, copyfunc_name, adr_type,
4987                    src_start, dest_start, copy_length XTOP);
4988}
4989