memnode.cpp revision 0:a61af66fc99e
1/*
2 * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25// Portions of code courtesy of Clifford Click
26
27// Optimization - Graph Style
28
29#include "incls/_precompiled.incl"
30#include "incls/_memnode.cpp.incl"
31
32//=============================================================================
33uint MemNode::size_of() const { return sizeof(*this); }
34
35const TypePtr *MemNode::adr_type() const {
36  Node* adr = in(Address);
37  const TypePtr* cross_check = NULL;
38  DEBUG_ONLY(cross_check = _adr_type);
39  return calculate_adr_type(adr->bottom_type(), cross_check);
40}
41
42#ifndef PRODUCT
43void MemNode::dump_spec(outputStream *st) const {
44  if (in(Address) == NULL)  return; // node is dead
45#ifndef ASSERT
46  // fake the missing field
47  const TypePtr* _adr_type = NULL;
48  if (in(Address) != NULL)
49    _adr_type = in(Address)->bottom_type()->isa_ptr();
50#endif
51  dump_adr_type(this, _adr_type, st);
52
53  Compile* C = Compile::current();
54  if( C->alias_type(_adr_type)->is_volatile() )
55    st->print(" Volatile!");
56}
57
58void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
59  st->print(" @");
60  if (adr_type == NULL) {
61    st->print("NULL");
62  } else {
63    adr_type->dump_on(st);
64    Compile* C = Compile::current();
65    Compile::AliasType* atp = NULL;
66    if (C->have_alias_type(adr_type))  atp = C->alias_type(adr_type);
67    if (atp == NULL)
68      st->print(", idx=?\?;");
69    else if (atp->index() == Compile::AliasIdxBot)
70      st->print(", idx=Bot;");
71    else if (atp->index() == Compile::AliasIdxTop)
72      st->print(", idx=Top;");
73    else if (atp->index() == Compile::AliasIdxRaw)
74      st->print(", idx=Raw;");
75    else {
76      ciField* field = atp->field();
77      if (field) {
78        st->print(", name=");
79        field->print_name_on(st);
80      }
81      st->print(", idx=%d;", atp->index());
82    }
83  }
84}
85
86extern void print_alias_types();
87
88#endif
89
90//--------------------------Ideal_common---------------------------------------
91// Look for degenerate control and memory inputs.  Bypass MergeMem inputs.
92// Unhook non-raw memories from complete (macro-expanded) initializations.
93Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
94  // If our control input is a dead region, kill all below the region
95  Node *ctl = in(MemNode::Control);
96  if (ctl && remove_dead_region(phase, can_reshape))
97    return this;
98
99  // Ignore if memory is dead, or self-loop
100  Node *mem = in(MemNode::Memory);
101  if( phase->type( mem ) == Type::TOP ) return NodeSentinel; // caller will return NULL
102  assert( mem != this, "dead loop in MemNode::Ideal" );
103
104  Node *address = in(MemNode::Address);
105  const Type *t_adr = phase->type( address );
106  if( t_adr == Type::TOP )              return NodeSentinel; // caller will return NULL
107
108  // Avoid independent memory operations
109  Node* old_mem = mem;
110
111  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
112    InitializeNode* init = mem->in(0)->as_Initialize();
113    if (init->is_complete()) {  // i.e., after macro expansion
114      const TypePtr* tp = t_adr->is_ptr();
115      uint alias_idx = phase->C->get_alias_index(tp);
116      // Free this slice from the init.  It was hooked, temporarily,
117      // by GraphKit::set_output_for_allocation.
118      if (alias_idx > Compile::AliasIdxRaw) {
119        mem = init->memory(alias_idx);
120        // ...but not with the raw-pointer slice.
121      }
122    }
123  }
124
125  if (mem->is_MergeMem()) {
126    MergeMemNode* mmem = mem->as_MergeMem();
127    const TypePtr *tp = t_adr->is_ptr();
128    uint alias_idx = phase->C->get_alias_index(tp);
129#ifdef ASSERT
130    {
131      // Check that current type is consistent with the alias index used during graph construction
132      assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx");
133      const TypePtr *adr_t =  adr_type();
134      bool consistent =  adr_t == NULL || adr_t->empty() || phase->C->must_alias(adr_t, alias_idx );
135      // Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
136      if( !consistent && adr_t != NULL && !adr_t->empty() &&
137             tp->isa_aryptr() &&    tp->offset() == Type::OffsetBot &&
138          adr_t->isa_aryptr() && adr_t->offset() != Type::OffsetBot &&
139          ( adr_t->offset() == arrayOopDesc::length_offset_in_bytes() ||
140            adr_t->offset() == oopDesc::klass_offset_in_bytes() ||
141            adr_t->offset() == oopDesc::mark_offset_in_bytes() ) ) {
142        // don't assert if it is dead code.
143        consistent = true;
144      }
145      if( !consistent ) {
146        tty->print("alias_idx==%d, adr_type()==", alias_idx); if( adr_t == NULL ) { tty->print("NULL"); } else { adr_t->dump(); }
147        tty->cr();
148        print_alias_types();
149        assert(consistent, "adr_type must match alias idx");
150      }
151    }
152#endif
153    // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally
154    // means an array I have not precisely typed yet.  Do not do any
155    // alias stuff with it any time soon.
156    const TypeInstPtr *tinst = tp->isa_instptr();
157    if( tp->base() != Type::AnyPtr &&
158        !(tinst &&
159          tinst->klass()->is_java_lang_Object() &&
160          tinst->offset() == Type::OffsetBot) ) {
161      // compress paths and change unreachable cycles to TOP
162      // If not, we can update the input infinitely along a MergeMem cycle
163      // Equivalent code in PhiNode::Ideal
164      Node* m  = phase->transform(mmem);
165      // If tranformed to a MergeMem, get the desired slice
166      // Otherwise the returned node represents memory for every slice
167      mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
168      // Update input if it is progress over what we have now
169    }
170  }
171
172  if (mem != old_mem) {
173    set_req(MemNode::Memory, mem);
174    return this;
175  }
176
177  // let the subclass continue analyzing...
178  return NULL;
179}
180
181// Helper function for proving some simple control dominations.
182// Attempt to prove that control input 'dom' dominates (or equals) 'sub'.
183// Already assumes that 'dom' is available at 'sub', and that 'sub'
184// is not a constant (dominated by the method's StartNode).
185// Used by MemNode::find_previous_store to prove that the
186// control input of a memory operation predates (dominates)
187// an allocation it wants to look past.
188bool MemNode::detect_dominating_control(Node* dom, Node* sub) {
189  if (dom == NULL)      return false;
190  if (dom->is_Proj())   dom = dom->in(0);
191  if (dom->is_Start())  return true; // anything inside the method
192  if (dom->is_Root())   return true; // dom 'controls' a constant
193  int cnt = 20;                      // detect cycle or too much effort
194  while (sub != NULL) {              // walk 'sub' up the chain to 'dom'
195    if (--cnt < 0)   return false;   // in a cycle or too complex
196    if (sub == dom)  return true;
197    if (sub->is_Start())  return false;
198    if (sub->is_Root())   return false;
199    Node* up = sub->in(0);
200    if (sub == up && sub->is_Region()) {
201      for (uint i = 1; i < sub->req(); i++) {
202        Node* in = sub->in(i);
203        if (in != NULL && !in->is_top() && in != sub) {
204          up = in; break;            // take any path on the way up to 'dom'
205        }
206      }
207    }
208    if (sub == up)  return false;    // some kind of tight cycle
209    sub = up;
210  }
211  return false;
212}
213
214//---------------------detect_ptr_independence---------------------------------
215// Used by MemNode::find_previous_store to prove that two base
216// pointers are never equal.
217// The pointers are accompanied by their associated allocations,
218// if any, which have been previously discovered by the caller.
219bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1,
220                                      Node* p2, AllocateNode* a2,
221                                      PhaseTransform* phase) {
222  // Attempt to prove that these two pointers cannot be aliased.
223  // They may both manifestly be allocations, and they should differ.
224  // Or, if they are not both allocations, they can be distinct constants.
225  // Otherwise, one is an allocation and the other a pre-existing value.
226  if (a1 == NULL && a2 == NULL) {           // neither an allocation
227    return (p1 != p2) && p1->is_Con() && p2->is_Con();
228  } else if (a1 != NULL && a2 != NULL) {    // both allocations
229    return (a1 != a2);
230  } else if (a1 != NULL) {                  // one allocation a1
231    // (Note:  p2->is_Con implies p2->in(0)->is_Root, which dominates.)
232    return detect_dominating_control(p2->in(0), a1->in(0));
233  } else { //(a2 != NULL)                   // one allocation a2
234    return detect_dominating_control(p1->in(0), a2->in(0));
235  }
236  return false;
237}
238
239
240// The logic for reordering loads and stores uses four steps:
241// (a) Walk carefully past stores and initializations which we
242//     can prove are independent of this load.
243// (b) Observe that the next memory state makes an exact match
244//     with self (load or store), and locate the relevant store.
245// (c) Ensure that, if we were to wire self directly to the store,
246//     the optimizer would fold it up somehow.
247// (d) Do the rewiring, and return, depending on some other part of
248//     the optimizer to fold up the load.
249// This routine handles steps (a) and (b).  Steps (c) and (d) are
250// specific to loads and stores, so they are handled by the callers.
251// (Currently, only LoadNode::Ideal has steps (c), (d).  More later.)
252//
253Node* MemNode::find_previous_store(PhaseTransform* phase) {
254  Node*         ctrl   = in(MemNode::Control);
255  Node*         adr    = in(MemNode::Address);
256  intptr_t      offset = 0;
257  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
258  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
259
260  if (offset == Type::OffsetBot)
261    return NULL;            // cannot unalias unless there are precise offsets
262
263  intptr_t size_in_bytes = memory_size();
264
265  Node* mem = in(MemNode::Memory);   // start searching here...
266
267  int cnt = 50;             // Cycle limiter
268  for (;;) {                // While we can dance past unrelated stores...
269    if (--cnt < 0)  break;  // Caught in cycle or a complicated dance?
270
271    if (mem->is_Store()) {
272      Node* st_adr = mem->in(MemNode::Address);
273      intptr_t st_offset = 0;
274      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
275      if (st_base == NULL)
276        break;              // inscrutable pointer
277      if (st_offset != offset && st_offset != Type::OffsetBot) {
278        const int MAX_STORE = BytesPerLong;
279        if (st_offset >= offset + size_in_bytes ||
280            st_offset <= offset - MAX_STORE ||
281            st_offset <= offset - mem->as_Store()->memory_size()) {
282          // Success:  The offsets are provably independent.
283          // (You may ask, why not just test st_offset != offset and be done?
284          // The answer is that stores of different sizes can co-exist
285          // in the same sequence of RawMem effects.  We sometimes initialize
286          // a whole 'tile' of array elements with a single jint or jlong.)
287          mem = mem->in(MemNode::Memory);
288          continue;           // (a) advance through independent store memory
289        }
290      }
291      if (st_base != base &&
292          detect_ptr_independence(base, alloc,
293                                  st_base,
294                                  AllocateNode::Ideal_allocation(st_base, phase),
295                                  phase)) {
296        // Success:  The bases are provably independent.
297        mem = mem->in(MemNode::Memory);
298        continue;           // (a) advance through independent store memory
299      }
300
301      // (b) At this point, if the bases or offsets do not agree, we lose,
302      // since we have not managed to prove 'this' and 'mem' independent.
303      if (st_base == base && st_offset == offset) {
304        return mem;         // let caller handle steps (c), (d)
305      }
306
307    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
308      InitializeNode* st_init = mem->in(0)->as_Initialize();
309      AllocateNode*  st_alloc = st_init->allocation();
310      if (st_alloc == NULL)
311        break;              // something degenerated
312      bool known_identical = false;
313      bool known_independent = false;
314      if (alloc == st_alloc)
315        known_identical = true;
316      else if (alloc != NULL)
317        known_independent = true;
318      else if (ctrl != NULL &&
319               detect_dominating_control(ctrl, st_alloc->in(0)))
320        known_independent = true;
321
322      if (known_independent) {
323        // The bases are provably independent: Either they are
324        // manifestly distinct allocations, or else the control
325        // of this load dominates the store's allocation.
326        int alias_idx = phase->C->get_alias_index(adr_type());
327        if (alias_idx == Compile::AliasIdxRaw) {
328          mem = st_alloc->in(TypeFunc::Memory);
329        } else {
330          mem = st_init->memory(alias_idx);
331        }
332        continue;           // (a) advance through independent store memory
333      }
334
335      // (b) at this point, if we are not looking at a store initializing
336      // the same allocation we are loading from, we lose.
337      if (known_identical) {
338        // From caller, can_see_stored_value will consult find_captured_store.
339        return mem;         // let caller handle steps (c), (d)
340      }
341
342    }
343
344    // Unless there is an explicit 'continue', we must bail out here,
345    // because 'mem' is an inscrutable memory state (e.g., a call).
346    break;
347  }
348
349  return NULL;              // bail out
350}
351
352//----------------------calculate_adr_type-------------------------------------
353// Helper function.  Notices when the given type of address hits top or bottom.
354// Also, asserts a cross-check of the type against the expected address type.
355const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) {
356  if (t == Type::TOP)  return NULL; // does not touch memory any more?
357  #ifdef PRODUCT
358  cross_check = NULL;
359  #else
360  if (!VerifyAliases || is_error_reported() || Node::in_dump())  cross_check = NULL;
361  #endif
362  const TypePtr* tp = t->isa_ptr();
363  if (tp == NULL) {
364    assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide");
365    return TypePtr::BOTTOM;           // touches lots of memory
366  } else {
367    #ifdef ASSERT
368    // %%%% [phh] We don't check the alias index if cross_check is
369    //            TypeRawPtr::BOTTOM.  Needs to be investigated.
370    if (cross_check != NULL &&
371        cross_check != TypePtr::BOTTOM &&
372        cross_check != TypeRawPtr::BOTTOM) {
373      // Recheck the alias index, to see if it has changed (due to a bug).
374      Compile* C = Compile::current();
375      assert(C->get_alias_index(cross_check) == C->get_alias_index(tp),
376             "must stay in the original alias category");
377      // The type of the address must be contained in the adr_type,
378      // disregarding "null"-ness.
379      // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.)
380      const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr();
381      assert(cross_check->meet(tp_notnull) == cross_check,
382             "real address must not escape from expected memory type");
383    }
384    #endif
385    return tp;
386  }
387}
388
389//------------------------adr_phi_is_loop_invariant----------------------------
390// A helper function for Ideal_DU_postCCP to check if a Phi in a counted
391// loop is loop invariant. Make a quick traversal of Phi and associated
392// CastPP nodes, looking to see if they are a closed group within the loop.
393bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) {
394  // The idea is that the phi-nest must boil down to only CastPP nodes
395  // with the same data. This implies that any path into the loop already
396  // includes such a CastPP, and so the original cast, whatever its input,
397  // must be covered by an equivalent cast, with an earlier control input.
398  ResourceMark rm;
399
400  // The loop entry input of the phi should be the unique dominating
401  // node for every Phi/CastPP in the loop.
402  Unique_Node_List closure;
403  closure.push(adr_phi->in(LoopNode::EntryControl));
404
405  // Add the phi node and the cast to the worklist.
406  Unique_Node_List worklist;
407  worklist.push(adr_phi);
408  if( cast != NULL ){
409    if( !cast->is_ConstraintCast() ) return false;
410    worklist.push(cast);
411  }
412
413  // Begin recursive walk of phi nodes.
414  while( worklist.size() ){
415    // Take a node off the worklist
416    Node *n = worklist.pop();
417    if( !closure.member(n) ){
418      // Add it to the closure.
419      closure.push(n);
420      // Make a sanity check to ensure we don't waste too much time here.
421      if( closure.size() > 20) return false;
422      // This node is OK if:
423      //  - it is a cast of an identical value
424      //  - or it is a phi node (then we add its inputs to the worklist)
425      // Otherwise, the node is not OK, and we presume the cast is not invariant
426      if( n->is_ConstraintCast() ){
427        worklist.push(n->in(1));
428      } else if( n->is_Phi() ) {
429        for( uint i = 1; i < n->req(); i++ ) {
430          worklist.push(n->in(i));
431        }
432      } else {
433        return false;
434      }
435    }
436  }
437
438  // Quit when the worklist is empty, and we've found no offending nodes.
439  return true;
440}
441
442//------------------------------Ideal_DU_postCCP-------------------------------
443// Find any cast-away of null-ness and keep its control.  Null cast-aways are
444// going away in this pass and we need to make this memory op depend on the
445// gating null check.
446
447// I tried to leave the CastPP's in.  This makes the graph more accurate in
448// some sense; we get to keep around the knowledge that an oop is not-null
449// after some test.  Alas, the CastPP's interfere with GVN (some values are
450// the regular oop, some are the CastPP of the oop, all merge at Phi's which
451// cannot collapse, etc).  This cost us 10% on SpecJVM, even when I removed
452// some of the more trivial cases in the optimizer.  Removing more useless
453// Phi's started allowing Loads to illegally float above null checks.  I gave
454// up on this approach.  CNC 10/20/2000
455Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
456  Node *ctr = in(MemNode::Control);
457  Node *mem = in(MemNode::Memory);
458  Node *adr = in(MemNode::Address);
459  Node *skipped_cast = NULL;
460  // Need a null check?  Regular static accesses do not because they are
461  // from constant addresses.  Array ops are gated by the range check (which
462  // always includes a NULL check).  Just check field ops.
463  if( !ctr ) {
464    // Scan upwards for the highest location we can place this memory op.
465    while( true ) {
466      switch( adr->Opcode() ) {
467
468      case Op_AddP:             // No change to NULL-ness, so peek thru AddP's
469        adr = adr->in(AddPNode::Base);
470        continue;
471
472      case Op_CastPP:
473        // If the CastPP is useless, just peek on through it.
474        if( ccp->type(adr) == ccp->type(adr->in(1)) ) {
475          // Remember the cast that we've peeked though. If we peek
476          // through more than one, then we end up remembering the highest
477          // one, that is, if in a loop, the one closest to the top.
478          skipped_cast = adr;
479          adr = adr->in(1);
480          continue;
481        }
482        // CastPP is going away in this pass!  We need this memory op to be
483        // control-dependent on the test that is guarding the CastPP.
484        ccp->hash_delete(this);
485        set_req(MemNode::Control, adr->in(0));
486        ccp->hash_insert(this);
487        return this;
488
489      case Op_Phi:
490        // Attempt to float above a Phi to some dominating point.
491        if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) {
492          // If we've already peeked through a Cast (which could have set the
493          // control), we can't float above a Phi, because the skipped Cast
494          // may not be loop invariant.
495          if (adr_phi_is_loop_invariant(adr, skipped_cast)) {
496            adr = adr->in(1);
497            continue;
498          }
499        }
500
501        // Intentional fallthrough!
502
503        // No obvious dominating point.  The mem op is pinned below the Phi
504        // by the Phi itself.  If the Phi goes away (no true value is merged)
505        // then the mem op can float, but not indefinitely.  It must be pinned
506        // behind the controls leading to the Phi.
507      case Op_CheckCastPP:
508        // These usually stick around to change address type, however a
509        // useless one can be elided and we still need to pick up a control edge
510        if (adr->in(0) == NULL) {
511          // This CheckCastPP node has NO control and is likely useless. But we
512          // need check further up the ancestor chain for a control input to keep
513          // the node in place. 4959717.
514          skipped_cast = adr;
515          adr = adr->in(1);
516          continue;
517        }
518        ccp->hash_delete(this);
519        set_req(MemNode::Control, adr->in(0));
520        ccp->hash_insert(this);
521        return this;
522
523        // List of "safe" opcodes; those that implicitly block the memory
524        // op below any null check.
525      case Op_CastX2P:          // no null checks on native pointers
526      case Op_Parm:             // 'this' pointer is not null
527      case Op_LoadP:            // Loading from within a klass
528      case Op_LoadKlass:        // Loading from within a klass
529      case Op_ConP:             // Loading from a klass
530      case Op_CreateEx:         // Sucking up the guts of an exception oop
531      case Op_Con:              // Reading from TLS
532      case Op_CMoveP:           // CMoveP is pinned
533        break;                  // No progress
534
535      case Op_Proj:             // Direct call to an allocation routine
536      case Op_SCMemProj:        // Memory state from store conditional ops
537#ifdef ASSERT
538        {
539          assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value");
540          const Node* call = adr->in(0);
541          if (call->is_CallStaticJava()) {
542            const CallStaticJavaNode* call_java = call->as_CallStaticJava();
543            assert(call_java && call_java->method() == NULL, "must be runtime call");
544            // We further presume that this is one of
545            // new_instance_Java, new_array_Java, or
546            // the like, but do not assert for this.
547          } else if (call->is_Allocate()) {
548            // similar case to new_instance_Java, etc.
549          } else if (!call->is_CallLeaf()) {
550            // Projections from fetch_oop (OSR) are allowed as well.
551            ShouldNotReachHere();
552          }
553        }
554#endif
555        break;
556      default:
557        ShouldNotReachHere();
558      }
559      break;
560    }
561  }
562
563  return  NULL;               // No progress
564}
565
566
567//=============================================================================
568uint LoadNode::size_of() const { return sizeof(*this); }
569uint LoadNode::cmp( const Node &n ) const
570{ return !Type::cmp( _type, ((LoadNode&)n)._type ); }
571const Type *LoadNode::bottom_type() const { return _type; }
572uint LoadNode::ideal_reg() const {
573  return Matcher::base2reg[_type->base()];
574}
575
576#ifndef PRODUCT
577void LoadNode::dump_spec(outputStream *st) const {
578  MemNode::dump_spec(st);
579  if( !Verbose && !WizardMode ) {
580    // standard dump does this in Verbose and WizardMode
581    st->print(" #"); _type->dump_on(st);
582  }
583}
584#endif
585
586
587//----------------------------LoadNode::make-----------------------------------
588// Polymorphic factory method:
589LoadNode *LoadNode::make( Compile *C, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) {
590  // sanity check the alias category against the created node type
591  assert(!(adr_type->isa_oopptr() &&
592           adr_type->offset() == oopDesc::klass_offset_in_bytes()),
593         "use LoadKlassNode instead");
594  assert(!(adr_type->isa_aryptr() &&
595           adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
596         "use LoadRangeNode instead");
597  switch (bt) {
598  case T_BOOLEAN:
599  case T_BYTE:    return new (C, 3) LoadBNode(ctl, mem, adr, adr_type, rt->is_int()    );
600  case T_INT:     return new (C, 3) LoadINode(ctl, mem, adr, adr_type, rt->is_int()    );
601  case T_CHAR:    return new (C, 3) LoadCNode(ctl, mem, adr, adr_type, rt->is_int()    );
602  case T_SHORT:   return new (C, 3) LoadSNode(ctl, mem, adr, adr_type, rt->is_int()    );
603  case T_LONG:    return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long()   );
604  case T_FLOAT:   return new (C, 3) LoadFNode(ctl, mem, adr, adr_type, rt              );
605  case T_DOUBLE:  return new (C, 3) LoadDNode(ctl, mem, adr, adr_type, rt              );
606  case T_ADDRESS: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr()    );
607  case T_OBJECT:  return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr());
608  }
609  ShouldNotReachHere();
610  return (LoadNode*)NULL;
611}
612
613LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) {
614  bool require_atomic = true;
615  return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic);
616}
617
618
619
620
621//------------------------------hash-------------------------------------------
622uint LoadNode::hash() const {
623  // unroll addition of interesting fields
624  return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address);
625}
626
627//---------------------------can_see_stored_value------------------------------
628// This routine exists to make sure this set of tests is done the same
629// everywhere.  We need to make a coordinated change: first LoadNode::Ideal
630// will change the graph shape in a way which makes memory alive twice at the
631// same time (uses the Oracle model of aliasing), then some
632// LoadXNode::Identity will fold things back to the equivalence-class model
633// of aliasing.
634Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
635  Node* ld_adr = in(MemNode::Address);
636
637  // Loop around twice in the case Load -> Initialize -> Store.
638  // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.)
639  for (int trip = 0; trip <= 1; trip++) {
640
641    if (st->is_Store()) {
642      Node* st_adr = st->in(MemNode::Address);
643      if (!phase->eqv(st_adr, ld_adr)) {
644        // Try harder before giving up...  Match raw and non-raw pointers.
645        intptr_t st_off = 0;
646        AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off);
647        if (alloc == NULL)       return NULL;
648        intptr_t ld_off = 0;
649        AllocateNode* allo2 = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
650        if (alloc != allo2)      return NULL;
651        if (ld_off != st_off)    return NULL;
652        // At this point we have proven something like this setup:
653        //  A = Allocate(...)
654        //  L = LoadQ(,  AddP(CastPP(, A.Parm),, #Off))
655        //  S = StoreQ(, AddP(,        A.Parm  , #Off), V)
656        // (Actually, we haven't yet proven the Q's are the same.)
657        // In other words, we are loading from a casted version of
658        // the same pointer-and-offset that we stored to.
659        // Thus, we are able to replace L by V.
660      }
661      // Now prove that we have a LoadQ matched to a StoreQ, for some Q.
662      if (store_Opcode() != st->Opcode())
663        return NULL;
664      return st->in(MemNode::ValueIn);
665    }
666
667    intptr_t offset = 0;  // scratch
668
669    // A load from a freshly-created object always returns zero.
670    // (This can happen after LoadNode::Ideal resets the load's memory input
671    // to find_captured_store, which returned InitializeNode::zero_memory.)
672    if (st->is_Proj() && st->in(0)->is_Allocate() &&
673        st->in(0) == AllocateNode::Ideal_allocation(ld_adr, phase, offset) &&
674        offset >= st->in(0)->as_Allocate()->minimum_header_size()) {
675      // return a zero value for the load's basic type
676      // (This is one of the few places where a generic PhaseTransform
677      // can create new nodes.  Think of it as lazily manifesting
678      // virtually pre-existing constants.)
679      return phase->zerocon(memory_type());
680    }
681
682    // A load from an initialization barrier can match a captured store.
683    if (st->is_Proj() && st->in(0)->is_Initialize()) {
684      InitializeNode* init = st->in(0)->as_Initialize();
685      AllocateNode* alloc = init->allocation();
686      if (alloc != NULL &&
687          alloc == AllocateNode::Ideal_allocation(ld_adr, phase, offset)) {
688        // examine a captured store value
689        st = init->find_captured_store(offset, memory_size(), phase);
690        if (st != NULL)
691          continue;             // take one more trip around
692      }
693    }
694
695    break;
696  }
697
698  return NULL;
699}
700
701//------------------------------Identity---------------------------------------
702// Loads are identity if previous store is to same address
703Node *LoadNode::Identity( PhaseTransform *phase ) {
704  // If the previous store-maker is the right kind of Store, and the store is
705  // to the same address, then we are equal to the value stored.
706  Node* mem = in(MemNode::Memory);
707  Node* value = can_see_stored_value(mem, phase);
708  if( value ) {
709    // byte, short & char stores truncate naturally.
710    // A load has to load the truncated value which requires
711    // some sort of masking operation and that requires an
712    // Ideal call instead of an Identity call.
713    if (memory_size() < BytesPerInt) {
714      // If the input to the store does not fit with the load's result type,
715      // it must be truncated via an Ideal call.
716      if (!phase->type(value)->higher_equal(phase->type(this)))
717        return this;
718    }
719    // (This works even when value is a Con, but LoadNode::Value
720    // usually runs first, producing the singleton type of the Con.)
721    return value;
722  }
723  return this;
724}
725
726//------------------------------Ideal------------------------------------------
727// If the load is from Field memory and the pointer is non-null, we can
728// zero out the control input.
729// If the offset is constant and the base is an object allocation,
730// try to hook me up to the exact initializing store.
731Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
732  Node* p = MemNode::Ideal_common(phase, can_reshape);
733  if (p)  return (p == NodeSentinel) ? NULL : p;
734
735  Node* ctrl    = in(MemNode::Control);
736  Node* address = in(MemNode::Address);
737
738  // Skip up past a SafePoint control.  Cannot do this for Stores because
739  // pointer stores & cardmarks must stay on the same side of a SafePoint.
740  if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint &&
741      phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) {
742    ctrl = ctrl->in(0);
743    set_req(MemNode::Control,ctrl);
744  }
745
746  // Check for useless control edge in some common special cases
747  if (in(MemNode::Control) != NULL) {
748    intptr_t ignore = 0;
749    Node*    base   = AddPNode::Ideal_base_and_offset(address, phase, ignore);
750    if (base != NULL
751        && phase->type(base)->higher_equal(TypePtr::NOTNULL)
752        && detect_dominating_control(base->in(0), phase->C->start())) {
753      // A method-invariant, non-null address (constant or 'this' argument).
754      set_req(MemNode::Control, NULL);
755    }
756  }
757
758  // Check for prior store with a different base or offset; make Load
759  // independent.  Skip through any number of them.  Bail out if the stores
760  // are in an endless dead cycle and report no progress.  This is a key
761  // transform for Reflection.  However, if after skipping through the Stores
762  // we can't then fold up against a prior store do NOT do the transform as
763  // this amounts to using the 'Oracle' model of aliasing.  It leaves the same
764  // array memory alive twice: once for the hoisted Load and again after the
765  // bypassed Store.  This situation only works if EVERYBODY who does
766  // anti-dependence work knows how to bypass.  I.e. we need all
767  // anti-dependence checks to ask the same Oracle.  Right now, that Oracle is
768  // the alias index stuff.  So instead, peek through Stores and IFF we can
769  // fold up, do so.
770  Node* prev_mem = find_previous_store(phase);
771  // Steps (a), (b):  Walk past independent stores to find an exact match.
772  if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) {
773    // (c) See if we can fold up on the spot, but don't fold up here.
774    // Fold-up might require truncation (for LoadB/LoadS/LoadC) or
775    // just return a prior value, which is done by Identity calls.
776    if (can_see_stored_value(prev_mem, phase)) {
777      // Make ready for step (d):
778      set_req(MemNode::Memory, prev_mem);
779      return this;
780    }
781  }
782
783  return NULL;                  // No further progress
784}
785
786// Helper to recognize certain Klass fields which are invariant across
787// some group of array types (e.g., int[] or all T[] where T < Object).
788const Type*
789LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
790                                 ciKlass* klass) const {
791  if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
792    // The field is Klass::_modifier_flags.  Return its (constant) value.
793    // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
794    assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
795    return TypeInt::make(klass->modifier_flags());
796  }
797  if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
798    // The field is Klass::_access_flags.  Return its (constant) value.
799    // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
800    assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
801    return TypeInt::make(klass->access_flags());
802  }
803  if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
804    // The field is Klass::_layout_helper.  Return its constant value if known.
805    assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
806    return TypeInt::make(klass->layout_helper());
807  }
808
809  // No match.
810  return NULL;
811}
812
813//------------------------------Value-----------------------------------------
814const Type *LoadNode::Value( PhaseTransform *phase ) const {
815  // Either input is TOP ==> the result is TOP
816  Node* mem = in(MemNode::Memory);
817  const Type *t1 = phase->type(mem);
818  if (t1 == Type::TOP)  return Type::TOP;
819  Node* adr = in(MemNode::Address);
820  const TypePtr* tp = phase->type(adr)->isa_ptr();
821  if (tp == NULL || tp->empty())  return Type::TOP;
822  int off = tp->offset();
823  assert(off != Type::OffsetTop, "case covered by TypePtr::empty");
824
825  // Try to guess loaded type from pointer type
826  if (tp->base() == Type::AryPtr) {
827    const Type *t = tp->is_aryptr()->elem();
828    // Don't do this for integer types. There is only potential profit if
829    // the element type t is lower than _type; that is, for int types, if _type is
830    // more restrictive than t.  This only happens here if one is short and the other
831    // char (both 16 bits), and in those cases we've made an intentional decision
832    // to use one kind of load over the other. See AndINode::Ideal and 4965907.
833    // Also, do not try to narrow the type for a LoadKlass, regardless of offset.
834    //
835    // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8))
836    // where the _gvn.type of the AddP is wider than 8.  This occurs when an earlier
837    // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been
838    // subsumed by p1.  If p1 is on the worklist but has not yet been re-transformed,
839    // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any.
840    // In fact, that could have been the original type of p1, and p1 could have
841    // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
842    // expression (LShiftL quux 3) independently optimized to the constant 8.
843    if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
844        && Opcode() != Op_LoadKlass) {
845      // t might actually be lower than _type, if _type is a unique
846      // concrete subclass of abstract class t.
847      // Make sure the reference is not into the header, by comparing
848      // the offset against the offset of the start of the array's data.
849      // Different array types begin at slightly different offsets (12 vs. 16).
850      // We choose T_BYTE as an example base type that is least restrictive
851      // as to alignment, which will therefore produce the smallest
852      // possible base offset.
853      const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE);
854      if ((uint)off >= (uint)min_base_off) {  // is the offset beyond the header?
855        const Type* jt = t->join(_type);
856        // In any case, do not allow the join, per se, to empty out the type.
857        if (jt->empty() && !t->empty()) {
858          // This can happen if a interface-typed array narrows to a class type.
859          jt = _type;
860        }
861        return jt;
862      }
863    }
864  } else if (tp->base() == Type::InstPtr) {
865    assert( off != Type::OffsetBot ||
866            // arrays can be cast to Objects
867            tp->is_oopptr()->klass()->is_java_lang_Object() ||
868            // unsafe field access may not have a constant offset
869            phase->C->has_unsafe_access(),
870            "Field accesses must be precise" );
871    // For oop loads, we expect the _type to be precise
872  } else if (tp->base() == Type::KlassPtr) {
873    assert( off != Type::OffsetBot ||
874            // arrays can be cast to Objects
875            tp->is_klassptr()->klass()->is_java_lang_Object() ||
876            // also allow array-loading from the primary supertype
877            // array during subtype checks
878            Opcode() == Op_LoadKlass,
879            "Field accesses must be precise" );
880    // For klass/static loads, we expect the _type to be precise
881  }
882
883  const TypeKlassPtr *tkls = tp->isa_klassptr();
884  if (tkls != NULL && !StressReflectiveCode) {
885    ciKlass* klass = tkls->klass();
886    if (klass->is_loaded() && tkls->klass_is_exact()) {
887      // We are loading a field from a Klass metaobject whose identity
888      // is known at compile time (the type is "exact" or "precise").
889      // Check for fields we know are maintained as constants by the VM.
890      if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
891        // The field is Klass::_super_check_offset.  Return its (constant) value.
892        // (Folds up type checking code.)
893        assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
894        return TypeInt::make(klass->super_check_offset());
895      }
896      // Compute index into primary_supers array
897      juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
898      // Check for overflowing; use unsigned compare to handle the negative case.
899      if( depth < ciKlass::primary_super_limit() ) {
900        // The field is an element of Klass::_primary_supers.  Return its (constant) value.
901        // (Folds up type checking code.)
902        assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
903        ciKlass *ss = klass->super_of_depth(depth);
904        return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
905      }
906      const Type* aift = load_array_final_field(tkls, klass);
907      if (aift != NULL)  return aift;
908      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
909          && klass->is_array_klass()) {
910        // The field is arrayKlass::_component_mirror.  Return its (constant) value.
911        // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
912        assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
913        return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
914      }
915      if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
916        // The field is Klass::_java_mirror.  Return its (constant) value.
917        // (Folds up the 2nd indirection in anObjConstant.getClass().)
918        assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
919        return TypeInstPtr::make(klass->java_mirror());
920      }
921    }
922
923    // We can still check if we are loading from the primary_supers array at a
924    // shallow enough depth.  Even though the klass is not exact, entries less
925    // than or equal to its super depth are correct.
926    if (klass->is_loaded() ) {
927      ciType *inner = klass->klass();
928      while( inner->is_obj_array_klass() )
929        inner = inner->as_obj_array_klass()->base_element_type();
930      if( inner->is_instance_klass() &&
931          !inner->as_instance_klass()->flags().is_interface() ) {
932        // Compute index into primary_supers array
933        juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
934        // Check for overflowing; use unsigned compare to handle the negative case.
935        if( depth < ciKlass::primary_super_limit() &&
936            depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
937          // The field is an element of Klass::_primary_supers.  Return its (constant) value.
938          // (Folds up type checking code.)
939          assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
940          ciKlass *ss = klass->super_of_depth(depth);
941          return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
942        }
943      }
944    }
945
946    // If the type is enough to determine that the thing is not an array,
947    // we can give the layout_helper a positive interval type.
948    // This will help short-circuit some reflective code.
949    if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
950        && !klass->is_array_klass() // not directly typed as an array
951        && !klass->is_interface()  // specifically not Serializable & Cloneable
952        && !klass->is_java_lang_Object()   // not the supertype of all T[]
953        ) {
954      // Note:  When interfaces are reliable, we can narrow the interface
955      // test to (klass != Serializable && klass != Cloneable).
956      assert(Opcode() == Op_LoadI, "must load an int from _layout_helper");
957      jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false);
958      // The key property of this type is that it folds up tests
959      // for array-ness, since it proves that the layout_helper is positive.
960      // Thus, a generic value like the basic object layout helper works fine.
961      return TypeInt::make(min_size, max_jint, Type::WidenMin);
962    }
963  }
964
965  // If we are loading from a freshly-allocated object, produce a zero,
966  // if the load is provably beyond the header of the object.
967  // (Also allow a variable load from a fresh array to produce zero.)
968  if (ReduceFieldZeroing) {
969    Node* value = can_see_stored_value(mem,phase);
970    if (value != NULL && value->is_Con())
971      return value->bottom_type();
972  }
973
974  return _type;
975}
976
977//------------------------------match_edge-------------------------------------
978// Do we Match on this edge index or not?  Match only the address.
979uint LoadNode::match_edge(uint idx) const {
980  return idx == MemNode::Address;
981}
982
983//--------------------------LoadBNode::Ideal--------------------------------------
984//
985//  If the previous store is to the same address as this load,
986//  and the value stored was larger than a byte, replace this load
987//  with the value stored truncated to a byte.  If no truncation is
988//  needed, the replacement is done in LoadNode::Identity().
989//
990Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) {
991  Node* mem = in(MemNode::Memory);
992  Node* value = can_see_stored_value(mem,phase);
993  if( value && !phase->type(value)->higher_equal( _type ) ) {
994    Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(24)) );
995    return new (phase->C, 3) RShiftINode(result, phase->intcon(24));
996  }
997  // Identity call will handle the case where truncation is not needed.
998  return LoadNode::Ideal(phase, can_reshape);
999}
1000
1001//--------------------------LoadCNode::Ideal--------------------------------------
1002//
1003//  If the previous store is to the same address as this load,
1004//  and the value stored was larger than a char, replace this load
1005//  with the value stored truncated to a char.  If no truncation is
1006//  needed, the replacement is done in LoadNode::Identity().
1007//
1008Node *LoadCNode::Ideal(PhaseGVN *phase, bool can_reshape) {
1009  Node* mem = in(MemNode::Memory);
1010  Node* value = can_see_stored_value(mem,phase);
1011  if( value && !phase->type(value)->higher_equal( _type ) )
1012    return new (phase->C, 3) AndINode(value,phase->intcon(0xFFFF));
1013  // Identity call will handle the case where truncation is not needed.
1014  return LoadNode::Ideal(phase, can_reshape);
1015}
1016
1017//--------------------------LoadSNode::Ideal--------------------------------------
1018//
1019//  If the previous store is to the same address as this load,
1020//  and the value stored was larger than a short, replace this load
1021//  with the value stored truncated to a short.  If no truncation is
1022//  needed, the replacement is done in LoadNode::Identity().
1023//
1024Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
1025  Node* mem = in(MemNode::Memory);
1026  Node* value = can_see_stored_value(mem,phase);
1027  if( value && !phase->type(value)->higher_equal( _type ) ) {
1028    Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(16)) );
1029    return new (phase->C, 3) RShiftINode(result, phase->intcon(16));
1030  }
1031  // Identity call will handle the case where truncation is not needed.
1032  return LoadNode::Ideal(phase, can_reshape);
1033}
1034
1035//=============================================================================
1036//------------------------------Value------------------------------------------
1037const Type *LoadKlassNode::Value( PhaseTransform *phase ) const {
1038  // Either input is TOP ==> the result is TOP
1039  const Type *t1 = phase->type( in(MemNode::Memory) );
1040  if (t1 == Type::TOP)  return Type::TOP;
1041  Node *adr = in(MemNode::Address);
1042  const Type *t2 = phase->type( adr );
1043  if (t2 == Type::TOP)  return Type::TOP;
1044  const TypePtr *tp = t2->is_ptr();
1045  if (TypePtr::above_centerline(tp->ptr()) ||
1046      tp->ptr() == TypePtr::Null)  return Type::TOP;
1047
1048  // Return a more precise klass, if possible
1049  const TypeInstPtr *tinst = tp->isa_instptr();
1050  if (tinst != NULL) {
1051    ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
1052    int offset = tinst->offset();
1053    if (ik == phase->C->env()->Class_klass()
1054        && (offset == java_lang_Class::klass_offset_in_bytes() ||
1055            offset == java_lang_Class::array_klass_offset_in_bytes())) {
1056      // We are loading a special hidden field from a Class mirror object,
1057      // the field which points to the VM's Klass metaobject.
1058      ciType* t = tinst->java_mirror_type();
1059      // java_mirror_type returns non-null for compile-time Class constants.
1060      if (t != NULL) {
1061        // constant oop => constant klass
1062        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
1063          return TypeKlassPtr::make(ciArrayKlass::make(t));
1064        }
1065        if (!t->is_klass()) {
1066          // a primitive Class (e.g., int.class) has NULL for a klass field
1067          return TypePtr::NULL_PTR;
1068        }
1069        // (Folds up the 1st indirection in aClassConstant.getModifiers().)
1070        return TypeKlassPtr::make(t->as_klass());
1071      }
1072      // non-constant mirror, so we can't tell what's going on
1073    }
1074    if( !ik->is_loaded() )
1075      return _type;             // Bail out if not loaded
1076    if (offset == oopDesc::klass_offset_in_bytes()) {
1077      if (tinst->klass_is_exact()) {
1078        return TypeKlassPtr::make(ik);
1079      }
1080      // See if we can become precise: no subklasses and no interface
1081      // (Note:  We need to support verified interfaces.)
1082      if (!ik->is_interface() && !ik->has_subklass()) {
1083        //assert(!UseExactTypes, "this code should be useless with exact types");
1084        // Add a dependence; if any subclass added we need to recompile
1085        if (!ik->is_final()) {
1086          // %%% should use stronger assert_unique_concrete_subtype instead
1087          phase->C->dependencies()->assert_leaf_type(ik);
1088        }
1089        // Return precise klass
1090        return TypeKlassPtr::make(ik);
1091      }
1092
1093      // Return root of possible klass
1094      return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/);
1095    }
1096  }
1097
1098  // Check for loading klass from an array
1099  const TypeAryPtr *tary = tp->isa_aryptr();
1100  if( tary != NULL ) {
1101    ciKlass *tary_klass = tary->klass();
1102    if (tary_klass != NULL   // can be NULL when at BOTTOM or TOP
1103        && tary->offset() == oopDesc::klass_offset_in_bytes()) {
1104      if (tary->klass_is_exact()) {
1105        return TypeKlassPtr::make(tary_klass);
1106      }
1107      ciArrayKlass *ak = tary->klass()->as_array_klass();
1108      // If the klass is an object array, we defer the question to the
1109      // array component klass.
1110      if( ak->is_obj_array_klass() ) {
1111        assert( ak->is_loaded(), "" );
1112        ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass();
1113        if( base_k->is_loaded() && base_k->is_instance_klass() ) {
1114          ciInstanceKlass* ik = base_k->as_instance_klass();
1115          // See if we can become precise: no subklasses and no interface
1116          if (!ik->is_interface() && !ik->has_subklass()) {
1117            //assert(!UseExactTypes, "this code should be useless with exact types");
1118            // Add a dependence; if any subclass added we need to recompile
1119            if (!ik->is_final()) {
1120              phase->C->dependencies()->assert_leaf_type(ik);
1121            }
1122            // Return precise array klass
1123            return TypeKlassPtr::make(ak);
1124          }
1125        }
1126        return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
1127      } else {                  // Found a type-array?
1128        //assert(!UseExactTypes, "this code should be useless with exact types");
1129        assert( ak->is_type_array_klass(), "" );
1130        return TypeKlassPtr::make(ak); // These are always precise
1131      }
1132    }
1133  }
1134
1135  // Check for loading klass from an array klass
1136  const TypeKlassPtr *tkls = tp->isa_klassptr();
1137  if (tkls != NULL && !StressReflectiveCode) {
1138    ciKlass* klass = tkls->klass();
1139    if( !klass->is_loaded() )
1140      return _type;             // Bail out if not loaded
1141    if( klass->is_obj_array_klass() &&
1142        (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
1143      ciKlass* elem = klass->as_obj_array_klass()->element_klass();
1144      // // Always returning precise element type is incorrect,
1145      // // e.g., element type could be object and array may contain strings
1146      // return TypeKlassPtr::make(TypePtr::Constant, elem, 0);
1147
1148      // The array's TypeKlassPtr was declared 'precise' or 'not precise'
1149      // according to the element type's subclassing.
1150      return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
1151    }
1152    if( klass->is_instance_klass() && tkls->klass_is_exact() &&
1153        (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
1154      ciKlass* sup = klass->as_instance_klass()->super();
1155      // The field is Klass::_super.  Return its (constant) value.
1156      // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
1157      return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR;
1158    }
1159  }
1160
1161  // Bailout case
1162  return LoadNode::Value(phase);
1163}
1164
1165//------------------------------Identity---------------------------------------
1166// To clean up reflective code, simplify k.java_mirror.as_klass to plain k.
1167// Also feed through the klass in Allocate(...klass...)._klass.
1168Node* LoadKlassNode::Identity( PhaseTransform *phase ) {
1169  Node* x = LoadNode::Identity(phase);
1170  if (x != this)  return x;
1171
1172  // Take apart the address into an oop and and offset.
1173  // Return 'this' if we cannot.
1174  Node*    adr    = in(MemNode::Address);
1175  intptr_t offset = 0;
1176  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
1177  if (base == NULL)     return this;
1178  const TypeOopPtr* toop = phase->type(adr)->isa_oopptr();
1179  if (toop == NULL)     return this;
1180
1181  // We can fetch the klass directly through an AllocateNode.
1182  // This works even if the klass is not constant (clone or newArray).
1183  if (offset == oopDesc::klass_offset_in_bytes()) {
1184    Node* allocated_klass = AllocateNode::Ideal_klass(base, phase);
1185    if (allocated_klass != NULL) {
1186      return allocated_klass;
1187    }
1188  }
1189
1190  // Simplify k.java_mirror.as_klass to plain k, where k is a klassOop.
1191  // Simplify ak.component_mirror.array_klass to plain ak, ak an arrayKlass.
1192  // See inline_native_Class_query for occurrences of these patterns.
1193  // Java Example:  x.getClass().isAssignableFrom(y)
1194  // Java Example:  Array.newInstance(x.getClass().getComponentType(), n)
1195  //
1196  // This improves reflective code, often making the Class
1197  // mirror go completely dead.  (Current exception:  Class
1198  // mirrors may appear in debug info, but we could clean them out by
1199  // introducing a new debug info operator for klassOop.java_mirror).
1200  if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass()
1201      && (offset == java_lang_Class::klass_offset_in_bytes() ||
1202          offset == java_lang_Class::array_klass_offset_in_bytes())) {
1203    // We are loading a special hidden field from a Class mirror,
1204    // the field which points to its Klass or arrayKlass metaobject.
1205    if (base->is_Load()) {
1206      Node* adr2 = base->in(MemNode::Address);
1207      const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
1208      if (tkls != NULL && !tkls->empty()
1209          && (tkls->klass()->is_instance_klass() ||
1210              tkls->klass()->is_array_klass())
1211          && adr2->is_AddP()
1212          ) {
1213        int mirror_field = Klass::java_mirror_offset_in_bytes();
1214        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
1215          mirror_field = in_bytes(arrayKlass::component_mirror_offset());
1216        }
1217        if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
1218          return adr2->in(AddPNode::Base);
1219        }
1220      }
1221    }
1222  }
1223
1224  return this;
1225}
1226
1227//------------------------------Value-----------------------------------------
1228const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
1229  // Either input is TOP ==> the result is TOP
1230  const Type *t1 = phase->type( in(MemNode::Memory) );
1231  if( t1 == Type::TOP ) return Type::TOP;
1232  Node *adr = in(MemNode::Address);
1233  const Type *t2 = phase->type( adr );
1234  if( t2 == Type::TOP ) return Type::TOP;
1235  const TypePtr *tp = t2->is_ptr();
1236  if (TypePtr::above_centerline(tp->ptr()))  return Type::TOP;
1237  const TypeAryPtr *tap = tp->isa_aryptr();
1238  if( !tap ) return _type;
1239  return tap->size();
1240}
1241
1242//------------------------------Identity---------------------------------------
1243// Feed through the length in AllocateArray(...length...)._length.
1244Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
1245  Node* x = LoadINode::Identity(phase);
1246  if (x != this)  return x;
1247
1248  // Take apart the address into an oop and and offset.
1249  // Return 'this' if we cannot.
1250  Node*    adr    = in(MemNode::Address);
1251  intptr_t offset = 0;
1252  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
1253  if (base == NULL)     return this;
1254  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
1255  if (tary == NULL)     return this;
1256
1257  // We can fetch the length directly through an AllocateArrayNode.
1258  // This works even if the length is not constant (clone or newArray).
1259  if (offset == arrayOopDesc::length_offset_in_bytes()) {
1260    Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
1261    if (allocated_length != NULL) {
1262      return allocated_length;
1263    }
1264  }
1265
1266  return this;
1267
1268}
1269//=============================================================================
1270//---------------------------StoreNode::make-----------------------------------
1271// Polymorphic factory method:
1272StoreNode* StoreNode::make( Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) {
1273  switch (bt) {
1274  case T_BOOLEAN:
1275  case T_BYTE:    return new (C, 4) StoreBNode(ctl, mem, adr, adr_type, val);
1276  case T_INT:     return new (C, 4) StoreINode(ctl, mem, adr, adr_type, val);
1277  case T_CHAR:
1278  case T_SHORT:   return new (C, 4) StoreCNode(ctl, mem, adr, adr_type, val);
1279  case T_LONG:    return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val);
1280  case T_FLOAT:   return new (C, 4) StoreFNode(ctl, mem, adr, adr_type, val);
1281  case T_DOUBLE:  return new (C, 4) StoreDNode(ctl, mem, adr, adr_type, val);
1282  case T_ADDRESS:
1283  case T_OBJECT:  return new (C, 4) StorePNode(ctl, mem, adr, adr_type, val);
1284  }
1285  ShouldNotReachHere();
1286  return (StoreNode*)NULL;
1287}
1288
1289StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) {
1290  bool require_atomic = true;
1291  return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic);
1292}
1293
1294
1295//--------------------------bottom_type----------------------------------------
1296const Type *StoreNode::bottom_type() const {
1297  return Type::MEMORY;
1298}
1299
1300//------------------------------hash-------------------------------------------
1301uint StoreNode::hash() const {
1302  // unroll addition of interesting fields
1303  //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn);
1304
1305  // Since they are not commoned, do not hash them:
1306  return NO_HASH;
1307}
1308
1309//------------------------------Ideal------------------------------------------
1310// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
1311// When a store immediately follows a relevant allocation/initialization,
1312// try to capture it into the initialization, or hoist it above.
1313Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
1314  Node* p = MemNode::Ideal_common(phase, can_reshape);
1315  if (p)  return (p == NodeSentinel) ? NULL : p;
1316
1317  Node* mem     = in(MemNode::Memory);
1318  Node* address = in(MemNode::Address);
1319
1320  // Back-to-back stores to same address?  Fold em up.
1321  // Generally unsafe if I have intervening uses...
1322  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) {
1323    // Looking at a dead closed cycle of memory?
1324    assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
1325
1326    assert(Opcode() == mem->Opcode() ||
1327           phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw,
1328           "no mismatched stores, except on raw memory");
1329
1330    if (mem->outcnt() == 1 &&           // check for intervening uses
1331        mem->as_Store()->memory_size() <= this->memory_size()) {
1332      // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
1333      // For example, 'mem' might be the final state at a conditional return.
1334      // Or, 'mem' might be used by some node which is live at the same time
1335      // 'this' is live, which might be unschedulable.  So, require exactly
1336      // ONE user, the 'this' store, until such time as we clone 'mem' for
1337      // each of 'mem's uses (thus making the exactly-1-user-rule hold true).
1338      if (can_reshape) {  // (%%% is this an anachronism?)
1339        set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
1340                  phase->is_IterGVN());
1341      } else {
1342        // It's OK to do this in the parser, since DU info is always accurate,
1343        // and the parser always refers to nodes via SafePointNode maps.
1344        set_req(MemNode::Memory, mem->in(MemNode::Memory));
1345      }
1346      return this;
1347    }
1348  }
1349
1350  // Capture an unaliased, unconditional, simple store into an initializer.
1351  // Or, if it is independent of the allocation, hoist it above the allocation.
1352  if (ReduceFieldZeroing && /*can_reshape &&*/
1353      mem->is_Proj() && mem->in(0)->is_Initialize()) {
1354    InitializeNode* init = mem->in(0)->as_Initialize();
1355    intptr_t offset = init->can_capture_store(this, phase);
1356    if (offset > 0) {
1357      Node* moved = init->capture_store(this, offset, phase);
1358      // If the InitializeNode captured me, it made a raw copy of me,
1359      // and I need to disappear.
1360      if (moved != NULL) {
1361        // %%% hack to ensure that Ideal returns a new node:
1362        mem = MergeMemNode::make(phase->C, mem);
1363        return mem;             // fold me away
1364      }
1365    }
1366  }
1367
1368  return NULL;                  // No further progress
1369}
1370
1371//------------------------------Value-----------------------------------------
1372const Type *StoreNode::Value( PhaseTransform *phase ) const {
1373  // Either input is TOP ==> the result is TOP
1374  const Type *t1 = phase->type( in(MemNode::Memory) );
1375  if( t1 == Type::TOP ) return Type::TOP;
1376  const Type *t2 = phase->type( in(MemNode::Address) );
1377  if( t2 == Type::TOP ) return Type::TOP;
1378  const Type *t3 = phase->type( in(MemNode::ValueIn) );
1379  if( t3 == Type::TOP ) return Type::TOP;
1380  return Type::MEMORY;
1381}
1382
1383//------------------------------Identity---------------------------------------
1384// Remove redundant stores:
1385//   Store(m, p, Load(m, p)) changes to m.
1386//   Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x).
1387Node *StoreNode::Identity( PhaseTransform *phase ) {
1388  Node* mem = in(MemNode::Memory);
1389  Node* adr = in(MemNode::Address);
1390  Node* val = in(MemNode::ValueIn);
1391
1392  // Load then Store?  Then the Store is useless
1393  if (val->is_Load() &&
1394      phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
1395      phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
1396      val->as_Load()->store_Opcode() == Opcode()) {
1397    return mem;
1398  }
1399
1400  // Two stores in a row of the same value?
1401  if (mem->is_Store() &&
1402      phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
1403      phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
1404      mem->Opcode() == Opcode()) {
1405    return mem;
1406  }
1407
1408  // Store of zero anywhere into a freshly-allocated object?
1409  // Then the store is useless.
1410  // (It must already have been captured by the InitializeNode.)
1411  if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
1412    // a newly allocated object is already all-zeroes everywhere
1413    if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
1414      return mem;
1415    }
1416
1417    // the store may also apply to zero-bits in an earlier object
1418    Node* prev_mem = find_previous_store(phase);
1419    // Steps (a), (b):  Walk past independent stores to find an exact match.
1420    if (prev_mem != NULL) {
1421      Node* prev_val = can_see_stored_value(prev_mem, phase);
1422      if (prev_val != NULL && phase->eqv(prev_val, val)) {
1423        // prev_val and val might differ by a cast; it would be good
1424        // to keep the more informative of the two.
1425        return mem;
1426      }
1427    }
1428  }
1429
1430  return this;
1431}
1432
1433//------------------------------match_edge-------------------------------------
1434// Do we Match on this edge index or not?  Match only memory & value
1435uint StoreNode::match_edge(uint idx) const {
1436  return idx == MemNode::Address || idx == MemNode::ValueIn;
1437}
1438
1439//------------------------------cmp--------------------------------------------
1440// Do not common stores up together.  They generally have to be split
1441// back up anyways, so do not bother.
1442uint StoreNode::cmp( const Node &n ) const {
1443  return (&n == this);          // Always fail except on self
1444}
1445
1446//------------------------------Ideal_masked_input-----------------------------
1447// Check for a useless mask before a partial-word store
1448// (StoreB ... (AndI valIn conIa) )
1449// If (conIa & mask == mask) this simplifies to
1450// (StoreB ... (valIn) )
1451Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
1452  Node *val = in(MemNode::ValueIn);
1453  if( val->Opcode() == Op_AndI ) {
1454    const TypeInt *t = phase->type( val->in(2) )->isa_int();
1455    if( t && t->is_con() && (t->get_con() & mask) == mask ) {
1456      set_req(MemNode::ValueIn, val->in(1));
1457      return this;
1458    }
1459  }
1460  return NULL;
1461}
1462
1463
1464//------------------------------Ideal_sign_extended_input----------------------
1465// Check for useless sign-extension before a partial-word store
1466// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
1467// If (conIL == conIR && conIR <= num_bits)  this simplifies to
1468// (StoreB ... (valIn) )
1469Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
1470  Node *val = in(MemNode::ValueIn);
1471  if( val->Opcode() == Op_RShiftI ) {
1472    const TypeInt *t = phase->type( val->in(2) )->isa_int();
1473    if( t && t->is_con() && (t->get_con() <= num_bits) ) {
1474      Node *shl = val->in(1);
1475      if( shl->Opcode() == Op_LShiftI ) {
1476        const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
1477        if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
1478          set_req(MemNode::ValueIn, shl->in(1));
1479          return this;
1480        }
1481      }
1482    }
1483  }
1484  return NULL;
1485}
1486
1487//------------------------------value_never_loaded-----------------------------------
1488// Determine whether there are any possible loads of the value stored.
1489// For simplicity, we actually check if there are any loads from the
1490// address stored to, not just for loads of the value stored by this node.
1491//
1492bool StoreNode::value_never_loaded( PhaseTransform *phase) const {
1493  Node *adr = in(Address);
1494  const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr();
1495  if (adr_oop == NULL)
1496    return false;
1497  if (!adr_oop->is_instance())
1498    return false; // if not a distinct instance, there may be aliases of the address
1499  for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) {
1500    Node *use = adr->fast_out(i);
1501    int opc = use->Opcode();
1502    if (use->is_Load() || use->is_LoadStore()) {
1503      return false;
1504    }
1505  }
1506  return true;
1507}
1508
1509//=============================================================================
1510//------------------------------Ideal------------------------------------------
1511// If the store is from an AND mask that leaves the low bits untouched, then
1512// we can skip the AND operation.  If the store is from a sign-extension
1513// (a left shift, then right shift) we can skip both.
1514Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){
1515  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF);
1516  if( progress != NULL ) return progress;
1517
1518  progress = StoreNode::Ideal_sign_extended_input(phase, 24);
1519  if( progress != NULL ) return progress;
1520
1521  // Finally check the default case
1522  return StoreNode::Ideal(phase, can_reshape);
1523}
1524
1525//=============================================================================
1526//------------------------------Ideal------------------------------------------
1527// If the store is from an AND mask that leaves the low bits untouched, then
1528// we can skip the AND operation
1529Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){
1530  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF);
1531  if( progress != NULL ) return progress;
1532
1533  progress = StoreNode::Ideal_sign_extended_input(phase, 16);
1534  if( progress != NULL ) return progress;
1535
1536  // Finally check the default case
1537  return StoreNode::Ideal(phase, can_reshape);
1538}
1539
1540//=============================================================================
1541//------------------------------Identity---------------------------------------
1542Node *StoreCMNode::Identity( PhaseTransform *phase ) {
1543  // No need to card mark when storing a null ptr
1544  Node* my_store = in(MemNode::OopStore);
1545  if (my_store->is_Store()) {
1546    const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) );
1547    if( t1 == TypePtr::NULL_PTR ) {
1548      return in(MemNode::Memory);
1549    }
1550  }
1551  return this;
1552}
1553
1554//------------------------------Value-----------------------------------------
1555const Type *StoreCMNode::Value( PhaseTransform *phase ) const {
1556  // If extra input is TOP ==> the result is TOP
1557  const Type *t1 = phase->type( in(MemNode::OopStore) );
1558  if( t1 == Type::TOP ) return Type::TOP;
1559
1560  return StoreNode::Value( phase );
1561}
1562
1563
1564//=============================================================================
1565//----------------------------------SCMemProjNode------------------------------
1566const Type * SCMemProjNode::Value( PhaseTransform *phase ) const
1567{
1568  return bottom_type();
1569}
1570
1571//=============================================================================
1572LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) {
1573  init_req(MemNode::Control, c  );
1574  init_req(MemNode::Memory , mem);
1575  init_req(MemNode::Address, adr);
1576  init_req(MemNode::ValueIn, val);
1577  init_req(         ExpectedIn, ex );
1578  init_class_id(Class_LoadStore);
1579
1580}
1581
1582//=============================================================================
1583//-------------------------------adr_type--------------------------------------
1584// Do we Match on this edge index or not?  Do not match memory
1585const TypePtr* ClearArrayNode::adr_type() const {
1586  Node *adr = in(3);
1587  return MemNode::calculate_adr_type(adr->bottom_type());
1588}
1589
1590//------------------------------match_edge-------------------------------------
1591// Do we Match on this edge index or not?  Do not match memory
1592uint ClearArrayNode::match_edge(uint idx) const {
1593  return idx > 1;
1594}
1595
1596//------------------------------Identity---------------------------------------
1597// Clearing a zero length array does nothing
1598Node *ClearArrayNode::Identity( PhaseTransform *phase ) {
1599  return phase->type(in(2))->higher_equal(TypeInt::ZERO)  ? in(1) : this;
1600}
1601
1602//------------------------------Idealize---------------------------------------
1603// Clearing a short array is faster with stores
1604Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
1605  const int unit = BytesPerLong;
1606  const TypeX* t = phase->type(in(2))->isa_intptr_t();
1607  if (!t)  return NULL;
1608  if (!t->is_con())  return NULL;
1609  intptr_t raw_count = t->get_con();
1610  intptr_t size = raw_count;
1611  if (!Matcher::init_array_count_is_in_bytes) size *= unit;
1612  // Clearing nothing uses the Identity call.
1613  // Negative clears are possible on dead ClearArrays
1614  // (see jck test stmt114.stmt11402.val).
1615  if (size <= 0 || size % unit != 0)  return NULL;
1616  intptr_t count = size / unit;
1617  // Length too long; use fast hardware clear
1618  if (size > Matcher::init_array_short_size)  return NULL;
1619  Node *mem = in(1);
1620  if( phase->type(mem)==Type::TOP ) return NULL;
1621  Node *adr = in(3);
1622  const Type* at = phase->type(adr);
1623  if( at==Type::TOP ) return NULL;
1624  const TypePtr* atp = at->isa_ptr();
1625  // adjust atp to be the correct array element address type
1626  if (atp == NULL)  atp = TypePtr::BOTTOM;
1627  else              atp = atp->add_offset(Type::OffsetBot);
1628  // Get base for derived pointer purposes
1629  if( adr->Opcode() != Op_AddP ) Unimplemented();
1630  Node *base = adr->in(1);
1631
1632  Node *zero = phase->makecon(TypeLong::ZERO);
1633  Node *off  = phase->MakeConX(BytesPerLong);
1634  mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
1635  count--;
1636  while( count-- ) {
1637    mem = phase->transform(mem);
1638    adr = phase->transform(new (phase->C, 4) AddPNode(base,adr,off));
1639    mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
1640  }
1641  return mem;
1642}
1643
1644//----------------------------clear_memory-------------------------------------
1645// Generate code to initialize object storage to zero.
1646Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
1647                                   intptr_t start_offset,
1648                                   Node* end_offset,
1649                                   PhaseGVN* phase) {
1650  Compile* C = phase->C;
1651  intptr_t offset = start_offset;
1652
1653  int unit = BytesPerLong;
1654  if ((offset % unit) != 0) {
1655    Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(offset));
1656    adr = phase->transform(adr);
1657    const TypePtr* atp = TypeRawPtr::BOTTOM;
1658    mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
1659    mem = phase->transform(mem);
1660    offset += BytesPerInt;
1661  }
1662  assert((offset % unit) == 0, "");
1663
1664  // Initialize the remaining stuff, if any, with a ClearArray.
1665  return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase);
1666}
1667
1668Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
1669                                   Node* start_offset,
1670                                   Node* end_offset,
1671                                   PhaseGVN* phase) {
1672  Compile* C = phase->C;
1673  int unit = BytesPerLong;
1674  Node* zbase = start_offset;
1675  Node* zend  = end_offset;
1676
1677  // Scale to the unit required by the CPU:
1678  if (!Matcher::init_array_count_is_in_bytes) {
1679    Node* shift = phase->intcon(exact_log2(unit));
1680    zbase = phase->transform( new(C,3) URShiftXNode(zbase, shift) );
1681    zend  = phase->transform( new(C,3) URShiftXNode(zend,  shift) );
1682  }
1683
1684  Node* zsize = phase->transform( new(C,3) SubXNode(zend, zbase) );
1685  Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT);
1686
1687  // Bulk clear double-words
1688  Node* adr = phase->transform( new(C,4) AddPNode(dest, dest, start_offset) );
1689  mem = new (C, 4) ClearArrayNode(ctl, mem, zsize, adr);
1690  return phase->transform(mem);
1691}
1692
1693Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
1694                                   intptr_t start_offset,
1695                                   intptr_t end_offset,
1696                                   PhaseGVN* phase) {
1697  Compile* C = phase->C;
1698  assert((end_offset % BytesPerInt) == 0, "odd end offset");
1699  intptr_t done_offset = end_offset;
1700  if ((done_offset % BytesPerLong) != 0) {
1701    done_offset -= BytesPerInt;
1702  }
1703  if (done_offset > start_offset) {
1704    mem = clear_memory(ctl, mem, dest,
1705                       start_offset, phase->MakeConX(done_offset), phase);
1706  }
1707  if (done_offset < end_offset) { // emit the final 32-bit store
1708    Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(done_offset));
1709    adr = phase->transform(adr);
1710    const TypePtr* atp = TypeRawPtr::BOTTOM;
1711    mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
1712    mem = phase->transform(mem);
1713    done_offset += BytesPerInt;
1714  }
1715  assert(done_offset == end_offset, "");
1716  return mem;
1717}
1718
1719//=============================================================================
1720// Do we match on this edge? No memory edges
1721uint StrCompNode::match_edge(uint idx) const {
1722  return idx == 5 || idx == 6;
1723}
1724
1725//------------------------------Ideal------------------------------------------
1726// Return a node which is more "ideal" than the current node.  Strip out
1727// control copies
1728Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){
1729  return remove_dead_region(phase, can_reshape) ? this : NULL;
1730}
1731
1732
1733//=============================================================================
1734MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
1735  : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
1736    _adr_type(C->get_adr_type(alias_idx))
1737{
1738  init_class_id(Class_MemBar);
1739  Node* top = C->top();
1740  init_req(TypeFunc::I_O,top);
1741  init_req(TypeFunc::FramePtr,top);
1742  init_req(TypeFunc::ReturnAdr,top);
1743  if (precedent != NULL)
1744    init_req(TypeFunc::Parms, precedent);
1745}
1746
1747//------------------------------cmp--------------------------------------------
1748uint MemBarNode::hash() const { return NO_HASH; }
1749uint MemBarNode::cmp( const Node &n ) const {
1750  return (&n == this);          // Always fail except on self
1751}
1752
1753//------------------------------make-------------------------------------------
1754MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
1755  int len = Precedent + (pn == NULL? 0: 1);
1756  switch (opcode) {
1757  case Op_MemBarAcquire:   return new(C, len) MemBarAcquireNode(C,  atp, pn);
1758  case Op_MemBarRelease:   return new(C, len) MemBarReleaseNode(C,  atp, pn);
1759  case Op_MemBarVolatile:  return new(C, len) MemBarVolatileNode(C, atp, pn);
1760  case Op_MemBarCPUOrder:  return new(C, len) MemBarCPUOrderNode(C, atp, pn);
1761  case Op_Initialize:      return new(C, len) InitializeNode(C,     atp, pn);
1762  default:                 ShouldNotReachHere(); return NULL;
1763  }
1764}
1765
1766//------------------------------Ideal------------------------------------------
1767// Return a node which is more "ideal" than the current node.  Strip out
1768// control copies
1769Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
1770  if (remove_dead_region(phase, can_reshape))  return this;
1771  return NULL;
1772}
1773
1774//------------------------------Value------------------------------------------
1775const Type *MemBarNode::Value( PhaseTransform *phase ) const {
1776  if( !in(0) ) return Type::TOP;
1777  if( phase->type(in(0)) == Type::TOP )
1778    return Type::TOP;
1779  return TypeTuple::MEMBAR;
1780}
1781
1782//------------------------------match------------------------------------------
1783// Construct projections for memory.
1784Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
1785  switch (proj->_con) {
1786  case TypeFunc::Control:
1787  case TypeFunc::Memory:
1788    return new (m->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
1789  }
1790  ShouldNotReachHere();
1791  return NULL;
1792}
1793
1794//===========================InitializeNode====================================
1795// SUMMARY:
1796// This node acts as a memory barrier on raw memory, after some raw stores.
1797// The 'cooked' oop value feeds from the Initialize, not the Allocation.
1798// The Initialize can 'capture' suitably constrained stores as raw inits.
1799// It can coalesce related raw stores into larger units (called 'tiles').
1800// It can avoid zeroing new storage for memory units which have raw inits.
1801// At macro-expansion, it is marked 'complete', and does not optimize further.
1802//
1803// EXAMPLE:
1804// The object 'new short[2]' occupies 16 bytes in a 32-bit machine.
1805//   ctl = incoming control; mem* = incoming memory
1806// (Note:  A star * on a memory edge denotes I/O and other standard edges.)
1807// First allocate uninitialized memory and fill in the header:
1808//   alloc = (Allocate ctl mem* 16 #short[].klass ...)
1809//   ctl := alloc.Control; mem* := alloc.Memory*
1810//   rawmem = alloc.Memory; rawoop = alloc.RawAddress
1811// Then initialize to zero the non-header parts of the raw memory block:
1812//   init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress)
1813//   ctl := init.Control; mem.SLICE(#short[*]) := init.Memory
1814// After the initialize node executes, the object is ready for service:
1815//   oop := (CheckCastPP init.Control alloc.RawAddress #short[])
1816// Suppose its body is immediately initialized as {1,2}:
1817//   store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
1818//   store2 = (StoreC init.Control store1      (+ oop 14) 2)
1819//   mem.SLICE(#short[*]) := store2
1820//
1821// DETAILS:
1822// An InitializeNode collects and isolates object initialization after
1823// an AllocateNode and before the next possible safepoint.  As a
1824// memory barrier (MemBarNode), it keeps critical stores from drifting
1825// down past any safepoint or any publication of the allocation.
1826// Before this barrier, a newly-allocated object may have uninitialized bits.
1827// After this barrier, it may be treated as a real oop, and GC is allowed.
1828//
1829// The semantics of the InitializeNode include an implicit zeroing of
1830// the new object from object header to the end of the object.
1831// (The object header and end are determined by the AllocateNode.)
1832//
1833// Certain stores may be added as direct inputs to the InitializeNode.
1834// These stores must update raw memory, and they must be to addresses
1835// derived from the raw address produced by AllocateNode, and with
1836// a constant offset.  They must be ordered by increasing offset.
1837// The first one is at in(RawStores), the last at in(req()-1).
1838// Unlike most memory operations, they are not linked in a chain,
1839// but are displayed in parallel as users of the rawmem output of
1840// the allocation.
1841//
1842// (See comments in InitializeNode::capture_store, which continue
1843// the example given above.)
1844//
1845// When the associated Allocate is macro-expanded, the InitializeNode
1846// may be rewritten to optimize collected stores.  A ClearArrayNode
1847// may also be created at that point to represent any required zeroing.
1848// The InitializeNode is then marked 'complete', prohibiting further
1849// capturing of nearby memory operations.
1850//
1851// During macro-expansion, all captured initializations which store
1852// constant values of 32 bits or smaller are coalesced (if advantagous)
1853// into larger 'tiles' 32 or 64 bits.  This allows an object to be
1854// initialized in fewer memory operations.  Memory words which are
1855// covered by neither tiles nor non-constant stores are pre-zeroed
1856// by explicit stores of zero.  (The code shape happens to do all
1857// zeroing first, then all other stores, with both sequences occurring
1858// in order of ascending offsets.)
1859//
1860// Alternatively, code may be inserted between an AllocateNode and its
1861// InitializeNode, to perform arbitrary initialization of the new object.
1862// E.g., the object copying intrinsics insert complex data transfers here.
1863// The initialization must then be marked as 'complete' disable the
1864// built-in zeroing semantics and the collection of initializing stores.
1865//
1866// While an InitializeNode is incomplete, reads from the memory state
1867// produced by it are optimizable if they match the control edge and
1868// new oop address associated with the allocation/initialization.
1869// They return a stored value (if the offset matches) or else zero.
1870// A write to the memory state, if it matches control and address,
1871// and if it is to a constant offset, may be 'captured' by the
1872// InitializeNode.  It is cloned as a raw memory operation and rewired
1873// inside the initialization, to the raw oop produced by the allocation.
1874// Operations on addresses which are provably distinct (e.g., to
1875// other AllocateNodes) are allowed to bypass the initialization.
1876//
1877// The effect of all this is to consolidate object initialization
1878// (both arrays and non-arrays, both piecewise and bulk) into a
1879// single location, where it can be optimized as a unit.
1880//
1881// Only stores with an offset less than TrackedInitializationLimit words
1882// will be considered for capture by an InitializeNode.  This puts a
1883// reasonable limit on the complexity of optimized initializations.
1884
1885//---------------------------InitializeNode------------------------------------
1886InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
1887  : _is_complete(false),
1888    MemBarNode(C, adr_type, rawoop)
1889{
1890  init_class_id(Class_Initialize);
1891
1892  assert(adr_type == Compile::AliasIdxRaw, "only valid atp");
1893  assert(in(RawAddress) == rawoop, "proper init");
1894  // Note:  allocation() can be NULL, for secondary initialization barriers
1895}
1896
1897// Since this node is not matched, it will be processed by the
1898// register allocator.  Declare that there are no constraints
1899// on the allocation of the RawAddress edge.
1900const RegMask &InitializeNode::in_RegMask(uint idx) const {
1901  // This edge should be set to top, by the set_complete.  But be conservative.
1902  if (idx == InitializeNode::RawAddress)
1903    return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]);
1904  return RegMask::Empty;
1905}
1906
1907Node* InitializeNode::memory(uint alias_idx) {
1908  Node* mem = in(Memory);
1909  if (mem->is_MergeMem()) {
1910    return mem->as_MergeMem()->memory_at(alias_idx);
1911  } else {
1912    // incoming raw memory is not split
1913    return mem;
1914  }
1915}
1916
1917bool InitializeNode::is_non_zero() {
1918  if (is_complete())  return false;
1919  remove_extra_zeroes();
1920  return (req() > RawStores);
1921}
1922
1923void InitializeNode::set_complete(PhaseGVN* phase) {
1924  assert(!is_complete(), "caller responsibility");
1925  _is_complete = true;
1926
1927  // After this node is complete, it contains a bunch of
1928  // raw-memory initializations.  There is no need for
1929  // it to have anything to do with non-raw memory effects.
1930  // Therefore, tell all non-raw users to re-optimize themselves,
1931  // after skipping the memory effects of this initialization.
1932  PhaseIterGVN* igvn = phase->is_IterGVN();
1933  if (igvn)  igvn->add_users_to_worklist(this);
1934}
1935
1936// convenience function
1937// return false if the init contains any stores already
1938bool AllocateNode::maybe_set_complete(PhaseGVN* phase) {
1939  InitializeNode* init = initialization();
1940  if (init == NULL || init->is_complete())  return false;
1941  init->remove_extra_zeroes();
1942  // for now, if this allocation has already collected any inits, bail:
1943  if (init->is_non_zero())  return false;
1944  init->set_complete(phase);
1945  return true;
1946}
1947
1948void InitializeNode::remove_extra_zeroes() {
1949  if (req() == RawStores)  return;
1950  Node* zmem = zero_memory();
1951  uint fill = RawStores;
1952  for (uint i = fill; i < req(); i++) {
1953    Node* n = in(i);
1954    if (n->is_top() || n == zmem)  continue;  // skip
1955    if (fill < i)  set_req(fill, n);          // compact
1956    ++fill;
1957  }
1958  // delete any empty spaces created:
1959  while (fill < req()) {
1960    del_req(fill);
1961  }
1962}
1963
1964// Helper for remembering which stores go with which offsets.
1965intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) {
1966  if (!st->is_Store())  return -1;  // can happen to dead code via subsume_node
1967  intptr_t offset = -1;
1968  Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address),
1969                                               phase, offset);
1970  if (base == NULL)     return -1;  // something is dead,
1971  if (offset < 0)       return -1;  //        dead, dead
1972  return offset;
1973}
1974
1975// Helper for proving that an initialization expression is
1976// "simple enough" to be folded into an object initialization.
1977// Attempts to prove that a store's initial value 'n' can be captured
1978// within the initialization without creating a vicious cycle, such as:
1979//     { Foo p = new Foo(); p.next = p; }
1980// True for constants and parameters and small combinations thereof.
1981bool InitializeNode::detect_init_independence(Node* n,
1982                                              bool st_is_pinned,
1983                                              int& count) {
1984  if (n == NULL)      return true;   // (can this really happen?)
1985  if (n->is_Proj())   n = n->in(0);
1986  if (n == this)      return false;  // found a cycle
1987  if (n->is_Con())    return true;
1988  if (n->is_Start())  return true;   // params, etc., are OK
1989  if (n->is_Root())   return true;   // even better
1990
1991  Node* ctl = n->in(0);
1992  if (ctl != NULL && !ctl->is_top()) {
1993    if (ctl->is_Proj())  ctl = ctl->in(0);
1994    if (ctl == this)  return false;
1995
1996    // If we already know that the enclosing memory op is pinned right after
1997    // the init, then any control flow that the store has picked up
1998    // must have preceded the init, or else be equal to the init.
1999    // Even after loop optimizations (which might change control edges)
2000    // a store is never pinned *before* the availability of its inputs.
2001    if (!MemNode::detect_dominating_control(ctl, this->in(0)))
2002      return false;                  // failed to prove a good control
2003
2004  }
2005
2006  // Check data edges for possible dependencies on 'this'.
2007  if ((count += 1) > 20)  return false;  // complexity limit
2008  for (uint i = 1; i < n->req(); i++) {
2009    Node* m = n->in(i);
2010    if (m == NULL || m == n || m->is_top())  continue;
2011    uint first_i = n->find_edge(m);
2012    if (i != first_i)  continue;  // process duplicate edge just once
2013    if (!detect_init_independence(m, st_is_pinned, count)) {
2014      return false;
2015    }
2016  }
2017
2018  return true;
2019}
2020
2021// Here are all the checks a Store must pass before it can be moved into
2022// an initialization.  Returns zero if a check fails.
2023// On success, returns the (constant) offset to which the store applies,
2024// within the initialized memory.
2025intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase) {
2026  const int FAIL = 0;
2027  if (st->req() != MemNode::ValueIn + 1)
2028    return FAIL;                // an inscrutable StoreNode (card mark?)
2029  Node* ctl = st->in(MemNode::Control);
2030  if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this))
2031    return FAIL;                // must be unconditional after the initialization
2032  Node* mem = st->in(MemNode::Memory);
2033  if (!(mem->is_Proj() && mem->in(0) == this))
2034    return FAIL;                // must not be preceded by other stores
2035  Node* adr = st->in(MemNode::Address);
2036  intptr_t offset;
2037  AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset);
2038  if (alloc == NULL)
2039    return FAIL;                // inscrutable address
2040  if (alloc != allocation())
2041    return FAIL;                // wrong allocation!  (store needs to float up)
2042  Node* val = st->in(MemNode::ValueIn);
2043  int complexity_count = 0;
2044  if (!detect_init_independence(val, true, complexity_count))
2045    return FAIL;                // stored value must be 'simple enough'
2046
2047  return offset;                // success
2048}
2049
2050// Find the captured store in(i) which corresponds to the range
2051// [start..start+size) in the initialized object.
2052// If there is one, return its index i.  If there isn't, return the
2053// negative of the index where it should be inserted.
2054// Return 0 if the queried range overlaps an initialization boundary
2055// or if dead code is encountered.
2056// If size_in_bytes is zero, do not bother with overlap checks.
2057int InitializeNode::captured_store_insertion_point(intptr_t start,
2058                                                   int size_in_bytes,
2059                                                   PhaseTransform* phase) {
2060  const int FAIL = 0, MAX_STORE = BytesPerLong;
2061
2062  if (is_complete())
2063    return FAIL;                // arraycopy got here first; punt
2064
2065  assert(allocation() != NULL, "must be present");
2066
2067  // no negatives, no header fields:
2068  if (start < (intptr_t) sizeof(oopDesc))  return FAIL;
2069  if (start < (intptr_t) sizeof(arrayOopDesc) &&
2070      start < (intptr_t) allocation()->minimum_header_size())  return FAIL;
2071
2072  // after a certain size, we bail out on tracking all the stores:
2073  intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
2074  if (start >= ti_limit)  return FAIL;
2075
2076  for (uint i = InitializeNode::RawStores, limit = req(); ; ) {
2077    if (i >= limit)  return -(int)i; // not found; here is where to put it
2078
2079    Node*    st     = in(i);
2080    intptr_t st_off = get_store_offset(st, phase);
2081    if (st_off < 0) {
2082      if (st != zero_memory()) {
2083        return FAIL;            // bail out if there is dead garbage
2084      }
2085    } else if (st_off > start) {
2086      // ...we are done, since stores are ordered
2087      if (st_off < start + size_in_bytes) {
2088        return FAIL;            // the next store overlaps
2089      }
2090      return -(int)i;           // not found; here is where to put it
2091    } else if (st_off < start) {
2092      if (size_in_bytes != 0 &&
2093          start < st_off + MAX_STORE &&
2094          start < st_off + st->as_Store()->memory_size()) {
2095        return FAIL;            // the previous store overlaps
2096      }
2097    } else {
2098      if (size_in_bytes != 0 &&
2099          st->as_Store()->memory_size() != size_in_bytes) {
2100        return FAIL;            // mismatched store size
2101      }
2102      return i;
2103    }
2104
2105    ++i;
2106  }
2107}
2108
2109// Look for a captured store which initializes at the offset 'start'
2110// with the given size.  If there is no such store, and no other
2111// initialization interferes, then return zero_memory (the memory
2112// projection of the AllocateNode).
2113Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes,
2114                                          PhaseTransform* phase) {
2115  assert(stores_are_sane(phase), "");
2116  int i = captured_store_insertion_point(start, size_in_bytes, phase);
2117  if (i == 0) {
2118    return NULL;                // something is dead
2119  } else if (i < 0) {
2120    return zero_memory();       // just primordial zero bits here
2121  } else {
2122    Node* st = in(i);           // here is the store at this position
2123    assert(get_store_offset(st->as_Store(), phase) == start, "sanity");
2124    return st;
2125  }
2126}
2127
2128// Create, as a raw pointer, an address within my new object at 'offset'.
2129Node* InitializeNode::make_raw_address(intptr_t offset,
2130                                       PhaseTransform* phase) {
2131  Node* addr = in(RawAddress);
2132  if (offset != 0) {
2133    Compile* C = phase->C;
2134    addr = phase->transform( new (C, 4) AddPNode(C->top(), addr,
2135                                                 phase->MakeConX(offset)) );
2136  }
2137  return addr;
2138}
2139
2140// Clone the given store, converting it into a raw store
2141// initializing a field or element of my new object.
2142// Caller is responsible for retiring the original store,
2143// with subsume_node or the like.
2144//
2145// From the example above InitializeNode::InitializeNode,
2146// here are the old stores to be captured:
2147//   store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
2148//   store2 = (StoreC init.Control store1      (+ oop 14) 2)
2149//
2150// Here is the changed code; note the extra edges on init:
2151//   alloc = (Allocate ...)
2152//   rawoop = alloc.RawAddress
2153//   rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1)
2154//   rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2)
2155//   init = (Initialize alloc.Control alloc.Memory rawoop
2156//                      rawstore1 rawstore2)
2157//
2158Node* InitializeNode::capture_store(StoreNode* st, intptr_t start,
2159                                    PhaseTransform* phase) {
2160  assert(stores_are_sane(phase), "");
2161
2162  if (start < 0)  return NULL;
2163  assert(can_capture_store(st, phase) == start, "sanity");
2164
2165  Compile* C = phase->C;
2166  int size_in_bytes = st->memory_size();
2167  int i = captured_store_insertion_point(start, size_in_bytes, phase);
2168  if (i == 0)  return NULL;     // bail out
2169  Node* prev_mem = NULL;        // raw memory for the captured store
2170  if (i > 0) {
2171    prev_mem = in(i);           // there is a pre-existing store under this one
2172    set_req(i, C->top());       // temporarily disconnect it
2173    // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect.
2174  } else {
2175    i = -i;                     // no pre-existing store
2176    prev_mem = zero_memory();   // a slice of the newly allocated object
2177    if (i > InitializeNode::RawStores && in(i-1) == prev_mem)
2178      set_req(--i, C->top());   // reuse this edge; it has been folded away
2179    else
2180      ins_req(i, C->top());     // build a new edge
2181  }
2182  Node* new_st = st->clone();
2183  new_st->set_req(MemNode::Control, in(Control));
2184  new_st->set_req(MemNode::Memory,  prev_mem);
2185  new_st->set_req(MemNode::Address, make_raw_address(start, phase));
2186  new_st = phase->transform(new_st);
2187
2188  // At this point, new_st might have swallowed a pre-existing store
2189  // at the same offset, or perhaps new_st might have disappeared,
2190  // if it redundantly stored the same value (or zero to fresh memory).
2191
2192  // In any case, wire it in:
2193  set_req(i, new_st);
2194
2195  // The caller may now kill the old guy.
2196  DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase));
2197  assert(check_st == new_st || check_st == NULL, "must be findable");
2198  assert(!is_complete(), "");
2199  return new_st;
2200}
2201
2202static bool store_constant(jlong* tiles, int num_tiles,
2203                           intptr_t st_off, int st_size,
2204                           jlong con) {
2205  if ((st_off & (st_size-1)) != 0)
2206    return false;               // strange store offset (assume size==2**N)
2207  address addr = (address)tiles + st_off;
2208  assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob");
2209  switch (st_size) {
2210  case sizeof(jbyte):  *(jbyte*) addr = (jbyte) con; break;
2211  case sizeof(jchar):  *(jchar*) addr = (jchar) con; break;
2212  case sizeof(jint):   *(jint*)  addr = (jint)  con; break;
2213  case sizeof(jlong):  *(jlong*) addr = (jlong) con; break;
2214  default: return false;        // strange store size (detect size!=2**N here)
2215  }
2216  return true;                  // return success to caller
2217}
2218
2219// Coalesce subword constants into int constants and possibly
2220// into long constants.  The goal, if the CPU permits,
2221// is to initialize the object with a small number of 64-bit tiles.
2222// Also, convert floating-point constants to bit patterns.
2223// Non-constants are not relevant to this pass.
2224//
2225// In terms of the running example on InitializeNode::InitializeNode
2226// and InitializeNode::capture_store, here is the transformation
2227// of rawstore1 and rawstore2 into rawstore12:
2228//   alloc = (Allocate ...)
2229//   rawoop = alloc.RawAddress
2230//   tile12 = 0x00010002
2231//   rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
2232//   init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
2233//
2234void
2235InitializeNode::coalesce_subword_stores(intptr_t header_size,
2236                                        Node* size_in_bytes,
2237                                        PhaseGVN* phase) {
2238  Compile* C = phase->C;
2239
2240  assert(stores_are_sane(phase), "");
2241  // Note:  After this pass, they are not completely sane,
2242  // since there may be some overlaps.
2243
2244  int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
2245
2246  intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
2247  intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
2248  size_limit = MIN2(size_limit, ti_limit);
2249  size_limit = align_size_up(size_limit, BytesPerLong);
2250  int num_tiles = size_limit / BytesPerLong;
2251
2252  // allocate space for the tile map:
2253  const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
2254  jlong  tiles_buf[small_len];
2255  Node*  nodes_buf[small_len];
2256  jlong  inits_buf[small_len];
2257  jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
2258                  : NEW_RESOURCE_ARRAY(jlong, num_tiles));
2259  Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
2260                  : NEW_RESOURCE_ARRAY(Node*, num_tiles));
2261  jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
2262                  : NEW_RESOURCE_ARRAY(jlong, num_tiles));
2263  // tiles: exact bitwise model of all primitive constants
2264  // nodes: last constant-storing node subsumed into the tiles model
2265  // inits: which bytes (in each tile) are touched by any initializations
2266
2267  //// Pass A: Fill in the tile model with any relevant stores.
2268
2269  Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);
2270  Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles);
2271  Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles);
2272  Node* zmem = zero_memory(); // initially zero memory state
2273  for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
2274    Node* st = in(i);
2275    intptr_t st_off = get_store_offset(st, phase);
2276
2277    // Figure out the store's offset and constant value:
2278    if (st_off < header_size)             continue; //skip (ignore header)
2279    if (st->in(MemNode::Memory) != zmem)  continue; //skip (odd store chain)
2280    int st_size = st->as_Store()->memory_size();
2281    if (st_off + st_size > size_limit)    break;
2282
2283    // Record which bytes are touched, whether by constant or not.
2284    if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1))
2285      continue;                 // skip (strange store size)
2286
2287    const Type* val = phase->type(st->in(MemNode::ValueIn));
2288    if (!val->singleton())                continue; //skip (non-con store)
2289    BasicType type = val->basic_type();
2290
2291    jlong con = 0;
2292    switch (type) {
2293    case T_INT:    con = val->is_int()->get_con();  break;
2294    case T_LONG:   con = val->is_long()->get_con(); break;
2295    case T_FLOAT:  con = jint_cast(val->getf());    break;
2296    case T_DOUBLE: con = jlong_cast(val->getd());   break;
2297    default:                              continue; //skip (odd store type)
2298    }
2299
2300    if (type == T_LONG && Matcher::isSimpleConstant64(con) &&
2301        st->Opcode() == Op_StoreL) {
2302      continue;                 // This StoreL is already optimal.
2303    }
2304
2305    // Store down the constant.
2306    store_constant(tiles, num_tiles, st_off, st_size, con);
2307
2308    intptr_t j = st_off >> LogBytesPerLong;
2309
2310    if (type == T_INT && st_size == BytesPerInt
2311        && (st_off & BytesPerInt) == BytesPerInt) {
2312      jlong lcon = tiles[j];
2313      if (!Matcher::isSimpleConstant64(lcon) &&
2314          st->Opcode() == Op_StoreI) {
2315        // This StoreI is already optimal by itself.
2316        jint* intcon = (jint*) &tiles[j];
2317        intcon[1] = 0;  // undo the store_constant()
2318
2319        // If the previous store is also optimal by itself, back up and
2320        // undo the action of the previous loop iteration... if we can.
2321        // But if we can't, just let the previous half take care of itself.
2322        st = nodes[j];
2323        st_off -= BytesPerInt;
2324        con = intcon[0];
2325        if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) {
2326          assert(st_off >= header_size, "still ignoring header");
2327          assert(get_store_offset(st, phase) == st_off, "must be");
2328          assert(in(i-1) == zmem, "must be");
2329          DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn)));
2330          assert(con == tcon->is_int()->get_con(), "must be");
2331          // Undo the effects of the previous loop trip, which swallowed st:
2332          intcon[0] = 0;        // undo store_constant()
2333          set_req(i-1, st);     // undo set_req(i, zmem)
2334          nodes[j] = NULL;      // undo nodes[j] = st
2335          --old_subword;        // undo ++old_subword
2336        }
2337        continue;               // This StoreI is already optimal.
2338      }
2339    }
2340
2341    // This store is not needed.
2342    set_req(i, zmem);
2343    nodes[j] = st;              // record for the moment
2344    if (st_size < BytesPerLong) // something has changed
2345          ++old_subword;        // includes int/float, but who's counting...
2346    else  ++old_long;
2347  }
2348
2349  if ((old_subword + old_long) == 0)
2350    return;                     // nothing more to do
2351
2352  //// Pass B: Convert any non-zero tiles into optimal constant stores.
2353  // Be sure to insert them before overlapping non-constant stores.
2354  // (E.g., byte[] x = { 1,2,y,4 }  =>  x[int 0] = 0x01020004, x[2]=y.)
2355  for (int j = 0; j < num_tiles; j++) {
2356    jlong con  = tiles[j];
2357    jlong init = inits[j];
2358    if (con == 0)  continue;
2359    jint con0,  con1;           // split the constant, address-wise
2360    jint init0, init1;          // split the init map, address-wise
2361    { union { jlong con; jint intcon[2]; } u;
2362      u.con = con;
2363      con0  = u.intcon[0];
2364      con1  = u.intcon[1];
2365      u.con = init;
2366      init0 = u.intcon[0];
2367      init1 = u.intcon[1];
2368    }
2369
2370    Node* old = nodes[j];
2371    assert(old != NULL, "need the prior store");
2372    intptr_t offset = (j * BytesPerLong);
2373
2374    bool split = !Matcher::isSimpleConstant64(con);
2375
2376    if (offset < header_size) {
2377      assert(offset + BytesPerInt >= header_size, "second int counts");
2378      assert(*(jint*)&tiles[j] == 0, "junk in header");
2379      split = true;             // only the second word counts
2380      // Example:  int a[] = { 42 ... }
2381    } else if (con0 == 0 && init0 == -1) {
2382      split = true;             // first word is covered by full inits
2383      // Example:  int a[] = { ... foo(), 42 ... }
2384    } else if (con1 == 0 && init1 == -1) {
2385      split = true;             // second word is covered by full inits
2386      // Example:  int a[] = { ... 42, foo() ... }
2387    }
2388
2389    // Here's a case where init0 is neither 0 nor -1:
2390    //   byte a[] = { ... 0,0,foo(),0,  0,0,0,42 ... }
2391    // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF.
2392    // In this case the tile is not split; it is (jlong)42.
2393    // The big tile is stored down, and then the foo() value is inserted.
2394    // (If there were foo(),foo() instead of foo(),0, init0 would be -1.)
2395
2396    Node* ctl = old->in(MemNode::Control);
2397    Node* adr = make_raw_address(offset, phase);
2398    const TypePtr* atp = TypeRawPtr::BOTTOM;
2399
2400    // One or two coalesced stores to plop down.
2401    Node*    st[2];
2402    intptr_t off[2];
2403    int  nst = 0;
2404    if (!split) {
2405      ++new_long;
2406      off[nst] = offset;
2407      st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
2408                                  phase->longcon(con), T_LONG);
2409    } else {
2410      // Omit either if it is a zero.
2411      if (con0 != 0) {
2412        ++new_int;
2413        off[nst]  = offset;
2414        st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
2415                                    phase->intcon(con0), T_INT);
2416      }
2417      if (con1 != 0) {
2418        ++new_int;
2419        offset += BytesPerInt;
2420        adr = make_raw_address(offset, phase);
2421        off[nst]  = offset;
2422        st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
2423                                    phase->intcon(con1), T_INT);
2424      }
2425    }
2426
2427    // Insert second store first, then the first before the second.
2428    // Insert each one just before any overlapping non-constant stores.
2429    while (nst > 0) {
2430      Node* st1 = st[--nst];
2431      C->copy_node_notes_to(st1, old);
2432      st1 = phase->transform(st1);
2433      offset = off[nst];
2434      assert(offset >= header_size, "do not smash header");
2435      int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase);
2436      guarantee(ins_idx != 0, "must re-insert constant store");
2437      if (ins_idx < 0)  ins_idx = -ins_idx;  // never overlap
2438      if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem)
2439        set_req(--ins_idx, st1);
2440      else
2441        ins_req(ins_idx, st1);
2442    }
2443  }
2444
2445  if (PrintCompilation && WizardMode)
2446    tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long",
2447                  old_subword, old_long, new_int, new_long);
2448  if (C->log() != NULL)
2449    C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'",
2450                   old_subword, old_long, new_int, new_long);
2451
2452  // Clean up any remaining occurrences of zmem:
2453  remove_extra_zeroes();
2454}
2455
2456// Explore forward from in(start) to find the first fully initialized
2457// word, and return its offset.  Skip groups of subword stores which
2458// together initialize full words.  If in(start) is itself part of a
2459// fully initialized word, return the offset of in(start).  If there
2460// are no following full-word stores, or if something is fishy, return
2461// a negative value.
2462intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
2463  int       int_map = 0;
2464  intptr_t  int_map_off = 0;
2465  const int FULL_MAP = right_n_bits(BytesPerInt);  // the int_map we hope for
2466
2467  for (uint i = start, limit = req(); i < limit; i++) {
2468    Node* st = in(i);
2469
2470    intptr_t st_off = get_store_offset(st, phase);
2471    if (st_off < 0)  break;  // return conservative answer
2472
2473    int st_size = st->as_Store()->memory_size();
2474    if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
2475      return st_off;            // we found a complete word init
2476    }
2477
2478    // update the map:
2479
2480    intptr_t this_int_off = align_size_down(st_off, BytesPerInt);
2481    if (this_int_off != int_map_off) {
2482      // reset the map:
2483      int_map = 0;
2484      int_map_off = this_int_off;
2485    }
2486
2487    int subword_off = st_off - this_int_off;
2488    int_map |= right_n_bits(st_size) << subword_off;
2489    if ((int_map & FULL_MAP) == FULL_MAP) {
2490      return this_int_off;      // we found a complete word init
2491    }
2492
2493    // Did this store hit or cross the word boundary?
2494    intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt);
2495    if (next_int_off == this_int_off + BytesPerInt) {
2496      // We passed the current int, without fully initializing it.
2497      int_map_off = next_int_off;
2498      int_map >>= BytesPerInt;
2499    } else if (next_int_off > this_int_off + BytesPerInt) {
2500      // We passed the current and next int.
2501      return this_int_off + BytesPerInt;
2502    }
2503  }
2504
2505  return -1;
2506}
2507
2508
2509// Called when the associated AllocateNode is expanded into CFG.
2510// At this point, we may perform additional optimizations.
2511// Linearize the stores by ascending offset, to make memory
2512// activity as coherent as possible.
2513Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
2514                                      intptr_t header_size,
2515                                      Node* size_in_bytes,
2516                                      PhaseGVN* phase) {
2517  assert(!is_complete(), "not already complete");
2518  assert(stores_are_sane(phase), "");
2519  assert(allocation() != NULL, "must be present");
2520
2521  remove_extra_zeroes();
2522
2523  if (ReduceFieldZeroing || ReduceBulkZeroing)
2524    // reduce instruction count for common initialization patterns
2525    coalesce_subword_stores(header_size, size_in_bytes, phase);
2526
2527  Node* zmem = zero_memory();   // initially zero memory state
2528  Node* inits = zmem;           // accumulating a linearized chain of inits
2529  #ifdef ASSERT
2530  intptr_t last_init_off = sizeof(oopDesc);  // previous init offset
2531  intptr_t last_init_end = sizeof(oopDesc);  // previous init offset+size
2532  intptr_t last_tile_end = sizeof(oopDesc);  // previous tile offset+size
2533  #endif
2534  intptr_t zeroes_done = header_size;
2535
2536  bool do_zeroing = true;       // we might give up if inits are very sparse
2537  int  big_init_gaps = 0;       // how many large gaps have we seen?
2538
2539  if (ZeroTLAB)  do_zeroing = false;
2540  if (!ReduceFieldZeroing && !ReduceBulkZeroing)  do_zeroing = false;
2541
2542  for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
2543    Node* st = in(i);
2544    intptr_t st_off = get_store_offset(st, phase);
2545    if (st_off < 0)
2546      break;                    // unknown junk in the inits
2547    if (st->in(MemNode::Memory) != zmem)
2548      break;                    // complicated store chains somehow in list
2549
2550    int st_size = st->as_Store()->memory_size();
2551    intptr_t next_init_off = st_off + st_size;
2552
2553    if (do_zeroing && zeroes_done < next_init_off) {
2554      // See if this store needs a zero before it or under it.
2555      intptr_t zeroes_needed = st_off;
2556
2557      if (st_size < BytesPerInt) {
2558        // Look for subword stores which only partially initialize words.
2559        // If we find some, we must lay down some word-level zeroes first,
2560        // underneath the subword stores.
2561        //
2562        // Examples:
2563        //   byte[] a = { p,q,r,s }  =>  a[0]=p,a[1]=q,a[2]=r,a[3]=s
2564        //   byte[] a = { x,y,0,0 }  =>  a[0..3] = 0, a[0]=x,a[1]=y
2565        //   byte[] a = { 0,0,z,0 }  =>  a[0..3] = 0, a[2]=z
2566        //
2567        // Note:  coalesce_subword_stores may have already done this,
2568        // if it was prompted by constant non-zero subword initializers.
2569        // But this case can still arise with non-constant stores.
2570
2571        intptr_t next_full_store = find_next_fullword_store(i, phase);
2572
2573        // In the examples above:
2574        //   in(i)          p   q   r   s     x   y     z
2575        //   st_off        12  13  14  15    12  13    14
2576        //   st_size        1   1   1   1     1   1     1
2577        //   next_full_s.  12  16  16  16    16  16    16
2578        //   z's_done      12  16  16  16    12  16    12
2579        //   z's_needed    12  16  16  16    16  16    16
2580        //   zsize          0   0   0   0     4   0     4
2581        if (next_full_store < 0) {
2582          // Conservative tack:  Zero to end of current word.
2583          zeroes_needed = align_size_up(zeroes_needed, BytesPerInt);
2584        } else {
2585          // Zero to beginning of next fully initialized word.
2586          // Or, don't zero at all, if we are already in that word.
2587          assert(next_full_store >= zeroes_needed, "must go forward");
2588          assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
2589          zeroes_needed = next_full_store;
2590        }
2591      }
2592
2593      if (zeroes_needed > zeroes_done) {
2594        intptr_t zsize = zeroes_needed - zeroes_done;
2595        // Do some incremental zeroing on rawmem, in parallel with inits.
2596        zeroes_done = align_size_down(zeroes_done, BytesPerInt);
2597        rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
2598                                              zeroes_done, zeroes_needed,
2599                                              phase);
2600        zeroes_done = zeroes_needed;
2601        if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2)
2602          do_zeroing = false;   // leave the hole, next time
2603      }
2604    }
2605
2606    // Collect the store and move on:
2607    st->set_req(MemNode::Memory, inits);
2608    inits = st;                 // put it on the linearized chain
2609    set_req(i, zmem);           // unhook from previous position
2610
2611    if (zeroes_done == st_off)
2612      zeroes_done = next_init_off;
2613
2614    assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
2615
2616    #ifdef ASSERT
2617    // Various order invariants.  Weaker than stores_are_sane because
2618    // a large constant tile can be filled in by smaller non-constant stores.
2619    assert(st_off >= last_init_off, "inits do not reverse");
2620    last_init_off = st_off;
2621    const Type* val = NULL;
2622    if (st_size >= BytesPerInt &&
2623        (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
2624        (int)val->basic_type() < (int)T_OBJECT) {
2625      assert(st_off >= last_tile_end, "tiles do not overlap");
2626      assert(st_off >= last_init_end, "tiles do not overwrite inits");
2627      last_tile_end = MAX2(last_tile_end, next_init_off);
2628    } else {
2629      intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong);
2630      assert(st_tile_end >= last_tile_end, "inits stay with tiles");
2631      assert(st_off      >= last_init_end, "inits do not overlap");
2632      last_init_end = next_init_off;  // it's a non-tile
2633    }
2634    #endif //ASSERT
2635  }
2636
2637  remove_extra_zeroes();        // clear out all the zmems left over
2638  add_req(inits);
2639
2640  if (!ZeroTLAB) {
2641    // If anything remains to be zeroed, zero it all now.
2642    zeroes_done = align_size_down(zeroes_done, BytesPerInt);
2643    // if it is the last unused 4 bytes of an instance, forget about it
2644    intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
2645    if (zeroes_done + BytesPerLong >= size_limit) {
2646      assert(allocation() != NULL, "");
2647      Node* klass_node = allocation()->in(AllocateNode::KlassNode);
2648      ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
2649      if (zeroes_done == k->layout_helper())
2650        zeroes_done = size_limit;
2651    }
2652    if (zeroes_done < size_limit) {
2653      rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
2654                                            zeroes_done, size_in_bytes, phase);
2655    }
2656  }
2657
2658  set_complete(phase);
2659  return rawmem;
2660}
2661
2662
2663#ifdef ASSERT
2664bool InitializeNode::stores_are_sane(PhaseTransform* phase) {
2665  if (is_complete())
2666    return true;                // stores could be anything at this point
2667  intptr_t last_off = sizeof(oopDesc);
2668  for (uint i = InitializeNode::RawStores; i < req(); i++) {
2669    Node* st = in(i);
2670    intptr_t st_off = get_store_offset(st, phase);
2671    if (st_off < 0)  continue;  // ignore dead garbage
2672    if (last_off > st_off) {
2673      tty->print_cr("*** bad store offset at %d: %d > %d", i, last_off, st_off);
2674      this->dump(2);
2675      assert(false, "ascending store offsets");
2676      return false;
2677    }
2678    last_off = st_off + st->as_Store()->memory_size();
2679  }
2680  return true;
2681}
2682#endif //ASSERT
2683
2684
2685
2686
2687//============================MergeMemNode=====================================
2688//
2689// SEMANTICS OF MEMORY MERGES:  A MergeMem is a memory state assembled from several
2690// contributing store or call operations.  Each contributor provides the memory
2691// state for a particular "alias type" (see Compile::alias_type).  For example,
2692// if a MergeMem has an input X for alias category #6, then any memory reference
2693// to alias category #6 may use X as its memory state input, as an exact equivalent
2694// to using the MergeMem as a whole.
2695//   Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p)
2696//
2697// (Here, the <N> notation gives the index of the relevant adr_type.)
2698//
2699// In one special case (and more cases in the future), alias categories overlap.
2700// The special alias category "Bot" (Compile::AliasIdxBot) includes all memory
2701// states.  Therefore, if a MergeMem has only one contributing input W for Bot,
2702// it is exactly equivalent to that state W:
2703//   MergeMem(<Bot>: W) <==> W
2704//
2705// Usually, the merge has more than one input.  In that case, where inputs
2706// overlap (i.e., one is Bot), the narrower alias type determines the memory
2707// state for that type, and the wider alias type (Bot) fills in everywhere else:
2708//   Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p)
2709//   Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p)
2710//
2711// A merge can take a "wide" memory state as one of its narrow inputs.
2712// This simply means that the merge observes out only the relevant parts of
2713// the wide input.  That is, wide memory states arriving at narrow merge inputs
2714// are implicitly "filtered" or "sliced" as necessary.  (This is rare.)
2715//
2716// These rules imply that MergeMem nodes may cascade (via their <Bot> links),
2717// and that memory slices "leak through":
2718//   MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y)
2719//
2720// But, in such a cascade, repeated memory slices can "block the leak":
2721//   MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y')
2722//
2723// In the last example, Y is not part of the combined memory state of the
2724// outermost MergeMem.  The system must, of course, prevent unschedulable
2725// memory states from arising, so you can be sure that the state Y is somehow
2726// a precursor to state Y'.
2727//
2728//
2729// REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array
2730// of each MergeMemNode array are exactly the numerical alias indexes, including
2731// but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw.  The functions
2732// Compile::alias_type (and kin) produce and manage these indexes.
2733//
2734// By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node.
2735// (Note that this provides quick access to the top node inside MergeMem methods,
2736// without the need to reach out via TLS to Compile::current.)
2737//
2738// As a consequence of what was just described, a MergeMem that represents a full
2739// memory state has an edge in(AliasIdxBot) which is a "wide" memory state,
2740// containing all alias categories.
2741//
2742// MergeMem nodes never (?) have control inputs, so in(0) is NULL.
2743//
2744// All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either
2745// a memory state for the alias type <N>, or else the top node, meaning that
2746// there is no particular input for that alias type.  Note that the length of
2747// a MergeMem is variable, and may be extended at any time to accommodate new
2748// memory states at larger alias indexes.  When merges grow, they are of course
2749// filled with "top" in the unused in() positions.
2750//
2751// This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable.
2752// (Top was chosen because it works smoothly with passes like GCM.)
2753//
2754// For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM.  (It is
2755// the type of random VM bits like TLS references.)  Since it is always the
2756// first non-Bot memory slice, some low-level loops use it to initialize an
2757// index variable:  for (i = AliasIdxRaw; i < req(); i++).
2758//
2759//
2760// ACCESSORS:  There is a special accessor MergeMemNode::base_memory which returns
2761// the distinguished "wide" state.  The accessor MergeMemNode::memory_at(N) returns
2762// the memory state for alias type <N>, or (if there is no particular slice at <N>,
2763// it returns the base memory.  To prevent bugs, memory_at does not accept <Top>
2764// or <Bot> indexes.  The iterator MergeMemStream provides robust iteration over
2765// MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited.
2766//
2767// %%%% We may get rid of base_memory as a separate accessor at some point; it isn't
2768// really that different from the other memory inputs.  An abbreviation called
2769// "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy.
2770//
2771//
2772// PARTIAL MEMORY STATES:  During optimization, MergeMem nodes may arise that represent
2773// partial memory states.  When a Phi splits through a MergeMem, the copy of the Phi
2774// that "emerges though" the base memory will be marked as excluding the alias types
2775// of the other (narrow-memory) copies which "emerged through" the narrow edges:
2776//
2777//   Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y))
2778//     ==Ideal=>  MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y))
2779//
2780// This strange "subtraction" effect is necessary to ensure IGVN convergence.
2781// (It is currently unimplemented.)  As you can see, the resulting merge is
2782// actually a disjoint union of memory states, rather than an overlay.
2783//
2784
2785//------------------------------MergeMemNode-----------------------------------
2786Node* MergeMemNode::make_empty_memory() {
2787  Node* empty_memory = (Node*) Compile::current()->top();
2788  assert(empty_memory->is_top(), "correct sentinel identity");
2789  return empty_memory;
2790}
2791
2792MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) {
2793  init_class_id(Class_MergeMem);
2794  // all inputs are nullified in Node::Node(int)
2795  // set_input(0, NULL);  // no control input
2796
2797  // Initialize the edges uniformly to top, for starters.
2798  Node* empty_mem = make_empty_memory();
2799  for (uint i = Compile::AliasIdxTop; i < req(); i++) {
2800    init_req(i,empty_mem);
2801  }
2802  assert(empty_memory() == empty_mem, "");
2803
2804  if( new_base != NULL && new_base->is_MergeMem() ) {
2805    MergeMemNode* mdef = new_base->as_MergeMem();
2806    assert(mdef->empty_memory() == empty_mem, "consistent sentinels");
2807    for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) {
2808      mms.set_memory(mms.memory2());
2809    }
2810    assert(base_memory() == mdef->base_memory(), "");
2811  } else {
2812    set_base_memory(new_base);
2813  }
2814}
2815
2816// Make a new, untransformed MergeMem with the same base as 'mem'.
2817// If mem is itself a MergeMem, populate the result with the same edges.
2818MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) {
2819  return new(C, 1+Compile::AliasIdxRaw) MergeMemNode(mem);
2820}
2821
2822//------------------------------cmp--------------------------------------------
2823uint MergeMemNode::hash() const { return NO_HASH; }
2824uint MergeMemNode::cmp( const Node &n ) const {
2825  return (&n == this);          // Always fail except on self
2826}
2827
2828//------------------------------Identity---------------------------------------
2829Node* MergeMemNode::Identity(PhaseTransform *phase) {
2830  // Identity if this merge point does not record any interesting memory
2831  // disambiguations.
2832  Node* base_mem = base_memory();
2833  Node* empty_mem = empty_memory();
2834  if (base_mem != empty_mem) {  // Memory path is not dead?
2835    for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
2836      Node* mem = in(i);
2837      if (mem != empty_mem && mem != base_mem) {
2838        return this;            // Many memory splits; no change
2839      }
2840    }
2841  }
2842  return base_mem;              // No memory splits; ID on the one true input
2843}
2844
2845//------------------------------Ideal------------------------------------------
2846// This method is invoked recursively on chains of MergeMem nodes
2847Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) {
2848  // Remove chain'd MergeMems
2849  //
2850  // This is delicate, because the each "in(i)" (i >= Raw) is interpreted
2851  // relative to the "in(Bot)".  Since we are patching both at the same time,
2852  // we have to be careful to read each "in(i)" relative to the old "in(Bot)",
2853  // but rewrite each "in(i)" relative to the new "in(Bot)".
2854  Node *progress = NULL;
2855
2856
2857  Node* old_base = base_memory();
2858  Node* empty_mem = empty_memory();
2859  if (old_base == empty_mem)
2860    return NULL; // Dead memory path.
2861
2862  MergeMemNode* old_mbase;
2863  if (old_base != NULL && old_base->is_MergeMem())
2864    old_mbase = old_base->as_MergeMem();
2865  else
2866    old_mbase = NULL;
2867  Node* new_base = old_base;
2868
2869  // simplify stacked MergeMems in base memory
2870  if (old_mbase)  new_base = old_mbase->base_memory();
2871
2872  // the base memory might contribute new slices beyond my req()
2873  if (old_mbase)  grow_to_match(old_mbase);
2874
2875  // Look carefully at the base node if it is a phi.
2876  PhiNode* phi_base;
2877  if (new_base != NULL && new_base->is_Phi())
2878    phi_base = new_base->as_Phi();
2879  else
2880    phi_base = NULL;
2881
2882  Node*    phi_reg = NULL;
2883  uint     phi_len = (uint)-1;
2884  if (phi_base != NULL && !phi_base->is_copy()) {
2885    // do not examine phi if degraded to a copy
2886    phi_reg = phi_base->region();
2887    phi_len = phi_base->req();
2888    // see if the phi is unfinished
2889    for (uint i = 1; i < phi_len; i++) {
2890      if (phi_base->in(i) == NULL) {
2891        // incomplete phi; do not look at it yet!
2892        phi_reg = NULL;
2893        phi_len = (uint)-1;
2894        break;
2895      }
2896    }
2897  }
2898
2899  // Note:  We do not call verify_sparse on entry, because inputs
2900  // can normalize to the base_memory via subsume_node or similar
2901  // mechanisms.  This method repairs that damage.
2902
2903  assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels");
2904
2905  // Look at each slice.
2906  for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
2907    Node* old_in = in(i);
2908    // calculate the old memory value
2909    Node* old_mem = old_in;
2910    if (old_mem == empty_mem)  old_mem = old_base;
2911    assert(old_mem == memory_at(i), "");
2912
2913    // maybe update (reslice) the old memory value
2914
2915    // simplify stacked MergeMems
2916    Node* new_mem = old_mem;
2917    MergeMemNode* old_mmem;
2918    if (old_mem != NULL && old_mem->is_MergeMem())
2919      old_mmem = old_mem->as_MergeMem();
2920    else
2921      old_mmem = NULL;
2922    if (old_mmem == this) {
2923      // This can happen if loops break up and safepoints disappear.
2924      // A merge of BotPtr (default) with a RawPtr memory derived from a
2925      // safepoint can be rewritten to a merge of the same BotPtr with
2926      // the BotPtr phi coming into the loop.  If that phi disappears
2927      // also, we can end up with a self-loop of the mergemem.
2928      // In general, if loops degenerate and memory effects disappear,
2929      // a mergemem can be left looking at itself.  This simply means
2930      // that the mergemem's default should be used, since there is
2931      // no longer any apparent effect on this slice.
2932      // Note: If a memory slice is a MergeMem cycle, it is unreachable
2933      //       from start.  Update the input to TOP.
2934      new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base;
2935    }
2936    else if (old_mmem != NULL) {
2937      new_mem = old_mmem->memory_at(i);
2938    }
2939    // else preceeding memory was not a MergeMem
2940
2941    // replace equivalent phis (unfortunately, they do not GVN together)
2942    if (new_mem != NULL && new_mem != new_base &&
2943        new_mem->req() == phi_len && new_mem->in(0) == phi_reg) {
2944      if (new_mem->is_Phi()) {
2945        PhiNode* phi_mem = new_mem->as_Phi();
2946        for (uint i = 1; i < phi_len; i++) {
2947          if (phi_base->in(i) != phi_mem->in(i)) {
2948            phi_mem = NULL;
2949            break;
2950          }
2951        }
2952        if (phi_mem != NULL) {
2953          // equivalent phi nodes; revert to the def
2954          new_mem = new_base;
2955        }
2956      }
2957    }
2958
2959    // maybe store down a new value
2960    Node* new_in = new_mem;
2961    if (new_in == new_base)  new_in = empty_mem;
2962
2963    if (new_in != old_in) {
2964      // Warning:  Do not combine this "if" with the previous "if"
2965      // A memory slice might have be be rewritten even if it is semantically
2966      // unchanged, if the base_memory value has changed.
2967      set_req(i, new_in);
2968      progress = this;          // Report progress
2969    }
2970  }
2971
2972  if (new_base != old_base) {
2973    set_req(Compile::AliasIdxBot, new_base);
2974    // Don't use set_base_memory(new_base), because we need to update du.
2975    assert(base_memory() == new_base, "");
2976    progress = this;
2977  }
2978
2979  if( base_memory() == this ) {
2980    // a self cycle indicates this memory path is dead
2981    set_req(Compile::AliasIdxBot, empty_mem);
2982  }
2983
2984  // Resolve external cycles by calling Ideal on a MergeMem base_memory
2985  // Recursion must occur after the self cycle check above
2986  if( base_memory()->is_MergeMem() ) {
2987    MergeMemNode *new_mbase = base_memory()->as_MergeMem();
2988    Node *m = phase->transform(new_mbase);  // Rollup any cycles
2989    if( m != NULL && (m->is_top() ||
2990        m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem) ) {
2991      // propagate rollup of dead cycle to self
2992      set_req(Compile::AliasIdxBot, empty_mem);
2993    }
2994  }
2995
2996  if( base_memory() == empty_mem ) {
2997    progress = this;
2998    // Cut inputs during Parse phase only.
2999    // During Optimize phase a dead MergeMem node will be subsumed by Top.
3000    if( !can_reshape ) {
3001      for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
3002        if( in(i) != empty_mem ) { set_req(i, empty_mem); }
3003      }
3004    }
3005  }
3006
3007  if( !progress && base_memory()->is_Phi() && can_reshape ) {
3008    // Check if PhiNode::Ideal's "Split phis through memory merges"
3009    // transform should be attempted. Look for this->phi->this cycle.
3010    uint merge_width = req();
3011    if (merge_width > Compile::AliasIdxRaw) {
3012      PhiNode* phi = base_memory()->as_Phi();
3013      for( uint i = 1; i < phi->req(); ++i ) {// For all paths in
3014        if (phi->in(i) == this) {
3015          phase->is_IterGVN()->_worklist.push(phi);
3016          break;
3017        }
3018      }
3019    }
3020  }
3021
3022  assert(verify_sparse(), "please, no dups of base");
3023  return progress;
3024}
3025
3026//-------------------------set_base_memory-------------------------------------
3027void MergeMemNode::set_base_memory(Node *new_base) {
3028  Node* empty_mem = empty_memory();
3029  set_req(Compile::AliasIdxBot, new_base);
3030  assert(memory_at(req()) == new_base, "must set default memory");
3031  // Clear out other occurrences of new_base:
3032  if (new_base != empty_mem) {
3033    for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
3034      if (in(i) == new_base)  set_req(i, empty_mem);
3035    }
3036  }
3037}
3038
3039//------------------------------out_RegMask------------------------------------
3040const RegMask &MergeMemNode::out_RegMask() const {
3041  return RegMask::Empty;
3042}
3043
3044//------------------------------dump_spec--------------------------------------
3045#ifndef PRODUCT
3046void MergeMemNode::dump_spec(outputStream *st) const {
3047  st->print(" {");
3048  Node* base_mem = base_memory();
3049  for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) {
3050    Node* mem = memory_at(i);
3051    if (mem == base_mem) { st->print(" -"); continue; }
3052    st->print( " N%d:", mem->_idx );
3053    Compile::current()->get_adr_type(i)->dump_on(st);
3054  }
3055  st->print(" }");
3056}
3057#endif // !PRODUCT
3058
3059
3060#ifdef ASSERT
3061static bool might_be_same(Node* a, Node* b) {
3062  if (a == b)  return true;
3063  if (!(a->is_Phi() || b->is_Phi()))  return false;
3064  // phis shift around during optimization
3065  return true;  // pretty stupid...
3066}
3067
3068// verify a narrow slice (either incoming or outgoing)
3069static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) {
3070  if (!VerifyAliases)       return;  // don't bother to verify unless requested
3071  if (is_error_reported())  return;  // muzzle asserts when debugging an error
3072  if (Node::in_dump())      return;  // muzzle asserts when printing
3073  assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel");
3074  assert(n != NULL, "");
3075  // Elide intervening MergeMem's
3076  while (n->is_MergeMem()) {
3077    n = n->as_MergeMem()->memory_at(alias_idx);
3078  }
3079  Compile* C = Compile::current();
3080  const TypePtr* n_adr_type = n->adr_type();
3081  if (n == m->empty_memory()) {
3082    // Implicit copy of base_memory()
3083  } else if (n_adr_type != TypePtr::BOTTOM) {
3084    assert(n_adr_type != NULL, "new memory must have a well-defined adr_type");
3085    assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice");
3086  } else {
3087    // A few places like make_runtime_call "know" that VM calls are narrow,
3088    // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM.
3089    bool expected_wide_mem = false;
3090    if (n == m->base_memory()) {
3091      expected_wide_mem = true;
3092    } else if (alias_idx == Compile::AliasIdxRaw ||
3093               n == m->memory_at(Compile::AliasIdxRaw)) {
3094      expected_wide_mem = true;
3095    } else if (!C->alias_type(alias_idx)->is_rewritable()) {
3096      // memory can "leak through" calls on channels that
3097      // are write-once.  Allow this also.
3098      expected_wide_mem = true;
3099    }
3100    assert(expected_wide_mem, "expected narrow slice replacement");
3101  }
3102}
3103#else // !ASSERT
3104#define verify_memory_slice(m,i,n) (0)  // PRODUCT version is no-op
3105#endif
3106
3107
3108//-----------------------------memory_at---------------------------------------
3109Node* MergeMemNode::memory_at(uint alias_idx) const {
3110  assert(alias_idx >= Compile::AliasIdxRaw ||
3111         alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0,
3112         "must avoid base_memory and AliasIdxTop");
3113
3114  // Otherwise, it is a narrow slice.
3115  Node* n = alias_idx < req() ? in(alias_idx) : empty_memory();
3116  Compile *C = Compile::current();
3117  if (is_empty_memory(n)) {
3118    // the array is sparse; empty slots are the "top" node
3119    n = base_memory();
3120    assert(Node::in_dump()
3121           || n == NULL || n->bottom_type() == Type::TOP
3122           || n->adr_type() == TypePtr::BOTTOM
3123           || n->adr_type() == TypeRawPtr::BOTTOM
3124           || Compile::current()->AliasLevel() == 0,
3125           "must be a wide memory");
3126    // AliasLevel == 0 if we are organizing the memory states manually.
3127    // See verify_memory_slice for comments on TypeRawPtr::BOTTOM.
3128  } else {
3129    // make sure the stored slice is sane
3130    #ifdef ASSERT
3131    if (is_error_reported() || Node::in_dump()) {
3132    } else if (might_be_same(n, base_memory())) {
3133      // Give it a pass:  It is a mostly harmless repetition of the base.
3134      // This can arise normally from node subsumption during optimization.
3135    } else {
3136      verify_memory_slice(this, alias_idx, n);
3137    }
3138    #endif
3139  }
3140  return n;
3141}
3142
3143//---------------------------set_memory_at-------------------------------------
3144void MergeMemNode::set_memory_at(uint alias_idx, Node *n) {
3145  verify_memory_slice(this, alias_idx, n);
3146  Node* empty_mem = empty_memory();
3147  if (n == base_memory())  n = empty_mem;  // collapse default
3148  uint need_req = alias_idx+1;
3149  if (req() < need_req) {
3150    if (n == empty_mem)  return;  // already the default, so do not grow me
3151    // grow the sparse array
3152    do {
3153      add_req(empty_mem);
3154    } while (req() < need_req);
3155  }
3156  set_req( alias_idx, n );
3157}
3158
3159
3160
3161//--------------------------iteration_setup------------------------------------
3162void MergeMemNode::iteration_setup(const MergeMemNode* other) {
3163  if (other != NULL) {
3164    grow_to_match(other);
3165    // invariant:  the finite support of mm2 is within mm->req()
3166    #ifdef ASSERT
3167    for (uint i = req(); i < other->req(); i++) {
3168      assert(other->is_empty_memory(other->in(i)), "slice left uncovered");
3169    }
3170    #endif
3171  }
3172  // Replace spurious copies of base_memory by top.
3173  Node* base_mem = base_memory();
3174  if (base_mem != NULL && !base_mem->is_top()) {
3175    for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) {
3176      if (in(i) == base_mem)
3177        set_req(i, empty_memory());
3178    }
3179  }
3180}
3181
3182//---------------------------grow_to_match-------------------------------------
3183void MergeMemNode::grow_to_match(const MergeMemNode* other) {
3184  Node* empty_mem = empty_memory();
3185  assert(other->is_empty_memory(empty_mem), "consistent sentinels");
3186  // look for the finite support of the other memory
3187  for (uint i = other->req(); --i >= req(); ) {
3188    if (other->in(i) != empty_mem) {
3189      uint new_len = i+1;
3190      while (req() < new_len)  add_req(empty_mem);
3191      break;
3192    }
3193  }
3194}
3195
3196//---------------------------verify_sparse-------------------------------------
3197#ifndef PRODUCT
3198bool MergeMemNode::verify_sparse() const {
3199  assert(is_empty_memory(make_empty_memory()), "sane sentinel");
3200  Node* base_mem = base_memory();
3201  // The following can happen in degenerate cases, since empty==top.
3202  if (is_empty_memory(base_mem))  return true;
3203  for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
3204    assert(in(i) != NULL, "sane slice");
3205    if (in(i) == base_mem)  return false;  // should have been the sentinel value!
3206  }
3207  return true;
3208}
3209
3210bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) {
3211  Node* n;
3212  n = mm->in(idx);
3213  if (mem == n)  return true;  // might be empty_memory()
3214  n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx);
3215  if (mem == n)  return true;
3216  while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) {
3217    if (mem == n)  return true;
3218    if (n == NULL)  break;
3219  }
3220  return false;
3221}
3222#endif // !PRODUCT
3223