1/*
2 * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "memory/allocation.inline.hpp"
27#include "memory/resourceArea.hpp"
28#include "opto/addnode.hpp"
29#include "opto/castnode.hpp"
30#include "opto/connode.hpp"
31#include "opto/castnode.hpp"
32#include "opto/divnode.hpp"
33#include "opto/loopnode.hpp"
34#include "opto/matcher.hpp"
35#include "opto/mulnode.hpp"
36#include "opto/movenode.hpp"
37#include "opto/opaquenode.hpp"
38#include "opto/rootnode.hpp"
39#include "opto/subnode.hpp"
40
41//=============================================================================
42//------------------------------split_thru_phi---------------------------------
43// Split Node 'n' through merge point if there is enough win.
44Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
45  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
46    // ConvI2L may have type information on it which is unsafe to push up
47    // so disable this for now
48    return NULL;
49  }
50
51  // Splitting range check CastIIs through a loop induction Phi can
52  // cause new Phis to be created that are left unrelated to the loop
53  // induction Phi and prevent optimizations (vectorization)
54  if (n->Opcode() == Op_CastII && n->as_CastII()->has_range_check() &&
55      region->is_CountedLoop() && n->in(1) == region->as_CountedLoop()->phi()) {
56    return NULL;
57  }
58
59  int wins = 0;
60  assert(!n->is_CFG(), "");
61  assert(region->is_Region(), "");
62
63  const Type* type = n->bottom_type();
64  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
65  Node *phi;
66  if (t_oop != NULL && t_oop->is_known_instance_field()) {
67    int iid    = t_oop->instance_id();
68    int index  = C->get_alias_index(t_oop);
69    int offset = t_oop->offset();
70    phi = new PhiNode(region, type, NULL, iid, index, offset);
71  } else {
72    phi = PhiNode::make_blank(region, n);
73  }
74  uint old_unique = C->unique();
75  for (uint i = 1; i < region->req(); i++) {
76    Node *x;
77    Node* the_clone = NULL;
78    if (region->in(i) == C->top()) {
79      x = C->top();             // Dead path?  Use a dead data op
80    } else {
81      x = n->clone();           // Else clone up the data op
82      the_clone = x;            // Remember for possible deletion.
83      // Alter data node to use pre-phi inputs
84      if (n->in(0) == region)
85        x->set_req( 0, region->in(i) );
86      for (uint j = 1; j < n->req(); j++) {
87        Node *in = n->in(j);
88        if (in->is_Phi() && in->in(0) == region)
89          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
90      }
91    }
92    // Check for a 'win' on some paths
93    const Type *t = x->Value(&_igvn);
94
95    bool singleton = t->singleton();
96
97    // A TOP singleton indicates that there are no possible values incoming
98    // along a particular edge. In most cases, this is OK, and the Phi will
99    // be eliminated later in an Ideal call. However, we can't allow this to
100    // happen if the singleton occurs on loop entry, as the elimination of
101    // the PhiNode may cause the resulting node to migrate back to a previous
102    // loop iteration.
103    if (singleton && t == Type::TOP) {
104      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
105      // irreducible loop may not be indicated by an affirmative is_Loop());
106      // therefore, the only top we can split thru a phi is on a backedge of
107      // a loop.
108      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
109    }
110
111    if (singleton) {
112      wins++;
113      x = ((PhaseGVN&)_igvn).makecon(t);
114    } else {
115      // We now call Identity to try to simplify the cloned node.
116      // Note that some Identity methods call phase->type(this).
117      // Make sure that the type array is big enough for
118      // our new node, even though we may throw the node away.
119      // (Note: This tweaking with igvn only works because x is a new node.)
120      _igvn.set_type(x, t);
121      // If x is a TypeNode, capture any more-precise type permanently into Node
122      // otherwise it will be not updated during igvn->transform since
123      // igvn->type(x) is set to x->Value() already.
124      x->raise_bottom_type(t);
125      Node *y = x->Identity(&_igvn);
126      if (y != x) {
127        wins++;
128        x = y;
129      } else {
130        y = _igvn.hash_find(x);
131        if (y) {
132          wins++;
133          x = y;
134        } else {
135          // Else x is a new node we are keeping
136          // We do not need register_new_node_with_optimizer
137          // because set_type has already been called.
138          _igvn._worklist.push(x);
139        }
140      }
141    }
142    if (x != the_clone && the_clone != NULL)
143      _igvn.remove_dead_node(the_clone);
144    phi->set_req( i, x );
145  }
146  // Too few wins?
147  if (wins <= policy) {
148    _igvn.remove_dead_node(phi);
149    return NULL;
150  }
151
152  // Record Phi
153  register_new_node( phi, region );
154
155  for (uint i2 = 1; i2 < phi->req(); i2++) {
156    Node *x = phi->in(i2);
157    // If we commoned up the cloned 'x' with another existing Node,
158    // the existing Node picks up a new use.  We need to make the
159    // existing Node occur higher up so it dominates its uses.
160    Node *old_ctrl;
161    IdealLoopTree *old_loop;
162
163    if (x->is_Con()) {
164      // Constant's control is always root.
165      set_ctrl(x, C->root());
166      continue;
167    }
168    // The occasional new node
169    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
170      old_ctrl = NULL;
171      old_loop = NULL;               // Not in any prior loop
172    } else {
173      old_ctrl = get_ctrl(x);
174      old_loop = get_loop(old_ctrl); // Get prior loop
175    }
176    // New late point must dominate new use
177    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
178    if (new_ctrl == old_ctrl) // Nothing is changed
179      continue;
180
181    IdealLoopTree *new_loop = get_loop(new_ctrl);
182
183    // Don't move x into a loop if its uses are
184    // outside of loop. Otherwise x will be cloned
185    // for each use outside of this loop.
186    IdealLoopTree *use_loop = get_loop(region);
187    if (!new_loop->is_member(use_loop) &&
188        (old_loop == NULL || !new_loop->is_member(old_loop))) {
189      // Take early control, later control will be recalculated
190      // during next iteration of loop optimizations.
191      new_ctrl = get_early_ctrl(x);
192      new_loop = get_loop(new_ctrl);
193    }
194    // Set new location
195    set_ctrl(x, new_ctrl);
196    // If changing loop bodies, see if we need to collect into new body
197    if (old_loop != new_loop) {
198      if (old_loop && !old_loop->_child)
199        old_loop->_body.yank(x);
200      if (!new_loop->_child)
201        new_loop->_body.push(x);  // Collect body info
202    }
203  }
204
205  return phi;
206}
207
208//------------------------------dominated_by------------------------------------
209// Replace the dominated test with an obvious true or false.  Place it on the
210// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
211// live path up to the dominating control.
212void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
213  if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
214
215  // prevdom is the dominating projection of the dominating test.
216  assert( iff->is_If(), "" );
217  assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
218  int pop = prevdom->Opcode();
219  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
220  if (flip) {
221    if (pop == Op_IfTrue)
222      pop = Op_IfFalse;
223    else
224      pop = Op_IfTrue;
225  }
226  // 'con' is set to true or false to kill the dominated test.
227  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
228  set_ctrl(con, C->root()); // Constant gets a new use
229  // Hack the dominated test
230  _igvn.replace_input_of(iff, 1, con);
231
232  // If I dont have a reachable TRUE and FALSE path following the IfNode then
233  // I can assume this path reaches an infinite loop.  In this case it's not
234  // important to optimize the data Nodes - either the whole compilation will
235  // be tossed or this path (and all data Nodes) will go dead.
236  if (iff->outcnt() != 2) return;
237
238  // Make control-dependent data Nodes on the live path (path that will remain
239  // once the dominated IF is removed) become control-dependent on the
240  // dominating projection.
241  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
242
243  // Loop predicates may have depending checks which should not
244  // be skipped. For example, range check predicate has two checks
245  // for lower and upper bounds.
246  if (dp == NULL)
247    return;
248
249  ProjNode* dp_proj  = dp->as_Proj();
250  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
251  if (exclude_loop_predicate &&
252      (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
253       unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
254    // If this is a range check (IfNode::is_range_check), do not
255    // reorder because Compile::allow_range_check_smearing might have
256    // changed the check.
257    return; // Let IGVN transformation change control dependence.
258  }
259
260  IdealLoopTree *old_loop = get_loop(dp);
261
262  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
263    Node* cd = dp->fast_out(i); // Control-dependent node
264    if (cd->depends_only_on_test()) {
265      assert(cd->in(0) == dp, "");
266      _igvn.replace_input_of(cd, 0, prevdom);
267      set_early_ctrl(cd);
268      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
269      if (old_loop != new_loop) {
270        if (!old_loop->_child) old_loop->_body.yank(cd);
271        if (!new_loop->_child) new_loop->_body.push(cd);
272      }
273      --i;
274      --imax;
275    }
276  }
277}
278
279//------------------------------has_local_phi_input----------------------------
280// Return TRUE if 'n' has Phi inputs from its local block and no other
281// block-local inputs (all non-local-phi inputs come from earlier blocks)
282Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
283  Node *n_ctrl = get_ctrl(n);
284  // See if some inputs come from a Phi in this block, or from before
285  // this block.
286  uint i;
287  for( i = 1; i < n->req(); i++ ) {
288    Node *phi = n->in(i);
289    if( phi->is_Phi() && phi->in(0) == n_ctrl )
290      break;
291  }
292  if( i >= n->req() )
293    return NULL;                // No Phi inputs; nowhere to clone thru
294
295  // Check for inputs created between 'n' and the Phi input.  These
296  // must split as well; they have already been given the chance
297  // (courtesy of a post-order visit) and since they did not we must
298  // recover the 'cost' of splitting them by being very profitable
299  // when splitting 'n'.  Since this is unlikely we simply give up.
300  for( i = 1; i < n->req(); i++ ) {
301    Node *m = n->in(i);
302    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
303      // We allow the special case of AddP's with no local inputs.
304      // This allows us to split-up address expressions.
305      if (m->is_AddP() &&
306          get_ctrl(m->in(2)) != n_ctrl &&
307          get_ctrl(m->in(3)) != n_ctrl) {
308        // Move the AddP up to dominating point
309        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
310        continue;
311      }
312      return NULL;
313    }
314  }
315
316  return n_ctrl;
317}
318
319//------------------------------remix_address_expressions----------------------
320// Rework addressing expressions to get the most loop-invariant stuff
321// moved out.  We'd like to do all associative operators, but it's especially
322// important (common) to do address expressions.
323Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
324  if (!has_ctrl(n))  return NULL;
325  Node *n_ctrl = get_ctrl(n);
326  IdealLoopTree *n_loop = get_loop(n_ctrl);
327
328  // See if 'n' mixes loop-varying and loop-invariant inputs and
329  // itself is loop-varying.
330
331  // Only interested in binary ops (and AddP)
332  if( n->req() < 3 || n->req() > 4 ) return NULL;
333
334  Node *n1_ctrl = get_ctrl(n->in(                    1));
335  Node *n2_ctrl = get_ctrl(n->in(                    2));
336  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
337  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
338  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
339  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
340
341  // Does one of my inputs spin in a tighter loop than self?
342  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
343      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
344      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
345    return NULL;                // Leave well enough alone
346
347  // Is at least one of my inputs loop-invariant?
348  if( n1_loop == n_loop &&
349      n2_loop == n_loop &&
350      n3_loop == n_loop )
351    return NULL;                // No loop-invariant inputs
352
353
354  int n_op = n->Opcode();
355
356  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
357  if( n_op == Op_LShiftI ) {
358    // Scale is loop invariant
359    Node *scale = n->in(2);
360    Node *scale_ctrl = get_ctrl(scale);
361    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
362    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
363      return NULL;
364    const TypeInt *scale_t = scale->bottom_type()->isa_int();
365    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
366      return NULL;              // Dont bother with byte/short masking
367    // Add must vary with loop (else shift would be loop-invariant)
368    Node *add = n->in(1);
369    Node *add_ctrl = get_ctrl(add);
370    IdealLoopTree *add_loop = get_loop(add_ctrl);
371    //assert( n_loop == add_loop, "" );
372    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
373
374    // Convert I-V into I+ (0-V); same for V-I
375    if( add->Opcode() == Op_SubI &&
376        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
377      Node *zero = _igvn.intcon(0);
378      set_ctrl(zero, C->root());
379      Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
380      register_new_node( neg, get_ctrl(add->in(2) ) );
381      add = new AddINode( add->in(1), neg );
382      register_new_node( add, add_ctrl );
383    }
384    if( add->Opcode() != Op_AddI ) return NULL;
385    // See if one add input is loop invariant
386    Node *add_var = add->in(1);
387    Node *add_var_ctrl = get_ctrl(add_var);
388    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
389    Node *add_invar = add->in(2);
390    Node *add_invar_ctrl = get_ctrl(add_invar);
391    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
392    if( add_var_loop == n_loop ) {
393    } else if( add_invar_loop == n_loop ) {
394      // Swap to find the invariant part
395      add_invar = add_var;
396      add_invar_ctrl = add_var_ctrl;
397      add_invar_loop = add_var_loop;
398      add_var = add->in(2);
399      Node *add_var_ctrl = get_ctrl(add_var);
400      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
401    } else                      // Else neither input is loop invariant
402      return NULL;
403    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
404      return NULL;              // No invariant part of the add?
405
406    // Yes!  Reshape address expression!
407    Node *inv_scale = new LShiftINode( add_invar, scale );
408    Node *inv_scale_ctrl =
409      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
410      add_invar_ctrl : scale_ctrl;
411    register_new_node( inv_scale, inv_scale_ctrl );
412    Node *var_scale = new LShiftINode( add_var, scale );
413    register_new_node( var_scale, n_ctrl );
414    Node *var_add = new AddINode( var_scale, inv_scale );
415    register_new_node( var_add, n_ctrl );
416    _igvn.replace_node( n, var_add );
417    return var_add;
418  }
419
420  // Replace (I+V) with (V+I)
421  if( n_op == Op_AddI ||
422      n_op == Op_AddL ||
423      n_op == Op_AddF ||
424      n_op == Op_AddD ||
425      n_op == Op_MulI ||
426      n_op == Op_MulL ||
427      n_op == Op_MulF ||
428      n_op == Op_MulD ) {
429    if( n2_loop == n_loop ) {
430      assert( n1_loop != n_loop, "" );
431      n->swap_edges(1, 2);
432    }
433  }
434
435  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
436  // but not if I2 is a constant.
437  if( n_op == Op_AddP ) {
438    if( n2_loop == n_loop && n3_loop != n_loop ) {
439      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
440        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
441        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
442        IdealLoopTree *n22loop = get_loop( n22_ctrl );
443        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
444        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
445            n23_loop == n_loop ) {
446          Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
447          // Stuff new AddP in the loop preheader
448          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
449          Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
450          register_new_node( add2, n_ctrl );
451          _igvn.replace_node( n, add2 );
452          return add2;
453        }
454      }
455    }
456
457    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
458    if (n2_loop != n_loop && n3_loop == n_loop) {
459      if (n->in(3)->Opcode() == Op_AddX) {
460        Node *V = n->in(3)->in(1);
461        Node *I = n->in(3)->in(2);
462        if (is_member(n_loop,get_ctrl(V))) {
463        } else {
464          Node *tmp = V; V = I; I = tmp;
465        }
466        if (!is_member(n_loop,get_ctrl(I))) {
467          Node *add1 = new AddPNode(n->in(1), n->in(2), I);
468          // Stuff new AddP in the loop preheader
469          register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
470          Node *add2 = new AddPNode(n->in(1), add1, V);
471          register_new_node(add2, n_ctrl);
472          _igvn.replace_node(n, add2);
473          return add2;
474        }
475      }
476    }
477  }
478
479  return NULL;
480}
481
482//------------------------------conditional_move-------------------------------
483// Attempt to replace a Phi with a conditional move.  We have some pretty
484// strict profitability requirements.  All Phis at the merge point must
485// be converted, so we can remove the control flow.  We need to limit the
486// number of c-moves to a small handful.  All code that was in the side-arms
487// of the CFG diamond is now speculatively executed.  This code has to be
488// "cheap enough".  We are pretty much limited to CFG diamonds that merge
489// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
490Node *PhaseIdealLoop::conditional_move( Node *region ) {
491
492  assert(region->is_Region(), "sanity check");
493  if (region->req() != 3) return NULL;
494
495  // Check for CFG diamond
496  Node *lp = region->in(1);
497  Node *rp = region->in(2);
498  if (!lp || !rp) return NULL;
499  Node *lp_c = lp->in(0);
500  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
501  IfNode *iff = lp_c->as_If();
502
503  // Check for ops pinned in an arm of the diamond.
504  // Can't remove the control flow in this case
505  if (lp->outcnt() > 1) return NULL;
506  if (rp->outcnt() > 1) return NULL;
507
508  IdealLoopTree* r_loop = get_loop(region);
509  assert(r_loop == get_loop(iff), "sanity");
510  // Always convert to CMOVE if all results are used only outside this loop.
511  bool used_inside_loop = (r_loop == _ltree_root);
512
513  // Check profitability
514  int cost = 0;
515  int phis = 0;
516  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
517    Node *out = region->fast_out(i);
518    if (!out->is_Phi()) continue; // Ignore other control edges, etc
519    phis++;
520    PhiNode* phi = out->as_Phi();
521    BasicType bt = phi->type()->basic_type();
522    switch (bt) {
523    case T_DOUBLE:
524      if (C->use_cmove()) {
525        continue; //TODO: maybe we want to add some cost
526      }
527    case T_FLOAT: {
528      cost += Matcher::float_cmove_cost(); // Could be very expensive
529      break;
530    }
531    case T_LONG: {
532      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
533    }
534    case T_INT:                 // These all CMOV fine
535    case T_ADDRESS: {           // (RawPtr)
536      cost++;
537      break;
538    }
539    case T_NARROWOOP: // Fall through
540    case T_OBJECT: {            // Base oops are OK, but not derived oops
541      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
542      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
543      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
544      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
545      // have a Phi for the base here that we convert to a CMOVE all is well
546      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
547      // the allocator will have to produce a base by creating a CMOVE of the
548      // relevant bases.  This puts the allocator in the business of
549      // manufacturing expensive instructions, generally a bad plan.
550      // Just Say No to Conditionally-Moved Derived Pointers.
551      if (tp && tp->offset() != 0)
552        return NULL;
553      cost++;
554      break;
555    }
556    default:
557      return NULL;              // In particular, can't do memory or I/O
558    }
559    // Add in cost any speculative ops
560    for (uint j = 1; j < region->req(); j++) {
561      Node *proj = region->in(j);
562      Node *inp = phi->in(j);
563      if (get_ctrl(inp) == proj) { // Found local op
564        cost++;
565        // Check for a chain of dependent ops; these will all become
566        // speculative in a CMOV.
567        for (uint k = 1; k < inp->req(); k++)
568          if (get_ctrl(inp->in(k)) == proj)
569            cost += ConditionalMoveLimit; // Too much speculative goo
570      }
571    }
572    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
573    // This will likely Split-If, a higher-payoff operation.
574    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
575      Node* use = phi->fast_out(k);
576      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
577        cost += ConditionalMoveLimit;
578      // Is there a use inside the loop?
579      // Note: check only basic types since CMoveP is pinned.
580      if (!used_inside_loop && is_java_primitive(bt)) {
581        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
582        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
583          used_inside_loop = true;
584        }
585      }
586    }
587  }//for
588  Node* bol = iff->in(1);
589  assert(bol->Opcode() == Op_Bool, "");
590  int cmp_op = bol->in(1)->Opcode();
591  // It is expensive to generate flags from a float compare.
592  // Avoid duplicated float compare.
593  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
594
595  float infrequent_prob = PROB_UNLIKELY_MAG(3);
596  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
597  if (used_inside_loop) {
598    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
599
600    // BlockLayoutByFrequency optimization moves infrequent branch
601    // from hot path. No point in CMOV'ing in such case (110 is used
602    // instead of 100 to take into account not exactness of float value).
603    if (BlockLayoutByFrequency) {
604      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
605    }
606  }
607  // Check for highly predictable branch.  No point in CMOV'ing if
608  // we are going to predict accurately all the time.
609  if (C->use_cmove() && cmp_op == Op_CmpD) ;//keep going
610  else if (iff->_prob < infrequent_prob ||
611      iff->_prob > (1.0f - infrequent_prob))
612    return NULL;
613
614  // --------------
615  // Now replace all Phis with CMOV's
616  Node *cmov_ctrl = iff->in(0);
617  uint flip = (lp->Opcode() == Op_IfTrue);
618  while (1) {
619    PhiNode* phi = NULL;
620    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
621      Node *out = region->fast_out(i);
622      if (out->is_Phi()) {
623        phi = out->as_Phi();
624        break;
625      }
626    }
627    if (phi == NULL)  break;
628    if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
629    // Move speculative ops
630    for (uint j = 1; j < region->req(); j++) {
631      Node *proj = region->in(j);
632      Node *inp = phi->in(j);
633      if (get_ctrl(inp) == proj) { // Found local op
634#ifndef PRODUCT
635        if (PrintOpto && VerifyLoopOptimizations) {
636          tty->print("  speculate: ");
637          inp->dump();
638        }
639#endif
640        set_ctrl(inp, cmov_ctrl);
641      }
642    }
643    Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
644    register_new_node( cmov, cmov_ctrl );
645    _igvn.replace_node( phi, cmov );
646#ifndef PRODUCT
647    if (TraceLoopOpts) {
648      tty->print("CMOV  ");
649      r_loop->dump_head();
650      if (Verbose) {
651        bol->in(1)->dump(1);
652        cmov->dump(1);
653      }
654    }
655    if (VerifyLoopOptimizations) verify();
656#endif
657  }
658
659  // The useless CFG diamond will fold up later; see the optimization in
660  // RegionNode::Ideal.
661  _igvn._worklist.push(region);
662
663  return iff->in(1);
664}
665
666static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
667  for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
668    Node* u = m->fast_out(i);
669    if (u->is_CFG()) {
670      if (u->Opcode() == Op_NeverBranch) {
671        u = ((NeverBranchNode*)u)->proj_out(0);
672        enqueue_cfg_uses(u, wq);
673      } else {
674        wq.push(u);
675      }
676    }
677  }
678}
679
680// Try moving a store out of a loop, right before the loop
681Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
682  // Store has to be first in the loop body
683  IdealLoopTree *n_loop = get_loop(n_ctrl);
684  if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop() && n->in(0) != NULL) {
685    Node* address = n->in(MemNode::Address);
686    Node* value = n->in(MemNode::ValueIn);
687    Node* mem = n->in(MemNode::Memory);
688    IdealLoopTree* address_loop = get_loop(get_ctrl(address));
689    IdealLoopTree* value_loop = get_loop(get_ctrl(value));
690
691    // - address and value must be loop invariant
692    // - memory must be a memory Phi for the loop
693    // - Store must be the only store on this memory slice in the
694    // loop: if there's another store following this one then value
695    // written at iteration i by the second store could be overwritten
696    // at iteration i+n by the first store: it's not safe to move the
697    // first store out of the loop
698    // - nothing must observe the memory Phi: it guarantees no read
699    // before the store, we are also guaranteed the store post
700    // dominates the loop head (ignoring a possible early
701    // exit). Otherwise there would be extra Phi involved between the
702    // loop's Phi and the store.
703    // - there must be no early exit from the loop before the Store
704    // (such an exit most of the time would be an extra use of the
705    // memory Phi but sometimes is a bottom memory Phi that takes the
706    // store as input).
707
708    if (!n_loop->is_member(address_loop) &&
709        !n_loop->is_member(value_loop) &&
710        mem->is_Phi() && mem->in(0) == n_loop->_head &&
711        mem->outcnt() == 1 &&
712        mem->in(LoopNode::LoopBackControl) == n) {
713
714      assert(n_loop->_tail != NULL, "need a tail");
715      assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
716
717      // Verify that there's no early exit of the loop before the store.
718      bool ctrl_ok = false;
719      {
720        // Follow control from loop head until n, we exit the loop or
721        // we reach the tail
722        ResourceMark rm;
723        Unique_Node_List wq;
724        wq.push(n_loop->_head);
725
726        for (uint next = 0; next < wq.size(); ++next) {
727          Node *m = wq.at(next);
728          if (m == n->in(0)) {
729            ctrl_ok = true;
730            continue;
731          }
732          assert(!has_ctrl(m), "should be CFG");
733          if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
734            ctrl_ok = false;
735            break;
736          }
737          enqueue_cfg_uses(m, wq);
738          if (wq.size() > 10) {
739            ctrl_ok = false;
740            break;
741          }
742        }
743      }
744      if (ctrl_ok) {
745        // move the Store
746        _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
747        _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
748        _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
749        // Disconnect the phi now. An empty phi can confuse other
750        // optimizations in this pass of loop opts.
751        _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
752        n_loop->_body.yank(mem);
753
754        IdealLoopTree* new_loop = get_loop(n->in(0));
755        set_ctrl_and_loop(n, n->in(0));
756
757        return n;
758      }
759    }
760  }
761  return NULL;
762}
763
764// Try moving a store out of a loop, right after the loop
765void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
766  if (n->is_Store() && n->in(0) != NULL) {
767    Node *n_ctrl = get_ctrl(n);
768    IdealLoopTree *n_loop = get_loop(n_ctrl);
769    // Store must be in a loop
770    if (n_loop != _ltree_root && !n_loop->_irreducible) {
771      Node* address = n->in(MemNode::Address);
772      Node* value = n->in(MemNode::ValueIn);
773      IdealLoopTree* address_loop = get_loop(get_ctrl(address));
774      // address must be loop invariant
775      if (!n_loop->is_member(address_loop)) {
776        // Store must be last on this memory slice in the loop and
777        // nothing in the loop must observe it
778        Node* phi = NULL;
779        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
780          Node* u = n->fast_out(i);
781          if (has_ctrl(u)) { // control use?
782            IdealLoopTree *u_loop = get_loop(get_ctrl(u));
783            if (!n_loop->is_member(u_loop)) {
784              continue;
785            }
786            if (u->is_Phi() && u->in(0) == n_loop->_head) {
787              assert(_igvn.type(u) == Type::MEMORY, "bad phi");
788              // multiple phis on the same slice are possible
789              if (phi != NULL) {
790                return;
791              }
792              phi = u;
793              continue;
794            }
795          }
796          return;
797        }
798        if (phi != NULL) {
799          // Nothing in the loop before the store (next iteration)
800          // must observe the stored value
801          bool mem_ok = true;
802          {
803            ResourceMark rm;
804            Unique_Node_List wq;
805            wq.push(phi);
806            for (uint next = 0; next < wq.size() && mem_ok; ++next) {
807              Node *m = wq.at(next);
808              for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
809                Node* u = m->fast_out(i);
810                if (u->is_Store() || u->is_Phi()) {
811                  if (u != n) {
812                    wq.push(u);
813                    mem_ok = (wq.size() <= 10);
814                  }
815                } else {
816                  mem_ok = false;
817                  break;
818                }
819              }
820            }
821          }
822          if (mem_ok) {
823            // Move the Store out of the loop creating clones along
824            // all paths out of the loop that observe the stored value
825            _igvn.rehash_node_delayed(phi);
826            int count = phi->replace_edge(n, n->in(MemNode::Memory));
827            assert(count > 0, "inconsistent phi");
828            for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
829              Node* u = n->fast_out(i);
830              Node* c = get_ctrl(u);
831
832              if (u->is_Phi()) {
833                c = u->in(0)->in(u->find_edge(n));
834              }
835              IdealLoopTree *u_loop = get_loop(c);
836              assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
837              while(true) {
838                Node* next_c = find_non_split_ctrl(idom(c));
839                if (n_loop->is_member(get_loop(next_c))) {
840                  break;
841                }
842                c = next_c;
843              }
844
845              Node* st = n->clone();
846              st->set_req(0, c);
847              _igvn.register_new_node_with_optimizer(st);
848
849              set_ctrl(st, c);
850              IdealLoopTree* new_loop = get_loop(c);
851              assert(new_loop != n_loop, "should be moved out of loop");
852              if (new_loop->_child == NULL) new_loop->_body.push(st);
853
854              _igvn.replace_input_of(u, u->find_edge(n), st);
855              --imax;
856              --i;
857            }
858
859
860            assert(n->outcnt() == 0, "all uses should be gone");
861            _igvn.replace_input_of(n, MemNode::Memory, C->top());
862            // Disconnect the phi now. An empty phi can confuse other
863            // optimizations in this pass of loop opts..
864            if (phi->in(LoopNode::LoopBackControl) == phi) {
865              _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
866              n_loop->_body.yank(phi);
867            }
868          }
869        }
870      }
871    }
872  }
873}
874
875//------------------------------split_if_with_blocks_pre-----------------------
876// Do the real work in a non-recursive function.  Data nodes want to be
877// cloned in the pre-order so they can feed each other nicely.
878Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
879  // Cloning these guys is unlikely to win
880  int n_op = n->Opcode();
881  if( n_op == Op_MergeMem ) return n;
882  if( n->is_Proj() ) return n;
883  // Do not clone-up CmpFXXX variations, as these are always
884  // followed by a CmpI
885  if( n->is_Cmp() ) return n;
886  // Attempt to use a conditional move instead of a phi/branch
887  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
888    Node *cmov = conditional_move( n );
889    if( cmov ) return cmov;
890  }
891  if( n->is_CFG() || n->is_LoadStore() )
892    return n;
893  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
894      n_op == Op_Opaque2 ) {
895    if( !C->major_progress() )   // If chance of no more loop opts...
896      _igvn._worklist.push(n);  // maybe we'll remove them
897    return n;
898  }
899
900  if( n->is_Con() ) return n;   // No cloning for Con nodes
901
902  Node *n_ctrl = get_ctrl(n);
903  if( !n_ctrl ) return n;       // Dead node
904
905  Node* res = try_move_store_before_loop(n, n_ctrl);
906  if (res != NULL) {
907    return n;
908  }
909
910  // Attempt to remix address expressions for loop invariants
911  Node *m = remix_address_expressions( n );
912  if( m ) return m;
913
914  if (n->is_ConstraintCast()) {
915    Node* dom_cast = n->as_ConstraintCast()->dominating_cast(&_igvn, this);
916    // ConstraintCastNode::dominating_cast() uses node control input to determine domination.
917    // Node control inputs don't necessarily agree with loop control info (due to
918    // transformations happened in between), thus additional dominance check is needed
919    // to keep loop info valid.
920    if (dom_cast != NULL && is_dominator(get_ctrl(dom_cast), get_ctrl(n))) {
921      _igvn.replace_node(n, dom_cast);
922      return dom_cast;
923    }
924  }
925
926  // Determine if the Node has inputs from some local Phi.
927  // Returns the block to clone thru.
928  Node *n_blk = has_local_phi_input( n );
929  if( !n_blk ) return n;
930
931  // Do not clone the trip counter through on a CountedLoop
932  // (messes up the canonical shape).
933  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
934
935  // Check for having no control input; not pinned.  Allow
936  // dominating control.
937  if (n->in(0)) {
938    Node *dom = idom(n_blk);
939    if (dom_lca(n->in(0), dom) != n->in(0)) {
940      return n;
941    }
942  }
943  // Policy: when is it profitable.  You must get more wins than
944  // policy before it is considered profitable.  Policy is usually 0,
945  // so 1 win is considered profitable.  Big merges will require big
946  // cloning, so get a larger policy.
947  int policy = n_blk->req() >> 2;
948
949  // If the loop is a candidate for range check elimination,
950  // delay splitting through it's phi until a later loop optimization
951  if (n_blk->is_CountedLoop()) {
952    IdealLoopTree *lp = get_loop(n_blk);
953    if (lp && lp->_rce_candidate) {
954      return n;
955    }
956  }
957
958  // Use same limit as split_if_with_blocks_post
959  if( C->live_nodes() > 35000 ) return n; // Method too big
960
961  // Split 'n' through the merge point if it is profitable
962  Node *phi = split_thru_phi( n, n_blk, policy );
963  if (!phi) return n;
964
965  // Found a Phi to split thru!
966  // Replace 'n' with the new phi
967  _igvn.replace_node( n, phi );
968  // Moved a load around the loop, 'en-registering' something.
969  if (n_blk->is_Loop() && n->is_Load() &&
970      !phi->in(LoopNode::LoopBackControl)->is_Load())
971    C->set_major_progress();
972
973  return phi;
974}
975
976static bool merge_point_too_heavy(Compile* C, Node* region) {
977  // Bail out if the region and its phis have too many users.
978  int weight = 0;
979  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
980    weight += region->fast_out(i)->outcnt();
981  }
982  int nodes_left = C->max_node_limit() - C->live_nodes();
983  if (weight * 8 > nodes_left) {
984    if (PrintOpto) {
985      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
986    }
987    return true;
988  } else {
989    return false;
990  }
991}
992
993static bool merge_point_safe(Node* region) {
994  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
995  // having a PhiNode input. This sidesteps the dangerous case where the split
996  // ConvI2LNode may become TOP if the input Value() does not
997  // overlap the ConvI2L range, leaving a node which may not dominate its
998  // uses.
999  // A better fix for this problem can be found in the BugTraq entry, but
1000  // expediency for Mantis demands this hack.
1001  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
1002  // split_if_with_blocks from splitting a block because we could not move around
1003  // the FastLockNode.
1004  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
1005    Node* n = region->fast_out(i);
1006    if (n->is_Phi()) {
1007      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1008        Node* m = n->fast_out(j);
1009        if (m->is_FastLock())
1010          return false;
1011#ifdef _LP64
1012        if (m->Opcode() == Op_ConvI2L)
1013          return false;
1014        if (m->is_CastII() && m->isa_CastII()->has_range_check()) {
1015          return false;
1016        }
1017#endif
1018      }
1019    }
1020  }
1021  return true;
1022}
1023
1024
1025//------------------------------place_near_use---------------------------------
1026// Place some computation next to use but not inside inner loops.
1027// For inner loop uses move it to the preheader area.
1028Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
1029  IdealLoopTree *u_loop = get_loop( useblock );
1030  return (u_loop->_irreducible || u_loop->_child)
1031    ? useblock
1032    : u_loop->_head->in(LoopNode::EntryControl);
1033}
1034
1035
1036bool PhaseIdealLoop::identical_backtoback_ifs(Node *n) {
1037  if (!n->is_If()) {
1038    return false;
1039  }
1040  if (!n->in(0)->is_Region()) {
1041    return false;
1042  }
1043  Node* region = n->in(0);
1044  Node* dom = idom(region);
1045  if (!dom->is_If() || dom->in(1) != n->in(1)) {
1046    return false;
1047  }
1048  IfNode* dom_if = dom->as_If();
1049  Node* proj_true = dom_if->proj_out(1);
1050  Node* proj_false = dom_if->proj_out(0);
1051
1052  for (uint i = 1; i < region->req(); i++) {
1053    if (is_dominator(proj_true, region->in(i))) {
1054      continue;
1055    }
1056    if (is_dominator(proj_false, region->in(i))) {
1057      continue;
1058    }
1059    return false;
1060  }
1061
1062  return true;
1063}
1064
1065bool PhaseIdealLoop::can_split_if(Node *n_ctrl) {
1066  if (C->live_nodes() > 35000) {
1067    return false; // Method too big
1068  }
1069
1070  // Do not do 'split-if' if irreducible loops are present.
1071  if (_has_irreducible_loops) {
1072    return false;
1073  }
1074
1075  if (merge_point_too_heavy(C, n_ctrl)) {
1076    return false;
1077  }
1078
1079  // Do not do 'split-if' if some paths are dead.  First do dead code
1080  // elimination and then see if its still profitable.
1081  for (uint i = 1; i < n_ctrl->req(); i++) {
1082    if (n_ctrl->in(i) == C->top()) {
1083      return false;
1084    }
1085  }
1086
1087  // If trying to do a 'Split-If' at the loop head, it is only
1088  // profitable if the cmp folds up on BOTH paths.  Otherwise we
1089  // risk peeling a loop forever.
1090
1091  // CNC - Disabled for now.  Requires careful handling of loop
1092  // body selection for the cloned code.  Also, make sure we check
1093  // for any input path not being in the same loop as n_ctrl.  For
1094  // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1095  // because the alternative loop entry points won't be converted
1096  // into LoopNodes.
1097  IdealLoopTree *n_loop = get_loop(n_ctrl);
1098  for (uint j = 1; j < n_ctrl->req(); j++) {
1099    if (get_loop(n_ctrl->in(j)) != n_loop) {
1100      return false;
1101    }
1102  }
1103
1104  // Check for safety of the merge point.
1105  if (!merge_point_safe(n_ctrl)) {
1106    return false;
1107  }
1108
1109  return true;
1110}
1111
1112//------------------------------split_if_with_blocks_post----------------------
1113// Do the real work in a non-recursive function.  CFG hackery wants to be
1114// in the post-order, so it can dirty the I-DOM info and not use the dirtied
1115// info.
1116void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
1117
1118  // Cloning Cmp through Phi's involves the split-if transform.
1119  // FastLock is not used by an If
1120  if (n->is_Cmp() && !n->is_FastLock()) {
1121    Node *n_ctrl = get_ctrl(n);
1122    // Determine if the Node has inputs from some local Phi.
1123    // Returns the block to clone thru.
1124    Node *n_blk = has_local_phi_input(n);
1125    if (n_blk != n_ctrl) {
1126      return;
1127    }
1128
1129    if (!can_split_if(n_ctrl)) {
1130      return;
1131    }
1132
1133    if (n->outcnt() != 1) {
1134      return; // Multiple bool's from 1 compare?
1135    }
1136    Node *bol = n->unique_out();
1137    assert(bol->is_Bool(), "expect a bool here");
1138    if (bol->outcnt() != 1) {
1139      return;// Multiple branches from 1 compare?
1140    }
1141    Node *iff = bol->unique_out();
1142
1143    // Check some safety conditions
1144    if (iff->is_If()) {        // Classic split-if?
1145      if (iff->in(0) != n_ctrl) {
1146        return; // Compare must be in same blk as if
1147      }
1148    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1149      // Can't split CMove with different control edge.
1150      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) {
1151        return;
1152      }
1153      if (get_ctrl(iff->in(2)) == n_ctrl ||
1154          get_ctrl(iff->in(3)) == n_ctrl) {
1155        return;                 // Inputs not yet split-up
1156      }
1157      if (get_loop(n_ctrl) != get_loop(get_ctrl(iff))) {
1158        return;                 // Loop-invar test gates loop-varying CMOVE
1159      }
1160    } else {
1161      return;  // some other kind of node, such as an Allocate
1162    }
1163
1164    // When is split-if profitable?  Every 'win' on means some control flow
1165    // goes dead, so it's almost always a win.
1166    int policy = 0;
1167    // Split compare 'n' through the merge point if it is profitable
1168    Node *phi = split_thru_phi( n, n_ctrl, policy);
1169    if (!phi) {
1170      return;
1171    }
1172
1173    // Found a Phi to split thru!
1174    // Replace 'n' with the new phi
1175    _igvn.replace_node(n, phi);
1176
1177    // Now split the bool up thru the phi
1178    Node *bolphi = split_thru_phi(bol, n_ctrl, -1);
1179    guarantee(bolphi != NULL, "null boolean phi node");
1180
1181    _igvn.replace_node(bol, bolphi);
1182    assert(iff->in(1) == bolphi, "");
1183
1184    if (bolphi->Value(&_igvn)->singleton()) {
1185      return;
1186    }
1187
1188    // Conditional-move?  Must split up now
1189    if (!iff->is_If()) {
1190      Node *cmovphi = split_thru_phi(iff, n_ctrl, -1);
1191      _igvn.replace_node(iff, cmovphi);
1192      return;
1193    }
1194
1195    // Now split the IF
1196    do_split_if(iff);
1197    return;
1198  }
1199
1200  // Two identical ifs back to back can be merged
1201  if (identical_backtoback_ifs(n) && can_split_if(n->in(0))) {
1202    Node *n_ctrl = n->in(0);
1203    PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1204    IfNode* dom_if = idom(n_ctrl)->as_If();
1205    Node* proj_true = dom_if->proj_out(1);
1206    Node* proj_false = dom_if->proj_out(0);
1207    Node* con_true = _igvn.makecon(TypeInt::ONE);
1208    Node* con_false = _igvn.makecon(TypeInt::ZERO);
1209
1210    for (uint i = 1; i < n_ctrl->req(); i++) {
1211      if (is_dominator(proj_true, n_ctrl->in(i))) {
1212        bolphi->init_req(i, con_true);
1213      } else {
1214        assert(is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1215        bolphi->init_req(i, con_false);
1216      }
1217    }
1218    register_new_node(bolphi, n_ctrl);
1219    _igvn.replace_input_of(n, 1, bolphi);
1220
1221    // Now split the IF
1222    do_split_if(n);
1223    return;
1224  }
1225
1226  // Check for an IF ready to split; one that has its
1227  // condition codes input coming from a Phi at the block start.
1228  int n_op = n->Opcode();
1229
1230  // Check for an IF being dominated by another IF same test
1231  if (n_op == Op_If ||
1232      n_op == Op_RangeCheck) {
1233    Node *bol = n->in(1);
1234    uint max = bol->outcnt();
1235    // Check for same test used more than once?
1236    if (max > 1 && bol->is_Bool()) {
1237      // Search up IDOMs to see if this IF is dominated.
1238      Node *cutoff = get_ctrl(bol);
1239
1240      // Now search up IDOMs till cutoff, looking for a dominating test
1241      Node *prevdom = n;
1242      Node *dom = idom(prevdom);
1243      while (dom != cutoff) {
1244        if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1245          // Replace the dominated test with an obvious true or false.
1246          // Place it on the IGVN worklist for later cleanup.
1247          C->set_major_progress();
1248          dominated_by(prevdom, n, false, true);
1249#ifndef PRODUCT
1250          if( VerifyLoopOptimizations ) verify();
1251#endif
1252          return;
1253        }
1254        prevdom = dom;
1255        dom = idom(prevdom);
1256      }
1257    }
1258  }
1259
1260  // See if a shared loop-varying computation has no loop-varying uses.
1261  // Happens if something is only used for JVM state in uncommon trap exits,
1262  // like various versions of induction variable+offset.  Clone the
1263  // computation per usage to allow it to sink out of the loop.
1264  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1265    Node *n_ctrl = get_ctrl(n);
1266    IdealLoopTree *n_loop = get_loop(n_ctrl);
1267    if( n_loop != _ltree_root ) {
1268      DUIterator_Fast imax, i = n->fast_outs(imax);
1269      for (; i < imax; i++) {
1270        Node* u = n->fast_out(i);
1271        if( !has_ctrl(u) )     break; // Found control user
1272        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1273        if( u_loop == n_loop ) break; // Found loop-varying use
1274        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1275        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1276      }
1277      bool did_break = (i < imax);  // Did we break out of the previous loop?
1278      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1279        Node *late_load_ctrl = NULL;
1280        if (n->is_Load()) {
1281          // If n is a load, get and save the result from get_late_ctrl(),
1282          // to be later used in calculating the control for n's clones.
1283          clear_dom_lca_tags();
1284          late_load_ctrl = get_late_ctrl(n, n_ctrl);
1285        }
1286        // If n is a load, and the late control is the same as the current
1287        // control, then the cloning of n is a pointless exercise, because
1288        // GVN will ensure that we end up where we started.
1289        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1290          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1291            Node *u = n->last_out(j); // Clone private computation per use
1292            _igvn.rehash_node_delayed(u);
1293            Node *x = n->clone(); // Clone computation
1294            Node *x_ctrl = NULL;
1295            if( u->is_Phi() ) {
1296              // Replace all uses of normal nodes.  Replace Phi uses
1297              // individually, so the separate Nodes can sink down
1298              // different paths.
1299              uint k = 1;
1300              while( u->in(k) != n ) k++;
1301              u->set_req( k, x );
1302              // x goes next to Phi input path
1303              x_ctrl = u->in(0)->in(k);
1304              --j;
1305            } else {              // Normal use
1306              // Replace all uses
1307              for( uint k = 0; k < u->req(); k++ ) {
1308                if( u->in(k) == n ) {
1309                  u->set_req( k, x );
1310                  --j;
1311                }
1312              }
1313              x_ctrl = get_ctrl(u);
1314            }
1315
1316            // Find control for 'x' next to use but not inside inner loops.
1317            // For inner loop uses get the preheader area.
1318            x_ctrl = place_near_use(x_ctrl);
1319
1320            if (n->is_Load()) {
1321              // For loads, add a control edge to a CFG node outside of the loop
1322              // to force them to not combine and return back inside the loop
1323              // during GVN optimization (4641526).
1324              //
1325              // Because we are setting the actual control input, factor in
1326              // the result from get_late_ctrl() so we respect any
1327              // anti-dependences. (6233005).
1328              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1329
1330              // Don't allow the control input to be a CFG splitting node.
1331              // Such nodes should only have ProjNodes as outs, e.g. IfNode
1332              // should only have IfTrueNode and IfFalseNode (4985384).
1333              x_ctrl = find_non_split_ctrl(x_ctrl);
1334              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1335
1336              x->set_req(0, x_ctrl);
1337            }
1338            register_new_node(x, x_ctrl);
1339
1340            // Some institutional knowledge is needed here: 'x' is
1341            // yanked because if the optimizer runs GVN on it all the
1342            // cloned x's will common up and undo this optimization and
1343            // be forced back in the loop.  This is annoying because it
1344            // makes +VerifyOpto report false-positives on progress.  I
1345            // tried setting control edges on the x's to force them to
1346            // not combine, but the matching gets worried when it tries
1347            // to fold a StoreP and an AddP together (as part of an
1348            // address expression) and the AddP and StoreP have
1349            // different controls.
1350            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1351          }
1352          _igvn.remove_dead_node(n);
1353        }
1354      }
1355    }
1356  }
1357
1358  try_move_store_after_loop(n);
1359
1360  // Check for Opaque2's who's loop has disappeared - who's input is in the
1361  // same loop nest as their output.  Remove 'em, they are no longer useful.
1362  if( n_op == Op_Opaque2 &&
1363      n->in(1) != NULL &&
1364      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1365    _igvn.replace_node( n, n->in(1) );
1366  }
1367}
1368
1369//------------------------------split_if_with_blocks---------------------------
1370// Check for aggressive application of 'split-if' optimization,
1371// using basic block level info.
1372void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1373  Node *n = C->root();
1374  visited.set(n->_idx); // first, mark node as visited
1375  // Do pre-visit work for root
1376  n = split_if_with_blocks_pre( n );
1377  uint cnt = n->outcnt();
1378  uint i   = 0;
1379  while (true) {
1380    // Visit all children
1381    if (i < cnt) {
1382      Node* use = n->raw_out(i);
1383      ++i;
1384      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1385        // Now do pre-visit work for this use
1386        use = split_if_with_blocks_pre( use );
1387        nstack.push(n, i); // Save parent and next use's index.
1388        n   = use;         // Process all children of current use.
1389        cnt = use->outcnt();
1390        i   = 0;
1391      }
1392    }
1393    else {
1394      // All of n's children have been processed, complete post-processing.
1395      if (cnt != 0 && !n->is_Con()) {
1396        assert(has_node(n), "no dead nodes");
1397        split_if_with_blocks_post( n );
1398      }
1399      if (nstack.is_empty()) {
1400        // Finished all nodes on stack.
1401        break;
1402      }
1403      // Get saved parent node and next use's index. Visit the rest of uses.
1404      n   = nstack.node();
1405      cnt = n->outcnt();
1406      i   = nstack.index();
1407      nstack.pop();
1408    }
1409  }
1410}
1411
1412
1413//=============================================================================
1414//
1415//                   C L O N E   A   L O O P   B O D Y
1416//
1417
1418//------------------------------clone_iff--------------------------------------
1419// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1420// "Nearly" because all Nodes have been cloned from the original in the loop,
1421// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1422// through the Phi recursively, and return a Bool.
1423BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1424
1425  // Convert this Phi into a Phi merging Bools
1426  uint i;
1427  for( i = 1; i < phi->req(); i++ ) {
1428    Node *b = phi->in(i);
1429    if( b->is_Phi() ) {
1430      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1431    } else {
1432      assert( b->is_Bool(), "" );
1433    }
1434  }
1435
1436  Node *sample_bool = phi->in(1);
1437  Node *sample_cmp  = sample_bool->in(1);
1438
1439  // Make Phis to merge the Cmp's inputs.
1440  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1441  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1442  for( i = 1; i < phi->req(); i++ ) {
1443    Node *n1 = phi->in(i)->in(1)->in(1);
1444    Node *n2 = phi->in(i)->in(1)->in(2);
1445    phi1->set_req( i, n1 );
1446    phi2->set_req( i, n2 );
1447    phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type()));
1448    phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type()));
1449  }
1450  // See if these Phis have been made before.
1451  // Register with optimizer
1452  Node *hit1 = _igvn.hash_find_insert(phi1);
1453  if( hit1 ) {                  // Hit, toss just made Phi
1454    _igvn.remove_dead_node(phi1); // Remove new phi
1455    assert( hit1->is_Phi(), "" );
1456    phi1 = (PhiNode*)hit1;      // Use existing phi
1457  } else {                      // Miss
1458    _igvn.register_new_node_with_optimizer(phi1);
1459  }
1460  Node *hit2 = _igvn.hash_find_insert(phi2);
1461  if( hit2 ) {                  // Hit, toss just made Phi
1462    _igvn.remove_dead_node(phi2); // Remove new phi
1463    assert( hit2->is_Phi(), "" );
1464    phi2 = (PhiNode*)hit2;      // Use existing phi
1465  } else {                      // Miss
1466    _igvn.register_new_node_with_optimizer(phi2);
1467  }
1468  // Register Phis with loop/block info
1469  set_ctrl(phi1, phi->in(0));
1470  set_ctrl(phi2, phi->in(0));
1471  // Make a new Cmp
1472  Node *cmp = sample_cmp->clone();
1473  cmp->set_req( 1, phi1 );
1474  cmp->set_req( 2, phi2 );
1475  _igvn.register_new_node_with_optimizer(cmp);
1476  set_ctrl(cmp, phi->in(0));
1477
1478  // Make a new Bool
1479  Node *b = sample_bool->clone();
1480  b->set_req(1,cmp);
1481  _igvn.register_new_node_with_optimizer(b);
1482  set_ctrl(b, phi->in(0));
1483
1484  assert( b->is_Bool(), "" );
1485  return (BoolNode*)b;
1486}
1487
1488//------------------------------clone_bool-------------------------------------
1489// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1490// "Nearly" because all Nodes have been cloned from the original in the loop,
1491// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1492// through the Phi recursively, and return a Bool.
1493CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1494  uint i;
1495  // Convert this Phi into a Phi merging Bools
1496  for( i = 1; i < phi->req(); i++ ) {
1497    Node *b = phi->in(i);
1498    if( b->is_Phi() ) {
1499      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1500    } else {
1501      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1502    }
1503  }
1504
1505  Node *sample_cmp = phi->in(1);
1506
1507  // Make Phis to merge the Cmp's inputs.
1508  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1509  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1510  for( uint j = 1; j < phi->req(); j++ ) {
1511    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1512    Node *n1, *n2;
1513    if( cmp_top->is_Cmp() ) {
1514      n1 = cmp_top->in(1);
1515      n2 = cmp_top->in(2);
1516    } else {
1517      n1 = n2 = cmp_top;
1518    }
1519    phi1->set_req( j, n1 );
1520    phi2->set_req( j, n2 );
1521    phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1522    phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1523  }
1524
1525  // See if these Phis have been made before.
1526  // Register with optimizer
1527  Node *hit1 = _igvn.hash_find_insert(phi1);
1528  if( hit1 ) {                  // Hit, toss just made Phi
1529    _igvn.remove_dead_node(phi1); // Remove new phi
1530    assert( hit1->is_Phi(), "" );
1531    phi1 = (PhiNode*)hit1;      // Use existing phi
1532  } else {                      // Miss
1533    _igvn.register_new_node_with_optimizer(phi1);
1534  }
1535  Node *hit2 = _igvn.hash_find_insert(phi2);
1536  if( hit2 ) {                  // Hit, toss just made Phi
1537    _igvn.remove_dead_node(phi2); // Remove new phi
1538    assert( hit2->is_Phi(), "" );
1539    phi2 = (PhiNode*)hit2;      // Use existing phi
1540  } else {                      // Miss
1541    _igvn.register_new_node_with_optimizer(phi2);
1542  }
1543  // Register Phis with loop/block info
1544  set_ctrl(phi1, phi->in(0));
1545  set_ctrl(phi2, phi->in(0));
1546  // Make a new Cmp
1547  Node *cmp = sample_cmp->clone();
1548  cmp->set_req( 1, phi1 );
1549  cmp->set_req( 2, phi2 );
1550  _igvn.register_new_node_with_optimizer(cmp);
1551  set_ctrl(cmp, phi->in(0));
1552
1553  assert( cmp->is_Cmp(), "" );
1554  return (CmpNode*)cmp;
1555}
1556
1557//------------------------------sink_use---------------------------------------
1558// If 'use' was in the loop-exit block, it now needs to be sunk
1559// below the post-loop merge point.
1560void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1561  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1562    set_ctrl(use, post_loop);
1563    for (DUIterator j = use->outs(); use->has_out(j); j++)
1564      sink_use(use->out(j), post_loop);
1565  }
1566}
1567
1568//------------------------------clone_loop-------------------------------------
1569//
1570//                   C L O N E   A   L O O P   B O D Y
1571//
1572// This is the basic building block of the loop optimizations.  It clones an
1573// entire loop body.  It makes an old_new loop body mapping; with this mapping
1574// you can find the new-loop equivalent to an old-loop node.  All new-loop
1575// nodes are exactly equal to their old-loop counterparts, all edges are the
1576// same.  All exits from the old-loop now have a RegionNode that merges the
1577// equivalent new-loop path.  This is true even for the normal "loop-exit"
1578// condition.  All uses of loop-invariant old-loop values now come from (one
1579// or more) Phis that merge their new-loop equivalents.
1580//
1581// This operation leaves the graph in an illegal state: there are two valid
1582// control edges coming from the loop pre-header to both loop bodies.  I'll
1583// definitely have to hack the graph after running this transform.
1584//
1585// From this building block I will further edit edges to perform loop peeling
1586// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1587//
1588// Parameter side_by_size_idom:
1589//   When side_by_size_idom is NULL, the dominator tree is constructed for
1590//      the clone loop to dominate the original.  Used in construction of
1591//      pre-main-post loop sequence.
1592//   When nonnull, the clone and original are side-by-side, both are
1593//      dominated by the side_by_side_idom node.  Used in construction of
1594//      unswitched loops.
1595void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1596                                 Node* side_by_side_idom) {
1597
1598  if (C->do_vector_loop() && PrintOpto) {
1599    const char* mname = C->method()->name()->as_quoted_ascii();
1600    if (mname != NULL) {
1601      tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
1602    }
1603  }
1604
1605  CloneMap& cm = C->clone_map();
1606  Dict* dict = cm.dict();
1607  if (C->do_vector_loop()) {
1608    cm.set_clone_idx(cm.max_gen()+1);
1609#ifndef PRODUCT
1610    if (PrintOpto) {
1611      tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
1612      loop->dump_head();
1613    }
1614#endif
1615  }
1616
1617  // Step 1: Clone the loop body.  Make the old->new mapping.
1618  uint i;
1619  for( i = 0; i < loop->_body.size(); i++ ) {
1620    Node *old = loop->_body.at(i);
1621    Node *nnn = old->clone();
1622    old_new.map( old->_idx, nnn );
1623    if (C->do_vector_loop()) {
1624      cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
1625    }
1626    _igvn.register_new_node_with_optimizer(nnn);
1627  }
1628
1629
1630  // Step 2: Fix the edges in the new body.  If the old input is outside the
1631  // loop use it.  If the old input is INside the loop, use the corresponding
1632  // new node instead.
1633  for( i = 0; i < loop->_body.size(); i++ ) {
1634    Node *old = loop->_body.at(i);
1635    Node *nnn = old_new[old->_idx];
1636    // Fix CFG/Loop controlling the new node
1637    if (has_ctrl(old)) {
1638      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1639    } else {
1640      set_loop(nnn, loop->_parent);
1641      if (old->outcnt() > 0) {
1642        set_idom( nnn, old_new[idom(old)->_idx], dd );
1643      }
1644    }
1645    // Correct edges to the new node
1646    for( uint j = 0; j < nnn->req(); j++ ) {
1647        Node *n = nnn->in(j);
1648        if( n ) {
1649          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1650          if( loop->is_member( old_in_loop ) )
1651            nnn->set_req(j, old_new[n->_idx]);
1652        }
1653    }
1654    _igvn.hash_find_insert(nnn);
1655  }
1656  Node *newhead = old_new[loop->_head->_idx];
1657  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1658
1659
1660  // Step 3: Now fix control uses.  Loop varying control uses have already
1661  // been fixed up (as part of all input edges in Step 2).  Loop invariant
1662  // control uses must be either an IfFalse or an IfTrue.  Make a merge
1663  // point to merge the old and new IfFalse/IfTrue nodes; make the use
1664  // refer to this.
1665  ResourceArea *area = Thread::current()->resource_area();
1666  Node_List worklist(area);
1667  uint new_counter = C->unique();
1668  for( i = 0; i < loop->_body.size(); i++ ) {
1669    Node* old = loop->_body.at(i);
1670    if( !old->is_CFG() ) continue;
1671    Node* nnn = old_new[old->_idx];
1672
1673    // Copy uses to a worklist, so I can munge the def-use info
1674    // with impunity.
1675    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1676      worklist.push(old->fast_out(j));
1677
1678    while( worklist.size() ) {  // Visit all uses
1679      Node *use = worklist.pop();
1680      if (!has_node(use))  continue; // Ignore dead nodes
1681      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1682      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1683        // Both OLD and USE are CFG nodes here.
1684        assert( use->is_Proj(), "" );
1685
1686        // Clone the loop exit control projection
1687        Node *newuse = use->clone();
1688        if (C->do_vector_loop()) {
1689          cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
1690        }
1691        newuse->set_req(0,nnn);
1692        _igvn.register_new_node_with_optimizer(newuse);
1693        set_loop(newuse, use_loop);
1694        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1695
1696        // We need a Region to merge the exit from the peeled body and the
1697        // exit from the old loop body.
1698        RegionNode *r = new RegionNode(3);
1699        // Map the old use to the new merge point
1700        old_new.map( use->_idx, r );
1701        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1702        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1703
1704        // The original user of 'use' uses 'r' instead.
1705        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1706          Node* useuse = use->last_out(l);
1707          _igvn.rehash_node_delayed(useuse);
1708          uint uses_found = 0;
1709          if( useuse->in(0) == use ) {
1710            useuse->set_req(0, r);
1711            uses_found++;
1712            if( useuse->is_CFG() ) {
1713              assert( dom_depth(useuse) > dd_r, "" );
1714              set_idom(useuse, r, dom_depth(useuse));
1715            }
1716          }
1717          for( uint k = 1; k < useuse->req(); k++ ) {
1718            if( useuse->in(k) == use ) {
1719              useuse->set_req(k, r);
1720              uses_found++;
1721            }
1722          }
1723          l -= uses_found;    // we deleted 1 or more copies of this edge
1724        }
1725
1726        // Now finish up 'r'
1727        r->set_req( 1, newuse );
1728        r->set_req( 2,    use );
1729        _igvn.register_new_node_with_optimizer(r);
1730        set_loop(r, use_loop);
1731        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1732      } // End of if a loop-exit test
1733    }
1734  }
1735
1736  // Step 4: If loop-invariant use is not control, it must be dominated by a
1737  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1738  // there if needed.  Make a Phi there merging old and new used values.
1739  Node_List *split_if_set = NULL;
1740  Node_List *split_bool_set = NULL;
1741  Node_List *split_cex_set = NULL;
1742  for( i = 0; i < loop->_body.size(); i++ ) {
1743    Node* old = loop->_body.at(i);
1744    Node* nnn = old_new[old->_idx];
1745    // Copy uses to a worklist, so I can munge the def-use info
1746    // with impunity.
1747    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1748      worklist.push(old->fast_out(j));
1749
1750    while( worklist.size() ) {
1751      Node *use = worklist.pop();
1752      if (!has_node(use))  continue; // Ignore dead nodes
1753      if (use->in(0) == C->top())  continue;
1754      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1755      // Check for data-use outside of loop - at least one of OLD or USE
1756      // must not be a CFG node.
1757      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1758
1759        // If the Data use is an IF, that means we have an IF outside of the
1760        // loop that is switching on a condition that is set inside of the
1761        // loop.  Happens if people set a loop-exit flag; then test the flag
1762        // in the loop to break the loop, then test is again outside of the
1763        // loop to determine which way the loop exited.
1764        // Loop predicate If node connects to Bool node through Opaque1 node.
1765        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
1766          // Since this code is highly unlikely, we lazily build the worklist
1767          // of such Nodes to go split.
1768          if( !split_if_set )
1769            split_if_set = new Node_List(area);
1770          split_if_set->push(use);
1771        }
1772        if( use->is_Bool() ) {
1773          if( !split_bool_set )
1774            split_bool_set = new Node_List(area);
1775          split_bool_set->push(use);
1776        }
1777        if( use->Opcode() == Op_CreateEx ) {
1778          if( !split_cex_set )
1779            split_cex_set = new Node_List(area);
1780          split_cex_set->push(use);
1781        }
1782
1783
1784        // Get "block" use is in
1785        uint idx = 0;
1786        while( use->in(idx) != old ) idx++;
1787        Node *prev = use->is_CFG() ? use : get_ctrl(use);
1788        assert( !loop->is_member( get_loop( prev ) ), "" );
1789        Node *cfg = prev->_idx >= new_counter
1790          ? prev->in(2)
1791          : idom(prev);
1792        if( use->is_Phi() )     // Phi use is in prior block
1793          cfg = prev->in(idx);  // NOT in block of Phi itself
1794        if (cfg->is_top()) {    // Use is dead?
1795          _igvn.replace_input_of(use, idx, C->top());
1796          continue;
1797        }
1798
1799        while( !loop->is_member( get_loop( cfg ) ) ) {
1800          prev = cfg;
1801          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1802        }
1803        // If the use occurs after merging several exits from the loop, then
1804        // old value must have dominated all those exits.  Since the same old
1805        // value was used on all those exits we did not need a Phi at this
1806        // merge point.  NOW we do need a Phi here.  Each loop exit value
1807        // is now merged with the peeled body exit; each exit gets its own
1808        // private Phi and those Phis need to be merged here.
1809        Node *phi;
1810        if( prev->is_Region() ) {
1811          if( idx == 0 ) {      // Updating control edge?
1812            phi = prev;         // Just use existing control
1813          } else {              // Else need a new Phi
1814            phi = PhiNode::make( prev, old );
1815            // Now recursively fix up the new uses of old!
1816            for( uint i = 1; i < prev->req(); i++ ) {
1817              worklist.push(phi); // Onto worklist once for each 'old' input
1818            }
1819          }
1820        } else {
1821          // Get new RegionNode merging old and new loop exits
1822          prev = old_new[prev->_idx];
1823          assert( prev, "just made this in step 7" );
1824          if( idx == 0 ) {      // Updating control edge?
1825            phi = prev;         // Just use existing control
1826          } else {              // Else need a new Phi
1827            // Make a new Phi merging data values properly
1828            phi = PhiNode::make( prev, old );
1829            phi->set_req( 1, nnn );
1830          }
1831        }
1832        // If inserting a new Phi, check for prior hits
1833        if( idx != 0 ) {
1834          Node *hit = _igvn.hash_find_insert(phi);
1835          if( hit == NULL ) {
1836           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1837          } else {                                      // or
1838            // Remove the new phi from the graph and use the hit
1839            _igvn.remove_dead_node(phi);
1840            phi = hit;                                  // Use existing phi
1841          }
1842          set_ctrl(phi, prev);
1843        }
1844        // Make 'use' use the Phi instead of the old loop body exit value
1845        _igvn.replace_input_of(use, idx, phi);
1846        if( use->_idx >= new_counter ) { // If updating new phis
1847          // Not needed for correctness, but prevents a weak assert
1848          // in AddPNode from tripping (when we end up with different
1849          // base & derived Phis that will become the same after
1850          // IGVN does CSE).
1851          Node *hit = _igvn.hash_find_insert(use);
1852          if( hit )             // Go ahead and re-hash for hits.
1853            _igvn.replace_node( use, hit );
1854        }
1855
1856        // If 'use' was in the loop-exit block, it now needs to be sunk
1857        // below the post-loop merge point.
1858        sink_use( use, prev );
1859      }
1860    }
1861  }
1862
1863  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1864  // the loop uses a condition set in the loop.  The original IF probably
1865  // takes control from one or more OLD Regions (which in turn get from NEW
1866  // Regions).  In any case, there will be a set of Phis for each merge point
1867  // from the IF up to where the original BOOL def exists the loop.
1868  if( split_if_set ) {
1869    while( split_if_set->size() ) {
1870      Node *iff = split_if_set->pop();
1871      if( iff->in(1)->is_Phi() ) {
1872        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1873        _igvn.replace_input_of(iff, 1, b);
1874      }
1875    }
1876  }
1877  if( split_bool_set ) {
1878    while( split_bool_set->size() ) {
1879      Node *b = split_bool_set->pop();
1880      Node *phi = b->in(1);
1881      assert( phi->is_Phi(), "" );
1882      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1883      _igvn.replace_input_of(b, 1, cmp);
1884    }
1885  }
1886  if( split_cex_set ) {
1887    while( split_cex_set->size() ) {
1888      Node *b = split_cex_set->pop();
1889      assert( b->in(0)->is_Region(), "" );
1890      assert( b->in(1)->is_Phi(), "" );
1891      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1892      split_up( b, b->in(0), NULL );
1893    }
1894  }
1895
1896}
1897
1898
1899//---------------------- stride_of_possible_iv -------------------------------------
1900// Looks for an iff/bool/comp with one operand of the compare
1901// being a cycle involving an add and a phi,
1902// with an optional truncation (left-shift followed by a right-shift)
1903// of the add. Returns zero if not an iv.
1904int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1905  Node* trunc1 = NULL;
1906  Node* trunc2 = NULL;
1907  const TypeInt* ttype = NULL;
1908  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1909    return 0;
1910  }
1911  BoolNode* bl = iff->in(1)->as_Bool();
1912  Node* cmp = bl->in(1);
1913  if (!cmp || (cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU)) {
1914    return 0;
1915  }
1916  // Must have an invariant operand
1917  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1918    return 0;
1919  }
1920  Node* add2 = NULL;
1921  Node* cmp1 = cmp->in(1);
1922  if (cmp1->is_Phi()) {
1923    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1924    Node* phi = cmp1;
1925    for (uint i = 1; i < phi->req(); i++) {
1926      Node* in = phi->in(i);
1927      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1928                                &trunc1, &trunc2, &ttype);
1929      if (add && add->in(1) == phi) {
1930        add2 = add->in(2);
1931        break;
1932      }
1933    }
1934  } else {
1935    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1936    Node* addtrunc = cmp1;
1937    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1938                                &trunc1, &trunc2, &ttype);
1939    if (add && add->in(1)->is_Phi()) {
1940      Node* phi = add->in(1);
1941      for (uint i = 1; i < phi->req(); i++) {
1942        if (phi->in(i) == addtrunc) {
1943          add2 = add->in(2);
1944          break;
1945        }
1946      }
1947    }
1948  }
1949  if (add2 != NULL) {
1950    const TypeInt* add2t = _igvn.type(add2)->is_int();
1951    if (add2t->is_con()) {
1952      return add2t->get_con();
1953    }
1954  }
1955  return 0;
1956}
1957
1958
1959//---------------------- stay_in_loop -------------------------------------
1960// Return the (unique) control output node that's in the loop (if it exists.)
1961Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1962  Node* unique = NULL;
1963  if (!n) return NULL;
1964  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1965    Node* use = n->fast_out(i);
1966    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1967      if (unique != NULL) {
1968        return NULL;
1969      }
1970      unique = use;
1971    }
1972  }
1973  return unique;
1974}
1975
1976//------------------------------ register_node -------------------------------------
1977// Utility to register node "n" with PhaseIdealLoop
1978void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1979  _igvn.register_new_node_with_optimizer(n);
1980  loop->_body.push(n);
1981  if (n->is_CFG()) {
1982    set_loop(n, loop);
1983    set_idom(n, pred, ddepth);
1984  } else {
1985    set_ctrl(n, pred);
1986  }
1987}
1988
1989//------------------------------ proj_clone -------------------------------------
1990// Utility to create an if-projection
1991ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1992  ProjNode* c = p->clone()->as_Proj();
1993  c->set_req(0, iff);
1994  return c;
1995}
1996
1997//------------------------------ short_circuit_if -------------------------------------
1998// Force the iff control output to be the live_proj
1999Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
2000  guarantee(live_proj != NULL, "null projection");
2001  int proj_con = live_proj->_con;
2002  assert(proj_con == 0 || proj_con == 1, "false or true projection");
2003  Node *con = _igvn.intcon(proj_con);
2004  set_ctrl(con, C->root());
2005  if (iff) {
2006    iff->set_req(1, con);
2007  }
2008  return con;
2009}
2010
2011//------------------------------ insert_if_before_proj -------------------------------------
2012// Insert a new if before an if projection (* - new node)
2013//
2014// before
2015//           if(test)
2016//           /     \
2017//          v       v
2018//    other-proj   proj (arg)
2019//
2020// after
2021//           if(test)
2022//           /     \
2023//          /       v
2024//         |      * proj-clone
2025//         v          |
2026//    other-proj      v
2027//                * new_if(relop(cmp[IU](left,right)))
2028//                  /  \
2029//                 v    v
2030//         * new-proj  proj
2031//         (returned)
2032//
2033ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
2034  IfNode* iff = proj->in(0)->as_If();
2035  IdealLoopTree *loop = get_loop(proj);
2036  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2037  int ddepth = dom_depth(proj);
2038
2039  _igvn.rehash_node_delayed(iff);
2040  _igvn.rehash_node_delayed(proj);
2041
2042  proj->set_req(0, NULL);  // temporary disconnect
2043  ProjNode* proj2 = proj_clone(proj, iff);
2044  register_node(proj2, loop, iff, ddepth);
2045
2046  Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
2047  register_node(cmp, loop, proj2, ddepth);
2048
2049  BoolNode* bol = new BoolNode(cmp, relop);
2050  register_node(bol, loop, proj2, ddepth);
2051
2052  int opcode = iff->Opcode();
2053  assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
2054  IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
2055    new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
2056  register_node(new_if, loop, proj2, ddepth);
2057
2058  proj->set_req(0, new_if); // reattach
2059  set_idom(proj, new_if, ddepth);
2060
2061  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
2062  guarantee(new_exit != NULL, "null exit node");
2063  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
2064
2065  return new_exit;
2066}
2067
2068//------------------------------ insert_region_before_proj -------------------------------------
2069// Insert a region before an if projection (* - new node)
2070//
2071// before
2072//           if(test)
2073//          /      |
2074//         v       |
2075//       proj      v
2076//               other-proj
2077//
2078// after
2079//           if(test)
2080//          /      |
2081//         v       |
2082// * proj-clone    v
2083//         |     other-proj
2084//         v
2085// * new-region
2086//         |
2087//         v
2088// *      dum_if
2089//       /     \
2090//      v       \
2091// * dum-proj    v
2092//              proj
2093//
2094RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
2095  IfNode* iff = proj->in(0)->as_If();
2096  IdealLoopTree *loop = get_loop(proj);
2097  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
2098  int ddepth = dom_depth(proj);
2099
2100  _igvn.rehash_node_delayed(iff);
2101  _igvn.rehash_node_delayed(proj);
2102
2103  proj->set_req(0, NULL);  // temporary disconnect
2104  ProjNode* proj2 = proj_clone(proj, iff);
2105  register_node(proj2, loop, iff, ddepth);
2106
2107  RegionNode* reg = new RegionNode(2);
2108  reg->set_req(1, proj2);
2109  register_node(reg, loop, iff, ddepth);
2110
2111  IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2112  register_node(dum_if, loop, reg, ddepth);
2113
2114  proj->set_req(0, dum_if); // reattach
2115  set_idom(proj, dum_if, ddepth);
2116
2117  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2118  register_node(dum_proj, loop, dum_if, ddepth);
2119
2120  return reg;
2121}
2122
2123//------------------------------ insert_cmpi_loop_exit -------------------------------------
2124// Clone a signed compare loop exit from an unsigned compare and
2125// insert it before the unsigned cmp on the stay-in-loop path.
2126// All new nodes inserted in the dominator tree between the original
2127// if and it's projections.  The original if test is replaced with
2128// a constant to force the stay-in-loop path.
2129//
2130// This is done to make sure that the original if and it's projections
2131// still dominate the same set of control nodes, that the ctrl() relation
2132// from data nodes to them is preserved, and that their loop nesting is
2133// preserved.
2134//
2135// before
2136//          if(i <u limit)    unsigned compare loop exit
2137//         /       |
2138//        v        v
2139//   exit-proj   stay-in-loop-proj
2140//
2141// after
2142//          if(stay-in-loop-const)  original if
2143//         /       |
2144//        /        v
2145//       /  if(i <  limit)    new signed test
2146//      /  /       |
2147//     /  /        v
2148//    /  /  if(i <u limit)    new cloned unsigned test
2149//   /  /   /      |
2150//   v  v  v       |
2151//    region       |
2152//        |        |
2153//      dum-if     |
2154//     /  |        |
2155// ether  |        |
2156//        v        v
2157//   exit-proj   stay-in-loop-proj
2158//
2159IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2160  const bool Signed   = true;
2161  const bool Unsigned = false;
2162
2163  BoolNode* bol = if_cmpu->in(1)->as_Bool();
2164  if (bol->_test._test != BoolTest::lt) return NULL;
2165  CmpNode* cmpu = bol->in(1)->as_Cmp();
2166  if (cmpu->Opcode() != Op_CmpU) return NULL;
2167  int stride = stride_of_possible_iv(if_cmpu);
2168  if (stride == 0) return NULL;
2169
2170  Node* lp_proj = stay_in_loop(if_cmpu, loop);
2171  guarantee(lp_proj != NULL, "null loop node");
2172
2173  ProjNode* lp_continue = lp_proj->as_Proj();
2174  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2175
2176  Node* limit = NULL;
2177  if (stride > 0) {
2178    limit = cmpu->in(2);
2179  } else {
2180    limit = _igvn.makecon(TypeInt::ZERO);
2181    set_ctrl(limit, C->root());
2182  }
2183  // Create a new region on the exit path
2184  RegionNode* reg = insert_region_before_proj(lp_exit);
2185  guarantee(reg != NULL, "null region node");
2186
2187  // Clone the if-cmpu-true-false using a signed compare
2188  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2189  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2190  reg->add_req(cmpi_exit);
2191
2192  // Clone the if-cmpu-true-false
2193  BoolTest::mask rel_u = bol->_test._test;
2194  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2195  reg->add_req(cmpu_exit);
2196
2197  // Force original if to stay in loop.
2198  short_circuit_if(if_cmpu, lp_continue);
2199
2200  return cmpi_exit->in(0)->as_If();
2201}
2202
2203//------------------------------ remove_cmpi_loop_exit -------------------------------------
2204// Remove a previously inserted signed compare loop exit.
2205void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2206  Node* lp_proj = stay_in_loop(if_cmp, loop);
2207  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2208         stay_in_loop(lp_proj, loop)->is_If() &&
2209         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2210  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2211  set_ctrl(con, C->root());
2212  if_cmp->set_req(1, con);
2213}
2214
2215//------------------------------ scheduled_nodelist -------------------------------------
2216// Create a post order schedule of nodes that are in the
2217// "member" set.  The list is returned in "sched".
2218// The first node in "sched" is the loop head, followed by
2219// nodes which have no inputs in the "member" set, and then
2220// followed by the nodes that have an immediate input dependence
2221// on a node in "sched".
2222void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2223
2224  assert(member.test(loop->_head->_idx), "loop head must be in member set");
2225  Arena *a = Thread::current()->resource_area();
2226  VectorSet visited(a);
2227  Node_Stack nstack(a, loop->_body.size());
2228
2229  Node* n  = loop->_head;  // top of stack is cached in "n"
2230  uint idx = 0;
2231  visited.set(n->_idx);
2232
2233  // Initially push all with no inputs from within member set
2234  for(uint i = 0; i < loop->_body.size(); i++ ) {
2235    Node *elt = loop->_body.at(i);
2236    if (member.test(elt->_idx)) {
2237      bool found = false;
2238      for (uint j = 0; j < elt->req(); j++) {
2239        Node* def = elt->in(j);
2240        if (def && member.test(def->_idx) && def != elt) {
2241          found = true;
2242          break;
2243        }
2244      }
2245      if (!found && elt != loop->_head) {
2246        nstack.push(n, idx);
2247        n = elt;
2248        assert(!visited.test(n->_idx), "not seen yet");
2249        visited.set(n->_idx);
2250      }
2251    }
2252  }
2253
2254  // traverse out's that are in the member set
2255  while (true) {
2256    if (idx < n->outcnt()) {
2257      Node* use = n->raw_out(idx);
2258      idx++;
2259      if (!visited.test_set(use->_idx)) {
2260        if (member.test(use->_idx)) {
2261          nstack.push(n, idx);
2262          n = use;
2263          idx = 0;
2264        }
2265      }
2266    } else {
2267      // All outputs processed
2268      sched.push(n);
2269      if (nstack.is_empty()) break;
2270      n   = nstack.node();
2271      idx = nstack.index();
2272      nstack.pop();
2273    }
2274  }
2275}
2276
2277
2278//------------------------------ has_use_in_set -------------------------------------
2279// Has a use in the vector set
2280bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2281  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2282    Node* use = n->fast_out(j);
2283    if (vset.test(use->_idx)) {
2284      return true;
2285    }
2286  }
2287  return false;
2288}
2289
2290
2291//------------------------------ has_use_internal_to_set -------------------------------------
2292// Has use internal to the vector set (ie. not in a phi at the loop head)
2293bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2294  Node* head  = loop->_head;
2295  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2296    Node* use = n->fast_out(j);
2297    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2298      return true;
2299    }
2300  }
2301  return false;
2302}
2303
2304
2305//------------------------------ clone_for_use_outside_loop -------------------------------------
2306// clone "n" for uses that are outside of loop
2307int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2308  int cloned = 0;
2309  assert(worklist.size() == 0, "should be empty");
2310  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2311    Node* use = n->fast_out(j);
2312    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2313      worklist.push(use);
2314    }
2315  }
2316  while( worklist.size() ) {
2317    Node *use = worklist.pop();
2318    if (!has_node(use) || use->in(0) == C->top()) continue;
2319    uint j;
2320    for (j = 0; j < use->req(); j++) {
2321      if (use->in(j) == n) break;
2322    }
2323    assert(j < use->req(), "must be there");
2324
2325    // clone "n" and insert it between the inputs of "n" and the use outside the loop
2326    Node* n_clone = n->clone();
2327    _igvn.replace_input_of(use, j, n_clone);
2328    cloned++;
2329    Node* use_c;
2330    if (!use->is_Phi()) {
2331      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2332    } else {
2333      // Use in a phi is considered a use in the associated predecessor block
2334      use_c = use->in(0)->in(j);
2335    }
2336    set_ctrl(n_clone, use_c);
2337    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2338    get_loop(use_c)->_body.push(n_clone);
2339    _igvn.register_new_node_with_optimizer(n_clone);
2340#if !defined(PRODUCT)
2341    if (TracePartialPeeling) {
2342      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2343    }
2344#endif
2345  }
2346  return cloned;
2347}
2348
2349
2350//------------------------------ clone_for_special_use_inside_loop -------------------------------------
2351// clone "n" for special uses that are in the not_peeled region.
2352// If these def-uses occur in separate blocks, the code generator
2353// marks the method as not compilable.  For example, if a "BoolNode"
2354// is in a different basic block than the "IfNode" that uses it, then
2355// the compilation is aborted in the code generator.
2356void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2357                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2358  if (n->is_Phi() || n->is_Load()) {
2359    return;
2360  }
2361  assert(worklist.size() == 0, "should be empty");
2362  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2363    Node* use = n->fast_out(j);
2364    if ( not_peel.test(use->_idx) &&
2365         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2366         use->in(1) == n)  {
2367      worklist.push(use);
2368    }
2369  }
2370  if (worklist.size() > 0) {
2371    // clone "n" and insert it between inputs of "n" and the use
2372    Node* n_clone = n->clone();
2373    loop->_body.push(n_clone);
2374    _igvn.register_new_node_with_optimizer(n_clone);
2375    set_ctrl(n_clone, get_ctrl(n));
2376    sink_list.push(n_clone);
2377    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2378#if !defined(PRODUCT)
2379    if (TracePartialPeeling) {
2380      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2381    }
2382#endif
2383    while( worklist.size() ) {
2384      Node *use = worklist.pop();
2385      _igvn.rehash_node_delayed(use);
2386      for (uint j = 1; j < use->req(); j++) {
2387        if (use->in(j) == n) {
2388          use->set_req(j, n_clone);
2389        }
2390      }
2391    }
2392  }
2393}
2394
2395
2396//------------------------------ insert_phi_for_loop -------------------------------------
2397// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2398void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2399  Node *phi = PhiNode::make(lp, back_edge_val);
2400  phi->set_req(LoopNode::EntryControl, lp_entry_val);
2401  // Use existing phi if it already exists
2402  Node *hit = _igvn.hash_find_insert(phi);
2403  if( hit == NULL ) {
2404    _igvn.register_new_node_with_optimizer(phi);
2405    set_ctrl(phi, lp);
2406  } else {
2407    // Remove the new phi from the graph and use the hit
2408    _igvn.remove_dead_node(phi);
2409    phi = hit;
2410  }
2411  _igvn.replace_input_of(use, idx, phi);
2412}
2413
2414#ifdef ASSERT
2415//------------------------------ is_valid_loop_partition -------------------------------------
2416// Validate the loop partition sets: peel and not_peel
2417bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2418                                              VectorSet& not_peel ) {
2419  uint i;
2420  // Check that peel_list entries are in the peel set
2421  for (i = 0; i < peel_list.size(); i++) {
2422    if (!peel.test(peel_list.at(i)->_idx)) {
2423      return false;
2424    }
2425  }
2426  // Check at loop members are in one of peel set or not_peel set
2427  for (i = 0; i < loop->_body.size(); i++ ) {
2428    Node *def  = loop->_body.at(i);
2429    uint di = def->_idx;
2430    // Check that peel set elements are in peel_list
2431    if (peel.test(di)) {
2432      if (not_peel.test(di)) {
2433        return false;
2434      }
2435      // Must be in peel_list also
2436      bool found = false;
2437      for (uint j = 0; j < peel_list.size(); j++) {
2438        if (peel_list.at(j)->_idx == di) {
2439          found = true;
2440          break;
2441        }
2442      }
2443      if (!found) {
2444        return false;
2445      }
2446    } else if (not_peel.test(di)) {
2447      if (peel.test(di)) {
2448        return false;
2449      }
2450    } else {
2451      return false;
2452    }
2453  }
2454  return true;
2455}
2456
2457//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2458// Ensure a use outside of loop is of the right form
2459bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2460  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2461  return (use->is_Phi() &&
2462          use_c->is_Region() && use_c->req() == 3 &&
2463          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2464           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2465           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2466          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2467}
2468
2469//------------------------------ is_valid_clone_loop_form -------------------------------------
2470// Ensure that all uses outside of loop are of the right form
2471bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2472                                               uint orig_exit_idx, uint clone_exit_idx) {
2473  uint len = peel_list.size();
2474  for (uint i = 0; i < len; i++) {
2475    Node *def = peel_list.at(i);
2476
2477    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2478      Node *use = def->fast_out(j);
2479      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2480      if (!loop->is_member(get_loop(use_c))) {
2481        // use is not in the loop, check for correct structure
2482        if (use->in(0) == def) {
2483          // Okay
2484        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2485          return false;
2486        }
2487      }
2488    }
2489  }
2490  return true;
2491}
2492#endif
2493
2494//------------------------------ partial_peel -------------------------------------
2495// Partially peel (aka loop rotation) the top portion of a loop (called
2496// the peel section below) by cloning it and placing one copy just before
2497// the new loop head and the other copy at the bottom of the new loop.
2498//
2499//    before                       after                where it came from
2500//
2501//    stmt1                        stmt1
2502//  loop:                          stmt2                     clone
2503//    stmt2                        if condA goto exitA       clone
2504//    if condA goto exitA        new_loop:                   new
2505//    stmt3                        stmt3                     clone
2506//    if !condB goto loop          if condB goto exitB       clone
2507//  exitB:                         stmt2                     orig
2508//    stmt4                        if !condA goto new_loop   orig
2509//  exitA:                         goto exitA
2510//                               exitB:
2511//                                 stmt4
2512//                               exitA:
2513//
2514// Step 1: find the cut point: an exit test on probable
2515//         induction variable.
2516// Step 2: schedule (with cloning) operations in the peel
2517//         section that can be executed after the cut into
2518//         the section that is not peeled.  This may need
2519//         to clone operations into exit blocks.  For
2520//         instance, a reference to A[i] in the not-peel
2521//         section and a reference to B[i] in an exit block
2522//         may cause a left-shift of i by 2 to be placed
2523//         in the peel block.  This step will clone the left
2524//         shift into the exit block and sink the left shift
2525//         from the peel to the not-peel section.
2526// Step 3: clone the loop, retarget the control, and insert
2527//         phis for values that are live across the new loop
2528//         head.  This is very dependent on the graph structure
2529//         from clone_loop.  It creates region nodes for
2530//         exit control and associated phi nodes for values
2531//         flow out of the loop through that exit.  The region
2532//         node is dominated by the clone's control projection.
2533//         So the clone's peel section is placed before the
2534//         new loop head, and the clone's not-peel section is
2535//         forms the top part of the new loop.  The original
2536//         peel section forms the tail of the new loop.
2537// Step 4: update the dominator tree and recompute the
2538//         dominator depth.
2539//
2540//                   orig
2541//
2542//                   stmt1
2543//                     |
2544//                     v
2545//               loop predicate
2546//                     |
2547//                     v
2548//                   loop<----+
2549//                     |      |
2550//                   stmt2    |
2551//                     |      |
2552//                     v      |
2553//                    ifA     |
2554//                   / |      |
2555//                  v  v      |
2556//               false true   ^  <-- last_peel
2557//               /     |      |
2558//              /   ===|==cut |
2559//             /     stmt3    |  <-- first_not_peel
2560//            /        |      |
2561//            |        v      |
2562//            v       ifB     |
2563//          exitA:   / \      |
2564//                  /   \     |
2565//                 v     v    |
2566//               false true   |
2567//               /       \    |
2568//              /         ----+
2569//             |
2570//             v
2571//           exitB:
2572//           stmt4
2573//
2574//
2575//            after clone loop
2576//
2577//                   stmt1
2578//                     |
2579//                     v
2580//               loop predicate
2581//                 /       \
2582//        clone   /         \   orig
2583//               /           \
2584//              /             \
2585//             v               v
2586//   +---->loop                loop<----+
2587//   |      |                    |      |
2588//   |    stmt2                stmt2    |
2589//   |      |                    |      |
2590//   |      v                    v      |
2591//   |      ifA                 ifA     |
2592//   |      | \                / |      |
2593//   |      v  v              v  v      |
2594//   ^    true  false      false true   ^  <-- last_peel
2595//   |      |   ^   \       /    |      |
2596//   | cut==|==  \   \     /  ===|==cut |
2597//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2598//   |      |    dom   | |       |      |
2599//   |      v      \  1v v2      v      |
2600//   |      ifB     regionA     ifB     |
2601//   |      / \        |       / \      |
2602//   |     /   \       v      /   \     |
2603//   |    v     v    exitA:  v     v    |
2604//   |    true  false      false true   |
2605//   |    /     ^   \      /       \    |
2606//   +----       \   \    /         ----+
2607//               dom  \  /
2608//                 \  1v v2
2609//                  regionB
2610//                     |
2611//                     v
2612//                   exitB:
2613//                   stmt4
2614//
2615//
2616//           after partial peel
2617//
2618//                  stmt1
2619//                     |
2620//                     v
2621//               loop predicate
2622//                 /
2623//        clone   /             orig
2624//               /          TOP
2625//              /             \
2626//             v               v
2627//    TOP->loop                loop----+
2628//          |                    |      |
2629//        stmt2                stmt2    |
2630//          |                    |      |
2631//          v                    v      |
2632//          ifA                 ifA     |
2633//          | \                / |      |
2634//          v  v              v  v      |
2635//        true  false      false true   |     <-- last_peel
2636//          |   ^   \       /    +------|---+
2637//  +->newloop   \   \     /  === ==cut |   |
2638//  |     stmt3   \   \   /     TOP     |   |
2639//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2640//  |       v      \  1v v2      v      |   |
2641//  |       ifB     regionA     ifB     ^   v
2642//  |       / \        |       / \      |   |
2643//  |      /   \       v      /   \     |   |
2644//  |     v     v    exitA:  v     v    |   |
2645//  |     true  false      false true   |   |
2646//  |     /     ^   \      /       \    |   |
2647//  |    |       \   \    /         v   |   |
2648//  |    |       dom  \  /         TOP  |   |
2649//  |    |         \  1v v2             |   |
2650//  ^    v          regionB             |   |
2651//  |    |             |                |   |
2652//  |    |             v                ^   v
2653//  |    |           exitB:             |   |
2654//  |    |           stmt4              |   |
2655//  |    +------------>-----------------+   |
2656//  |                                       |
2657//  +-----------------<---------------------+
2658//
2659//
2660//              final graph
2661//
2662//                  stmt1
2663//                    |
2664//                    v
2665//               loop predicate
2666//                    |
2667//                    v
2668//                  stmt2 clone
2669//                    |
2670//                    v
2671//         ........> ifA clone
2672//         :        / |
2673//        dom      /  |
2674//         :      v   v
2675//         :  false   true
2676//         :  |       |
2677//         :  |       v
2678//         :  |    newloop<-----+
2679//         :  |        |        |
2680//         :  |     stmt3 clone |
2681//         :  |        |        |
2682//         :  |        v        |
2683//         :  |       ifB       |
2684//         :  |      / \        |
2685//         :  |     v   v       |
2686//         :  |  false true     |
2687//         :  |   |     |       |
2688//         :  |   v    stmt2    |
2689//         :  | exitB:  |       |
2690//         :  | stmt4   v       |
2691//         :  |       ifA orig  |
2692//         :  |      /  \       |
2693//         :  |     /    \      |
2694//         :  |    v     v      |
2695//         :  |  false  true    |
2696//         :  |  /        \     |
2697//         :  v  v         -----+
2698//          RegionA
2699//             |
2700//             v
2701//           exitA
2702//
2703bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2704
2705  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2706  if (!loop->_head->is_Loop()) {
2707    return false;  }
2708
2709  LoopNode *head  = loop->_head->as_Loop();
2710
2711  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2712    return false;
2713  }
2714
2715  // Check for complex exit control
2716  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2717    Node *n = loop->_body.at(ii);
2718    int opc = n->Opcode();
2719    if (n->is_Call()        ||
2720        opc == Op_Catch     ||
2721        opc == Op_CatchProj ||
2722        opc == Op_Jump      ||
2723        opc == Op_JumpProj) {
2724#if !defined(PRODUCT)
2725      if (TracePartialPeeling) {
2726        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2727      }
2728#endif
2729      return false;
2730    }
2731  }
2732
2733  int dd = dom_depth(head);
2734
2735  // Step 1: find cut point
2736
2737  // Walk up dominators to loop head looking for first loop exit
2738  // which is executed on every path thru loop.
2739  IfNode *peel_if = NULL;
2740  IfNode *peel_if_cmpu = NULL;
2741
2742  Node *iff = loop->tail();
2743  while( iff != head ) {
2744    if( iff->is_If() ) {
2745      Node *ctrl = get_ctrl(iff->in(1));
2746      if (ctrl->is_top()) return false; // Dead test on live IF.
2747      // If loop-varying exit-test, check for induction variable
2748      if( loop->is_member(get_loop(ctrl)) &&
2749          loop->is_loop_exit(iff) &&
2750          is_possible_iv_test(iff)) {
2751        Node* cmp = iff->in(1)->in(1);
2752        if (cmp->Opcode() == Op_CmpI) {
2753          peel_if = iff->as_If();
2754        } else {
2755          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2756          peel_if_cmpu = iff->as_If();
2757        }
2758      }
2759    }
2760    iff = idom(iff);
2761  }
2762  // Prefer signed compare over unsigned compare.
2763  IfNode* new_peel_if = NULL;
2764  if (peel_if == NULL) {
2765    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2766      return false;   // No peel point found
2767    }
2768    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2769    if (new_peel_if == NULL) {
2770      return false;   // No peel point found
2771    }
2772    peel_if = new_peel_if;
2773  }
2774  Node* last_peel        = stay_in_loop(peel_if, loop);
2775  Node* first_not_peeled = stay_in_loop(last_peel, loop);
2776  if (first_not_peeled == NULL || first_not_peeled == head) {
2777    return false;
2778  }
2779
2780#if !defined(PRODUCT)
2781  if (TraceLoopOpts) {
2782    tty->print("PartialPeel  ");
2783    loop->dump_head();
2784  }
2785
2786  if (TracePartialPeeling) {
2787    tty->print_cr("before partial peel one iteration");
2788    Node_List wl;
2789    Node* t = head->in(2);
2790    while (true) {
2791      wl.push(t);
2792      if (t == head) break;
2793      t = idom(t);
2794    }
2795    while (wl.size() > 0) {
2796      Node* tt = wl.pop();
2797      tt->dump();
2798      if (tt == last_peel) tty->print_cr("-- cut --");
2799    }
2800  }
2801#endif
2802  ResourceArea *area = Thread::current()->resource_area();
2803  VectorSet peel(area);
2804  VectorSet not_peel(area);
2805  Node_List peel_list(area);
2806  Node_List worklist(area);
2807  Node_List sink_list(area);
2808
2809  // Set of cfg nodes to peel are those that are executable from
2810  // the head through last_peel.
2811  assert(worklist.size() == 0, "should be empty");
2812  worklist.push(head);
2813  peel.set(head->_idx);
2814  while (worklist.size() > 0) {
2815    Node *n = worklist.pop();
2816    if (n != last_peel) {
2817      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2818        Node* use = n->fast_out(j);
2819        if (use->is_CFG() &&
2820            loop->is_member(get_loop(use)) &&
2821            !peel.test_set(use->_idx)) {
2822          worklist.push(use);
2823        }
2824      }
2825    }
2826  }
2827
2828  // Set of non-cfg nodes to peel are those that are control
2829  // dependent on the cfg nodes.
2830  uint i;
2831  for(i = 0; i < loop->_body.size(); i++ ) {
2832    Node *n = loop->_body.at(i);
2833    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2834    if (peel.test(n_c->_idx)) {
2835      peel.set(n->_idx);
2836    } else {
2837      not_peel.set(n->_idx);
2838    }
2839  }
2840
2841  // Step 2: move operations from the peeled section down into the
2842  //         not-peeled section
2843
2844  // Get a post order schedule of nodes in the peel region
2845  // Result in right-most operand.
2846  scheduled_nodelist(loop, peel, peel_list );
2847
2848  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2849
2850  // For future check for too many new phis
2851  uint old_phi_cnt = 0;
2852  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2853    Node* use = head->fast_out(j);
2854    if (use->is_Phi()) old_phi_cnt++;
2855  }
2856
2857#if !defined(PRODUCT)
2858  if (TracePartialPeeling) {
2859    tty->print_cr("\npeeled list");
2860  }
2861#endif
2862
2863  // Evacuate nodes in peel region into the not_peeled region if possible
2864  uint new_phi_cnt = 0;
2865  uint cloned_for_outside_use = 0;
2866  for (i = 0; i < peel_list.size();) {
2867    Node* n = peel_list.at(i);
2868#if !defined(PRODUCT)
2869  if (TracePartialPeeling) n->dump();
2870#endif
2871    bool incr = true;
2872    if ( !n->is_CFG() ) {
2873
2874      if ( has_use_in_set(n, not_peel) ) {
2875
2876        // If not used internal to the peeled region,
2877        // move "n" from peeled to not_peeled region.
2878
2879        if ( !has_use_internal_to_set(n, peel, loop) ) {
2880
2881          // if not pinned and not a load (which maybe anti-dependent on a store)
2882          // and not a CMove (Matcher expects only bool->cmove).
2883          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2884            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2885            sink_list.push(n);
2886            peel     >>= n->_idx; // delete n from peel set.
2887            not_peel <<= n->_idx; // add n to not_peel set.
2888            peel_list.remove(i);
2889            incr = false;
2890#if !defined(PRODUCT)
2891            if (TracePartialPeeling) {
2892              tty->print_cr("sink to not_peeled region: %d newbb: %d",
2893                            n->_idx, get_ctrl(n)->_idx);
2894            }
2895#endif
2896          }
2897        } else {
2898          // Otherwise check for special def-use cases that span
2899          // the peel/not_peel boundary such as bool->if
2900          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2901          new_phi_cnt++;
2902        }
2903      }
2904    }
2905    if (incr) i++;
2906  }
2907
2908  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2909#if !defined(PRODUCT)
2910    if (TracePartialPeeling) {
2911      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2912                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2913    }
2914#endif
2915    if (new_peel_if != NULL) {
2916      remove_cmpi_loop_exit(new_peel_if, loop);
2917    }
2918    // Inhibit more partial peeling on this loop
2919    assert(!head->is_partial_peel_loop(), "not partial peeled");
2920    head->mark_partial_peel_failed();
2921    if (cloned_for_outside_use > 0) {
2922      // Terminate this round of loop opts because
2923      // the graph outside this loop was changed.
2924      C->set_major_progress();
2925      return true;
2926    }
2927    return false;
2928  }
2929
2930  // Step 3: clone loop, retarget control, and insert new phis
2931
2932  // Create new loop head for new phis and to hang
2933  // the nodes being moved (sinked) from the peel region.
2934  LoopNode* new_head = new LoopNode(last_peel, last_peel);
2935  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2936  _igvn.register_new_node_with_optimizer(new_head);
2937  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2938  _igvn.replace_input_of(first_not_peeled, 0, new_head);
2939  set_loop(new_head, loop);
2940  loop->_body.push(new_head);
2941  not_peel.set(new_head->_idx);
2942  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2943  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2944
2945  while (sink_list.size() > 0) {
2946    Node* n = sink_list.pop();
2947    set_ctrl(n, new_head);
2948  }
2949
2950  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2951
2952  clone_loop( loop, old_new, dd );
2953
2954  const uint clone_exit_idx = 1;
2955  const uint orig_exit_idx  = 2;
2956  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2957
2958  Node* head_clone             = old_new[head->_idx];
2959  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2960  Node* orig_tail_clone        = head_clone->in(2);
2961
2962  // Add phi if "def" node is in peel set and "use" is not
2963
2964  for(i = 0; i < peel_list.size(); i++ ) {
2965    Node *def  = peel_list.at(i);
2966    if (!def->is_CFG()) {
2967      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2968        Node *use = def->fast_out(j);
2969        if (has_node(use) && use->in(0) != C->top() &&
2970            (!peel.test(use->_idx) ||
2971             (use->is_Phi() && use->in(0) == head)) ) {
2972          worklist.push(use);
2973        }
2974      }
2975      while( worklist.size() ) {
2976        Node *use = worklist.pop();
2977        for (uint j = 1; j < use->req(); j++) {
2978          Node* n = use->in(j);
2979          if (n == def) {
2980
2981            // "def" is in peel set, "use" is not in peel set
2982            // or "use" is in the entry boundary (a phi) of the peel set
2983
2984            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2985
2986            if ( loop->is_member(get_loop( use_c )) ) {
2987              // use is in loop
2988              if (old_new[use->_idx] != NULL) { // null for dead code
2989                Node* use_clone = old_new[use->_idx];
2990                _igvn.replace_input_of(use, j, C->top());
2991                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2992              }
2993            } else {
2994              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2995              // use is not in the loop, check if the live range includes the cut
2996              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2997              if (not_peel.test(lp_if->_idx)) {
2998                assert(j == orig_exit_idx, "use from original loop");
2999                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
3000              }
3001            }
3002          }
3003        }
3004      }
3005    }
3006  }
3007
3008  // Step 3b: retarget control
3009
3010  // Redirect control to the new loop head if a cloned node in
3011  // the not_peeled region has control that points into the peeled region.
3012  // This necessary because the cloned peeled region will be outside
3013  // the loop.
3014  //                            from    to
3015  //          cloned-peeled    <---+
3016  //    new_head_clone:            |    <--+
3017  //          cloned-not_peeled  in(0)    in(0)
3018  //          orig-peeled
3019
3020  for(i = 0; i < loop->_body.size(); i++ ) {
3021    Node *n = loop->_body.at(i);
3022    if (!n->is_CFG()           && n->in(0) != NULL        &&
3023        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
3024      Node* n_clone = old_new[n->_idx];
3025      _igvn.replace_input_of(n_clone, 0, new_head_clone);
3026    }
3027  }
3028
3029  // Backedge of the surviving new_head (the clone) is original last_peel
3030  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
3031
3032  // Cut first node in original not_peel set
3033  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
3034  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
3035  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
3036
3037  // Copy head_clone back-branch info to original head
3038  // and remove original head's loop entry and
3039  // clone head's back-branch
3040  _igvn.rehash_node_delayed(head); // Multiple edge updates
3041  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
3042  head->set_req(LoopNode::LoopBackControl, C->top());
3043  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
3044
3045  // Similarly modify the phis
3046  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
3047    Node* use = head->fast_out(k);
3048    if (use->is_Phi() && use->outcnt() > 0) {
3049      Node* use_clone = old_new[use->_idx];
3050      _igvn.rehash_node_delayed(use); // Multiple edge updates
3051      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
3052      use->set_req(LoopNode::LoopBackControl, C->top());
3053      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
3054    }
3055  }
3056
3057  // Step 4: update dominator tree and dominator depth
3058
3059  set_idom(head, orig_tail_clone, dd);
3060  recompute_dom_depth();
3061
3062  // Inhibit more partial peeling on this loop
3063  new_head_clone->set_partial_peel_loop();
3064  C->set_major_progress();
3065  loop->record_for_igvn();
3066
3067#if !defined(PRODUCT)
3068  if (TracePartialPeeling) {
3069    tty->print_cr("\nafter partial peel one iteration");
3070    Node_List wl(area);
3071    Node* t = last_peel;
3072    while (true) {
3073      wl.push(t);
3074      if (t == head_clone) break;
3075      t = idom(t);
3076    }
3077    while (wl.size() > 0) {
3078      Node* tt = wl.pop();
3079      if (tt == head) tty->print_cr("orig head");
3080      else if (tt == new_head_clone) tty->print_cr("new head");
3081      else if (tt == head_clone) tty->print_cr("clone head");
3082      tt->dump();
3083    }
3084  }
3085#endif
3086  return true;
3087}
3088
3089//------------------------------reorg_offsets----------------------------------
3090// Reorganize offset computations to lower register pressure.  Mostly
3091// prevent loop-fallout uses of the pre-incremented trip counter (which are
3092// then alive with the post-incremented trip counter forcing an extra
3093// register move)
3094void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
3095  // Perform it only for canonical counted loops.
3096  // Loop's shape could be messed up by iteration_split_impl.
3097  if (!loop->_head->is_CountedLoop())
3098    return;
3099  if (!loop->_head->as_Loop()->is_valid_counted_loop())
3100    return;
3101
3102  CountedLoopNode *cl = loop->_head->as_CountedLoop();
3103  CountedLoopEndNode *cle = cl->loopexit();
3104  Node *exit = cle->proj_out(false);
3105  Node *phi = cl->phi();
3106
3107  // Check for the special case of folks using the pre-incremented
3108  // trip-counter on the fall-out path (forces the pre-incremented
3109  // and post-incremented trip counter to be live at the same time).
3110  // Fix this by adjusting to use the post-increment trip counter.
3111
3112  bool progress = true;
3113  while (progress) {
3114    progress = false;
3115    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3116      Node* use = phi->fast_out(i);   // User of trip-counter
3117      if (!has_ctrl(use))  continue;
3118      Node *u_ctrl = get_ctrl(use);
3119      if (use->is_Phi()) {
3120        u_ctrl = NULL;
3121        for (uint j = 1; j < use->req(); j++)
3122          if (use->in(j) == phi)
3123            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3124      }
3125      IdealLoopTree *u_loop = get_loop(u_ctrl);
3126      // Look for loop-invariant use
3127      if (u_loop == loop) continue;
3128      if (loop->is_member(u_loop)) continue;
3129      // Check that use is live out the bottom.  Assuming the trip-counter
3130      // update is right at the bottom, uses of of the loop middle are ok.
3131      if (dom_lca(exit, u_ctrl) != exit) continue;
3132      // Hit!  Refactor use to use the post-incremented tripcounter.
3133      // Compute a post-increment tripcounter.
3134      Node *opaq = new Opaque2Node( C, cle->incr() );
3135      register_new_node(opaq, exit);
3136      Node *neg_stride = _igvn.intcon(-cle->stride_con());
3137      set_ctrl(neg_stride, C->root());
3138      Node *post = new AddINode( opaq, neg_stride);
3139      register_new_node(post, exit);
3140      _igvn.rehash_node_delayed(use);
3141      for (uint j = 1; j < use->req(); j++) {
3142        if (use->in(j) == phi)
3143          use->set_req(j, post);
3144      }
3145      // Since DU info changed, rerun loop
3146      progress = true;
3147      break;
3148    }
3149  }
3150
3151}
3152