loopopts.cpp revision 3724:8e47bac5643a
1/*
2 * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "memory/allocation.inline.hpp"
27#include "opto/addnode.hpp"
28#include "opto/connode.hpp"
29#include "opto/divnode.hpp"
30#include "opto/loopnode.hpp"
31#include "opto/matcher.hpp"
32#include "opto/mulnode.hpp"
33#include "opto/rootnode.hpp"
34#include "opto/subnode.hpp"
35
36//=============================================================================
37//------------------------------split_thru_phi---------------------------------
38// Split Node 'n' through merge point if there is enough win.
39Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
40  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
41    // ConvI2L may have type information on it which is unsafe to push up
42    // so disable this for now
43    return NULL;
44  }
45  int wins = 0;
46  assert(!n->is_CFG(), "");
47  assert(region->is_Region(), "");
48
49  const Type* type = n->bottom_type();
50  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
51  Node *phi;
52  if (t_oop != NULL && t_oop->is_known_instance_field()) {
53    int iid    = t_oop->instance_id();
54    int index  = C->get_alias_index(t_oop);
55    int offset = t_oop->offset();
56    phi = new (C) PhiNode(region, type, NULL, iid, index, offset);
57  } else {
58    phi = PhiNode::make_blank(region, n);
59  }
60  uint old_unique = C->unique();
61  for (uint i = 1; i < region->req(); i++) {
62    Node *x;
63    Node* the_clone = NULL;
64    if (region->in(i) == C->top()) {
65      x = C->top();             // Dead path?  Use a dead data op
66    } else {
67      x = n->clone();           // Else clone up the data op
68      the_clone = x;            // Remember for possible deletion.
69      // Alter data node to use pre-phi inputs
70      if (n->in(0) == region)
71        x->set_req( 0, region->in(i) );
72      for (uint j = 1; j < n->req(); j++) {
73        Node *in = n->in(j);
74        if (in->is_Phi() && in->in(0) == region)
75          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
76      }
77    }
78    // Check for a 'win' on some paths
79    const Type *t = x->Value(&_igvn);
80
81    bool singleton = t->singleton();
82
83    // A TOP singleton indicates that there are no possible values incoming
84    // along a particular edge. In most cases, this is OK, and the Phi will
85    // be eliminated later in an Ideal call. However, we can't allow this to
86    // happen if the singleton occurs on loop entry, as the elimination of
87    // the PhiNode may cause the resulting node to migrate back to a previous
88    // loop iteration.
89    if (singleton && t == Type::TOP) {
90      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
91      // irreducible loop may not be indicated by an affirmative is_Loop());
92      // therefore, the only top we can split thru a phi is on a backedge of
93      // a loop.
94      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
95    }
96
97    if (singleton) {
98      wins++;
99      x = ((PhaseGVN&)_igvn).makecon(t);
100    } else {
101      // We now call Identity to try to simplify the cloned node.
102      // Note that some Identity methods call phase->type(this).
103      // Make sure that the type array is big enough for
104      // our new node, even though we may throw the node away.
105      // (Note: This tweaking with igvn only works because x is a new node.)
106      _igvn.set_type(x, t);
107      // If x is a TypeNode, capture any more-precise type permanently into Node
108      // otherwise it will be not updated during igvn->transform since
109      // igvn->type(x) is set to x->Value() already.
110      x->raise_bottom_type(t);
111      Node *y = x->Identity(&_igvn);
112      if (y != x) {
113        wins++;
114        x = y;
115      } else {
116        y = _igvn.hash_find(x);
117        if (y) {
118          wins++;
119          x = y;
120        } else {
121          // Else x is a new node we are keeping
122          // We do not need register_new_node_with_optimizer
123          // because set_type has already been called.
124          _igvn._worklist.push(x);
125        }
126      }
127    }
128    if (x != the_clone && the_clone != NULL)
129      _igvn.remove_dead_node(the_clone);
130    phi->set_req( i, x );
131  }
132  // Too few wins?
133  if (wins <= policy) {
134    _igvn.remove_dead_node(phi);
135    return NULL;
136  }
137
138  // Record Phi
139  register_new_node( phi, region );
140
141  for (uint i2 = 1; i2 < phi->req(); i2++) {
142    Node *x = phi->in(i2);
143    // If we commoned up the cloned 'x' with another existing Node,
144    // the existing Node picks up a new use.  We need to make the
145    // existing Node occur higher up so it dominates its uses.
146    Node *old_ctrl;
147    IdealLoopTree *old_loop;
148
149    if (x->is_Con()) {
150      // Constant's control is always root.
151      set_ctrl(x, C->root());
152      continue;
153    }
154    // The occasional new node
155    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
156      old_ctrl = NULL;
157      old_loop = NULL;               // Not in any prior loop
158    } else {
159      old_ctrl = get_ctrl(x);
160      old_loop = get_loop(old_ctrl); // Get prior loop
161    }
162    // New late point must dominate new use
163    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
164    if (new_ctrl == old_ctrl) // Nothing is changed
165      continue;
166
167    IdealLoopTree *new_loop = get_loop(new_ctrl);
168
169    // Don't move x into a loop if its uses are
170    // outside of loop. Otherwise x will be cloned
171    // for each use outside of this loop.
172    IdealLoopTree *use_loop = get_loop(region);
173    if (!new_loop->is_member(use_loop) &&
174        (old_loop == NULL || !new_loop->is_member(old_loop))) {
175      // Take early control, later control will be recalculated
176      // during next iteration of loop optimizations.
177      new_ctrl = get_early_ctrl(x);
178      new_loop = get_loop(new_ctrl);
179    }
180    // Set new location
181    set_ctrl(x, new_ctrl);
182    // If changing loop bodies, see if we need to collect into new body
183    if (old_loop != new_loop) {
184      if (old_loop && !old_loop->_child)
185        old_loop->_body.yank(x);
186      if (!new_loop->_child)
187        new_loop->_body.push(x);  // Collect body info
188    }
189  }
190
191  return phi;
192}
193
194//------------------------------dominated_by------------------------------------
195// Replace the dominated test with an obvious true or false.  Place it on the
196// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
197// live path up to the dominating control.
198void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
199#ifndef PRODUCT
200  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
201#endif
202
203
204  // prevdom is the dominating projection of the dominating test.
205  assert( iff->is_If(), "" );
206  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
207  int pop = prevdom->Opcode();
208  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
209  if (flip) {
210    if (pop == Op_IfTrue)
211      pop = Op_IfFalse;
212    else
213      pop = Op_IfTrue;
214  }
215  // 'con' is set to true or false to kill the dominated test.
216  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
217  set_ctrl(con, C->root()); // Constant gets a new use
218  // Hack the dominated test
219  _igvn.replace_input_of(iff, 1, con);
220
221  // If I dont have a reachable TRUE and FALSE path following the IfNode then
222  // I can assume this path reaches an infinite loop.  In this case it's not
223  // important to optimize the data Nodes - either the whole compilation will
224  // be tossed or this path (and all data Nodes) will go dead.
225  if (iff->outcnt() != 2) return;
226
227  // Make control-dependent data Nodes on the live path (path that will remain
228  // once the dominated IF is removed) become control-dependent on the
229  // dominating projection.
230  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
231
232  // Loop predicates may have depending checks which should not
233  // be skipped. For example, range check predicate has two checks
234  // for lower and upper bounds.
235  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp->as_Proj()->_con)->as_Proj();
236  if (exclude_loop_predicate &&
237      is_uncommon_trap_proj(unc_proj, Deoptimization::Reason_predicate))
238    return; // Let IGVN transformation change control dependence.
239
240  IdealLoopTree *old_loop = get_loop(dp);
241
242  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
243    Node* cd = dp->fast_out(i); // Control-dependent node
244    if (cd->depends_only_on_test()) {
245      assert(cd->in(0) == dp, "");
246      _igvn.replace_input_of(cd, 0, prevdom);
247      set_early_ctrl(cd);
248      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
249      if (old_loop != new_loop) {
250        if (!old_loop->_child) old_loop->_body.yank(cd);
251        if (!new_loop->_child) new_loop->_body.push(cd);
252      }
253      --i;
254      --imax;
255    }
256  }
257}
258
259//------------------------------has_local_phi_input----------------------------
260// Return TRUE if 'n' has Phi inputs from its local block and no other
261// block-local inputs (all non-local-phi inputs come from earlier blocks)
262Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
263  Node *n_ctrl = get_ctrl(n);
264  // See if some inputs come from a Phi in this block, or from before
265  // this block.
266  uint i;
267  for( i = 1; i < n->req(); i++ ) {
268    Node *phi = n->in(i);
269    if( phi->is_Phi() && phi->in(0) == n_ctrl )
270      break;
271  }
272  if( i >= n->req() )
273    return NULL;                // No Phi inputs; nowhere to clone thru
274
275  // Check for inputs created between 'n' and the Phi input.  These
276  // must split as well; they have already been given the chance
277  // (courtesy of a post-order visit) and since they did not we must
278  // recover the 'cost' of splitting them by being very profitable
279  // when splitting 'n'.  Since this is unlikely we simply give up.
280  for( i = 1; i < n->req(); i++ ) {
281    Node *m = n->in(i);
282    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
283      // We allow the special case of AddP's with no local inputs.
284      // This allows us to split-up address expressions.
285      if (m->is_AddP() &&
286          get_ctrl(m->in(2)) != n_ctrl &&
287          get_ctrl(m->in(3)) != n_ctrl) {
288        // Move the AddP up to dominating point
289        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
290        continue;
291      }
292      return NULL;
293    }
294  }
295
296  return n_ctrl;
297}
298
299//------------------------------remix_address_expressions----------------------
300// Rework addressing expressions to get the most loop-invariant stuff
301// moved out.  We'd like to do all associative operators, but it's especially
302// important (common) to do address expressions.
303Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
304  if (!has_ctrl(n))  return NULL;
305  Node *n_ctrl = get_ctrl(n);
306  IdealLoopTree *n_loop = get_loop(n_ctrl);
307
308  // See if 'n' mixes loop-varying and loop-invariant inputs and
309  // itself is loop-varying.
310
311  // Only interested in binary ops (and AddP)
312  if( n->req() < 3 || n->req() > 4 ) return NULL;
313
314  Node *n1_ctrl = get_ctrl(n->in(                    1));
315  Node *n2_ctrl = get_ctrl(n->in(                    2));
316  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
317  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
318  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
319  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
320
321  // Does one of my inputs spin in a tighter loop than self?
322  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
323      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
324      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
325    return NULL;                // Leave well enough alone
326
327  // Is at least one of my inputs loop-invariant?
328  if( n1_loop == n_loop &&
329      n2_loop == n_loop &&
330      n3_loop == n_loop )
331    return NULL;                // No loop-invariant inputs
332
333
334  int n_op = n->Opcode();
335
336  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
337  if( n_op == Op_LShiftI ) {
338    // Scale is loop invariant
339    Node *scale = n->in(2);
340    Node *scale_ctrl = get_ctrl(scale);
341    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
342    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
343      return NULL;
344    const TypeInt *scale_t = scale->bottom_type()->isa_int();
345    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
346      return NULL;              // Dont bother with byte/short masking
347    // Add must vary with loop (else shift would be loop-invariant)
348    Node *add = n->in(1);
349    Node *add_ctrl = get_ctrl(add);
350    IdealLoopTree *add_loop = get_loop(add_ctrl);
351    //assert( n_loop == add_loop, "" );
352    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
353
354    // Convert I-V into I+ (0-V); same for V-I
355    if( add->Opcode() == Op_SubI &&
356        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
357      Node *zero = _igvn.intcon(0);
358      set_ctrl(zero, C->root());
359      Node *neg = new (C) SubINode( _igvn.intcon(0), add->in(2) );
360      register_new_node( neg, get_ctrl(add->in(2) ) );
361      add = new (C) AddINode( add->in(1), neg );
362      register_new_node( add, add_ctrl );
363    }
364    if( add->Opcode() != Op_AddI ) return NULL;
365    // See if one add input is loop invariant
366    Node *add_var = add->in(1);
367    Node *add_var_ctrl = get_ctrl(add_var);
368    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
369    Node *add_invar = add->in(2);
370    Node *add_invar_ctrl = get_ctrl(add_invar);
371    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
372    if( add_var_loop == n_loop ) {
373    } else if( add_invar_loop == n_loop ) {
374      // Swap to find the invariant part
375      add_invar = add_var;
376      add_invar_ctrl = add_var_ctrl;
377      add_invar_loop = add_var_loop;
378      add_var = add->in(2);
379      Node *add_var_ctrl = get_ctrl(add_var);
380      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
381    } else                      // Else neither input is loop invariant
382      return NULL;
383    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
384      return NULL;              // No invariant part of the add?
385
386    // Yes!  Reshape address expression!
387    Node *inv_scale = new (C) LShiftINode( add_invar, scale );
388    Node *inv_scale_ctrl =
389      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
390      add_invar_ctrl : scale_ctrl;
391    register_new_node( inv_scale, inv_scale_ctrl );
392    Node *var_scale = new (C) LShiftINode( add_var, scale );
393    register_new_node( var_scale, n_ctrl );
394    Node *var_add = new (C) AddINode( var_scale, inv_scale );
395    register_new_node( var_add, n_ctrl );
396    _igvn.replace_node( n, var_add );
397    return var_add;
398  }
399
400  // Replace (I+V) with (V+I)
401  if( n_op == Op_AddI ||
402      n_op == Op_AddL ||
403      n_op == Op_AddF ||
404      n_op == Op_AddD ||
405      n_op == Op_MulI ||
406      n_op == Op_MulL ||
407      n_op == Op_MulF ||
408      n_op == Op_MulD ) {
409    if( n2_loop == n_loop ) {
410      assert( n1_loop != n_loop, "" );
411      n->swap_edges(1, 2);
412    }
413  }
414
415  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
416  // but not if I2 is a constant.
417  if( n_op == Op_AddP ) {
418    if( n2_loop == n_loop && n3_loop != n_loop ) {
419      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
420        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
421        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
422        IdealLoopTree *n22loop = get_loop( n22_ctrl );
423        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
424        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
425            n23_loop == n_loop ) {
426          Node *add1 = new (C) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
427          // Stuff new AddP in the loop preheader
428          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
429          Node *add2 = new (C) AddPNode( n->in(1), add1, n->in(2)->in(3) );
430          register_new_node( add2, n_ctrl );
431          _igvn.replace_node( n, add2 );
432          return add2;
433        }
434      }
435    }
436
437    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
438    if( n2_loop != n_loop && n3_loop == n_loop ) {
439      if( n->in(3)->Opcode() == Op_AddI ) {
440        Node *V = n->in(3)->in(1);
441        Node *I = n->in(3)->in(2);
442        if( is_member(n_loop,get_ctrl(V)) ) {
443        } else {
444          Node *tmp = V; V = I; I = tmp;
445        }
446        if( !is_member(n_loop,get_ctrl(I)) ) {
447          Node *add1 = new (C) AddPNode( n->in(1), n->in(2), I );
448          // Stuff new AddP in the loop preheader
449          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
450          Node *add2 = new (C) AddPNode( n->in(1), add1, V );
451          register_new_node( add2, n_ctrl );
452          _igvn.replace_node( n, add2 );
453          return add2;
454        }
455      }
456    }
457  }
458
459  return NULL;
460}
461
462//------------------------------conditional_move-------------------------------
463// Attempt to replace a Phi with a conditional move.  We have some pretty
464// strict profitability requirements.  All Phis at the merge point must
465// be converted, so we can remove the control flow.  We need to limit the
466// number of c-moves to a small handful.  All code that was in the side-arms
467// of the CFG diamond is now speculatively executed.  This code has to be
468// "cheap enough".  We are pretty much limited to CFG diamonds that merge
469// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
470Node *PhaseIdealLoop::conditional_move( Node *region ) {
471
472  assert(region->is_Region(), "sanity check");
473  if (region->req() != 3) return NULL;
474
475  // Check for CFG diamond
476  Node *lp = region->in(1);
477  Node *rp = region->in(2);
478  if (!lp || !rp) return NULL;
479  Node *lp_c = lp->in(0);
480  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
481  IfNode *iff = lp_c->as_If();
482
483  // Check for ops pinned in an arm of the diamond.
484  // Can't remove the control flow in this case
485  if (lp->outcnt() > 1) return NULL;
486  if (rp->outcnt() > 1) return NULL;
487
488  IdealLoopTree* r_loop = get_loop(region);
489  assert(r_loop == get_loop(iff), "sanity");
490  // Always convert to CMOVE if all results are used only outside this loop.
491  bool used_inside_loop = (r_loop == _ltree_root);
492
493  // Check profitability
494  int cost = 0;
495  int phis = 0;
496  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
497    Node *out = region->fast_out(i);
498    if (!out->is_Phi()) continue; // Ignore other control edges, etc
499    phis++;
500    PhiNode* phi = out->as_Phi();
501    BasicType bt = phi->type()->basic_type();
502    switch (bt) {
503    case T_FLOAT:
504    case T_DOUBLE: {
505      cost += Matcher::float_cmove_cost(); // Could be very expensive
506      break;
507    }
508    case T_LONG: {
509      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
510    }
511    case T_INT:                 // These all CMOV fine
512    case T_ADDRESS: {           // (RawPtr)
513      cost++;
514      break;
515    }
516    case T_NARROWOOP: // Fall through
517    case T_OBJECT: {            // Base oops are OK, but not derived oops
518      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
519      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
520      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
521      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
522      // have a Phi for the base here that we convert to a CMOVE all is well
523      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
524      // the allocator will have to produce a base by creating a CMOVE of the
525      // relevant bases.  This puts the allocator in the business of
526      // manufacturing expensive instructions, generally a bad plan.
527      // Just Say No to Conditionally-Moved Derived Pointers.
528      if (tp && tp->offset() != 0)
529        return NULL;
530      cost++;
531      break;
532    }
533    default:
534      return NULL;              // In particular, can't do memory or I/O
535    }
536    // Add in cost any speculative ops
537    for (uint j = 1; j < region->req(); j++) {
538      Node *proj = region->in(j);
539      Node *inp = phi->in(j);
540      if (get_ctrl(inp) == proj) { // Found local op
541        cost++;
542        // Check for a chain of dependent ops; these will all become
543        // speculative in a CMOV.
544        for (uint k = 1; k < inp->req(); k++)
545          if (get_ctrl(inp->in(k)) == proj)
546            cost += ConditionalMoveLimit; // Too much speculative goo
547      }
548    }
549    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
550    // This will likely Split-If, a higher-payoff operation.
551    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
552      Node* use = phi->fast_out(k);
553      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
554        cost += ConditionalMoveLimit;
555      // Is there a use inside the loop?
556      // Note: check only basic types since CMoveP is pinned.
557      if (!used_inside_loop && is_java_primitive(bt)) {
558        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
559        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
560          used_inside_loop = true;
561        }
562      }
563    }
564  }
565  Node* bol = iff->in(1);
566  assert(bol->Opcode() == Op_Bool, "");
567  int cmp_op = bol->in(1)->Opcode();
568  // It is expensive to generate flags from a float compare.
569  // Avoid duplicated float compare.
570  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
571
572  float infrequent_prob = PROB_UNLIKELY_MAG(3);
573  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
574  if (used_inside_loop) {
575    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
576
577    // BlockLayoutByFrequency optimization moves infrequent branch
578    // from hot path. No point in CMOV'ing in such case (110 is used
579    // instead of 100 to take into account not exactness of float value).
580    if (BlockLayoutByFrequency) {
581      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
582    }
583  }
584  // Check for highly predictable branch.  No point in CMOV'ing if
585  // we are going to predict accurately all the time.
586  if (iff->_prob < infrequent_prob ||
587      iff->_prob > (1.0f - infrequent_prob))
588    return NULL;
589
590  // --------------
591  // Now replace all Phis with CMOV's
592  Node *cmov_ctrl = iff->in(0);
593  uint flip = (lp->Opcode() == Op_IfTrue);
594  while (1) {
595    PhiNode* phi = NULL;
596    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
597      Node *out = region->fast_out(i);
598      if (out->is_Phi()) {
599        phi = out->as_Phi();
600        break;
601      }
602    }
603    if (phi == NULL)  break;
604#ifndef PRODUCT
605    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
606#endif
607    // Move speculative ops
608    for (uint j = 1; j < region->req(); j++) {
609      Node *proj = region->in(j);
610      Node *inp = phi->in(j);
611      if (get_ctrl(inp) == proj) { // Found local op
612#ifndef PRODUCT
613        if (PrintOpto && VerifyLoopOptimizations) {
614          tty->print("  speculate: ");
615          inp->dump();
616        }
617#endif
618        set_ctrl(inp, cmov_ctrl);
619      }
620    }
621    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
622    register_new_node( cmov, cmov_ctrl );
623    _igvn.replace_node( phi, cmov );
624#ifndef PRODUCT
625    if (TraceLoopOpts) {
626      tty->print("CMOV  ");
627      r_loop->dump_head();
628      if (Verbose) {
629        bol->in(1)->dump(1);
630        cmov->dump(1);
631      }
632    }
633    if (VerifyLoopOptimizations) verify();
634#endif
635  }
636
637  // The useless CFG diamond will fold up later; see the optimization in
638  // RegionNode::Ideal.
639  _igvn._worklist.push(region);
640
641  return iff->in(1);
642}
643
644//------------------------------split_if_with_blocks_pre-----------------------
645// Do the real work in a non-recursive function.  Data nodes want to be
646// cloned in the pre-order so they can feed each other nicely.
647Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
648  // Cloning these guys is unlikely to win
649  int n_op = n->Opcode();
650  if( n_op == Op_MergeMem ) return n;
651  if( n->is_Proj() ) return n;
652  // Do not clone-up CmpFXXX variations, as these are always
653  // followed by a CmpI
654  if( n->is_Cmp() ) return n;
655  // Attempt to use a conditional move instead of a phi/branch
656  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
657    Node *cmov = conditional_move( n );
658    if( cmov ) return cmov;
659  }
660  if( n->is_CFG() || n->is_LoadStore() )
661    return n;
662  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
663      n_op == Op_Opaque2 ) {
664    if( !C->major_progress() )   // If chance of no more loop opts...
665      _igvn._worklist.push(n);  // maybe we'll remove them
666    return n;
667  }
668
669  if( n->is_Con() ) return n;   // No cloning for Con nodes
670
671  Node *n_ctrl = get_ctrl(n);
672  if( !n_ctrl ) return n;       // Dead node
673
674  // Attempt to remix address expressions for loop invariants
675  Node *m = remix_address_expressions( n );
676  if( m ) return m;
677
678  // Determine if the Node has inputs from some local Phi.
679  // Returns the block to clone thru.
680  Node *n_blk = has_local_phi_input( n );
681  if( !n_blk ) return n;
682  // Do not clone the trip counter through on a CountedLoop
683  // (messes up the canonical shape).
684  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
685
686  // Check for having no control input; not pinned.  Allow
687  // dominating control.
688  if( n->in(0) ) {
689    Node *dom = idom(n_blk);
690    if( dom_lca( n->in(0), dom ) != n->in(0) )
691      return n;
692  }
693  // Policy: when is it profitable.  You must get more wins than
694  // policy before it is considered profitable.  Policy is usually 0,
695  // so 1 win is considered profitable.  Big merges will require big
696  // cloning, so get a larger policy.
697  int policy = n_blk->req() >> 2;
698
699  // If the loop is a candidate for range check elimination,
700  // delay splitting through it's phi until a later loop optimization
701  if (n_blk->is_CountedLoop()) {
702    IdealLoopTree *lp = get_loop(n_blk);
703    if (lp && lp->_rce_candidate) {
704      return n;
705    }
706  }
707
708  // Use same limit as split_if_with_blocks_post
709  if( C->unique() > 35000 ) return n; // Method too big
710
711  // Split 'n' through the merge point if it is profitable
712  Node *phi = split_thru_phi( n, n_blk, policy );
713  if (!phi) return n;
714
715  // Found a Phi to split thru!
716  // Replace 'n' with the new phi
717  _igvn.replace_node( n, phi );
718  // Moved a load around the loop, 'en-registering' something.
719  if (n_blk->is_Loop() && n->is_Load() &&
720      !phi->in(LoopNode::LoopBackControl)->is_Load())
721    C->set_major_progress();
722
723  return phi;
724}
725
726static bool merge_point_too_heavy(Compile* C, Node* region) {
727  // Bail out if the region and its phis have too many users.
728  int weight = 0;
729  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
730    weight += region->fast_out(i)->outcnt();
731  }
732  int nodes_left = MaxNodeLimit - C->unique();
733  if (weight * 8 > nodes_left) {
734#ifndef PRODUCT
735    if (PrintOpto)
736      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
737#endif
738    return true;
739  } else {
740    return false;
741  }
742}
743
744static bool merge_point_safe(Node* region) {
745  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
746  // having a PhiNode input. This sidesteps the dangerous case where the split
747  // ConvI2LNode may become TOP if the input Value() does not
748  // overlap the ConvI2L range, leaving a node which may not dominate its
749  // uses.
750  // A better fix for this problem can be found in the BugTraq entry, but
751  // expediency for Mantis demands this hack.
752  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
753  // split_if_with_blocks from splitting a block because we could not move around
754  // the FastLockNode.
755  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
756    Node* n = region->fast_out(i);
757    if (n->is_Phi()) {
758      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
759        Node* m = n->fast_out(j);
760        if (m->is_FastLock())
761          return false;
762#ifdef _LP64
763        if (m->Opcode() == Op_ConvI2L)
764          return false;
765#endif
766      }
767    }
768  }
769  return true;
770}
771
772
773//------------------------------place_near_use---------------------------------
774// Place some computation next to use but not inside inner loops.
775// For inner loop uses move it to the preheader area.
776Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
777  IdealLoopTree *u_loop = get_loop( useblock );
778  return (u_loop->_irreducible || u_loop->_child)
779    ? useblock
780    : u_loop->_head->in(LoopNode::EntryControl);
781}
782
783
784//------------------------------split_if_with_blocks_post----------------------
785// Do the real work in a non-recursive function.  CFG hackery wants to be
786// in the post-order, so it can dirty the I-DOM info and not use the dirtied
787// info.
788void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
789
790  // Cloning Cmp through Phi's involves the split-if transform.
791  // FastLock is not used by an If
792  if( n->is_Cmp() && !n->is_FastLock() ) {
793    if( C->unique() > 35000 ) return; // Method too big
794
795    // Do not do 'split-if' if irreducible loops are present.
796    if( _has_irreducible_loops )
797      return;
798
799    Node *n_ctrl = get_ctrl(n);
800    // Determine if the Node has inputs from some local Phi.
801    // Returns the block to clone thru.
802    Node *n_blk = has_local_phi_input( n );
803    if( n_blk != n_ctrl ) return;
804
805    if( merge_point_too_heavy(C, n_ctrl) )
806      return;
807
808    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
809    Node *bol = n->unique_out();
810    assert( bol->is_Bool(), "expect a bool here" );
811    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
812    Node *iff = bol->unique_out();
813
814    // Check some safety conditions
815    if( iff->is_If() ) {        // Classic split-if?
816      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
817    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
818      // Can't split CMove with different control edge.
819      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
820      if( get_ctrl(iff->in(2)) == n_ctrl ||
821          get_ctrl(iff->in(3)) == n_ctrl )
822        return;                 // Inputs not yet split-up
823      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
824        return;                 // Loop-invar test gates loop-varying CMOVE
825      }
826    } else {
827      return;  // some other kind of node, such as an Allocate
828    }
829
830    // Do not do 'split-if' if some paths are dead.  First do dead code
831    // elimination and then see if its still profitable.
832    for( uint i = 1; i < n_ctrl->req(); i++ )
833      if( n_ctrl->in(i) == C->top() )
834        return;
835
836    // When is split-if profitable?  Every 'win' on means some control flow
837    // goes dead, so it's almost always a win.
838    int policy = 0;
839    // If trying to do a 'Split-If' at the loop head, it is only
840    // profitable if the cmp folds up on BOTH paths.  Otherwise we
841    // risk peeling a loop forever.
842
843    // CNC - Disabled for now.  Requires careful handling of loop
844    // body selection for the cloned code.  Also, make sure we check
845    // for any input path not being in the same loop as n_ctrl.  For
846    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
847    // because the alternative loop entry points won't be converted
848    // into LoopNodes.
849    IdealLoopTree *n_loop = get_loop(n_ctrl);
850    for( uint j = 1; j < n_ctrl->req(); j++ )
851      if( get_loop(n_ctrl->in(j)) != n_loop )
852        return;
853
854    // Check for safety of the merge point.
855    if( !merge_point_safe(n_ctrl) ) {
856      return;
857    }
858
859    // Split compare 'n' through the merge point if it is profitable
860    Node *phi = split_thru_phi( n, n_ctrl, policy );
861    if( !phi ) return;
862
863    // Found a Phi to split thru!
864    // Replace 'n' with the new phi
865    _igvn.replace_node( n, phi );
866
867    // Now split the bool up thru the phi
868    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
869    _igvn.replace_node( bol, bolphi );
870    assert( iff->in(1) == bolphi, "" );
871    if( bolphi->Value(&_igvn)->singleton() )
872      return;
873
874    // Conditional-move?  Must split up now
875    if( !iff->is_If() ) {
876      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
877      _igvn.replace_node( iff, cmovphi );
878      return;
879    }
880
881    // Now split the IF
882    do_split_if( iff );
883    return;
884  }
885
886  // Check for an IF ready to split; one that has its
887  // condition codes input coming from a Phi at the block start.
888  int n_op = n->Opcode();
889
890  // Check for an IF being dominated by another IF same test
891  if( n_op == Op_If ) {
892    Node *bol = n->in(1);
893    uint max = bol->outcnt();
894    // Check for same test used more than once?
895    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
896      // Search up IDOMs to see if this IF is dominated.
897      Node *cutoff = get_ctrl(bol);
898
899      // Now search up IDOMs till cutoff, looking for a dominating test
900      Node *prevdom = n;
901      Node *dom = idom(prevdom);
902      while( dom != cutoff ) {
903        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
904          // Replace the dominated test with an obvious true or false.
905          // Place it on the IGVN worklist for later cleanup.
906          C->set_major_progress();
907          dominated_by( prevdom, n, false, true );
908#ifndef PRODUCT
909          if( VerifyLoopOptimizations ) verify();
910#endif
911          return;
912        }
913        prevdom = dom;
914        dom = idom(prevdom);
915      }
916    }
917  }
918
919  // See if a shared loop-varying computation has no loop-varying uses.
920  // Happens if something is only used for JVM state in uncommon trap exits,
921  // like various versions of induction variable+offset.  Clone the
922  // computation per usage to allow it to sink out of the loop.
923  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
924    Node *n_ctrl = get_ctrl(n);
925    IdealLoopTree *n_loop = get_loop(n_ctrl);
926    if( n_loop != _ltree_root ) {
927      DUIterator_Fast imax, i = n->fast_outs(imax);
928      for (; i < imax; i++) {
929        Node* u = n->fast_out(i);
930        if( !has_ctrl(u) )     break; // Found control user
931        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
932        if( u_loop == n_loop ) break; // Found loop-varying use
933        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
934        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
935      }
936      bool did_break = (i < imax);  // Did we break out of the previous loop?
937      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
938        Node *late_load_ctrl = NULL;
939        if (n->is_Load()) {
940          // If n is a load, get and save the result from get_late_ctrl(),
941          // to be later used in calculating the control for n's clones.
942          clear_dom_lca_tags();
943          late_load_ctrl = get_late_ctrl(n, n_ctrl);
944        }
945        // If n is a load, and the late control is the same as the current
946        // control, then the cloning of n is a pointless exercise, because
947        // GVN will ensure that we end up where we started.
948        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
949          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
950            Node *u = n->last_out(j); // Clone private computation per use
951            _igvn.rehash_node_delayed(u);
952            Node *x = n->clone(); // Clone computation
953            Node *x_ctrl = NULL;
954            if( u->is_Phi() ) {
955              // Replace all uses of normal nodes.  Replace Phi uses
956              // individually, so the separate Nodes can sink down
957              // different paths.
958              uint k = 1;
959              while( u->in(k) != n ) k++;
960              u->set_req( k, x );
961              // x goes next to Phi input path
962              x_ctrl = u->in(0)->in(k);
963              --j;
964            } else {              // Normal use
965              // Replace all uses
966              for( uint k = 0; k < u->req(); k++ ) {
967                if( u->in(k) == n ) {
968                  u->set_req( k, x );
969                  --j;
970                }
971              }
972              x_ctrl = get_ctrl(u);
973            }
974
975            // Find control for 'x' next to use but not inside inner loops.
976            // For inner loop uses get the preheader area.
977            x_ctrl = place_near_use(x_ctrl);
978
979            if (n->is_Load()) {
980              // For loads, add a control edge to a CFG node outside of the loop
981              // to force them to not combine and return back inside the loop
982              // during GVN optimization (4641526).
983              //
984              // Because we are setting the actual control input, factor in
985              // the result from get_late_ctrl() so we respect any
986              // anti-dependences. (6233005).
987              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
988
989              // Don't allow the control input to be a CFG splitting node.
990              // Such nodes should only have ProjNodes as outs, e.g. IfNode
991              // should only have IfTrueNode and IfFalseNode (4985384).
992              x_ctrl = find_non_split_ctrl(x_ctrl);
993              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
994
995              x->set_req(0, x_ctrl);
996            }
997            register_new_node(x, x_ctrl);
998
999            // Some institutional knowledge is needed here: 'x' is
1000            // yanked because if the optimizer runs GVN on it all the
1001            // cloned x's will common up and undo this optimization and
1002            // be forced back in the loop.  This is annoying because it
1003            // makes +VerifyOpto report false-positives on progress.  I
1004            // tried setting control edges on the x's to force them to
1005            // not combine, but the matching gets worried when it tries
1006            // to fold a StoreP and an AddP together (as part of an
1007            // address expression) and the AddP and StoreP have
1008            // different controls.
1009            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1010          }
1011          _igvn.remove_dead_node(n);
1012        }
1013      }
1014    }
1015  }
1016
1017  // Check for Opaque2's who's loop has disappeared - who's input is in the
1018  // same loop nest as their output.  Remove 'em, they are no longer useful.
1019  if( n_op == Op_Opaque2 &&
1020      n->in(1) != NULL &&
1021      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1022    _igvn.replace_node( n, n->in(1) );
1023  }
1024}
1025
1026//------------------------------split_if_with_blocks---------------------------
1027// Check for aggressive application of 'split-if' optimization,
1028// using basic block level info.
1029void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1030  Node *n = C->root();
1031  visited.set(n->_idx); // first, mark node as visited
1032  // Do pre-visit work for root
1033  n = split_if_with_blocks_pre( n );
1034  uint cnt = n->outcnt();
1035  uint i   = 0;
1036  while (true) {
1037    // Visit all children
1038    if (i < cnt) {
1039      Node* use = n->raw_out(i);
1040      ++i;
1041      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1042        // Now do pre-visit work for this use
1043        use = split_if_with_blocks_pre( use );
1044        nstack.push(n, i); // Save parent and next use's index.
1045        n   = use;         // Process all children of current use.
1046        cnt = use->outcnt();
1047        i   = 0;
1048      }
1049    }
1050    else {
1051      // All of n's children have been processed, complete post-processing.
1052      if (cnt != 0 && !n->is_Con()) {
1053        assert(has_node(n), "no dead nodes");
1054        split_if_with_blocks_post( n );
1055      }
1056      if (nstack.is_empty()) {
1057        // Finished all nodes on stack.
1058        break;
1059      }
1060      // Get saved parent node and next use's index. Visit the rest of uses.
1061      n   = nstack.node();
1062      cnt = n->outcnt();
1063      i   = nstack.index();
1064      nstack.pop();
1065    }
1066  }
1067}
1068
1069
1070//=============================================================================
1071//
1072//                   C L O N E   A   L O O P   B O D Y
1073//
1074
1075//------------------------------clone_iff--------------------------------------
1076// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1077// "Nearly" because all Nodes have been cloned from the original in the loop,
1078// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1079// through the Phi recursively, and return a Bool.
1080BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1081
1082  // Convert this Phi into a Phi merging Bools
1083  uint i;
1084  for( i = 1; i < phi->req(); i++ ) {
1085    Node *b = phi->in(i);
1086    if( b->is_Phi() ) {
1087      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1088    } else {
1089      assert( b->is_Bool(), "" );
1090    }
1091  }
1092
1093  Node *sample_bool = phi->in(1);
1094  Node *sample_cmp  = sample_bool->in(1);
1095
1096  // Make Phis to merge the Cmp's inputs.
1097  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1098  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1099  for( i = 1; i < phi->req(); i++ ) {
1100    Node *n1 = phi->in(i)->in(1)->in(1);
1101    Node *n2 = phi->in(i)->in(1)->in(2);
1102    phi1->set_req( i, n1 );
1103    phi2->set_req( i, n2 );
1104    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1105    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1106  }
1107  // See if these Phis have been made before.
1108  // Register with optimizer
1109  Node *hit1 = _igvn.hash_find_insert(phi1);
1110  if( hit1 ) {                  // Hit, toss just made Phi
1111    _igvn.remove_dead_node(phi1); // Remove new phi
1112    assert( hit1->is_Phi(), "" );
1113    phi1 = (PhiNode*)hit1;      // Use existing phi
1114  } else {                      // Miss
1115    _igvn.register_new_node_with_optimizer(phi1);
1116  }
1117  Node *hit2 = _igvn.hash_find_insert(phi2);
1118  if( hit2 ) {                  // Hit, toss just made Phi
1119    _igvn.remove_dead_node(phi2); // Remove new phi
1120    assert( hit2->is_Phi(), "" );
1121    phi2 = (PhiNode*)hit2;      // Use existing phi
1122  } else {                      // Miss
1123    _igvn.register_new_node_with_optimizer(phi2);
1124  }
1125  // Register Phis with loop/block info
1126  set_ctrl(phi1, phi->in(0));
1127  set_ctrl(phi2, phi->in(0));
1128  // Make a new Cmp
1129  Node *cmp = sample_cmp->clone();
1130  cmp->set_req( 1, phi1 );
1131  cmp->set_req( 2, phi2 );
1132  _igvn.register_new_node_with_optimizer(cmp);
1133  set_ctrl(cmp, phi->in(0));
1134
1135  // Make a new Bool
1136  Node *b = sample_bool->clone();
1137  b->set_req(1,cmp);
1138  _igvn.register_new_node_with_optimizer(b);
1139  set_ctrl(b, phi->in(0));
1140
1141  assert( b->is_Bool(), "" );
1142  return (BoolNode*)b;
1143}
1144
1145//------------------------------clone_bool-------------------------------------
1146// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1147// "Nearly" because all Nodes have been cloned from the original in the loop,
1148// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1149// through the Phi recursively, and return a Bool.
1150CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1151  uint i;
1152  // Convert this Phi into a Phi merging Bools
1153  for( i = 1; i < phi->req(); i++ ) {
1154    Node *b = phi->in(i);
1155    if( b->is_Phi() ) {
1156      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1157    } else {
1158      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1159    }
1160  }
1161
1162  Node *sample_cmp = phi->in(1);
1163
1164  // Make Phis to merge the Cmp's inputs.
1165  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1166  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1167  for( uint j = 1; j < phi->req(); j++ ) {
1168    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1169    Node *n1, *n2;
1170    if( cmp_top->is_Cmp() ) {
1171      n1 = cmp_top->in(1);
1172      n2 = cmp_top->in(2);
1173    } else {
1174      n1 = n2 = cmp_top;
1175    }
1176    phi1->set_req( j, n1 );
1177    phi2->set_req( j, n2 );
1178    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1179    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1180  }
1181
1182  // See if these Phis have been made before.
1183  // Register with optimizer
1184  Node *hit1 = _igvn.hash_find_insert(phi1);
1185  if( hit1 ) {                  // Hit, toss just made Phi
1186    _igvn.remove_dead_node(phi1); // Remove new phi
1187    assert( hit1->is_Phi(), "" );
1188    phi1 = (PhiNode*)hit1;      // Use existing phi
1189  } else {                      // Miss
1190    _igvn.register_new_node_with_optimizer(phi1);
1191  }
1192  Node *hit2 = _igvn.hash_find_insert(phi2);
1193  if( hit2 ) {                  // Hit, toss just made Phi
1194    _igvn.remove_dead_node(phi2); // Remove new phi
1195    assert( hit2->is_Phi(), "" );
1196    phi2 = (PhiNode*)hit2;      // Use existing phi
1197  } else {                      // Miss
1198    _igvn.register_new_node_with_optimizer(phi2);
1199  }
1200  // Register Phis with loop/block info
1201  set_ctrl(phi1, phi->in(0));
1202  set_ctrl(phi2, phi->in(0));
1203  // Make a new Cmp
1204  Node *cmp = sample_cmp->clone();
1205  cmp->set_req( 1, phi1 );
1206  cmp->set_req( 2, phi2 );
1207  _igvn.register_new_node_with_optimizer(cmp);
1208  set_ctrl(cmp, phi->in(0));
1209
1210  assert( cmp->is_Cmp(), "" );
1211  return (CmpNode*)cmp;
1212}
1213
1214//------------------------------sink_use---------------------------------------
1215// If 'use' was in the loop-exit block, it now needs to be sunk
1216// below the post-loop merge point.
1217void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1218  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1219    set_ctrl(use, post_loop);
1220    for (DUIterator j = use->outs(); use->has_out(j); j++)
1221      sink_use(use->out(j), post_loop);
1222  }
1223}
1224
1225//------------------------------clone_loop-------------------------------------
1226//
1227//                   C L O N E   A   L O O P   B O D Y
1228//
1229// This is the basic building block of the loop optimizations.  It clones an
1230// entire loop body.  It makes an old_new loop body mapping; with this mapping
1231// you can find the new-loop equivalent to an old-loop node.  All new-loop
1232// nodes are exactly equal to their old-loop counterparts, all edges are the
1233// same.  All exits from the old-loop now have a RegionNode that merges the
1234// equivalent new-loop path.  This is true even for the normal "loop-exit"
1235// condition.  All uses of loop-invariant old-loop values now come from (one
1236// or more) Phis that merge their new-loop equivalents.
1237//
1238// This operation leaves the graph in an illegal state: there are two valid
1239// control edges coming from the loop pre-header to both loop bodies.  I'll
1240// definitely have to hack the graph after running this transform.
1241//
1242// From this building block I will further edit edges to perform loop peeling
1243// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1244//
1245// Parameter side_by_size_idom:
1246//   When side_by_size_idom is NULL, the dominator tree is constructed for
1247//      the clone loop to dominate the original.  Used in construction of
1248//      pre-main-post loop sequence.
1249//   When nonnull, the clone and original are side-by-side, both are
1250//      dominated by the side_by_side_idom node.  Used in construction of
1251//      unswitched loops.
1252void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1253                                 Node* side_by_side_idom) {
1254
1255  // Step 1: Clone the loop body.  Make the old->new mapping.
1256  uint i;
1257  for( i = 0; i < loop->_body.size(); i++ ) {
1258    Node *old = loop->_body.at(i);
1259    Node *nnn = old->clone();
1260    old_new.map( old->_idx, nnn );
1261    _igvn.register_new_node_with_optimizer(nnn);
1262  }
1263
1264
1265  // Step 2: Fix the edges in the new body.  If the old input is outside the
1266  // loop use it.  If the old input is INside the loop, use the corresponding
1267  // new node instead.
1268  for( i = 0; i < loop->_body.size(); i++ ) {
1269    Node *old = loop->_body.at(i);
1270    Node *nnn = old_new[old->_idx];
1271    // Fix CFG/Loop controlling the new node
1272    if (has_ctrl(old)) {
1273      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1274    } else {
1275      set_loop(nnn, loop->_parent);
1276      if (old->outcnt() > 0) {
1277        set_idom( nnn, old_new[idom(old)->_idx], dd );
1278      }
1279    }
1280    // Correct edges to the new node
1281    for( uint j = 0; j < nnn->req(); j++ ) {
1282        Node *n = nnn->in(j);
1283        if( n ) {
1284          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1285          if( loop->is_member( old_in_loop ) )
1286            nnn->set_req(j, old_new[n->_idx]);
1287        }
1288    }
1289    _igvn.hash_find_insert(nnn);
1290  }
1291  Node *newhead = old_new[loop->_head->_idx];
1292  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1293
1294
1295  // Step 3: Now fix control uses.  Loop varying control uses have already
1296  // been fixed up (as part of all input edges in Step 2).  Loop invariant
1297  // control uses must be either an IfFalse or an IfTrue.  Make a merge
1298  // point to merge the old and new IfFalse/IfTrue nodes; make the use
1299  // refer to this.
1300  ResourceArea *area = Thread::current()->resource_area();
1301  Node_List worklist(area);
1302  uint new_counter = C->unique();
1303  for( i = 0; i < loop->_body.size(); i++ ) {
1304    Node* old = loop->_body.at(i);
1305    if( !old->is_CFG() ) continue;
1306    Node* nnn = old_new[old->_idx];
1307
1308    // Copy uses to a worklist, so I can munge the def-use info
1309    // with impunity.
1310    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1311      worklist.push(old->fast_out(j));
1312
1313    while( worklist.size() ) {  // Visit all uses
1314      Node *use = worklist.pop();
1315      if (!has_node(use))  continue; // Ignore dead nodes
1316      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1317      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1318        // Both OLD and USE are CFG nodes here.
1319        assert( use->is_Proj(), "" );
1320
1321        // Clone the loop exit control projection
1322        Node *newuse = use->clone();
1323        newuse->set_req(0,nnn);
1324        _igvn.register_new_node_with_optimizer(newuse);
1325        set_loop(newuse, use_loop);
1326        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1327
1328        // We need a Region to merge the exit from the peeled body and the
1329        // exit from the old loop body.
1330        RegionNode *r = new (C) RegionNode(3);
1331        // Map the old use to the new merge point
1332        old_new.map( use->_idx, r );
1333        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1334        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1335
1336        // The original user of 'use' uses 'r' instead.
1337        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1338          Node* useuse = use->last_out(l);
1339          _igvn.rehash_node_delayed(useuse);
1340          uint uses_found = 0;
1341          if( useuse->in(0) == use ) {
1342            useuse->set_req(0, r);
1343            uses_found++;
1344            if( useuse->is_CFG() ) {
1345              assert( dom_depth(useuse) > dd_r, "" );
1346              set_idom(useuse, r, dom_depth(useuse));
1347            }
1348          }
1349          for( uint k = 1; k < useuse->req(); k++ ) {
1350            if( useuse->in(k) == use ) {
1351              useuse->set_req(k, r);
1352              uses_found++;
1353            }
1354          }
1355          l -= uses_found;    // we deleted 1 or more copies of this edge
1356        }
1357
1358        // Now finish up 'r'
1359        r->set_req( 1, newuse );
1360        r->set_req( 2,    use );
1361        _igvn.register_new_node_with_optimizer(r);
1362        set_loop(r, use_loop);
1363        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1364      } // End of if a loop-exit test
1365    }
1366  }
1367
1368  // Step 4: If loop-invariant use is not control, it must be dominated by a
1369  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1370  // there if needed.  Make a Phi there merging old and new used values.
1371  Node_List *split_if_set = NULL;
1372  Node_List *split_bool_set = NULL;
1373  Node_List *split_cex_set = NULL;
1374  for( i = 0; i < loop->_body.size(); i++ ) {
1375    Node* old = loop->_body.at(i);
1376    Node* nnn = old_new[old->_idx];
1377    // Copy uses to a worklist, so I can munge the def-use info
1378    // with impunity.
1379    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1380      worklist.push(old->fast_out(j));
1381
1382    while( worklist.size() ) {
1383      Node *use = worklist.pop();
1384      if (!has_node(use))  continue; // Ignore dead nodes
1385      if (use->in(0) == C->top())  continue;
1386      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1387      // Check for data-use outside of loop - at least one of OLD or USE
1388      // must not be a CFG node.
1389      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1390
1391        // If the Data use is an IF, that means we have an IF outside of the
1392        // loop that is switching on a condition that is set inside of the
1393        // loop.  Happens if people set a loop-exit flag; then test the flag
1394        // in the loop to break the loop, then test is again outside of the
1395        // loop to determine which way the loop exited.
1396        if( use->is_If() || use->is_CMove() ) {
1397          // Since this code is highly unlikely, we lazily build the worklist
1398          // of such Nodes to go split.
1399          if( !split_if_set )
1400            split_if_set = new Node_List(area);
1401          split_if_set->push(use);
1402        }
1403        if( use->is_Bool() ) {
1404          if( !split_bool_set )
1405            split_bool_set = new Node_List(area);
1406          split_bool_set->push(use);
1407        }
1408        if( use->Opcode() == Op_CreateEx ) {
1409          if( !split_cex_set )
1410            split_cex_set = new Node_List(area);
1411          split_cex_set->push(use);
1412        }
1413
1414
1415        // Get "block" use is in
1416        uint idx = 0;
1417        while( use->in(idx) != old ) idx++;
1418        Node *prev = use->is_CFG() ? use : get_ctrl(use);
1419        assert( !loop->is_member( get_loop( prev ) ), "" );
1420        Node *cfg = prev->_idx >= new_counter
1421          ? prev->in(2)
1422          : idom(prev);
1423        if( use->is_Phi() )     // Phi use is in prior block
1424          cfg = prev->in(idx);  // NOT in block of Phi itself
1425        if (cfg->is_top()) {    // Use is dead?
1426          _igvn.replace_input_of(use, idx, C->top());
1427          continue;
1428        }
1429
1430        while( !loop->is_member( get_loop( cfg ) ) ) {
1431          prev = cfg;
1432          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1433        }
1434        // If the use occurs after merging several exits from the loop, then
1435        // old value must have dominated all those exits.  Since the same old
1436        // value was used on all those exits we did not need a Phi at this
1437        // merge point.  NOW we do need a Phi here.  Each loop exit value
1438        // is now merged with the peeled body exit; each exit gets its own
1439        // private Phi and those Phis need to be merged here.
1440        Node *phi;
1441        if( prev->is_Region() ) {
1442          if( idx == 0 ) {      // Updating control edge?
1443            phi = prev;         // Just use existing control
1444          } else {              // Else need a new Phi
1445            phi = PhiNode::make( prev, old );
1446            // Now recursively fix up the new uses of old!
1447            for( uint i = 1; i < prev->req(); i++ ) {
1448              worklist.push(phi); // Onto worklist once for each 'old' input
1449            }
1450          }
1451        } else {
1452          // Get new RegionNode merging old and new loop exits
1453          prev = old_new[prev->_idx];
1454          assert( prev, "just made this in step 7" );
1455          if( idx == 0 ) {      // Updating control edge?
1456            phi = prev;         // Just use existing control
1457          } else {              // Else need a new Phi
1458            // Make a new Phi merging data values properly
1459            phi = PhiNode::make( prev, old );
1460            phi->set_req( 1, nnn );
1461          }
1462        }
1463        // If inserting a new Phi, check for prior hits
1464        if( idx != 0 ) {
1465          Node *hit = _igvn.hash_find_insert(phi);
1466          if( hit == NULL ) {
1467           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1468          } else {                                      // or
1469            // Remove the new phi from the graph and use the hit
1470            _igvn.remove_dead_node(phi);
1471            phi = hit;                                  // Use existing phi
1472          }
1473          set_ctrl(phi, prev);
1474        }
1475        // Make 'use' use the Phi instead of the old loop body exit value
1476        _igvn.replace_input_of(use, idx, phi);
1477        if( use->_idx >= new_counter ) { // If updating new phis
1478          // Not needed for correctness, but prevents a weak assert
1479          // in AddPNode from tripping (when we end up with different
1480          // base & derived Phis that will become the same after
1481          // IGVN does CSE).
1482          Node *hit = _igvn.hash_find_insert(use);
1483          if( hit )             // Go ahead and re-hash for hits.
1484            _igvn.replace_node( use, hit );
1485        }
1486
1487        // If 'use' was in the loop-exit block, it now needs to be sunk
1488        // below the post-loop merge point.
1489        sink_use( use, prev );
1490      }
1491    }
1492  }
1493
1494  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1495  // the loop uses a condition set in the loop.  The original IF probably
1496  // takes control from one or more OLD Regions (which in turn get from NEW
1497  // Regions).  In any case, there will be a set of Phis for each merge point
1498  // from the IF up to where the original BOOL def exists the loop.
1499  if( split_if_set ) {
1500    while( split_if_set->size() ) {
1501      Node *iff = split_if_set->pop();
1502      if( iff->in(1)->is_Phi() ) {
1503        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1504        _igvn.replace_input_of(iff, 1, b);
1505      }
1506    }
1507  }
1508  if( split_bool_set ) {
1509    while( split_bool_set->size() ) {
1510      Node *b = split_bool_set->pop();
1511      Node *phi = b->in(1);
1512      assert( phi->is_Phi(), "" );
1513      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1514      _igvn.replace_input_of(b, 1, cmp);
1515    }
1516  }
1517  if( split_cex_set ) {
1518    while( split_cex_set->size() ) {
1519      Node *b = split_cex_set->pop();
1520      assert( b->in(0)->is_Region(), "" );
1521      assert( b->in(1)->is_Phi(), "" );
1522      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1523      split_up( b, b->in(0), NULL );
1524    }
1525  }
1526
1527}
1528
1529
1530//---------------------- stride_of_possible_iv -------------------------------------
1531// Looks for an iff/bool/comp with one operand of the compare
1532// being a cycle involving an add and a phi,
1533// with an optional truncation (left-shift followed by a right-shift)
1534// of the add. Returns zero if not an iv.
1535int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1536  Node* trunc1 = NULL;
1537  Node* trunc2 = NULL;
1538  const TypeInt* ttype = NULL;
1539  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1540    return 0;
1541  }
1542  BoolNode* bl = iff->in(1)->as_Bool();
1543  Node* cmp = bl->in(1);
1544  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1545    return 0;
1546  }
1547  // Must have an invariant operand
1548  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1549    return 0;
1550  }
1551  Node* add2 = NULL;
1552  Node* cmp1 = cmp->in(1);
1553  if (cmp1->is_Phi()) {
1554    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1555    Node* phi = cmp1;
1556    for (uint i = 1; i < phi->req(); i++) {
1557      Node* in = phi->in(i);
1558      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1559                                &trunc1, &trunc2, &ttype);
1560      if (add && add->in(1) == phi) {
1561        add2 = add->in(2);
1562        break;
1563      }
1564    }
1565  } else {
1566    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1567    Node* addtrunc = cmp1;
1568    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1569                                &trunc1, &trunc2, &ttype);
1570    if (add && add->in(1)->is_Phi()) {
1571      Node* phi = add->in(1);
1572      for (uint i = 1; i < phi->req(); i++) {
1573        if (phi->in(i) == addtrunc) {
1574          add2 = add->in(2);
1575          break;
1576        }
1577      }
1578    }
1579  }
1580  if (add2 != NULL) {
1581    const TypeInt* add2t = _igvn.type(add2)->is_int();
1582    if (add2t->is_con()) {
1583      return add2t->get_con();
1584    }
1585  }
1586  return 0;
1587}
1588
1589
1590//---------------------- stay_in_loop -------------------------------------
1591// Return the (unique) control output node that's in the loop (if it exists.)
1592Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1593  Node* unique = NULL;
1594  if (!n) return NULL;
1595  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1596    Node* use = n->fast_out(i);
1597    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1598      if (unique != NULL) {
1599        return NULL;
1600      }
1601      unique = use;
1602    }
1603  }
1604  return unique;
1605}
1606
1607//------------------------------ register_node -------------------------------------
1608// Utility to register node "n" with PhaseIdealLoop
1609void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1610  _igvn.register_new_node_with_optimizer(n);
1611  loop->_body.push(n);
1612  if (n->is_CFG()) {
1613    set_loop(n, loop);
1614    set_idom(n, pred, ddepth);
1615  } else {
1616    set_ctrl(n, pred);
1617  }
1618}
1619
1620//------------------------------ proj_clone -------------------------------------
1621// Utility to create an if-projection
1622ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1623  ProjNode* c = p->clone()->as_Proj();
1624  c->set_req(0, iff);
1625  return c;
1626}
1627
1628//------------------------------ short_circuit_if -------------------------------------
1629// Force the iff control output to be the live_proj
1630Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1631  int proj_con = live_proj->_con;
1632  assert(proj_con == 0 || proj_con == 1, "false or true projection");
1633  Node *con = _igvn.intcon(proj_con);
1634  set_ctrl(con, C->root());
1635  if (iff) {
1636    iff->set_req(1, con);
1637  }
1638  return con;
1639}
1640
1641//------------------------------ insert_if_before_proj -------------------------------------
1642// Insert a new if before an if projection (* - new node)
1643//
1644// before
1645//           if(test)
1646//           /     \
1647//          v       v
1648//    other-proj   proj (arg)
1649//
1650// after
1651//           if(test)
1652//           /     \
1653//          /       v
1654//         |      * proj-clone
1655//         v          |
1656//    other-proj      v
1657//                * new_if(relop(cmp[IU](left,right)))
1658//                  /  \
1659//                 v    v
1660//         * new-proj  proj
1661//         (returned)
1662//
1663ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1664  IfNode* iff = proj->in(0)->as_If();
1665  IdealLoopTree *loop = get_loop(proj);
1666  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1667  int ddepth = dom_depth(proj);
1668
1669  _igvn.rehash_node_delayed(iff);
1670  _igvn.rehash_node_delayed(proj);
1671
1672  proj->set_req(0, NULL);  // temporary disconnect
1673  ProjNode* proj2 = proj_clone(proj, iff);
1674  register_node(proj2, loop, iff, ddepth);
1675
1676  Node* cmp = Signed ? (Node*) new (C)CmpINode(left, right) : (Node*) new (C)CmpUNode(left, right);
1677  register_node(cmp, loop, proj2, ddepth);
1678
1679  BoolNode* bol = new (C)BoolNode(cmp, relop);
1680  register_node(bol, loop, proj2, ddepth);
1681
1682  IfNode* new_if = new (C)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
1683  register_node(new_if, loop, proj2, ddepth);
1684
1685  proj->set_req(0, new_if); // reattach
1686  set_idom(proj, new_if, ddepth);
1687
1688  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1689  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1690
1691  return new_exit;
1692}
1693
1694//------------------------------ insert_region_before_proj -------------------------------------
1695// Insert a region before an if projection (* - new node)
1696//
1697// before
1698//           if(test)
1699//          /      |
1700//         v       |
1701//       proj      v
1702//               other-proj
1703//
1704// after
1705//           if(test)
1706//          /      |
1707//         v       |
1708// * proj-clone    v
1709//         |     other-proj
1710//         v
1711// * new-region
1712//         |
1713//         v
1714// *      dum_if
1715//       /     \
1716//      v       \
1717// * dum-proj    v
1718//              proj
1719//
1720RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
1721  IfNode* iff = proj->in(0)->as_If();
1722  IdealLoopTree *loop = get_loop(proj);
1723  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1724  int ddepth = dom_depth(proj);
1725
1726  _igvn.rehash_node_delayed(iff);
1727  _igvn.rehash_node_delayed(proj);
1728
1729  proj->set_req(0, NULL);  // temporary disconnect
1730  ProjNode* proj2 = proj_clone(proj, iff);
1731  register_node(proj2, loop, iff, ddepth);
1732
1733  RegionNode* reg = new (C)RegionNode(2);
1734  reg->set_req(1, proj2);
1735  register_node(reg, loop, iff, ddepth);
1736
1737  IfNode* dum_if = new (C)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
1738  register_node(dum_if, loop, reg, ddepth);
1739
1740  proj->set_req(0, dum_if); // reattach
1741  set_idom(proj, dum_if, ddepth);
1742
1743  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
1744  register_node(dum_proj, loop, dum_if, ddepth);
1745
1746  return reg;
1747}
1748
1749//------------------------------ insert_cmpi_loop_exit -------------------------------------
1750// Clone a signed compare loop exit from an unsigned compare and
1751// insert it before the unsigned cmp on the stay-in-loop path.
1752// All new nodes inserted in the dominator tree between the original
1753// if and it's projections.  The original if test is replaced with
1754// a constant to force the stay-in-loop path.
1755//
1756// This is done to make sure that the original if and it's projections
1757// still dominate the same set of control nodes, that the ctrl() relation
1758// from data nodes to them is preserved, and that their loop nesting is
1759// preserved.
1760//
1761// before
1762//          if(i <u limit)    unsigned compare loop exit
1763//         /       |
1764//        v        v
1765//   exit-proj   stay-in-loop-proj
1766//
1767// after
1768//          if(stay-in-loop-const)  original if
1769//         /       |
1770//        /        v
1771//       /  if(i <  limit)    new signed test
1772//      /  /       |
1773//     /  /        v
1774//    /  /  if(i <u limit)    new cloned unsigned test
1775//   /  /   /      |
1776//   v  v  v       |
1777//    region       |
1778//        |        |
1779//      dum-if     |
1780//     /  |        |
1781// ether  |        |
1782//        v        v
1783//   exit-proj   stay-in-loop-proj
1784//
1785IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
1786  const bool Signed   = true;
1787  const bool Unsigned = false;
1788
1789  BoolNode* bol = if_cmpu->in(1)->as_Bool();
1790  if (bol->_test._test != BoolTest::lt) return NULL;
1791  CmpNode* cmpu = bol->in(1)->as_Cmp();
1792  if (cmpu->Opcode() != Op_CmpU) return NULL;
1793  int stride = stride_of_possible_iv(if_cmpu);
1794  if (stride == 0) return NULL;
1795
1796  ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj();
1797  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
1798
1799  Node* limit = NULL;
1800  if (stride > 0) {
1801    limit = cmpu->in(2);
1802  } else {
1803    limit = _igvn.makecon(TypeInt::ZERO);
1804    set_ctrl(limit, C->root());
1805  }
1806  // Create a new region on the exit path
1807  RegionNode* reg = insert_region_before_proj(lp_exit);
1808
1809  // Clone the if-cmpu-true-false using a signed compare
1810  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
1811  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
1812  reg->add_req(cmpi_exit);
1813
1814  // Clone the if-cmpu-true-false
1815  BoolTest::mask rel_u = bol->_test._test;
1816  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
1817  reg->add_req(cmpu_exit);
1818
1819  // Force original if to stay in loop.
1820  short_circuit_if(if_cmpu, lp_continue);
1821
1822  return cmpi_exit->in(0)->as_If();
1823}
1824
1825//------------------------------ remove_cmpi_loop_exit -------------------------------------
1826// Remove a previously inserted signed compare loop exit.
1827void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
1828  Node* lp_proj = stay_in_loop(if_cmp, loop);
1829  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
1830         stay_in_loop(lp_proj, loop)->is_If() &&
1831         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
1832  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
1833  set_ctrl(con, C->root());
1834  if_cmp->set_req(1, con);
1835}
1836
1837//------------------------------ scheduled_nodelist -------------------------------------
1838// Create a post order schedule of nodes that are in the
1839// "member" set.  The list is returned in "sched".
1840// The first node in "sched" is the loop head, followed by
1841// nodes which have no inputs in the "member" set, and then
1842// followed by the nodes that have an immediate input dependence
1843// on a node in "sched".
1844void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
1845
1846  assert(member.test(loop->_head->_idx), "loop head must be in member set");
1847  Arena *a = Thread::current()->resource_area();
1848  VectorSet visited(a);
1849  Node_Stack nstack(a, loop->_body.size());
1850
1851  Node* n  = loop->_head;  // top of stack is cached in "n"
1852  uint idx = 0;
1853  visited.set(n->_idx);
1854
1855  // Initially push all with no inputs from within member set
1856  for(uint i = 0; i < loop->_body.size(); i++ ) {
1857    Node *elt = loop->_body.at(i);
1858    if (member.test(elt->_idx)) {
1859      bool found = false;
1860      for (uint j = 0; j < elt->req(); j++) {
1861        Node* def = elt->in(j);
1862        if (def && member.test(def->_idx) && def != elt) {
1863          found = true;
1864          break;
1865        }
1866      }
1867      if (!found && elt != loop->_head) {
1868        nstack.push(n, idx);
1869        n = elt;
1870        assert(!visited.test(n->_idx), "not seen yet");
1871        visited.set(n->_idx);
1872      }
1873    }
1874  }
1875
1876  // traverse out's that are in the member set
1877  while (true) {
1878    if (idx < n->outcnt()) {
1879      Node* use = n->raw_out(idx);
1880      idx++;
1881      if (!visited.test_set(use->_idx)) {
1882        if (member.test(use->_idx)) {
1883          nstack.push(n, idx);
1884          n = use;
1885          idx = 0;
1886        }
1887      }
1888    } else {
1889      // All outputs processed
1890      sched.push(n);
1891      if (nstack.is_empty()) break;
1892      n   = nstack.node();
1893      idx = nstack.index();
1894      nstack.pop();
1895    }
1896  }
1897}
1898
1899
1900//------------------------------ has_use_in_set -------------------------------------
1901// Has a use in the vector set
1902bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
1903  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1904    Node* use = n->fast_out(j);
1905    if (vset.test(use->_idx)) {
1906      return true;
1907    }
1908  }
1909  return false;
1910}
1911
1912
1913//------------------------------ has_use_internal_to_set -------------------------------------
1914// Has use internal to the vector set (ie. not in a phi at the loop head)
1915bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
1916  Node* head  = loop->_head;
1917  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1918    Node* use = n->fast_out(j);
1919    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
1920      return true;
1921    }
1922  }
1923  return false;
1924}
1925
1926
1927//------------------------------ clone_for_use_outside_loop -------------------------------------
1928// clone "n" for uses that are outside of loop
1929void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
1930
1931  assert(worklist.size() == 0, "should be empty");
1932  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1933    Node* use = n->fast_out(j);
1934    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1935      worklist.push(use);
1936    }
1937  }
1938  while( worklist.size() ) {
1939    Node *use = worklist.pop();
1940    if (!has_node(use) || use->in(0) == C->top()) continue;
1941    uint j;
1942    for (j = 0; j < use->req(); j++) {
1943      if (use->in(j) == n) break;
1944    }
1945    assert(j < use->req(), "must be there");
1946
1947    // clone "n" and insert it between the inputs of "n" and the use outside the loop
1948    Node* n_clone = n->clone();
1949    _igvn.replace_input_of(use, j, n_clone);
1950    Node* use_c;
1951    if (!use->is_Phi()) {
1952      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1953    } else {
1954      // Use in a phi is considered a use in the associated predecessor block
1955      use_c = use->in(0)->in(j);
1956    }
1957    set_ctrl(n_clone, use_c);
1958    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1959    get_loop(use_c)->_body.push(n_clone);
1960    _igvn.register_new_node_with_optimizer(n_clone);
1961#if !defined(PRODUCT)
1962    if (TracePartialPeeling) {
1963      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1964    }
1965#endif
1966  }
1967}
1968
1969
1970//------------------------------ clone_for_special_use_inside_loop -------------------------------------
1971// clone "n" for special uses that are in the not_peeled region.
1972// If these def-uses occur in separate blocks, the code generator
1973// marks the method as not compilable.  For example, if a "BoolNode"
1974// is in a different basic block than the "IfNode" that uses it, then
1975// the compilation is aborted in the code generator.
1976void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1977                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1978  if (n->is_Phi() || n->is_Load()) {
1979    return;
1980  }
1981  assert(worklist.size() == 0, "should be empty");
1982  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1983    Node* use = n->fast_out(j);
1984    if ( not_peel.test(use->_idx) &&
1985         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
1986         use->in(1) == n)  {
1987      worklist.push(use);
1988    }
1989  }
1990  if (worklist.size() > 0) {
1991    // clone "n" and insert it between inputs of "n" and the use
1992    Node* n_clone = n->clone();
1993    loop->_body.push(n_clone);
1994    _igvn.register_new_node_with_optimizer(n_clone);
1995    set_ctrl(n_clone, get_ctrl(n));
1996    sink_list.push(n_clone);
1997    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
1998#if !defined(PRODUCT)
1999    if (TracePartialPeeling) {
2000      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2001    }
2002#endif
2003    while( worklist.size() ) {
2004      Node *use = worklist.pop();
2005      _igvn.rehash_node_delayed(use);
2006      for (uint j = 1; j < use->req(); j++) {
2007        if (use->in(j) == n) {
2008          use->set_req(j, n_clone);
2009        }
2010      }
2011    }
2012  }
2013}
2014
2015
2016//------------------------------ insert_phi_for_loop -------------------------------------
2017// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2018void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2019  Node *phi = PhiNode::make(lp, back_edge_val);
2020  phi->set_req(LoopNode::EntryControl, lp_entry_val);
2021  // Use existing phi if it already exists
2022  Node *hit = _igvn.hash_find_insert(phi);
2023  if( hit == NULL ) {
2024    _igvn.register_new_node_with_optimizer(phi);
2025    set_ctrl(phi, lp);
2026  } else {
2027    // Remove the new phi from the graph and use the hit
2028    _igvn.remove_dead_node(phi);
2029    phi = hit;
2030  }
2031  _igvn.replace_input_of(use, idx, phi);
2032}
2033
2034#ifdef ASSERT
2035//------------------------------ is_valid_loop_partition -------------------------------------
2036// Validate the loop partition sets: peel and not_peel
2037bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2038                                              VectorSet& not_peel ) {
2039  uint i;
2040  // Check that peel_list entries are in the peel set
2041  for (i = 0; i < peel_list.size(); i++) {
2042    if (!peel.test(peel_list.at(i)->_idx)) {
2043      return false;
2044    }
2045  }
2046  // Check at loop members are in one of peel set or not_peel set
2047  for (i = 0; i < loop->_body.size(); i++ ) {
2048    Node *def  = loop->_body.at(i);
2049    uint di = def->_idx;
2050    // Check that peel set elements are in peel_list
2051    if (peel.test(di)) {
2052      if (not_peel.test(di)) {
2053        return false;
2054      }
2055      // Must be in peel_list also
2056      bool found = false;
2057      for (uint j = 0; j < peel_list.size(); j++) {
2058        if (peel_list.at(j)->_idx == di) {
2059          found = true;
2060          break;
2061        }
2062      }
2063      if (!found) {
2064        return false;
2065      }
2066    } else if (not_peel.test(di)) {
2067      if (peel.test(di)) {
2068        return false;
2069      }
2070    } else {
2071      return false;
2072    }
2073  }
2074  return true;
2075}
2076
2077//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2078// Ensure a use outside of loop is of the right form
2079bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2080  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2081  return (use->is_Phi() &&
2082          use_c->is_Region() && use_c->req() == 3 &&
2083          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2084           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2085           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2086          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2087}
2088
2089//------------------------------ is_valid_clone_loop_form -------------------------------------
2090// Ensure that all uses outside of loop are of the right form
2091bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2092                                               uint orig_exit_idx, uint clone_exit_idx) {
2093  uint len = peel_list.size();
2094  for (uint i = 0; i < len; i++) {
2095    Node *def = peel_list.at(i);
2096
2097    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2098      Node *use = def->fast_out(j);
2099      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2100      if (!loop->is_member(get_loop(use_c))) {
2101        // use is not in the loop, check for correct structure
2102        if (use->in(0) == def) {
2103          // Okay
2104        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2105          return false;
2106        }
2107      }
2108    }
2109  }
2110  return true;
2111}
2112#endif
2113
2114//------------------------------ partial_peel -------------------------------------
2115// Partially peel (aka loop rotation) the top portion of a loop (called
2116// the peel section below) by cloning it and placing one copy just before
2117// the new loop head and the other copy at the bottom of the new loop.
2118//
2119//    before                       after                where it came from
2120//
2121//    stmt1                        stmt1
2122//  loop:                          stmt2                     clone
2123//    stmt2                        if condA goto exitA       clone
2124//    if condA goto exitA        new_loop:                   new
2125//    stmt3                        stmt3                     clone
2126//    if !condB goto loop          if condB goto exitB       clone
2127//  exitB:                         stmt2                     orig
2128//    stmt4                        if !condA goto new_loop   orig
2129//  exitA:                         goto exitA
2130//                               exitB:
2131//                                 stmt4
2132//                               exitA:
2133//
2134// Step 1: find the cut point: an exit test on probable
2135//         induction variable.
2136// Step 2: schedule (with cloning) operations in the peel
2137//         section that can be executed after the cut into
2138//         the section that is not peeled.  This may need
2139//         to clone operations into exit blocks.  For
2140//         instance, a reference to A[i] in the not-peel
2141//         section and a reference to B[i] in an exit block
2142//         may cause a left-shift of i by 2 to be placed
2143//         in the peel block.  This step will clone the left
2144//         shift into the exit block and sink the left shift
2145//         from the peel to the not-peel section.
2146// Step 3: clone the loop, retarget the control, and insert
2147//         phis for values that are live across the new loop
2148//         head.  This is very dependent on the graph structure
2149//         from clone_loop.  It creates region nodes for
2150//         exit control and associated phi nodes for values
2151//         flow out of the loop through that exit.  The region
2152//         node is dominated by the clone's control projection.
2153//         So the clone's peel section is placed before the
2154//         new loop head, and the clone's not-peel section is
2155//         forms the top part of the new loop.  The original
2156//         peel section forms the tail of the new loop.
2157// Step 4: update the dominator tree and recompute the
2158//         dominator depth.
2159//
2160//                   orig
2161//
2162//                   stmt1
2163//                     |
2164//                     v
2165//               loop predicate
2166//                     |
2167//                     v
2168//                   loop<----+
2169//                     |      |
2170//                   stmt2    |
2171//                     |      |
2172//                     v      |
2173//                    ifA     |
2174//                   / |      |
2175//                  v  v      |
2176//               false true   ^  <-- last_peel
2177//               /     |      |
2178//              /   ===|==cut |
2179//             /     stmt3    |  <-- first_not_peel
2180//            /        |      |
2181//            |        v      |
2182//            v       ifB     |
2183//          exitA:   / \      |
2184//                  /   \     |
2185//                 v     v    |
2186//               false true   |
2187//               /       \    |
2188//              /         ----+
2189//             |
2190//             v
2191//           exitB:
2192//           stmt4
2193//
2194//
2195//            after clone loop
2196//
2197//                   stmt1
2198//                     |
2199//                     v
2200//               loop predicate
2201//                 /       \
2202//        clone   /         \   orig
2203//               /           \
2204//              /             \
2205//             v               v
2206//   +---->loop                loop<----+
2207//   |      |                    |      |
2208//   |    stmt2                stmt2    |
2209//   |      |                    |      |
2210//   |      v                    v      |
2211//   |      ifA                 ifA     |
2212//   |      | \                / |      |
2213//   |      v  v              v  v      |
2214//   ^    true  false      false true   ^  <-- last_peel
2215//   |      |   ^   \       /    |      |
2216//   | cut==|==  \   \     /  ===|==cut |
2217//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2218//   |      |    dom   | |       |      |
2219//   |      v      \  1v v2      v      |
2220//   |      ifB     regionA     ifB     |
2221//   |      / \        |       / \      |
2222//   |     /   \       v      /   \     |
2223//   |    v     v    exitA:  v     v    |
2224//   |    true  false      false true   |
2225//   |    /     ^   \      /       \    |
2226//   +----       \   \    /         ----+
2227//               dom  \  /
2228//                 \  1v v2
2229//                  regionB
2230//                     |
2231//                     v
2232//                   exitB:
2233//                   stmt4
2234//
2235//
2236//           after partial peel
2237//
2238//                  stmt1
2239//                     |
2240//                     v
2241//               loop predicate
2242//                 /
2243//        clone   /             orig
2244//               /          TOP
2245//              /             \
2246//             v               v
2247//    TOP->loop                loop----+
2248//          |                    |      |
2249//        stmt2                stmt2    |
2250//          |                    |      |
2251//          v                    v      |
2252//          ifA                 ifA     |
2253//          | \                / |      |
2254//          v  v              v  v      |
2255//        true  false      false true   |     <-- last_peel
2256//          |   ^   \       /    +------|---+
2257//  +->newloop   \   \     /  === ==cut |   |
2258//  |     stmt3   \   \   /     TOP     |   |
2259//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2260//  |       v      \  1v v2      v      |   |
2261//  |       ifB     regionA     ifB     ^   v
2262//  |       / \        |       / \      |   |
2263//  |      /   \       v      /   \     |   |
2264//  |     v     v    exitA:  v     v    |   |
2265//  |     true  false      false true   |   |
2266//  |     /     ^   \      /       \    |   |
2267//  |    |       \   \    /         v   |   |
2268//  |    |       dom  \  /         TOP  |   |
2269//  |    |         \  1v v2             |   |
2270//  ^    v          regionB             |   |
2271//  |    |             |                |   |
2272//  |    |             v                ^   v
2273//  |    |           exitB:             |   |
2274//  |    |           stmt4              |   |
2275//  |    +------------>-----------------+   |
2276//  |                                       |
2277//  +-----------------<---------------------+
2278//
2279//
2280//              final graph
2281//
2282//                  stmt1
2283//                    |
2284//                    v
2285//               loop predicate
2286//                    |
2287//                    v
2288//                  stmt2 clone
2289//                    |
2290//                    v
2291//         ........> ifA clone
2292//         :        / |
2293//        dom      /  |
2294//         :      v   v
2295//         :  false   true
2296//         :  |       |
2297//         :  |       v
2298//         :  |    newloop<-----+
2299//         :  |        |        |
2300//         :  |     stmt3 clone |
2301//         :  |        |        |
2302//         :  |        v        |
2303//         :  |       ifB       |
2304//         :  |      / \        |
2305//         :  |     v   v       |
2306//         :  |  false true     |
2307//         :  |   |     |       |
2308//         :  |   v    stmt2    |
2309//         :  | exitB:  |       |
2310//         :  | stmt4   v       |
2311//         :  |       ifA orig  |
2312//         :  |      /  \       |
2313//         :  |     /    \      |
2314//         :  |    v     v      |
2315//         :  |  false  true    |
2316//         :  |  /        \     |
2317//         :  v  v         -----+
2318//          RegionA
2319//             |
2320//             v
2321//           exitA
2322//
2323bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2324
2325  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2326  if (!loop->_head->is_Loop()) {
2327    return false;  }
2328
2329  LoopNode *head  = loop->_head->as_Loop();
2330
2331  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2332    return false;
2333  }
2334
2335  // Check for complex exit control
2336  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2337    Node *n = loop->_body.at(ii);
2338    int opc = n->Opcode();
2339    if (n->is_Call()        ||
2340        opc == Op_Catch     ||
2341        opc == Op_CatchProj ||
2342        opc == Op_Jump      ||
2343        opc == Op_JumpProj) {
2344#if !defined(PRODUCT)
2345      if (TracePartialPeeling) {
2346        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2347      }
2348#endif
2349      return false;
2350    }
2351  }
2352
2353  int dd = dom_depth(head);
2354
2355  // Step 1: find cut point
2356
2357  // Walk up dominators to loop head looking for first loop exit
2358  // which is executed on every path thru loop.
2359  IfNode *peel_if = NULL;
2360  IfNode *peel_if_cmpu = NULL;
2361
2362  Node *iff = loop->tail();
2363  while( iff != head ) {
2364    if( iff->is_If() ) {
2365      Node *ctrl = get_ctrl(iff->in(1));
2366      if (ctrl->is_top()) return false; // Dead test on live IF.
2367      // If loop-varying exit-test, check for induction variable
2368      if( loop->is_member(get_loop(ctrl)) &&
2369          loop->is_loop_exit(iff) &&
2370          is_possible_iv_test(iff)) {
2371        Node* cmp = iff->in(1)->in(1);
2372        if (cmp->Opcode() == Op_CmpI) {
2373          peel_if = iff->as_If();
2374        } else {
2375          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2376          peel_if_cmpu = iff->as_If();
2377        }
2378      }
2379    }
2380    iff = idom(iff);
2381  }
2382  // Prefer signed compare over unsigned compare.
2383  IfNode* new_peel_if = NULL;
2384  if (peel_if == NULL) {
2385    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2386      return false;   // No peel point found
2387    }
2388    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2389    if (new_peel_if == NULL) {
2390      return false;   // No peel point found
2391    }
2392    peel_if = new_peel_if;
2393  }
2394  Node* last_peel        = stay_in_loop(peel_if, loop);
2395  Node* first_not_peeled = stay_in_loop(last_peel, loop);
2396  if (first_not_peeled == NULL || first_not_peeled == head) {
2397    return false;
2398  }
2399
2400#if !defined(PRODUCT)
2401  if (TraceLoopOpts) {
2402    tty->print("PartialPeel  ");
2403    loop->dump_head();
2404  }
2405
2406  if (TracePartialPeeling) {
2407    tty->print_cr("before partial peel one iteration");
2408    Node_List wl;
2409    Node* t = head->in(2);
2410    while (true) {
2411      wl.push(t);
2412      if (t == head) break;
2413      t = idom(t);
2414    }
2415    while (wl.size() > 0) {
2416      Node* tt = wl.pop();
2417      tt->dump();
2418      if (tt == last_peel) tty->print_cr("-- cut --");
2419    }
2420  }
2421#endif
2422  ResourceArea *area = Thread::current()->resource_area();
2423  VectorSet peel(area);
2424  VectorSet not_peel(area);
2425  Node_List peel_list(area);
2426  Node_List worklist(area);
2427  Node_List sink_list(area);
2428
2429  // Set of cfg nodes to peel are those that are executable from
2430  // the head through last_peel.
2431  assert(worklist.size() == 0, "should be empty");
2432  worklist.push(head);
2433  peel.set(head->_idx);
2434  while (worklist.size() > 0) {
2435    Node *n = worklist.pop();
2436    if (n != last_peel) {
2437      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2438        Node* use = n->fast_out(j);
2439        if (use->is_CFG() &&
2440            loop->is_member(get_loop(use)) &&
2441            !peel.test_set(use->_idx)) {
2442          worklist.push(use);
2443        }
2444      }
2445    }
2446  }
2447
2448  // Set of non-cfg nodes to peel are those that are control
2449  // dependent on the cfg nodes.
2450  uint i;
2451  for(i = 0; i < loop->_body.size(); i++ ) {
2452    Node *n = loop->_body.at(i);
2453    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2454    if (peel.test(n_c->_idx)) {
2455      peel.set(n->_idx);
2456    } else {
2457      not_peel.set(n->_idx);
2458    }
2459  }
2460
2461  // Step 2: move operations from the peeled section down into the
2462  //         not-peeled section
2463
2464  // Get a post order schedule of nodes in the peel region
2465  // Result in right-most operand.
2466  scheduled_nodelist(loop, peel, peel_list );
2467
2468  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2469
2470  // For future check for too many new phis
2471  uint old_phi_cnt = 0;
2472  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2473    Node* use = head->fast_out(j);
2474    if (use->is_Phi()) old_phi_cnt++;
2475  }
2476
2477#if !defined(PRODUCT)
2478  if (TracePartialPeeling) {
2479    tty->print_cr("\npeeled list");
2480  }
2481#endif
2482
2483  // Evacuate nodes in peel region into the not_peeled region if possible
2484  uint new_phi_cnt = 0;
2485  for (i = 0; i < peel_list.size();) {
2486    Node* n = peel_list.at(i);
2487#if !defined(PRODUCT)
2488  if (TracePartialPeeling) n->dump();
2489#endif
2490    bool incr = true;
2491    if ( !n->is_CFG() ) {
2492
2493      if ( has_use_in_set(n, not_peel) ) {
2494
2495        // If not used internal to the peeled region,
2496        // move "n" from peeled to not_peeled region.
2497
2498        if ( !has_use_internal_to_set(n, peel, loop) ) {
2499
2500          // if not pinned and not a load (which maybe anti-dependent on a store)
2501          // and not a CMove (Matcher expects only bool->cmove).
2502          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2503            clone_for_use_outside_loop( loop, n, worklist );
2504
2505            sink_list.push(n);
2506            peel     >>= n->_idx; // delete n from peel set.
2507            not_peel <<= n->_idx; // add n to not_peel set.
2508            peel_list.remove(i);
2509            incr = false;
2510#if !defined(PRODUCT)
2511            if (TracePartialPeeling) {
2512              tty->print_cr("sink to not_peeled region: %d newbb: %d",
2513                            n->_idx, get_ctrl(n)->_idx);
2514            }
2515#endif
2516          }
2517        } else {
2518          // Otherwise check for special def-use cases that span
2519          // the peel/not_peel boundary such as bool->if
2520          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2521          new_phi_cnt++;
2522        }
2523      }
2524    }
2525    if (incr) i++;
2526  }
2527
2528  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2529#if !defined(PRODUCT)
2530    if (TracePartialPeeling) {
2531      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2532                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2533    }
2534#endif
2535    if (new_peel_if != NULL) {
2536      remove_cmpi_loop_exit(new_peel_if, loop);
2537    }
2538    // Inhibit more partial peeling on this loop
2539    assert(!head->is_partial_peel_loop(), "not partial peeled");
2540    head->mark_partial_peel_failed();
2541    return false;
2542  }
2543
2544  // Step 3: clone loop, retarget control, and insert new phis
2545
2546  // Create new loop head for new phis and to hang
2547  // the nodes being moved (sinked) from the peel region.
2548  LoopNode* new_head = new (C) LoopNode(last_peel, last_peel);
2549  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2550  _igvn.register_new_node_with_optimizer(new_head);
2551  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2552  first_not_peeled->set_req(0, new_head);
2553  set_loop(new_head, loop);
2554  loop->_body.push(new_head);
2555  not_peel.set(new_head->_idx);
2556  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2557  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2558
2559  while (sink_list.size() > 0) {
2560    Node* n = sink_list.pop();
2561    set_ctrl(n, new_head);
2562  }
2563
2564  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2565
2566  clone_loop( loop, old_new, dd );
2567
2568  const uint clone_exit_idx = 1;
2569  const uint orig_exit_idx  = 2;
2570  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2571
2572  Node* head_clone             = old_new[head->_idx];
2573  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2574  Node* orig_tail_clone        = head_clone->in(2);
2575
2576  // Add phi if "def" node is in peel set and "use" is not
2577
2578  for(i = 0; i < peel_list.size(); i++ ) {
2579    Node *def  = peel_list.at(i);
2580    if (!def->is_CFG()) {
2581      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2582        Node *use = def->fast_out(j);
2583        if (has_node(use) && use->in(0) != C->top() &&
2584            (!peel.test(use->_idx) ||
2585             (use->is_Phi() && use->in(0) == head)) ) {
2586          worklist.push(use);
2587        }
2588      }
2589      while( worklist.size() ) {
2590        Node *use = worklist.pop();
2591        for (uint j = 1; j < use->req(); j++) {
2592          Node* n = use->in(j);
2593          if (n == def) {
2594
2595            // "def" is in peel set, "use" is not in peel set
2596            // or "use" is in the entry boundary (a phi) of the peel set
2597
2598            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2599
2600            if ( loop->is_member(get_loop( use_c )) ) {
2601              // use is in loop
2602              if (old_new[use->_idx] != NULL) { // null for dead code
2603                Node* use_clone = old_new[use->_idx];
2604                _igvn.replace_input_of(use, j, C->top());
2605                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2606              }
2607            } else {
2608              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2609              // use is not in the loop, check if the live range includes the cut
2610              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2611              if (not_peel.test(lp_if->_idx)) {
2612                assert(j == orig_exit_idx, "use from original loop");
2613                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2614              }
2615            }
2616          }
2617        }
2618      }
2619    }
2620  }
2621
2622  // Step 3b: retarget control
2623
2624  // Redirect control to the new loop head if a cloned node in
2625  // the not_peeled region has control that points into the peeled region.
2626  // This necessary because the cloned peeled region will be outside
2627  // the loop.
2628  //                            from    to
2629  //          cloned-peeled    <---+
2630  //    new_head_clone:            |    <--+
2631  //          cloned-not_peeled  in(0)    in(0)
2632  //          orig-peeled
2633
2634  for(i = 0; i < loop->_body.size(); i++ ) {
2635    Node *n = loop->_body.at(i);
2636    if (!n->is_CFG()           && n->in(0) != NULL        &&
2637        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2638      Node* n_clone = old_new[n->_idx];
2639      _igvn.replace_input_of(n_clone, 0, new_head_clone);
2640    }
2641  }
2642
2643  // Backedge of the surviving new_head (the clone) is original last_peel
2644  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2645
2646  // Cut first node in original not_peel set
2647  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2648  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2649  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2650
2651  // Copy head_clone back-branch info to original head
2652  // and remove original head's loop entry and
2653  // clone head's back-branch
2654  _igvn.rehash_node_delayed(head); // Multiple edge updates
2655  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2656  head->set_req(LoopNode::LoopBackControl, C->top());
2657  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2658
2659  // Similarly modify the phis
2660  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2661    Node* use = head->fast_out(k);
2662    if (use->is_Phi() && use->outcnt() > 0) {
2663      Node* use_clone = old_new[use->_idx];
2664      _igvn.rehash_node_delayed(use); // Multiple edge updates
2665      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2666      use->set_req(LoopNode::LoopBackControl, C->top());
2667      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2668    }
2669  }
2670
2671  // Step 4: update dominator tree and dominator depth
2672
2673  set_idom(head, orig_tail_clone, dd);
2674  recompute_dom_depth();
2675
2676  // Inhibit more partial peeling on this loop
2677  new_head_clone->set_partial_peel_loop();
2678  C->set_major_progress();
2679
2680#if !defined(PRODUCT)
2681  if (TracePartialPeeling) {
2682    tty->print_cr("\nafter partial peel one iteration");
2683    Node_List wl(area);
2684    Node* t = last_peel;
2685    while (true) {
2686      wl.push(t);
2687      if (t == head_clone) break;
2688      t = idom(t);
2689    }
2690    while (wl.size() > 0) {
2691      Node* tt = wl.pop();
2692      if (tt == head) tty->print_cr("orig head");
2693      else if (tt == new_head_clone) tty->print_cr("new head");
2694      else if (tt == head_clone) tty->print_cr("clone head");
2695      tt->dump();
2696    }
2697  }
2698#endif
2699  return true;
2700}
2701
2702//------------------------------reorg_offsets----------------------------------
2703// Reorganize offset computations to lower register pressure.  Mostly
2704// prevent loop-fallout uses of the pre-incremented trip counter (which are
2705// then alive with the post-incremented trip counter forcing an extra
2706// register move)
2707void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
2708  // Perform it only for canonical counted loops.
2709  // Loop's shape could be messed up by iteration_split_impl.
2710  if (!loop->_head->is_CountedLoop())
2711    return;
2712  if (!loop->_head->as_Loop()->is_valid_counted_loop())
2713    return;
2714
2715  CountedLoopNode *cl = loop->_head->as_CountedLoop();
2716  CountedLoopEndNode *cle = cl->loopexit();
2717  Node *exit = cle->proj_out(false);
2718  Node *phi = cl->phi();
2719
2720  // Check for the special case of folks using the pre-incremented
2721  // trip-counter on the fall-out path (forces the pre-incremented
2722  // and post-incremented trip counter to be live at the same time).
2723  // Fix this by adjusting to use the post-increment trip counter.
2724
2725  bool progress = true;
2726  while (progress) {
2727    progress = false;
2728    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2729      Node* use = phi->fast_out(i);   // User of trip-counter
2730      if (!has_ctrl(use))  continue;
2731      Node *u_ctrl = get_ctrl(use);
2732      if (use->is_Phi()) {
2733        u_ctrl = NULL;
2734        for (uint j = 1; j < use->req(); j++)
2735          if (use->in(j) == phi)
2736            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
2737      }
2738      IdealLoopTree *u_loop = get_loop(u_ctrl);
2739      // Look for loop-invariant use
2740      if (u_loop == loop) continue;
2741      if (loop->is_member(u_loop)) continue;
2742      // Check that use is live out the bottom.  Assuming the trip-counter
2743      // update is right at the bottom, uses of of the loop middle are ok.
2744      if (dom_lca(exit, u_ctrl) != exit) continue;
2745      // Hit!  Refactor use to use the post-incremented tripcounter.
2746      // Compute a post-increment tripcounter.
2747      Node *opaq = new (C) Opaque2Node( C, cle->incr() );
2748      register_new_node( opaq, u_ctrl );
2749      Node *neg_stride = _igvn.intcon(-cle->stride_con());
2750      set_ctrl(neg_stride, C->root());
2751      Node *post = new (C) AddINode( opaq, neg_stride);
2752      register_new_node( post, u_ctrl );
2753      _igvn.rehash_node_delayed(use);
2754      for (uint j = 1; j < use->req(); j++) {
2755        if (use->in(j) == phi)
2756          use->set_req(j, post);
2757      }
2758      // Since DU info changed, rerun loop
2759      progress = true;
2760      break;
2761    }
2762  }
2763
2764}
2765