loopopts.cpp revision 6562:3cb509208318
1/*
2 * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "memory/allocation.inline.hpp"
27#include "opto/addnode.hpp"
28#include "opto/connode.hpp"
29#include "opto/divnode.hpp"
30#include "opto/loopnode.hpp"
31#include "opto/matcher.hpp"
32#include "opto/mulnode.hpp"
33#include "opto/movenode.hpp"
34#include "opto/opaquenode.hpp"
35#include "opto/rootnode.hpp"
36#include "opto/subnode.hpp"
37
38//=============================================================================
39//------------------------------split_thru_phi---------------------------------
40// Split Node 'n' through merge point if there is enough win.
41Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
42  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
43    // ConvI2L may have type information on it which is unsafe to push up
44    // so disable this for now
45    return NULL;
46  }
47
48  int wins = 0;
49  assert(!n->is_CFG(), "");
50  assert(region->is_Region(), "");
51
52  const Type* type = n->bottom_type();
53  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
54  Node *phi;
55  if (t_oop != NULL && t_oop->is_known_instance_field()) {
56    int iid    = t_oop->instance_id();
57    int index  = C->get_alias_index(t_oop);
58    int offset = t_oop->offset();
59    phi = new PhiNode(region, type, NULL, iid, index, offset);
60  } else {
61    phi = PhiNode::make_blank(region, n);
62  }
63  uint old_unique = C->unique();
64  for (uint i = 1; i < region->req(); i++) {
65    Node *x;
66    Node* the_clone = NULL;
67    if (region->in(i) == C->top()) {
68      x = C->top();             // Dead path?  Use a dead data op
69    } else {
70      x = n->clone();           // Else clone up the data op
71      the_clone = x;            // Remember for possible deletion.
72      // Alter data node to use pre-phi inputs
73      if (n->in(0) == region)
74        x->set_req( 0, region->in(i) );
75      for (uint j = 1; j < n->req(); j++) {
76        Node *in = n->in(j);
77        if (in->is_Phi() && in->in(0) == region)
78          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
79      }
80    }
81    // Check for a 'win' on some paths
82    const Type *t = x->Value(&_igvn);
83
84    bool singleton = t->singleton();
85
86    // A TOP singleton indicates that there are no possible values incoming
87    // along a particular edge. In most cases, this is OK, and the Phi will
88    // be eliminated later in an Ideal call. However, we can't allow this to
89    // happen if the singleton occurs on loop entry, as the elimination of
90    // the PhiNode may cause the resulting node to migrate back to a previous
91    // loop iteration.
92    if (singleton && t == Type::TOP) {
93      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
94      // irreducible loop may not be indicated by an affirmative is_Loop());
95      // therefore, the only top we can split thru a phi is on a backedge of
96      // a loop.
97      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
98    }
99
100    if (singleton) {
101      wins++;
102      x = ((PhaseGVN&)_igvn).makecon(t);
103    } else {
104      // We now call Identity to try to simplify the cloned node.
105      // Note that some Identity methods call phase->type(this).
106      // Make sure that the type array is big enough for
107      // our new node, even though we may throw the node away.
108      // (Note: This tweaking with igvn only works because x is a new node.)
109      _igvn.set_type(x, t);
110      // If x is a TypeNode, capture any more-precise type permanently into Node
111      // otherwise it will be not updated during igvn->transform since
112      // igvn->type(x) is set to x->Value() already.
113      x->raise_bottom_type(t);
114      Node *y = x->Identity(&_igvn);
115      if (y != x) {
116        wins++;
117        x = y;
118      } else {
119        y = _igvn.hash_find(x);
120        if (y) {
121          wins++;
122          x = y;
123        } else {
124          // Else x is a new node we are keeping
125          // We do not need register_new_node_with_optimizer
126          // because set_type has already been called.
127          _igvn._worklist.push(x);
128        }
129      }
130    }
131    if (x != the_clone && the_clone != NULL)
132      _igvn.remove_dead_node(the_clone);
133    phi->set_req( i, x );
134  }
135  // Too few wins?
136  if (wins <= policy) {
137    _igvn.remove_dead_node(phi);
138    return NULL;
139  }
140
141  // Record Phi
142  register_new_node( phi, region );
143
144  for (uint i2 = 1; i2 < phi->req(); i2++) {
145    Node *x = phi->in(i2);
146    // If we commoned up the cloned 'x' with another existing Node,
147    // the existing Node picks up a new use.  We need to make the
148    // existing Node occur higher up so it dominates its uses.
149    Node *old_ctrl;
150    IdealLoopTree *old_loop;
151
152    if (x->is_Con()) {
153      // Constant's control is always root.
154      set_ctrl(x, C->root());
155      continue;
156    }
157    // The occasional new node
158    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
159      old_ctrl = NULL;
160      old_loop = NULL;               // Not in any prior loop
161    } else {
162      old_ctrl = get_ctrl(x);
163      old_loop = get_loop(old_ctrl); // Get prior loop
164    }
165    // New late point must dominate new use
166    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
167    if (new_ctrl == old_ctrl) // Nothing is changed
168      continue;
169
170    IdealLoopTree *new_loop = get_loop(new_ctrl);
171
172    // Don't move x into a loop if its uses are
173    // outside of loop. Otherwise x will be cloned
174    // for each use outside of this loop.
175    IdealLoopTree *use_loop = get_loop(region);
176    if (!new_loop->is_member(use_loop) &&
177        (old_loop == NULL || !new_loop->is_member(old_loop))) {
178      // Take early control, later control will be recalculated
179      // during next iteration of loop optimizations.
180      new_ctrl = get_early_ctrl(x);
181      new_loop = get_loop(new_ctrl);
182    }
183    // Set new location
184    set_ctrl(x, new_ctrl);
185    // If changing loop bodies, see if we need to collect into new body
186    if (old_loop != new_loop) {
187      if (old_loop && !old_loop->_child)
188        old_loop->_body.yank(x);
189      if (!new_loop->_child)
190        new_loop->_body.push(x);  // Collect body info
191    }
192  }
193
194  return phi;
195}
196
197//------------------------------dominated_by------------------------------------
198// Replace the dominated test with an obvious true or false.  Place it on the
199// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
200// live path up to the dominating control.
201void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
202#ifndef PRODUCT
203  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
204#endif
205
206
207  // prevdom is the dominating projection of the dominating test.
208  assert( iff->is_If(), "" );
209  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
210  int pop = prevdom->Opcode();
211  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
212  if (flip) {
213    if (pop == Op_IfTrue)
214      pop = Op_IfFalse;
215    else
216      pop = Op_IfTrue;
217  }
218  // 'con' is set to true or false to kill the dominated test.
219  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
220  set_ctrl(con, C->root()); // Constant gets a new use
221  // Hack the dominated test
222  _igvn.replace_input_of(iff, 1, con);
223
224  // If I dont have a reachable TRUE and FALSE path following the IfNode then
225  // I can assume this path reaches an infinite loop.  In this case it's not
226  // important to optimize the data Nodes - either the whole compilation will
227  // be tossed or this path (and all data Nodes) will go dead.
228  if (iff->outcnt() != 2) return;
229
230  // Make control-dependent data Nodes on the live path (path that will remain
231  // once the dominated IF is removed) become control-dependent on the
232  // dominating projection.
233  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
234
235  // Loop predicates may have depending checks which should not
236  // be skipped. For example, range check predicate has two checks
237  // for lower and upper bounds.
238  if (dp == NULL)
239    return;
240
241  ProjNode* dp_proj  = dp->as_Proj();
242  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
243  if (exclude_loop_predicate &&
244      unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))
245    return; // Let IGVN transformation change control dependence.
246
247  IdealLoopTree *old_loop = get_loop(dp);
248
249  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
250    Node* cd = dp->fast_out(i); // Control-dependent node
251    if (cd->depends_only_on_test()) {
252      assert(cd->in(0) == dp, "");
253      _igvn.replace_input_of(cd, 0, prevdom);
254      set_early_ctrl(cd);
255      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
256      if (old_loop != new_loop) {
257        if (!old_loop->_child) old_loop->_body.yank(cd);
258        if (!new_loop->_child) new_loop->_body.push(cd);
259      }
260      --i;
261      --imax;
262    }
263  }
264}
265
266//------------------------------has_local_phi_input----------------------------
267// Return TRUE if 'n' has Phi inputs from its local block and no other
268// block-local inputs (all non-local-phi inputs come from earlier blocks)
269Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
270  Node *n_ctrl = get_ctrl(n);
271  // See if some inputs come from a Phi in this block, or from before
272  // this block.
273  uint i;
274  for( i = 1; i < n->req(); i++ ) {
275    Node *phi = n->in(i);
276    if( phi->is_Phi() && phi->in(0) == n_ctrl )
277      break;
278  }
279  if( i >= n->req() )
280    return NULL;                // No Phi inputs; nowhere to clone thru
281
282  // Check for inputs created between 'n' and the Phi input.  These
283  // must split as well; they have already been given the chance
284  // (courtesy of a post-order visit) and since they did not we must
285  // recover the 'cost' of splitting them by being very profitable
286  // when splitting 'n'.  Since this is unlikely we simply give up.
287  for( i = 1; i < n->req(); i++ ) {
288    Node *m = n->in(i);
289    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
290      // We allow the special case of AddP's with no local inputs.
291      // This allows us to split-up address expressions.
292      if (m->is_AddP() &&
293          get_ctrl(m->in(2)) != n_ctrl &&
294          get_ctrl(m->in(3)) != n_ctrl) {
295        // Move the AddP up to dominating point
296        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
297        continue;
298      }
299      return NULL;
300    }
301  }
302
303  return n_ctrl;
304}
305
306//------------------------------remix_address_expressions----------------------
307// Rework addressing expressions to get the most loop-invariant stuff
308// moved out.  We'd like to do all associative operators, but it's especially
309// important (common) to do address expressions.
310Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
311  if (!has_ctrl(n))  return NULL;
312  Node *n_ctrl = get_ctrl(n);
313  IdealLoopTree *n_loop = get_loop(n_ctrl);
314
315  // See if 'n' mixes loop-varying and loop-invariant inputs and
316  // itself is loop-varying.
317
318  // Only interested in binary ops (and AddP)
319  if( n->req() < 3 || n->req() > 4 ) return NULL;
320
321  Node *n1_ctrl = get_ctrl(n->in(                    1));
322  Node *n2_ctrl = get_ctrl(n->in(                    2));
323  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
324  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
325  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
326  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
327
328  // Does one of my inputs spin in a tighter loop than self?
329  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
330      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
331      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
332    return NULL;                // Leave well enough alone
333
334  // Is at least one of my inputs loop-invariant?
335  if( n1_loop == n_loop &&
336      n2_loop == n_loop &&
337      n3_loop == n_loop )
338    return NULL;                // No loop-invariant inputs
339
340
341  int n_op = n->Opcode();
342
343  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
344  if( n_op == Op_LShiftI ) {
345    // Scale is loop invariant
346    Node *scale = n->in(2);
347    Node *scale_ctrl = get_ctrl(scale);
348    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
349    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
350      return NULL;
351    const TypeInt *scale_t = scale->bottom_type()->isa_int();
352    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
353      return NULL;              // Dont bother with byte/short masking
354    // Add must vary with loop (else shift would be loop-invariant)
355    Node *add = n->in(1);
356    Node *add_ctrl = get_ctrl(add);
357    IdealLoopTree *add_loop = get_loop(add_ctrl);
358    //assert( n_loop == add_loop, "" );
359    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
360
361    // Convert I-V into I+ (0-V); same for V-I
362    if( add->Opcode() == Op_SubI &&
363        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
364      Node *zero = _igvn.intcon(0);
365      set_ctrl(zero, C->root());
366      Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
367      register_new_node( neg, get_ctrl(add->in(2) ) );
368      add = new AddINode( add->in(1), neg );
369      register_new_node( add, add_ctrl );
370    }
371    if( add->Opcode() != Op_AddI ) return NULL;
372    // See if one add input is loop invariant
373    Node *add_var = add->in(1);
374    Node *add_var_ctrl = get_ctrl(add_var);
375    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
376    Node *add_invar = add->in(2);
377    Node *add_invar_ctrl = get_ctrl(add_invar);
378    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
379    if( add_var_loop == n_loop ) {
380    } else if( add_invar_loop == n_loop ) {
381      // Swap to find the invariant part
382      add_invar = add_var;
383      add_invar_ctrl = add_var_ctrl;
384      add_invar_loop = add_var_loop;
385      add_var = add->in(2);
386      Node *add_var_ctrl = get_ctrl(add_var);
387      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
388    } else                      // Else neither input is loop invariant
389      return NULL;
390    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
391      return NULL;              // No invariant part of the add?
392
393    // Yes!  Reshape address expression!
394    Node *inv_scale = new LShiftINode( add_invar, scale );
395    Node *inv_scale_ctrl =
396      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
397      add_invar_ctrl : scale_ctrl;
398    register_new_node( inv_scale, inv_scale_ctrl );
399    Node *var_scale = new LShiftINode( add_var, scale );
400    register_new_node( var_scale, n_ctrl );
401    Node *var_add = new AddINode( var_scale, inv_scale );
402    register_new_node( var_add, n_ctrl );
403    _igvn.replace_node( n, var_add );
404    return var_add;
405  }
406
407  // Replace (I+V) with (V+I)
408  if( n_op == Op_AddI ||
409      n_op == Op_AddL ||
410      n_op == Op_AddF ||
411      n_op == Op_AddD ||
412      n_op == Op_MulI ||
413      n_op == Op_MulL ||
414      n_op == Op_MulF ||
415      n_op == Op_MulD ) {
416    if( n2_loop == n_loop ) {
417      assert( n1_loop != n_loop, "" );
418      n->swap_edges(1, 2);
419    }
420  }
421
422  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
423  // but not if I2 is a constant.
424  if( n_op == Op_AddP ) {
425    if( n2_loop == n_loop && n3_loop != n_loop ) {
426      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
427        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
428        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
429        IdealLoopTree *n22loop = get_loop( n22_ctrl );
430        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
431        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
432            n23_loop == n_loop ) {
433          Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
434          // Stuff new AddP in the loop preheader
435          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
436          Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
437          register_new_node( add2, n_ctrl );
438          _igvn.replace_node( n, add2 );
439          return add2;
440        }
441      }
442    }
443
444    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
445    if( n2_loop != n_loop && n3_loop == n_loop ) {
446      if( n->in(3)->Opcode() == Op_AddI ) {
447        Node *V = n->in(3)->in(1);
448        Node *I = n->in(3)->in(2);
449        if( is_member(n_loop,get_ctrl(V)) ) {
450        } else {
451          Node *tmp = V; V = I; I = tmp;
452        }
453        if( !is_member(n_loop,get_ctrl(I)) ) {
454          Node *add1 = new AddPNode( n->in(1), n->in(2), I );
455          // Stuff new AddP in the loop preheader
456          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
457          Node *add2 = new AddPNode( n->in(1), add1, V );
458          register_new_node( add2, n_ctrl );
459          _igvn.replace_node( n, add2 );
460          return add2;
461        }
462      }
463    }
464  }
465
466  return NULL;
467}
468
469//------------------------------conditional_move-------------------------------
470// Attempt to replace a Phi with a conditional move.  We have some pretty
471// strict profitability requirements.  All Phis at the merge point must
472// be converted, so we can remove the control flow.  We need to limit the
473// number of c-moves to a small handful.  All code that was in the side-arms
474// of the CFG diamond is now speculatively executed.  This code has to be
475// "cheap enough".  We are pretty much limited to CFG diamonds that merge
476// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
477Node *PhaseIdealLoop::conditional_move( Node *region ) {
478
479  assert(region->is_Region(), "sanity check");
480  if (region->req() != 3) return NULL;
481
482  // Check for CFG diamond
483  Node *lp = region->in(1);
484  Node *rp = region->in(2);
485  if (!lp || !rp) return NULL;
486  Node *lp_c = lp->in(0);
487  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
488  IfNode *iff = lp_c->as_If();
489
490  // Check for ops pinned in an arm of the diamond.
491  // Can't remove the control flow in this case
492  if (lp->outcnt() > 1) return NULL;
493  if (rp->outcnt() > 1) return NULL;
494
495  IdealLoopTree* r_loop = get_loop(region);
496  assert(r_loop == get_loop(iff), "sanity");
497  // Always convert to CMOVE if all results are used only outside this loop.
498  bool used_inside_loop = (r_loop == _ltree_root);
499
500  // Check profitability
501  int cost = 0;
502  int phis = 0;
503  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
504    Node *out = region->fast_out(i);
505    if (!out->is_Phi()) continue; // Ignore other control edges, etc
506    phis++;
507    PhiNode* phi = out->as_Phi();
508    BasicType bt = phi->type()->basic_type();
509    switch (bt) {
510    case T_FLOAT:
511    case T_DOUBLE: {
512      cost += Matcher::float_cmove_cost(); // Could be very expensive
513      break;
514    }
515    case T_LONG: {
516      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
517    }
518    case T_INT:                 // These all CMOV fine
519    case T_ADDRESS: {           // (RawPtr)
520      cost++;
521      break;
522    }
523    case T_NARROWOOP: // Fall through
524    case T_OBJECT: {            // Base oops are OK, but not derived oops
525      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
526      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
527      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
528      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
529      // have a Phi for the base here that we convert to a CMOVE all is well
530      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
531      // the allocator will have to produce a base by creating a CMOVE of the
532      // relevant bases.  This puts the allocator in the business of
533      // manufacturing expensive instructions, generally a bad plan.
534      // Just Say No to Conditionally-Moved Derived Pointers.
535      if (tp && tp->offset() != 0)
536        return NULL;
537      cost++;
538      break;
539    }
540    default:
541      return NULL;              // In particular, can't do memory or I/O
542    }
543    // Add in cost any speculative ops
544    for (uint j = 1; j < region->req(); j++) {
545      Node *proj = region->in(j);
546      Node *inp = phi->in(j);
547      if (get_ctrl(inp) == proj) { // Found local op
548        cost++;
549        // Check for a chain of dependent ops; these will all become
550        // speculative in a CMOV.
551        for (uint k = 1; k < inp->req(); k++)
552          if (get_ctrl(inp->in(k)) == proj)
553            cost += ConditionalMoveLimit; // Too much speculative goo
554      }
555    }
556    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
557    // This will likely Split-If, a higher-payoff operation.
558    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
559      Node* use = phi->fast_out(k);
560      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
561        cost += ConditionalMoveLimit;
562      // Is there a use inside the loop?
563      // Note: check only basic types since CMoveP is pinned.
564      if (!used_inside_loop && is_java_primitive(bt)) {
565        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
566        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
567          used_inside_loop = true;
568        }
569      }
570    }
571  }
572  Node* bol = iff->in(1);
573  assert(bol->Opcode() == Op_Bool, "");
574  int cmp_op = bol->in(1)->Opcode();
575  // It is expensive to generate flags from a float compare.
576  // Avoid duplicated float compare.
577  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
578
579  float infrequent_prob = PROB_UNLIKELY_MAG(3);
580  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
581  if (used_inside_loop) {
582    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
583
584    // BlockLayoutByFrequency optimization moves infrequent branch
585    // from hot path. No point in CMOV'ing in such case (110 is used
586    // instead of 100 to take into account not exactness of float value).
587    if (BlockLayoutByFrequency) {
588      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
589    }
590  }
591  // Check for highly predictable branch.  No point in CMOV'ing if
592  // we are going to predict accurately all the time.
593  if (iff->_prob < infrequent_prob ||
594      iff->_prob > (1.0f - infrequent_prob))
595    return NULL;
596
597  // --------------
598  // Now replace all Phis with CMOV's
599  Node *cmov_ctrl = iff->in(0);
600  uint flip = (lp->Opcode() == Op_IfTrue);
601  while (1) {
602    PhiNode* phi = NULL;
603    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
604      Node *out = region->fast_out(i);
605      if (out->is_Phi()) {
606        phi = out->as_Phi();
607        break;
608      }
609    }
610    if (phi == NULL)  break;
611#ifndef PRODUCT
612    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
613#endif
614    // Move speculative ops
615    for (uint j = 1; j < region->req(); j++) {
616      Node *proj = region->in(j);
617      Node *inp = phi->in(j);
618      if (get_ctrl(inp) == proj) { // Found local op
619#ifndef PRODUCT
620        if (PrintOpto && VerifyLoopOptimizations) {
621          tty->print("  speculate: ");
622          inp->dump();
623        }
624#endif
625        set_ctrl(inp, cmov_ctrl);
626      }
627    }
628    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
629    register_new_node( cmov, cmov_ctrl );
630    _igvn.replace_node( phi, cmov );
631#ifndef PRODUCT
632    if (TraceLoopOpts) {
633      tty->print("CMOV  ");
634      r_loop->dump_head();
635      if (Verbose) {
636        bol->in(1)->dump(1);
637        cmov->dump(1);
638      }
639    }
640    if (VerifyLoopOptimizations) verify();
641#endif
642  }
643
644  // The useless CFG diamond will fold up later; see the optimization in
645  // RegionNode::Ideal.
646  _igvn._worklist.push(region);
647
648  return iff->in(1);
649}
650
651//------------------------------split_if_with_blocks_pre-----------------------
652// Do the real work in a non-recursive function.  Data nodes want to be
653// cloned in the pre-order so they can feed each other nicely.
654Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
655  // Cloning these guys is unlikely to win
656  int n_op = n->Opcode();
657  if( n_op == Op_MergeMem ) return n;
658  if( n->is_Proj() ) return n;
659  // Do not clone-up CmpFXXX variations, as these are always
660  // followed by a CmpI
661  if( n->is_Cmp() ) return n;
662  // Attempt to use a conditional move instead of a phi/branch
663  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
664    Node *cmov = conditional_move( n );
665    if( cmov ) return cmov;
666  }
667  if( n->is_CFG() || n->is_LoadStore() )
668    return n;
669  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
670      n_op == Op_Opaque2 ) {
671    if( !C->major_progress() )   // If chance of no more loop opts...
672      _igvn._worklist.push(n);  // maybe we'll remove them
673    return n;
674  }
675
676  if( n->is_Con() ) return n;   // No cloning for Con nodes
677
678  Node *n_ctrl = get_ctrl(n);
679  if( !n_ctrl ) return n;       // Dead node
680
681  // Attempt to remix address expressions for loop invariants
682  Node *m = remix_address_expressions( n );
683  if( m ) return m;
684
685  // Determine if the Node has inputs from some local Phi.
686  // Returns the block to clone thru.
687  Node *n_blk = has_local_phi_input( n );
688  if( !n_blk ) return n;
689  // Do not clone the trip counter through on a CountedLoop
690  // (messes up the canonical shape).
691  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
692
693  // Check for having no control input; not pinned.  Allow
694  // dominating control.
695  if( n->in(0) ) {
696    Node *dom = idom(n_blk);
697    if( dom_lca( n->in(0), dom ) != n->in(0) )
698      return n;
699  }
700  // Policy: when is it profitable.  You must get more wins than
701  // policy before it is considered profitable.  Policy is usually 0,
702  // so 1 win is considered profitable.  Big merges will require big
703  // cloning, so get a larger policy.
704  int policy = n_blk->req() >> 2;
705
706  // If the loop is a candidate for range check elimination,
707  // delay splitting through it's phi until a later loop optimization
708  if (n_blk->is_CountedLoop()) {
709    IdealLoopTree *lp = get_loop(n_blk);
710    if (lp && lp->_rce_candidate) {
711      return n;
712    }
713  }
714
715  // Use same limit as split_if_with_blocks_post
716  if( C->unique() > 35000 ) return n; // Method too big
717
718  // Split 'n' through the merge point if it is profitable
719  Node *phi = split_thru_phi( n, n_blk, policy );
720  if (!phi) return n;
721
722  // Found a Phi to split thru!
723  // Replace 'n' with the new phi
724  _igvn.replace_node( n, phi );
725  // Moved a load around the loop, 'en-registering' something.
726  if (n_blk->is_Loop() && n->is_Load() &&
727      !phi->in(LoopNode::LoopBackControl)->is_Load())
728    C->set_major_progress();
729
730  return phi;
731}
732
733static bool merge_point_too_heavy(Compile* C, Node* region) {
734  // Bail out if the region and its phis have too many users.
735  int weight = 0;
736  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
737    weight += region->fast_out(i)->outcnt();
738  }
739  int nodes_left = MaxNodeLimit - C->live_nodes();
740  if (weight * 8 > nodes_left) {
741#ifndef PRODUCT
742    if (PrintOpto)
743      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
744#endif
745    return true;
746  } else {
747    return false;
748  }
749}
750
751static bool merge_point_safe(Node* region) {
752  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
753  // having a PhiNode input. This sidesteps the dangerous case where the split
754  // ConvI2LNode may become TOP if the input Value() does not
755  // overlap the ConvI2L range, leaving a node which may not dominate its
756  // uses.
757  // A better fix for this problem can be found in the BugTraq entry, but
758  // expediency for Mantis demands this hack.
759  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
760  // split_if_with_blocks from splitting a block because we could not move around
761  // the FastLockNode.
762  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
763    Node* n = region->fast_out(i);
764    if (n->is_Phi()) {
765      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
766        Node* m = n->fast_out(j);
767        if (m->is_FastLock())
768          return false;
769#ifdef _LP64
770        if (m->Opcode() == Op_ConvI2L)
771          return false;
772#endif
773      }
774    }
775  }
776  return true;
777}
778
779
780//------------------------------place_near_use---------------------------------
781// Place some computation next to use but not inside inner loops.
782// For inner loop uses move it to the preheader area.
783Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
784  IdealLoopTree *u_loop = get_loop( useblock );
785  return (u_loop->_irreducible || u_loop->_child)
786    ? useblock
787    : u_loop->_head->in(LoopNode::EntryControl);
788}
789
790
791//------------------------------split_if_with_blocks_post----------------------
792// Do the real work in a non-recursive function.  CFG hackery wants to be
793// in the post-order, so it can dirty the I-DOM info and not use the dirtied
794// info.
795void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
796
797  // Cloning Cmp through Phi's involves the split-if transform.
798  // FastLock is not used by an If
799  if( n->is_Cmp() && !n->is_FastLock() ) {
800    if( C->unique() > 35000 ) return; // Method too big
801
802    // Do not do 'split-if' if irreducible loops are present.
803    if( _has_irreducible_loops )
804      return;
805
806    Node *n_ctrl = get_ctrl(n);
807    // Determine if the Node has inputs from some local Phi.
808    // Returns the block to clone thru.
809    Node *n_blk = has_local_phi_input( n );
810    if( n_blk != n_ctrl ) return;
811
812    if( merge_point_too_heavy(C, n_ctrl) )
813      return;
814
815    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
816    Node *bol = n->unique_out();
817    assert( bol->is_Bool(), "expect a bool here" );
818    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
819    Node *iff = bol->unique_out();
820
821    // Check some safety conditions
822    if( iff->is_If() ) {        // Classic split-if?
823      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
824    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
825      // Can't split CMove with different control edge.
826      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
827      if( get_ctrl(iff->in(2)) == n_ctrl ||
828          get_ctrl(iff->in(3)) == n_ctrl )
829        return;                 // Inputs not yet split-up
830      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
831        return;                 // Loop-invar test gates loop-varying CMOVE
832      }
833    } else {
834      return;  // some other kind of node, such as an Allocate
835    }
836
837    // Do not do 'split-if' if some paths are dead.  First do dead code
838    // elimination and then see if its still profitable.
839    for( uint i = 1; i < n_ctrl->req(); i++ )
840      if( n_ctrl->in(i) == C->top() )
841        return;
842
843    // When is split-if profitable?  Every 'win' on means some control flow
844    // goes dead, so it's almost always a win.
845    int policy = 0;
846    // If trying to do a 'Split-If' at the loop head, it is only
847    // profitable if the cmp folds up on BOTH paths.  Otherwise we
848    // risk peeling a loop forever.
849
850    // CNC - Disabled for now.  Requires careful handling of loop
851    // body selection for the cloned code.  Also, make sure we check
852    // for any input path not being in the same loop as n_ctrl.  For
853    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
854    // because the alternative loop entry points won't be converted
855    // into LoopNodes.
856    IdealLoopTree *n_loop = get_loop(n_ctrl);
857    for( uint j = 1; j < n_ctrl->req(); j++ )
858      if( get_loop(n_ctrl->in(j)) != n_loop )
859        return;
860
861    // Check for safety of the merge point.
862    if( !merge_point_safe(n_ctrl) ) {
863      return;
864    }
865
866    // Split compare 'n' through the merge point if it is profitable
867    Node *phi = split_thru_phi( n, n_ctrl, policy );
868    if( !phi ) return;
869
870    // Found a Phi to split thru!
871    // Replace 'n' with the new phi
872    _igvn.replace_node( n, phi );
873
874    // Now split the bool up thru the phi
875    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
876    guarantee(bolphi != NULL, "null boolean phi node");
877
878    _igvn.replace_node( bol, bolphi );
879    assert( iff->in(1) == bolphi, "" );
880
881    if( bolphi->Value(&_igvn)->singleton() )
882      return;
883
884    // Conditional-move?  Must split up now
885    if( !iff->is_If() ) {
886      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
887      _igvn.replace_node( iff, cmovphi );
888      return;
889    }
890
891    // Now split the IF
892    do_split_if( iff );
893    return;
894  }
895
896  // Check for an IF ready to split; one that has its
897  // condition codes input coming from a Phi at the block start.
898  int n_op = n->Opcode();
899
900  // Check for an IF being dominated by another IF same test
901  if( n_op == Op_If ) {
902    Node *bol = n->in(1);
903    uint max = bol->outcnt();
904    // Check for same test used more than once?
905    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
906      // Search up IDOMs to see if this IF is dominated.
907      Node *cutoff = get_ctrl(bol);
908
909      // Now search up IDOMs till cutoff, looking for a dominating test
910      Node *prevdom = n;
911      Node *dom = idom(prevdom);
912      while( dom != cutoff ) {
913        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
914          // Replace the dominated test with an obvious true or false.
915          // Place it on the IGVN worklist for later cleanup.
916          C->set_major_progress();
917          dominated_by( prevdom, n, false, true );
918#ifndef PRODUCT
919          if( VerifyLoopOptimizations ) verify();
920#endif
921          return;
922        }
923        prevdom = dom;
924        dom = idom(prevdom);
925      }
926    }
927  }
928
929  // See if a shared loop-varying computation has no loop-varying uses.
930  // Happens if something is only used for JVM state in uncommon trap exits,
931  // like various versions of induction variable+offset.  Clone the
932  // computation per usage to allow it to sink out of the loop.
933  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
934    Node *n_ctrl = get_ctrl(n);
935    IdealLoopTree *n_loop = get_loop(n_ctrl);
936    if( n_loop != _ltree_root ) {
937      DUIterator_Fast imax, i = n->fast_outs(imax);
938      for (; i < imax; i++) {
939        Node* u = n->fast_out(i);
940        if( !has_ctrl(u) )     break; // Found control user
941        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
942        if( u_loop == n_loop ) break; // Found loop-varying use
943        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
944        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
945      }
946      bool did_break = (i < imax);  // Did we break out of the previous loop?
947      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
948        Node *late_load_ctrl = NULL;
949        if (n->is_Load()) {
950          // If n is a load, get and save the result from get_late_ctrl(),
951          // to be later used in calculating the control for n's clones.
952          clear_dom_lca_tags();
953          late_load_ctrl = get_late_ctrl(n, n_ctrl);
954        }
955        // If n is a load, and the late control is the same as the current
956        // control, then the cloning of n is a pointless exercise, because
957        // GVN will ensure that we end up where we started.
958        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
959          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
960            Node *u = n->last_out(j); // Clone private computation per use
961            _igvn.rehash_node_delayed(u);
962            Node *x = n->clone(); // Clone computation
963            Node *x_ctrl = NULL;
964            if( u->is_Phi() ) {
965              // Replace all uses of normal nodes.  Replace Phi uses
966              // individually, so the separate Nodes can sink down
967              // different paths.
968              uint k = 1;
969              while( u->in(k) != n ) k++;
970              u->set_req( k, x );
971              // x goes next to Phi input path
972              x_ctrl = u->in(0)->in(k);
973              --j;
974            } else {              // Normal use
975              // Replace all uses
976              for( uint k = 0; k < u->req(); k++ ) {
977                if( u->in(k) == n ) {
978                  u->set_req( k, x );
979                  --j;
980                }
981              }
982              x_ctrl = get_ctrl(u);
983            }
984
985            // Find control for 'x' next to use but not inside inner loops.
986            // For inner loop uses get the preheader area.
987            x_ctrl = place_near_use(x_ctrl);
988
989            if (n->is_Load()) {
990              // For loads, add a control edge to a CFG node outside of the loop
991              // to force them to not combine and return back inside the loop
992              // during GVN optimization (4641526).
993              //
994              // Because we are setting the actual control input, factor in
995              // the result from get_late_ctrl() so we respect any
996              // anti-dependences. (6233005).
997              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
998
999              // Don't allow the control input to be a CFG splitting node.
1000              // Such nodes should only have ProjNodes as outs, e.g. IfNode
1001              // should only have IfTrueNode and IfFalseNode (4985384).
1002              x_ctrl = find_non_split_ctrl(x_ctrl);
1003              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1004
1005              x->set_req(0, x_ctrl);
1006            }
1007            register_new_node(x, x_ctrl);
1008
1009            // Some institutional knowledge is needed here: 'x' is
1010            // yanked because if the optimizer runs GVN on it all the
1011            // cloned x's will common up and undo this optimization and
1012            // be forced back in the loop.  This is annoying because it
1013            // makes +VerifyOpto report false-positives on progress.  I
1014            // tried setting control edges on the x's to force them to
1015            // not combine, but the matching gets worried when it tries
1016            // to fold a StoreP and an AddP together (as part of an
1017            // address expression) and the AddP and StoreP have
1018            // different controls.
1019            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1020          }
1021          _igvn.remove_dead_node(n);
1022        }
1023      }
1024    }
1025  }
1026
1027  // Check for Opaque2's who's loop has disappeared - who's input is in the
1028  // same loop nest as their output.  Remove 'em, they are no longer useful.
1029  if( n_op == Op_Opaque2 &&
1030      n->in(1) != NULL &&
1031      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1032    _igvn.replace_node( n, n->in(1) );
1033  }
1034}
1035
1036//------------------------------split_if_with_blocks---------------------------
1037// Check for aggressive application of 'split-if' optimization,
1038// using basic block level info.
1039void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1040  Node *n = C->root();
1041  visited.set(n->_idx); // first, mark node as visited
1042  // Do pre-visit work for root
1043  n = split_if_with_blocks_pre( n );
1044  uint cnt = n->outcnt();
1045  uint i   = 0;
1046  while (true) {
1047    // Visit all children
1048    if (i < cnt) {
1049      Node* use = n->raw_out(i);
1050      ++i;
1051      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1052        // Now do pre-visit work for this use
1053        use = split_if_with_blocks_pre( use );
1054        nstack.push(n, i); // Save parent and next use's index.
1055        n   = use;         // Process all children of current use.
1056        cnt = use->outcnt();
1057        i   = 0;
1058      }
1059    }
1060    else {
1061      // All of n's children have been processed, complete post-processing.
1062      if (cnt != 0 && !n->is_Con()) {
1063        assert(has_node(n), "no dead nodes");
1064        split_if_with_blocks_post( n );
1065      }
1066      if (nstack.is_empty()) {
1067        // Finished all nodes on stack.
1068        break;
1069      }
1070      // Get saved parent node and next use's index. Visit the rest of uses.
1071      n   = nstack.node();
1072      cnt = n->outcnt();
1073      i   = nstack.index();
1074      nstack.pop();
1075    }
1076  }
1077}
1078
1079
1080//=============================================================================
1081//
1082//                   C L O N E   A   L O O P   B O D Y
1083//
1084
1085//------------------------------clone_iff--------------------------------------
1086// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1087// "Nearly" because all Nodes have been cloned from the original in the loop,
1088// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1089// through the Phi recursively, and return a Bool.
1090BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1091
1092  // Convert this Phi into a Phi merging Bools
1093  uint i;
1094  for( i = 1; i < phi->req(); i++ ) {
1095    Node *b = phi->in(i);
1096    if( b->is_Phi() ) {
1097      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1098    } else {
1099      assert( b->is_Bool(), "" );
1100    }
1101  }
1102
1103  Node *sample_bool = phi->in(1);
1104  Node *sample_cmp  = sample_bool->in(1);
1105
1106  // Make Phis to merge the Cmp's inputs.
1107  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1108  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1109  for( i = 1; i < phi->req(); i++ ) {
1110    Node *n1 = phi->in(i)->in(1)->in(1);
1111    Node *n2 = phi->in(i)->in(1)->in(2);
1112    phi1->set_req( i, n1 );
1113    phi2->set_req( i, n2 );
1114    phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type()));
1115    phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type()));
1116  }
1117  // See if these Phis have been made before.
1118  // Register with optimizer
1119  Node *hit1 = _igvn.hash_find_insert(phi1);
1120  if( hit1 ) {                  // Hit, toss just made Phi
1121    _igvn.remove_dead_node(phi1); // Remove new phi
1122    assert( hit1->is_Phi(), "" );
1123    phi1 = (PhiNode*)hit1;      // Use existing phi
1124  } else {                      // Miss
1125    _igvn.register_new_node_with_optimizer(phi1);
1126  }
1127  Node *hit2 = _igvn.hash_find_insert(phi2);
1128  if( hit2 ) {                  // Hit, toss just made Phi
1129    _igvn.remove_dead_node(phi2); // Remove new phi
1130    assert( hit2->is_Phi(), "" );
1131    phi2 = (PhiNode*)hit2;      // Use existing phi
1132  } else {                      // Miss
1133    _igvn.register_new_node_with_optimizer(phi2);
1134  }
1135  // Register Phis with loop/block info
1136  set_ctrl(phi1, phi->in(0));
1137  set_ctrl(phi2, phi->in(0));
1138  // Make a new Cmp
1139  Node *cmp = sample_cmp->clone();
1140  cmp->set_req( 1, phi1 );
1141  cmp->set_req( 2, phi2 );
1142  _igvn.register_new_node_with_optimizer(cmp);
1143  set_ctrl(cmp, phi->in(0));
1144
1145  // Make a new Bool
1146  Node *b = sample_bool->clone();
1147  b->set_req(1,cmp);
1148  _igvn.register_new_node_with_optimizer(b);
1149  set_ctrl(b, phi->in(0));
1150
1151  assert( b->is_Bool(), "" );
1152  return (BoolNode*)b;
1153}
1154
1155//------------------------------clone_bool-------------------------------------
1156// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1157// "Nearly" because all Nodes have been cloned from the original in the loop,
1158// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1159// through the Phi recursively, and return a Bool.
1160CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1161  uint i;
1162  // Convert this Phi into a Phi merging Bools
1163  for( i = 1; i < phi->req(); i++ ) {
1164    Node *b = phi->in(i);
1165    if( b->is_Phi() ) {
1166      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1167    } else {
1168      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1169    }
1170  }
1171
1172  Node *sample_cmp = phi->in(1);
1173
1174  // Make Phis to merge the Cmp's inputs.
1175  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1176  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1177  for( uint j = 1; j < phi->req(); j++ ) {
1178    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1179    Node *n1, *n2;
1180    if( cmp_top->is_Cmp() ) {
1181      n1 = cmp_top->in(1);
1182      n2 = cmp_top->in(2);
1183    } else {
1184      n1 = n2 = cmp_top;
1185    }
1186    phi1->set_req( j, n1 );
1187    phi2->set_req( j, n2 );
1188    phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1189    phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1190  }
1191
1192  // See if these Phis have been made before.
1193  // Register with optimizer
1194  Node *hit1 = _igvn.hash_find_insert(phi1);
1195  if( hit1 ) {                  // Hit, toss just made Phi
1196    _igvn.remove_dead_node(phi1); // Remove new phi
1197    assert( hit1->is_Phi(), "" );
1198    phi1 = (PhiNode*)hit1;      // Use existing phi
1199  } else {                      // Miss
1200    _igvn.register_new_node_with_optimizer(phi1);
1201  }
1202  Node *hit2 = _igvn.hash_find_insert(phi2);
1203  if( hit2 ) {                  // Hit, toss just made Phi
1204    _igvn.remove_dead_node(phi2); // Remove new phi
1205    assert( hit2->is_Phi(), "" );
1206    phi2 = (PhiNode*)hit2;      // Use existing phi
1207  } else {                      // Miss
1208    _igvn.register_new_node_with_optimizer(phi2);
1209  }
1210  // Register Phis with loop/block info
1211  set_ctrl(phi1, phi->in(0));
1212  set_ctrl(phi2, phi->in(0));
1213  // Make a new Cmp
1214  Node *cmp = sample_cmp->clone();
1215  cmp->set_req( 1, phi1 );
1216  cmp->set_req( 2, phi2 );
1217  _igvn.register_new_node_with_optimizer(cmp);
1218  set_ctrl(cmp, phi->in(0));
1219
1220  assert( cmp->is_Cmp(), "" );
1221  return (CmpNode*)cmp;
1222}
1223
1224//------------------------------sink_use---------------------------------------
1225// If 'use' was in the loop-exit block, it now needs to be sunk
1226// below the post-loop merge point.
1227void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1228  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1229    set_ctrl(use, post_loop);
1230    for (DUIterator j = use->outs(); use->has_out(j); j++)
1231      sink_use(use->out(j), post_loop);
1232  }
1233}
1234
1235//------------------------------clone_loop-------------------------------------
1236//
1237//                   C L O N E   A   L O O P   B O D Y
1238//
1239// This is the basic building block of the loop optimizations.  It clones an
1240// entire loop body.  It makes an old_new loop body mapping; with this mapping
1241// you can find the new-loop equivalent to an old-loop node.  All new-loop
1242// nodes are exactly equal to their old-loop counterparts, all edges are the
1243// same.  All exits from the old-loop now have a RegionNode that merges the
1244// equivalent new-loop path.  This is true even for the normal "loop-exit"
1245// condition.  All uses of loop-invariant old-loop values now come from (one
1246// or more) Phis that merge their new-loop equivalents.
1247//
1248// This operation leaves the graph in an illegal state: there are two valid
1249// control edges coming from the loop pre-header to both loop bodies.  I'll
1250// definitely have to hack the graph after running this transform.
1251//
1252// From this building block I will further edit edges to perform loop peeling
1253// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1254//
1255// Parameter side_by_size_idom:
1256//   When side_by_size_idom is NULL, the dominator tree is constructed for
1257//      the clone loop to dominate the original.  Used in construction of
1258//      pre-main-post loop sequence.
1259//   When nonnull, the clone and original are side-by-side, both are
1260//      dominated by the side_by_side_idom node.  Used in construction of
1261//      unswitched loops.
1262void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1263                                 Node* side_by_side_idom) {
1264
1265  // Step 1: Clone the loop body.  Make the old->new mapping.
1266  uint i;
1267  for( i = 0; i < loop->_body.size(); i++ ) {
1268    Node *old = loop->_body.at(i);
1269    Node *nnn = old->clone();
1270    old_new.map( old->_idx, nnn );
1271    _igvn.register_new_node_with_optimizer(nnn);
1272  }
1273
1274
1275  // Step 2: Fix the edges in the new body.  If the old input is outside the
1276  // loop use it.  If the old input is INside the loop, use the corresponding
1277  // new node instead.
1278  for( i = 0; i < loop->_body.size(); i++ ) {
1279    Node *old = loop->_body.at(i);
1280    Node *nnn = old_new[old->_idx];
1281    // Fix CFG/Loop controlling the new node
1282    if (has_ctrl(old)) {
1283      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1284    } else {
1285      set_loop(nnn, loop->_parent);
1286      if (old->outcnt() > 0) {
1287        set_idom( nnn, old_new[idom(old)->_idx], dd );
1288      }
1289    }
1290    // Correct edges to the new node
1291    for( uint j = 0; j < nnn->req(); j++ ) {
1292        Node *n = nnn->in(j);
1293        if( n ) {
1294          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1295          if( loop->is_member( old_in_loop ) )
1296            nnn->set_req(j, old_new[n->_idx]);
1297        }
1298    }
1299    _igvn.hash_find_insert(nnn);
1300  }
1301  Node *newhead = old_new[loop->_head->_idx];
1302  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1303
1304
1305  // Step 3: Now fix control uses.  Loop varying control uses have already
1306  // been fixed up (as part of all input edges in Step 2).  Loop invariant
1307  // control uses must be either an IfFalse or an IfTrue.  Make a merge
1308  // point to merge the old and new IfFalse/IfTrue nodes; make the use
1309  // refer to this.
1310  ResourceArea *area = Thread::current()->resource_area();
1311  Node_List worklist(area);
1312  uint new_counter = C->unique();
1313  for( i = 0; i < loop->_body.size(); i++ ) {
1314    Node* old = loop->_body.at(i);
1315    if( !old->is_CFG() ) continue;
1316    Node* nnn = old_new[old->_idx];
1317
1318    // Copy uses to a worklist, so I can munge the def-use info
1319    // with impunity.
1320    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1321      worklist.push(old->fast_out(j));
1322
1323    while( worklist.size() ) {  // Visit all uses
1324      Node *use = worklist.pop();
1325      if (!has_node(use))  continue; // Ignore dead nodes
1326      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1327      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1328        // Both OLD and USE are CFG nodes here.
1329        assert( use->is_Proj(), "" );
1330
1331        // Clone the loop exit control projection
1332        Node *newuse = use->clone();
1333        newuse->set_req(0,nnn);
1334        _igvn.register_new_node_with_optimizer(newuse);
1335        set_loop(newuse, use_loop);
1336        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1337
1338        // We need a Region to merge the exit from the peeled body and the
1339        // exit from the old loop body.
1340        RegionNode *r = new RegionNode(3);
1341        // Map the old use to the new merge point
1342        old_new.map( use->_idx, r );
1343        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1344        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1345
1346        // The original user of 'use' uses 'r' instead.
1347        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1348          Node* useuse = use->last_out(l);
1349          _igvn.rehash_node_delayed(useuse);
1350          uint uses_found = 0;
1351          if( useuse->in(0) == use ) {
1352            useuse->set_req(0, r);
1353            uses_found++;
1354            if( useuse->is_CFG() ) {
1355              assert( dom_depth(useuse) > dd_r, "" );
1356              set_idom(useuse, r, dom_depth(useuse));
1357            }
1358          }
1359          for( uint k = 1; k < useuse->req(); k++ ) {
1360            if( useuse->in(k) == use ) {
1361              useuse->set_req(k, r);
1362              uses_found++;
1363            }
1364          }
1365          l -= uses_found;    // we deleted 1 or more copies of this edge
1366        }
1367
1368        // Now finish up 'r'
1369        r->set_req( 1, newuse );
1370        r->set_req( 2,    use );
1371        _igvn.register_new_node_with_optimizer(r);
1372        set_loop(r, use_loop);
1373        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1374      } // End of if a loop-exit test
1375    }
1376  }
1377
1378  // Step 4: If loop-invariant use is not control, it must be dominated by a
1379  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1380  // there if needed.  Make a Phi there merging old and new used values.
1381  Node_List *split_if_set = NULL;
1382  Node_List *split_bool_set = NULL;
1383  Node_List *split_cex_set = NULL;
1384  for( i = 0; i < loop->_body.size(); i++ ) {
1385    Node* old = loop->_body.at(i);
1386    Node* nnn = old_new[old->_idx];
1387    // Copy uses to a worklist, so I can munge the def-use info
1388    // with impunity.
1389    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1390      worklist.push(old->fast_out(j));
1391
1392    while( worklist.size() ) {
1393      Node *use = worklist.pop();
1394      if (!has_node(use))  continue; // Ignore dead nodes
1395      if (use->in(0) == C->top())  continue;
1396      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1397      // Check for data-use outside of loop - at least one of OLD or USE
1398      // must not be a CFG node.
1399      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1400
1401        // If the Data use is an IF, that means we have an IF outside of the
1402        // loop that is switching on a condition that is set inside of the
1403        // loop.  Happens if people set a loop-exit flag; then test the flag
1404        // in the loop to break the loop, then test is again outside of the
1405        // loop to determine which way the loop exited.
1406        // Loop predicate If node connects to Bool node through Opaque1 node.
1407        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
1408          // Since this code is highly unlikely, we lazily build the worklist
1409          // of such Nodes to go split.
1410          if( !split_if_set )
1411            split_if_set = new Node_List(area);
1412          split_if_set->push(use);
1413        }
1414        if( use->is_Bool() ) {
1415          if( !split_bool_set )
1416            split_bool_set = new Node_List(area);
1417          split_bool_set->push(use);
1418        }
1419        if( use->Opcode() == Op_CreateEx ) {
1420          if( !split_cex_set )
1421            split_cex_set = new Node_List(area);
1422          split_cex_set->push(use);
1423        }
1424
1425
1426        // Get "block" use is in
1427        uint idx = 0;
1428        while( use->in(idx) != old ) idx++;
1429        Node *prev = use->is_CFG() ? use : get_ctrl(use);
1430        assert( !loop->is_member( get_loop( prev ) ), "" );
1431        Node *cfg = prev->_idx >= new_counter
1432          ? prev->in(2)
1433          : idom(prev);
1434        if( use->is_Phi() )     // Phi use is in prior block
1435          cfg = prev->in(idx);  // NOT in block of Phi itself
1436        if (cfg->is_top()) {    // Use is dead?
1437          _igvn.replace_input_of(use, idx, C->top());
1438          continue;
1439        }
1440
1441        while( !loop->is_member( get_loop( cfg ) ) ) {
1442          prev = cfg;
1443          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1444        }
1445        // If the use occurs after merging several exits from the loop, then
1446        // old value must have dominated all those exits.  Since the same old
1447        // value was used on all those exits we did not need a Phi at this
1448        // merge point.  NOW we do need a Phi here.  Each loop exit value
1449        // is now merged with the peeled body exit; each exit gets its own
1450        // private Phi and those Phis need to be merged here.
1451        Node *phi;
1452        if( prev->is_Region() ) {
1453          if( idx == 0 ) {      // Updating control edge?
1454            phi = prev;         // Just use existing control
1455          } else {              // Else need a new Phi
1456            phi = PhiNode::make( prev, old );
1457            // Now recursively fix up the new uses of old!
1458            for( uint i = 1; i < prev->req(); i++ ) {
1459              worklist.push(phi); // Onto worklist once for each 'old' input
1460            }
1461          }
1462        } else {
1463          // Get new RegionNode merging old and new loop exits
1464          prev = old_new[prev->_idx];
1465          assert( prev, "just made this in step 7" );
1466          if( idx == 0 ) {      // Updating control edge?
1467            phi = prev;         // Just use existing control
1468          } else {              // Else need a new Phi
1469            // Make a new Phi merging data values properly
1470            phi = PhiNode::make( prev, old );
1471            phi->set_req( 1, nnn );
1472          }
1473        }
1474        // If inserting a new Phi, check for prior hits
1475        if( idx != 0 ) {
1476          Node *hit = _igvn.hash_find_insert(phi);
1477          if( hit == NULL ) {
1478           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1479          } else {                                      // or
1480            // Remove the new phi from the graph and use the hit
1481            _igvn.remove_dead_node(phi);
1482            phi = hit;                                  // Use existing phi
1483          }
1484          set_ctrl(phi, prev);
1485        }
1486        // Make 'use' use the Phi instead of the old loop body exit value
1487        _igvn.replace_input_of(use, idx, phi);
1488        if( use->_idx >= new_counter ) { // If updating new phis
1489          // Not needed for correctness, but prevents a weak assert
1490          // in AddPNode from tripping (when we end up with different
1491          // base & derived Phis that will become the same after
1492          // IGVN does CSE).
1493          Node *hit = _igvn.hash_find_insert(use);
1494          if( hit )             // Go ahead and re-hash for hits.
1495            _igvn.replace_node( use, hit );
1496        }
1497
1498        // If 'use' was in the loop-exit block, it now needs to be sunk
1499        // below the post-loop merge point.
1500        sink_use( use, prev );
1501      }
1502    }
1503  }
1504
1505  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1506  // the loop uses a condition set in the loop.  The original IF probably
1507  // takes control from one or more OLD Regions (which in turn get from NEW
1508  // Regions).  In any case, there will be a set of Phis for each merge point
1509  // from the IF up to where the original BOOL def exists the loop.
1510  if( split_if_set ) {
1511    while( split_if_set->size() ) {
1512      Node *iff = split_if_set->pop();
1513      if( iff->in(1)->is_Phi() ) {
1514        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1515        _igvn.replace_input_of(iff, 1, b);
1516      }
1517    }
1518  }
1519  if( split_bool_set ) {
1520    while( split_bool_set->size() ) {
1521      Node *b = split_bool_set->pop();
1522      Node *phi = b->in(1);
1523      assert( phi->is_Phi(), "" );
1524      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1525      _igvn.replace_input_of(b, 1, cmp);
1526    }
1527  }
1528  if( split_cex_set ) {
1529    while( split_cex_set->size() ) {
1530      Node *b = split_cex_set->pop();
1531      assert( b->in(0)->is_Region(), "" );
1532      assert( b->in(1)->is_Phi(), "" );
1533      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1534      split_up( b, b->in(0), NULL );
1535    }
1536  }
1537
1538}
1539
1540
1541//---------------------- stride_of_possible_iv -------------------------------------
1542// Looks for an iff/bool/comp with one operand of the compare
1543// being a cycle involving an add and a phi,
1544// with an optional truncation (left-shift followed by a right-shift)
1545// of the add. Returns zero if not an iv.
1546int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1547  Node* trunc1 = NULL;
1548  Node* trunc2 = NULL;
1549  const TypeInt* ttype = NULL;
1550  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1551    return 0;
1552  }
1553  BoolNode* bl = iff->in(1)->as_Bool();
1554  Node* cmp = bl->in(1);
1555  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1556    return 0;
1557  }
1558  // Must have an invariant operand
1559  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1560    return 0;
1561  }
1562  Node* add2 = NULL;
1563  Node* cmp1 = cmp->in(1);
1564  if (cmp1->is_Phi()) {
1565    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1566    Node* phi = cmp1;
1567    for (uint i = 1; i < phi->req(); i++) {
1568      Node* in = phi->in(i);
1569      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1570                                &trunc1, &trunc2, &ttype);
1571      if (add && add->in(1) == phi) {
1572        add2 = add->in(2);
1573        break;
1574      }
1575    }
1576  } else {
1577    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1578    Node* addtrunc = cmp1;
1579    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1580                                &trunc1, &trunc2, &ttype);
1581    if (add && add->in(1)->is_Phi()) {
1582      Node* phi = add->in(1);
1583      for (uint i = 1; i < phi->req(); i++) {
1584        if (phi->in(i) == addtrunc) {
1585          add2 = add->in(2);
1586          break;
1587        }
1588      }
1589    }
1590  }
1591  if (add2 != NULL) {
1592    const TypeInt* add2t = _igvn.type(add2)->is_int();
1593    if (add2t->is_con()) {
1594      return add2t->get_con();
1595    }
1596  }
1597  return 0;
1598}
1599
1600
1601//---------------------- stay_in_loop -------------------------------------
1602// Return the (unique) control output node that's in the loop (if it exists.)
1603Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1604  Node* unique = NULL;
1605  if (!n) return NULL;
1606  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1607    Node* use = n->fast_out(i);
1608    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1609      if (unique != NULL) {
1610        return NULL;
1611      }
1612      unique = use;
1613    }
1614  }
1615  return unique;
1616}
1617
1618//------------------------------ register_node -------------------------------------
1619// Utility to register node "n" with PhaseIdealLoop
1620void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1621  _igvn.register_new_node_with_optimizer(n);
1622  loop->_body.push(n);
1623  if (n->is_CFG()) {
1624    set_loop(n, loop);
1625    set_idom(n, pred, ddepth);
1626  } else {
1627    set_ctrl(n, pred);
1628  }
1629}
1630
1631//------------------------------ proj_clone -------------------------------------
1632// Utility to create an if-projection
1633ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1634  ProjNode* c = p->clone()->as_Proj();
1635  c->set_req(0, iff);
1636  return c;
1637}
1638
1639//------------------------------ short_circuit_if -------------------------------------
1640// Force the iff control output to be the live_proj
1641Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1642  guarantee(live_proj != NULL, "null projection");
1643  int proj_con = live_proj->_con;
1644  assert(proj_con == 0 || proj_con == 1, "false or true projection");
1645  Node *con = _igvn.intcon(proj_con);
1646  set_ctrl(con, C->root());
1647  if (iff) {
1648    iff->set_req(1, con);
1649  }
1650  return con;
1651}
1652
1653//------------------------------ insert_if_before_proj -------------------------------------
1654// Insert a new if before an if projection (* - new node)
1655//
1656// before
1657//           if(test)
1658//           /     \
1659//          v       v
1660//    other-proj   proj (arg)
1661//
1662// after
1663//           if(test)
1664//           /     \
1665//          /       v
1666//         |      * proj-clone
1667//         v          |
1668//    other-proj      v
1669//                * new_if(relop(cmp[IU](left,right)))
1670//                  /  \
1671//                 v    v
1672//         * new-proj  proj
1673//         (returned)
1674//
1675ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1676  IfNode* iff = proj->in(0)->as_If();
1677  IdealLoopTree *loop = get_loop(proj);
1678  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1679  int ddepth = dom_depth(proj);
1680
1681  _igvn.rehash_node_delayed(iff);
1682  _igvn.rehash_node_delayed(proj);
1683
1684  proj->set_req(0, NULL);  // temporary disconnect
1685  ProjNode* proj2 = proj_clone(proj, iff);
1686  register_node(proj2, loop, iff, ddepth);
1687
1688  Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
1689  register_node(cmp, loop, proj2, ddepth);
1690
1691  BoolNode* bol = new BoolNode(cmp, relop);
1692  register_node(bol, loop, proj2, ddepth);
1693
1694  IfNode* new_if = new IfNode(proj2, bol, iff->_prob, iff->_fcnt);
1695  register_node(new_if, loop, proj2, ddepth);
1696
1697  proj->set_req(0, new_if); // reattach
1698  set_idom(proj, new_if, ddepth);
1699
1700  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1701  guarantee(new_exit != NULL, "null exit node");
1702  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1703
1704  return new_exit;
1705}
1706
1707//------------------------------ insert_region_before_proj -------------------------------------
1708// Insert a region before an if projection (* - new node)
1709//
1710// before
1711//           if(test)
1712//          /      |
1713//         v       |
1714//       proj      v
1715//               other-proj
1716//
1717// after
1718//           if(test)
1719//          /      |
1720//         v       |
1721// * proj-clone    v
1722//         |     other-proj
1723//         v
1724// * new-region
1725//         |
1726//         v
1727// *      dum_if
1728//       /     \
1729//      v       \
1730// * dum-proj    v
1731//              proj
1732//
1733RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
1734  IfNode* iff = proj->in(0)->as_If();
1735  IdealLoopTree *loop = get_loop(proj);
1736  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1737  int ddepth = dom_depth(proj);
1738
1739  _igvn.rehash_node_delayed(iff);
1740  _igvn.rehash_node_delayed(proj);
1741
1742  proj->set_req(0, NULL);  // temporary disconnect
1743  ProjNode* proj2 = proj_clone(proj, iff);
1744  register_node(proj2, loop, iff, ddepth);
1745
1746  RegionNode* reg = new RegionNode(2);
1747  reg->set_req(1, proj2);
1748  register_node(reg, loop, iff, ddepth);
1749
1750  IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
1751  register_node(dum_if, loop, reg, ddepth);
1752
1753  proj->set_req(0, dum_if); // reattach
1754  set_idom(proj, dum_if, ddepth);
1755
1756  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
1757  register_node(dum_proj, loop, dum_if, ddepth);
1758
1759  return reg;
1760}
1761
1762//------------------------------ insert_cmpi_loop_exit -------------------------------------
1763// Clone a signed compare loop exit from an unsigned compare and
1764// insert it before the unsigned cmp on the stay-in-loop path.
1765// All new nodes inserted in the dominator tree between the original
1766// if and it's projections.  The original if test is replaced with
1767// a constant to force the stay-in-loop path.
1768//
1769// This is done to make sure that the original if and it's projections
1770// still dominate the same set of control nodes, that the ctrl() relation
1771// from data nodes to them is preserved, and that their loop nesting is
1772// preserved.
1773//
1774// before
1775//          if(i <u limit)    unsigned compare loop exit
1776//         /       |
1777//        v        v
1778//   exit-proj   stay-in-loop-proj
1779//
1780// after
1781//          if(stay-in-loop-const)  original if
1782//         /       |
1783//        /        v
1784//       /  if(i <  limit)    new signed test
1785//      /  /       |
1786//     /  /        v
1787//    /  /  if(i <u limit)    new cloned unsigned test
1788//   /  /   /      |
1789//   v  v  v       |
1790//    region       |
1791//        |        |
1792//      dum-if     |
1793//     /  |        |
1794// ether  |        |
1795//        v        v
1796//   exit-proj   stay-in-loop-proj
1797//
1798IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
1799  const bool Signed   = true;
1800  const bool Unsigned = false;
1801
1802  BoolNode* bol = if_cmpu->in(1)->as_Bool();
1803  if (bol->_test._test != BoolTest::lt) return NULL;
1804  CmpNode* cmpu = bol->in(1)->as_Cmp();
1805  if (cmpu->Opcode() != Op_CmpU) return NULL;
1806  int stride = stride_of_possible_iv(if_cmpu);
1807  if (stride == 0) return NULL;
1808
1809  Node* lp_proj = stay_in_loop(if_cmpu, loop);
1810  guarantee(lp_proj != NULL, "null loop node");
1811
1812  ProjNode* lp_continue = lp_proj->as_Proj();
1813  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
1814
1815  Node* limit = NULL;
1816  if (stride > 0) {
1817    limit = cmpu->in(2);
1818  } else {
1819    limit = _igvn.makecon(TypeInt::ZERO);
1820    set_ctrl(limit, C->root());
1821  }
1822  // Create a new region on the exit path
1823  RegionNode* reg = insert_region_before_proj(lp_exit);
1824  guarantee(reg != NULL, "null region node");
1825
1826  // Clone the if-cmpu-true-false using a signed compare
1827  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
1828  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
1829  reg->add_req(cmpi_exit);
1830
1831  // Clone the if-cmpu-true-false
1832  BoolTest::mask rel_u = bol->_test._test;
1833  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
1834  reg->add_req(cmpu_exit);
1835
1836  // Force original if to stay in loop.
1837  short_circuit_if(if_cmpu, lp_continue);
1838
1839  return cmpi_exit->in(0)->as_If();
1840}
1841
1842//------------------------------ remove_cmpi_loop_exit -------------------------------------
1843// Remove a previously inserted signed compare loop exit.
1844void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
1845  Node* lp_proj = stay_in_loop(if_cmp, loop);
1846  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
1847         stay_in_loop(lp_proj, loop)->is_If() &&
1848         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
1849  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
1850  set_ctrl(con, C->root());
1851  if_cmp->set_req(1, con);
1852}
1853
1854//------------------------------ scheduled_nodelist -------------------------------------
1855// Create a post order schedule of nodes that are in the
1856// "member" set.  The list is returned in "sched".
1857// The first node in "sched" is the loop head, followed by
1858// nodes which have no inputs in the "member" set, and then
1859// followed by the nodes that have an immediate input dependence
1860// on a node in "sched".
1861void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
1862
1863  assert(member.test(loop->_head->_idx), "loop head must be in member set");
1864  Arena *a = Thread::current()->resource_area();
1865  VectorSet visited(a);
1866  Node_Stack nstack(a, loop->_body.size());
1867
1868  Node* n  = loop->_head;  // top of stack is cached in "n"
1869  uint idx = 0;
1870  visited.set(n->_idx);
1871
1872  // Initially push all with no inputs from within member set
1873  for(uint i = 0; i < loop->_body.size(); i++ ) {
1874    Node *elt = loop->_body.at(i);
1875    if (member.test(elt->_idx)) {
1876      bool found = false;
1877      for (uint j = 0; j < elt->req(); j++) {
1878        Node* def = elt->in(j);
1879        if (def && member.test(def->_idx) && def != elt) {
1880          found = true;
1881          break;
1882        }
1883      }
1884      if (!found && elt != loop->_head) {
1885        nstack.push(n, idx);
1886        n = elt;
1887        assert(!visited.test(n->_idx), "not seen yet");
1888        visited.set(n->_idx);
1889      }
1890    }
1891  }
1892
1893  // traverse out's that are in the member set
1894  while (true) {
1895    if (idx < n->outcnt()) {
1896      Node* use = n->raw_out(idx);
1897      idx++;
1898      if (!visited.test_set(use->_idx)) {
1899        if (member.test(use->_idx)) {
1900          nstack.push(n, idx);
1901          n = use;
1902          idx = 0;
1903        }
1904      }
1905    } else {
1906      // All outputs processed
1907      sched.push(n);
1908      if (nstack.is_empty()) break;
1909      n   = nstack.node();
1910      idx = nstack.index();
1911      nstack.pop();
1912    }
1913  }
1914}
1915
1916
1917//------------------------------ has_use_in_set -------------------------------------
1918// Has a use in the vector set
1919bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
1920  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1921    Node* use = n->fast_out(j);
1922    if (vset.test(use->_idx)) {
1923      return true;
1924    }
1925  }
1926  return false;
1927}
1928
1929
1930//------------------------------ has_use_internal_to_set -------------------------------------
1931// Has use internal to the vector set (ie. not in a phi at the loop head)
1932bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
1933  Node* head  = loop->_head;
1934  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1935    Node* use = n->fast_out(j);
1936    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
1937      return true;
1938    }
1939  }
1940  return false;
1941}
1942
1943
1944//------------------------------ clone_for_use_outside_loop -------------------------------------
1945// clone "n" for uses that are outside of loop
1946int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
1947  int cloned = 0;
1948  assert(worklist.size() == 0, "should be empty");
1949  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1950    Node* use = n->fast_out(j);
1951    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1952      worklist.push(use);
1953    }
1954  }
1955  while( worklist.size() ) {
1956    Node *use = worklist.pop();
1957    if (!has_node(use) || use->in(0) == C->top()) continue;
1958    uint j;
1959    for (j = 0; j < use->req(); j++) {
1960      if (use->in(j) == n) break;
1961    }
1962    assert(j < use->req(), "must be there");
1963
1964    // clone "n" and insert it between the inputs of "n" and the use outside the loop
1965    Node* n_clone = n->clone();
1966    _igvn.replace_input_of(use, j, n_clone);
1967    cloned++;
1968    Node* use_c;
1969    if (!use->is_Phi()) {
1970      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1971    } else {
1972      // Use in a phi is considered a use in the associated predecessor block
1973      use_c = use->in(0)->in(j);
1974    }
1975    set_ctrl(n_clone, use_c);
1976    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1977    get_loop(use_c)->_body.push(n_clone);
1978    _igvn.register_new_node_with_optimizer(n_clone);
1979#if !defined(PRODUCT)
1980    if (TracePartialPeeling) {
1981      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1982    }
1983#endif
1984  }
1985  return cloned;
1986}
1987
1988
1989//------------------------------ clone_for_special_use_inside_loop -------------------------------------
1990// clone "n" for special uses that are in the not_peeled region.
1991// If these def-uses occur in separate blocks, the code generator
1992// marks the method as not compilable.  For example, if a "BoolNode"
1993// is in a different basic block than the "IfNode" that uses it, then
1994// the compilation is aborted in the code generator.
1995void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1996                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
1997  if (n->is_Phi() || n->is_Load()) {
1998    return;
1999  }
2000  assert(worklist.size() == 0, "should be empty");
2001  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2002    Node* use = n->fast_out(j);
2003    if ( not_peel.test(use->_idx) &&
2004         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2005         use->in(1) == n)  {
2006      worklist.push(use);
2007    }
2008  }
2009  if (worklist.size() > 0) {
2010    // clone "n" and insert it between inputs of "n" and the use
2011    Node* n_clone = n->clone();
2012    loop->_body.push(n_clone);
2013    _igvn.register_new_node_with_optimizer(n_clone);
2014    set_ctrl(n_clone, get_ctrl(n));
2015    sink_list.push(n_clone);
2016    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2017#if !defined(PRODUCT)
2018    if (TracePartialPeeling) {
2019      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2020    }
2021#endif
2022    while( worklist.size() ) {
2023      Node *use = worklist.pop();
2024      _igvn.rehash_node_delayed(use);
2025      for (uint j = 1; j < use->req(); j++) {
2026        if (use->in(j) == n) {
2027          use->set_req(j, n_clone);
2028        }
2029      }
2030    }
2031  }
2032}
2033
2034
2035//------------------------------ insert_phi_for_loop -------------------------------------
2036// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2037void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2038  Node *phi = PhiNode::make(lp, back_edge_val);
2039  phi->set_req(LoopNode::EntryControl, lp_entry_val);
2040  // Use existing phi if it already exists
2041  Node *hit = _igvn.hash_find_insert(phi);
2042  if( hit == NULL ) {
2043    _igvn.register_new_node_with_optimizer(phi);
2044    set_ctrl(phi, lp);
2045  } else {
2046    // Remove the new phi from the graph and use the hit
2047    _igvn.remove_dead_node(phi);
2048    phi = hit;
2049  }
2050  _igvn.replace_input_of(use, idx, phi);
2051}
2052
2053#ifdef ASSERT
2054//------------------------------ is_valid_loop_partition -------------------------------------
2055// Validate the loop partition sets: peel and not_peel
2056bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2057                                              VectorSet& not_peel ) {
2058  uint i;
2059  // Check that peel_list entries are in the peel set
2060  for (i = 0; i < peel_list.size(); i++) {
2061    if (!peel.test(peel_list.at(i)->_idx)) {
2062      return false;
2063    }
2064  }
2065  // Check at loop members are in one of peel set or not_peel set
2066  for (i = 0; i < loop->_body.size(); i++ ) {
2067    Node *def  = loop->_body.at(i);
2068    uint di = def->_idx;
2069    // Check that peel set elements are in peel_list
2070    if (peel.test(di)) {
2071      if (not_peel.test(di)) {
2072        return false;
2073      }
2074      // Must be in peel_list also
2075      bool found = false;
2076      for (uint j = 0; j < peel_list.size(); j++) {
2077        if (peel_list.at(j)->_idx == di) {
2078          found = true;
2079          break;
2080        }
2081      }
2082      if (!found) {
2083        return false;
2084      }
2085    } else if (not_peel.test(di)) {
2086      if (peel.test(di)) {
2087        return false;
2088      }
2089    } else {
2090      return false;
2091    }
2092  }
2093  return true;
2094}
2095
2096//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2097// Ensure a use outside of loop is of the right form
2098bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2099  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2100  return (use->is_Phi() &&
2101          use_c->is_Region() && use_c->req() == 3 &&
2102          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2103           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2104           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2105          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2106}
2107
2108//------------------------------ is_valid_clone_loop_form -------------------------------------
2109// Ensure that all uses outside of loop are of the right form
2110bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2111                                               uint orig_exit_idx, uint clone_exit_idx) {
2112  uint len = peel_list.size();
2113  for (uint i = 0; i < len; i++) {
2114    Node *def = peel_list.at(i);
2115
2116    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2117      Node *use = def->fast_out(j);
2118      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2119      if (!loop->is_member(get_loop(use_c))) {
2120        // use is not in the loop, check for correct structure
2121        if (use->in(0) == def) {
2122          // Okay
2123        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2124          return false;
2125        }
2126      }
2127    }
2128  }
2129  return true;
2130}
2131#endif
2132
2133//------------------------------ partial_peel -------------------------------------
2134// Partially peel (aka loop rotation) the top portion of a loop (called
2135// the peel section below) by cloning it and placing one copy just before
2136// the new loop head and the other copy at the bottom of the new loop.
2137//
2138//    before                       after                where it came from
2139//
2140//    stmt1                        stmt1
2141//  loop:                          stmt2                     clone
2142//    stmt2                        if condA goto exitA       clone
2143//    if condA goto exitA        new_loop:                   new
2144//    stmt3                        stmt3                     clone
2145//    if !condB goto loop          if condB goto exitB       clone
2146//  exitB:                         stmt2                     orig
2147//    stmt4                        if !condA goto new_loop   orig
2148//  exitA:                         goto exitA
2149//                               exitB:
2150//                                 stmt4
2151//                               exitA:
2152//
2153// Step 1: find the cut point: an exit test on probable
2154//         induction variable.
2155// Step 2: schedule (with cloning) operations in the peel
2156//         section that can be executed after the cut into
2157//         the section that is not peeled.  This may need
2158//         to clone operations into exit blocks.  For
2159//         instance, a reference to A[i] in the not-peel
2160//         section and a reference to B[i] in an exit block
2161//         may cause a left-shift of i by 2 to be placed
2162//         in the peel block.  This step will clone the left
2163//         shift into the exit block and sink the left shift
2164//         from the peel to the not-peel section.
2165// Step 3: clone the loop, retarget the control, and insert
2166//         phis for values that are live across the new loop
2167//         head.  This is very dependent on the graph structure
2168//         from clone_loop.  It creates region nodes for
2169//         exit control and associated phi nodes for values
2170//         flow out of the loop through that exit.  The region
2171//         node is dominated by the clone's control projection.
2172//         So the clone's peel section is placed before the
2173//         new loop head, and the clone's not-peel section is
2174//         forms the top part of the new loop.  The original
2175//         peel section forms the tail of the new loop.
2176// Step 4: update the dominator tree and recompute the
2177//         dominator depth.
2178//
2179//                   orig
2180//
2181//                   stmt1
2182//                     |
2183//                     v
2184//               loop predicate
2185//                     |
2186//                     v
2187//                   loop<----+
2188//                     |      |
2189//                   stmt2    |
2190//                     |      |
2191//                     v      |
2192//                    ifA     |
2193//                   / |      |
2194//                  v  v      |
2195//               false true   ^  <-- last_peel
2196//               /     |      |
2197//              /   ===|==cut |
2198//             /     stmt3    |  <-- first_not_peel
2199//            /        |      |
2200//            |        v      |
2201//            v       ifB     |
2202//          exitA:   / \      |
2203//                  /   \     |
2204//                 v     v    |
2205//               false true   |
2206//               /       \    |
2207//              /         ----+
2208//             |
2209//             v
2210//           exitB:
2211//           stmt4
2212//
2213//
2214//            after clone loop
2215//
2216//                   stmt1
2217//                     |
2218//                     v
2219//               loop predicate
2220//                 /       \
2221//        clone   /         \   orig
2222//               /           \
2223//              /             \
2224//             v               v
2225//   +---->loop                loop<----+
2226//   |      |                    |      |
2227//   |    stmt2                stmt2    |
2228//   |      |                    |      |
2229//   |      v                    v      |
2230//   |      ifA                 ifA     |
2231//   |      | \                / |      |
2232//   |      v  v              v  v      |
2233//   ^    true  false      false true   ^  <-- last_peel
2234//   |      |   ^   \       /    |      |
2235//   | cut==|==  \   \     /  ===|==cut |
2236//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2237//   |      |    dom   | |       |      |
2238//   |      v      \  1v v2      v      |
2239//   |      ifB     regionA     ifB     |
2240//   |      / \        |       / \      |
2241//   |     /   \       v      /   \     |
2242//   |    v     v    exitA:  v     v    |
2243//   |    true  false      false true   |
2244//   |    /     ^   \      /       \    |
2245//   +----       \   \    /         ----+
2246//               dom  \  /
2247//                 \  1v v2
2248//                  regionB
2249//                     |
2250//                     v
2251//                   exitB:
2252//                   stmt4
2253//
2254//
2255//           after partial peel
2256//
2257//                  stmt1
2258//                     |
2259//                     v
2260//               loop predicate
2261//                 /
2262//        clone   /             orig
2263//               /          TOP
2264//              /             \
2265//             v               v
2266//    TOP->loop                loop----+
2267//          |                    |      |
2268//        stmt2                stmt2    |
2269//          |                    |      |
2270//          v                    v      |
2271//          ifA                 ifA     |
2272//          | \                / |      |
2273//          v  v              v  v      |
2274//        true  false      false true   |     <-- last_peel
2275//          |   ^   \       /    +------|---+
2276//  +->newloop   \   \     /  === ==cut |   |
2277//  |     stmt3   \   \   /     TOP     |   |
2278//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2279//  |       v      \  1v v2      v      |   |
2280//  |       ifB     regionA     ifB     ^   v
2281//  |       / \        |       / \      |   |
2282//  |      /   \       v      /   \     |   |
2283//  |     v     v    exitA:  v     v    |   |
2284//  |     true  false      false true   |   |
2285//  |     /     ^   \      /       \    |   |
2286//  |    |       \   \    /         v   |   |
2287//  |    |       dom  \  /         TOP  |   |
2288//  |    |         \  1v v2             |   |
2289//  ^    v          regionB             |   |
2290//  |    |             |                |   |
2291//  |    |             v                ^   v
2292//  |    |           exitB:             |   |
2293//  |    |           stmt4              |   |
2294//  |    +------------>-----------------+   |
2295//  |                                       |
2296//  +-----------------<---------------------+
2297//
2298//
2299//              final graph
2300//
2301//                  stmt1
2302//                    |
2303//                    v
2304//               loop predicate
2305//                    |
2306//                    v
2307//                  stmt2 clone
2308//                    |
2309//                    v
2310//         ........> ifA clone
2311//         :        / |
2312//        dom      /  |
2313//         :      v   v
2314//         :  false   true
2315//         :  |       |
2316//         :  |       v
2317//         :  |    newloop<-----+
2318//         :  |        |        |
2319//         :  |     stmt3 clone |
2320//         :  |        |        |
2321//         :  |        v        |
2322//         :  |       ifB       |
2323//         :  |      / \        |
2324//         :  |     v   v       |
2325//         :  |  false true     |
2326//         :  |   |     |       |
2327//         :  |   v    stmt2    |
2328//         :  | exitB:  |       |
2329//         :  | stmt4   v       |
2330//         :  |       ifA orig  |
2331//         :  |      /  \       |
2332//         :  |     /    \      |
2333//         :  |    v     v      |
2334//         :  |  false  true    |
2335//         :  |  /        \     |
2336//         :  v  v         -----+
2337//          RegionA
2338//             |
2339//             v
2340//           exitA
2341//
2342bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2343
2344  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2345  if (!loop->_head->is_Loop()) {
2346    return false;  }
2347
2348  LoopNode *head  = loop->_head->as_Loop();
2349
2350  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2351    return false;
2352  }
2353
2354  // Check for complex exit control
2355  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2356    Node *n = loop->_body.at(ii);
2357    int opc = n->Opcode();
2358    if (n->is_Call()        ||
2359        opc == Op_Catch     ||
2360        opc == Op_CatchProj ||
2361        opc == Op_Jump      ||
2362        opc == Op_JumpProj) {
2363#if !defined(PRODUCT)
2364      if (TracePartialPeeling) {
2365        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2366      }
2367#endif
2368      return false;
2369    }
2370  }
2371
2372  int dd = dom_depth(head);
2373
2374  // Step 1: find cut point
2375
2376  // Walk up dominators to loop head looking for first loop exit
2377  // which is executed on every path thru loop.
2378  IfNode *peel_if = NULL;
2379  IfNode *peel_if_cmpu = NULL;
2380
2381  Node *iff = loop->tail();
2382  while( iff != head ) {
2383    if( iff->is_If() ) {
2384      Node *ctrl = get_ctrl(iff->in(1));
2385      if (ctrl->is_top()) return false; // Dead test on live IF.
2386      // If loop-varying exit-test, check for induction variable
2387      if( loop->is_member(get_loop(ctrl)) &&
2388          loop->is_loop_exit(iff) &&
2389          is_possible_iv_test(iff)) {
2390        Node* cmp = iff->in(1)->in(1);
2391        if (cmp->Opcode() == Op_CmpI) {
2392          peel_if = iff->as_If();
2393        } else {
2394          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2395          peel_if_cmpu = iff->as_If();
2396        }
2397      }
2398    }
2399    iff = idom(iff);
2400  }
2401  // Prefer signed compare over unsigned compare.
2402  IfNode* new_peel_if = NULL;
2403  if (peel_if == NULL) {
2404    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2405      return false;   // No peel point found
2406    }
2407    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2408    if (new_peel_if == NULL) {
2409      return false;   // No peel point found
2410    }
2411    peel_if = new_peel_if;
2412  }
2413  Node* last_peel        = stay_in_loop(peel_if, loop);
2414  Node* first_not_peeled = stay_in_loop(last_peel, loop);
2415  if (first_not_peeled == NULL || first_not_peeled == head) {
2416    return false;
2417  }
2418
2419#if !defined(PRODUCT)
2420  if (TraceLoopOpts) {
2421    tty->print("PartialPeel  ");
2422    loop->dump_head();
2423  }
2424
2425  if (TracePartialPeeling) {
2426    tty->print_cr("before partial peel one iteration");
2427    Node_List wl;
2428    Node* t = head->in(2);
2429    while (true) {
2430      wl.push(t);
2431      if (t == head) break;
2432      t = idom(t);
2433    }
2434    while (wl.size() > 0) {
2435      Node* tt = wl.pop();
2436      tt->dump();
2437      if (tt == last_peel) tty->print_cr("-- cut --");
2438    }
2439  }
2440#endif
2441  ResourceArea *area = Thread::current()->resource_area();
2442  VectorSet peel(area);
2443  VectorSet not_peel(area);
2444  Node_List peel_list(area);
2445  Node_List worklist(area);
2446  Node_List sink_list(area);
2447
2448  // Set of cfg nodes to peel are those that are executable from
2449  // the head through last_peel.
2450  assert(worklist.size() == 0, "should be empty");
2451  worklist.push(head);
2452  peel.set(head->_idx);
2453  while (worklist.size() > 0) {
2454    Node *n = worklist.pop();
2455    if (n != last_peel) {
2456      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2457        Node* use = n->fast_out(j);
2458        if (use->is_CFG() &&
2459            loop->is_member(get_loop(use)) &&
2460            !peel.test_set(use->_idx)) {
2461          worklist.push(use);
2462        }
2463      }
2464    }
2465  }
2466
2467  // Set of non-cfg nodes to peel are those that are control
2468  // dependent on the cfg nodes.
2469  uint i;
2470  for(i = 0; i < loop->_body.size(); i++ ) {
2471    Node *n = loop->_body.at(i);
2472    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2473    if (peel.test(n_c->_idx)) {
2474      peel.set(n->_idx);
2475    } else {
2476      not_peel.set(n->_idx);
2477    }
2478  }
2479
2480  // Step 2: move operations from the peeled section down into the
2481  //         not-peeled section
2482
2483  // Get a post order schedule of nodes in the peel region
2484  // Result in right-most operand.
2485  scheduled_nodelist(loop, peel, peel_list );
2486
2487  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2488
2489  // For future check for too many new phis
2490  uint old_phi_cnt = 0;
2491  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2492    Node* use = head->fast_out(j);
2493    if (use->is_Phi()) old_phi_cnt++;
2494  }
2495
2496#if !defined(PRODUCT)
2497  if (TracePartialPeeling) {
2498    tty->print_cr("\npeeled list");
2499  }
2500#endif
2501
2502  // Evacuate nodes in peel region into the not_peeled region if possible
2503  uint new_phi_cnt = 0;
2504  uint cloned_for_outside_use = 0;
2505  for (i = 0; i < peel_list.size();) {
2506    Node* n = peel_list.at(i);
2507#if !defined(PRODUCT)
2508  if (TracePartialPeeling) n->dump();
2509#endif
2510    bool incr = true;
2511    if ( !n->is_CFG() ) {
2512
2513      if ( has_use_in_set(n, not_peel) ) {
2514
2515        // If not used internal to the peeled region,
2516        // move "n" from peeled to not_peeled region.
2517
2518        if ( !has_use_internal_to_set(n, peel, loop) ) {
2519
2520          // if not pinned and not a load (which maybe anti-dependent on a store)
2521          // and not a CMove (Matcher expects only bool->cmove).
2522          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2523            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2524            sink_list.push(n);
2525            peel     >>= n->_idx; // delete n from peel set.
2526            not_peel <<= n->_idx; // add n to not_peel set.
2527            peel_list.remove(i);
2528            incr = false;
2529#if !defined(PRODUCT)
2530            if (TracePartialPeeling) {
2531              tty->print_cr("sink to not_peeled region: %d newbb: %d",
2532                            n->_idx, get_ctrl(n)->_idx);
2533            }
2534#endif
2535          }
2536        } else {
2537          // Otherwise check for special def-use cases that span
2538          // the peel/not_peel boundary such as bool->if
2539          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2540          new_phi_cnt++;
2541        }
2542      }
2543    }
2544    if (incr) i++;
2545  }
2546
2547  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2548#if !defined(PRODUCT)
2549    if (TracePartialPeeling) {
2550      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2551                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2552    }
2553#endif
2554    if (new_peel_if != NULL) {
2555      remove_cmpi_loop_exit(new_peel_if, loop);
2556    }
2557    // Inhibit more partial peeling on this loop
2558    assert(!head->is_partial_peel_loop(), "not partial peeled");
2559    head->mark_partial_peel_failed();
2560    if (cloned_for_outside_use > 0) {
2561      // Terminate this round of loop opts because
2562      // the graph outside this loop was changed.
2563      C->set_major_progress();
2564      return true;
2565    }
2566    return false;
2567  }
2568
2569  // Step 3: clone loop, retarget control, and insert new phis
2570
2571  // Create new loop head for new phis and to hang
2572  // the nodes being moved (sinked) from the peel region.
2573  LoopNode* new_head = new LoopNode(last_peel, last_peel);
2574  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2575  _igvn.register_new_node_with_optimizer(new_head);
2576  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2577  first_not_peeled->set_req(0, new_head);
2578  set_loop(new_head, loop);
2579  loop->_body.push(new_head);
2580  not_peel.set(new_head->_idx);
2581  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2582  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2583
2584  while (sink_list.size() > 0) {
2585    Node* n = sink_list.pop();
2586    set_ctrl(n, new_head);
2587  }
2588
2589  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2590
2591  clone_loop( loop, old_new, dd );
2592
2593  const uint clone_exit_idx = 1;
2594  const uint orig_exit_idx  = 2;
2595  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2596
2597  Node* head_clone             = old_new[head->_idx];
2598  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2599  Node* orig_tail_clone        = head_clone->in(2);
2600
2601  // Add phi if "def" node is in peel set and "use" is not
2602
2603  for(i = 0; i < peel_list.size(); i++ ) {
2604    Node *def  = peel_list.at(i);
2605    if (!def->is_CFG()) {
2606      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2607        Node *use = def->fast_out(j);
2608        if (has_node(use) && use->in(0) != C->top() &&
2609            (!peel.test(use->_idx) ||
2610             (use->is_Phi() && use->in(0) == head)) ) {
2611          worklist.push(use);
2612        }
2613      }
2614      while( worklist.size() ) {
2615        Node *use = worklist.pop();
2616        for (uint j = 1; j < use->req(); j++) {
2617          Node* n = use->in(j);
2618          if (n == def) {
2619
2620            // "def" is in peel set, "use" is not in peel set
2621            // or "use" is in the entry boundary (a phi) of the peel set
2622
2623            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2624
2625            if ( loop->is_member(get_loop( use_c )) ) {
2626              // use is in loop
2627              if (old_new[use->_idx] != NULL) { // null for dead code
2628                Node* use_clone = old_new[use->_idx];
2629                _igvn.replace_input_of(use, j, C->top());
2630                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2631              }
2632            } else {
2633              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2634              // use is not in the loop, check if the live range includes the cut
2635              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2636              if (not_peel.test(lp_if->_idx)) {
2637                assert(j == orig_exit_idx, "use from original loop");
2638                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2639              }
2640            }
2641          }
2642        }
2643      }
2644    }
2645  }
2646
2647  // Step 3b: retarget control
2648
2649  // Redirect control to the new loop head if a cloned node in
2650  // the not_peeled region has control that points into the peeled region.
2651  // This necessary because the cloned peeled region will be outside
2652  // the loop.
2653  //                            from    to
2654  //          cloned-peeled    <---+
2655  //    new_head_clone:            |    <--+
2656  //          cloned-not_peeled  in(0)    in(0)
2657  //          orig-peeled
2658
2659  for(i = 0; i < loop->_body.size(); i++ ) {
2660    Node *n = loop->_body.at(i);
2661    if (!n->is_CFG()           && n->in(0) != NULL        &&
2662        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2663      Node* n_clone = old_new[n->_idx];
2664      _igvn.replace_input_of(n_clone, 0, new_head_clone);
2665    }
2666  }
2667
2668  // Backedge of the surviving new_head (the clone) is original last_peel
2669  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2670
2671  // Cut first node in original not_peel set
2672  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2673  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2674  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2675
2676  // Copy head_clone back-branch info to original head
2677  // and remove original head's loop entry and
2678  // clone head's back-branch
2679  _igvn.rehash_node_delayed(head); // Multiple edge updates
2680  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2681  head->set_req(LoopNode::LoopBackControl, C->top());
2682  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2683
2684  // Similarly modify the phis
2685  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2686    Node* use = head->fast_out(k);
2687    if (use->is_Phi() && use->outcnt() > 0) {
2688      Node* use_clone = old_new[use->_idx];
2689      _igvn.rehash_node_delayed(use); // Multiple edge updates
2690      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2691      use->set_req(LoopNode::LoopBackControl, C->top());
2692      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2693    }
2694  }
2695
2696  // Step 4: update dominator tree and dominator depth
2697
2698  set_idom(head, orig_tail_clone, dd);
2699  recompute_dom_depth();
2700
2701  // Inhibit more partial peeling on this loop
2702  new_head_clone->set_partial_peel_loop();
2703  C->set_major_progress();
2704  loop->record_for_igvn();
2705
2706#if !defined(PRODUCT)
2707  if (TracePartialPeeling) {
2708    tty->print_cr("\nafter partial peel one iteration");
2709    Node_List wl(area);
2710    Node* t = last_peel;
2711    while (true) {
2712      wl.push(t);
2713      if (t == head_clone) break;
2714      t = idom(t);
2715    }
2716    while (wl.size() > 0) {
2717      Node* tt = wl.pop();
2718      if (tt == head) tty->print_cr("orig head");
2719      else if (tt == new_head_clone) tty->print_cr("new head");
2720      else if (tt == head_clone) tty->print_cr("clone head");
2721      tt->dump();
2722    }
2723  }
2724#endif
2725  return true;
2726}
2727
2728//------------------------------reorg_offsets----------------------------------
2729// Reorganize offset computations to lower register pressure.  Mostly
2730// prevent loop-fallout uses of the pre-incremented trip counter (which are
2731// then alive with the post-incremented trip counter forcing an extra
2732// register move)
2733void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
2734  // Perform it only for canonical counted loops.
2735  // Loop's shape could be messed up by iteration_split_impl.
2736  if (!loop->_head->is_CountedLoop())
2737    return;
2738  if (!loop->_head->as_Loop()->is_valid_counted_loop())
2739    return;
2740
2741  CountedLoopNode *cl = loop->_head->as_CountedLoop();
2742  CountedLoopEndNode *cle = cl->loopexit();
2743  Node *exit = cle->proj_out(false);
2744  Node *phi = cl->phi();
2745
2746  // Check for the special case of folks using the pre-incremented
2747  // trip-counter on the fall-out path (forces the pre-incremented
2748  // and post-incremented trip counter to be live at the same time).
2749  // Fix this by adjusting to use the post-increment trip counter.
2750
2751  bool progress = true;
2752  while (progress) {
2753    progress = false;
2754    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2755      Node* use = phi->fast_out(i);   // User of trip-counter
2756      if (!has_ctrl(use))  continue;
2757      Node *u_ctrl = get_ctrl(use);
2758      if (use->is_Phi()) {
2759        u_ctrl = NULL;
2760        for (uint j = 1; j < use->req(); j++)
2761          if (use->in(j) == phi)
2762            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
2763      }
2764      IdealLoopTree *u_loop = get_loop(u_ctrl);
2765      // Look for loop-invariant use
2766      if (u_loop == loop) continue;
2767      if (loop->is_member(u_loop)) continue;
2768      // Check that use is live out the bottom.  Assuming the trip-counter
2769      // update is right at the bottom, uses of of the loop middle are ok.
2770      if (dom_lca(exit, u_ctrl) != exit) continue;
2771      // Hit!  Refactor use to use the post-incremented tripcounter.
2772      // Compute a post-increment tripcounter.
2773      Node *opaq = new Opaque2Node( C, cle->incr() );
2774      register_new_node( opaq, u_ctrl );
2775      Node *neg_stride = _igvn.intcon(-cle->stride_con());
2776      set_ctrl(neg_stride, C->root());
2777      Node *post = new AddINode( opaq, neg_stride);
2778      register_new_node( post, u_ctrl );
2779      _igvn.rehash_node_delayed(use);
2780      for (uint j = 1; j < use->req(); j++) {
2781        if (use->in(j) == phi)
2782          use->set_req(j, post);
2783      }
2784      // Since DU info changed, rerun loop
2785      progress = true;
2786      break;
2787    }
2788  }
2789
2790}
2791