loopopts.cpp revision 10000:9e17d9e4b59f
1/*
2 * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "memory/allocation.inline.hpp"
27#include "opto/addnode.hpp"
28#include "opto/connode.hpp"
29#include "opto/castnode.hpp"
30#include "opto/divnode.hpp"
31#include "opto/loopnode.hpp"
32#include "opto/matcher.hpp"
33#include "opto/mulnode.hpp"
34#include "opto/movenode.hpp"
35#include "opto/opaquenode.hpp"
36#include "opto/rootnode.hpp"
37#include "opto/subnode.hpp"
38
39//=============================================================================
40//------------------------------split_thru_phi---------------------------------
41// Split Node 'n' through merge point if there is enough win.
42Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
43  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
44    // ConvI2L may have type information on it which is unsafe to push up
45    // so disable this for now
46    return NULL;
47  }
48
49  int wins = 0;
50  assert(!n->is_CFG(), "");
51  assert(region->is_Region(), "");
52
53  const Type* type = n->bottom_type();
54  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
55  Node *phi;
56  if (t_oop != NULL && t_oop->is_known_instance_field()) {
57    int iid    = t_oop->instance_id();
58    int index  = C->get_alias_index(t_oop);
59    int offset = t_oop->offset();
60    phi = new PhiNode(region, type, NULL, iid, index, offset);
61  } else {
62    phi = PhiNode::make_blank(region, n);
63  }
64  uint old_unique = C->unique();
65  for (uint i = 1; i < region->req(); i++) {
66    Node *x;
67    Node* the_clone = NULL;
68    if (region->in(i) == C->top()) {
69      x = C->top();             // Dead path?  Use a dead data op
70    } else {
71      x = n->clone();           // Else clone up the data op
72      the_clone = x;            // Remember for possible deletion.
73      // Alter data node to use pre-phi inputs
74      if (n->in(0) == region)
75        x->set_req( 0, region->in(i) );
76      for (uint j = 1; j < n->req(); j++) {
77        Node *in = n->in(j);
78        if (in->is_Phi() && in->in(0) == region)
79          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
80      }
81    }
82    // Check for a 'win' on some paths
83    const Type *t = x->Value(&_igvn);
84
85    bool singleton = t->singleton();
86
87    // A TOP singleton indicates that there are no possible values incoming
88    // along a particular edge. In most cases, this is OK, and the Phi will
89    // be eliminated later in an Ideal call. However, we can't allow this to
90    // happen if the singleton occurs on loop entry, as the elimination of
91    // the PhiNode may cause the resulting node to migrate back to a previous
92    // loop iteration.
93    if (singleton && t == Type::TOP) {
94      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
95      // irreducible loop may not be indicated by an affirmative is_Loop());
96      // therefore, the only top we can split thru a phi is on a backedge of
97      // a loop.
98      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
99    }
100
101    if (singleton) {
102      wins++;
103      x = ((PhaseGVN&)_igvn).makecon(t);
104    } else {
105      // We now call Identity to try to simplify the cloned node.
106      // Note that some Identity methods call phase->type(this).
107      // Make sure that the type array is big enough for
108      // our new node, even though we may throw the node away.
109      // (Note: This tweaking with igvn only works because x is a new node.)
110      _igvn.set_type(x, t);
111      // If x is a TypeNode, capture any more-precise type permanently into Node
112      // otherwise it will be not updated during igvn->transform since
113      // igvn->type(x) is set to x->Value() already.
114      x->raise_bottom_type(t);
115      Node *y = x->Identity(&_igvn);
116      if (y != x) {
117        wins++;
118        x = y;
119      } else {
120        y = _igvn.hash_find(x);
121        if (y) {
122          wins++;
123          x = y;
124        } else {
125          // Else x is a new node we are keeping
126          // We do not need register_new_node_with_optimizer
127          // because set_type has already been called.
128          _igvn._worklist.push(x);
129        }
130      }
131    }
132    if (x != the_clone && the_clone != NULL)
133      _igvn.remove_dead_node(the_clone);
134    phi->set_req( i, x );
135  }
136  // Too few wins?
137  if (wins <= policy) {
138    _igvn.remove_dead_node(phi);
139    return NULL;
140  }
141
142  // Record Phi
143  register_new_node( phi, region );
144
145  for (uint i2 = 1; i2 < phi->req(); i2++) {
146    Node *x = phi->in(i2);
147    // If we commoned up the cloned 'x' with another existing Node,
148    // the existing Node picks up a new use.  We need to make the
149    // existing Node occur higher up so it dominates its uses.
150    Node *old_ctrl;
151    IdealLoopTree *old_loop;
152
153    if (x->is_Con()) {
154      // Constant's control is always root.
155      set_ctrl(x, C->root());
156      continue;
157    }
158    // The occasional new node
159    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
160      old_ctrl = NULL;
161      old_loop = NULL;               // Not in any prior loop
162    } else {
163      old_ctrl = get_ctrl(x);
164      old_loop = get_loop(old_ctrl); // Get prior loop
165    }
166    // New late point must dominate new use
167    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
168    if (new_ctrl == old_ctrl) // Nothing is changed
169      continue;
170
171    IdealLoopTree *new_loop = get_loop(new_ctrl);
172
173    // Don't move x into a loop if its uses are
174    // outside of loop. Otherwise x will be cloned
175    // for each use outside of this loop.
176    IdealLoopTree *use_loop = get_loop(region);
177    if (!new_loop->is_member(use_loop) &&
178        (old_loop == NULL || !new_loop->is_member(old_loop))) {
179      // Take early control, later control will be recalculated
180      // during next iteration of loop optimizations.
181      new_ctrl = get_early_ctrl(x);
182      new_loop = get_loop(new_ctrl);
183    }
184    // Set new location
185    set_ctrl(x, new_ctrl);
186    // If changing loop bodies, see if we need to collect into new body
187    if (old_loop != new_loop) {
188      if (old_loop && !old_loop->_child)
189        old_loop->_body.yank(x);
190      if (!new_loop->_child)
191        new_loop->_body.push(x);  // Collect body info
192    }
193  }
194
195  return phi;
196}
197
198//------------------------------dominated_by------------------------------------
199// Replace the dominated test with an obvious true or false.  Place it on the
200// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
201// live path up to the dominating control.
202void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
203  if (VerifyLoopOptimizations && PrintOpto) { tty->print_cr("dominating test"); }
204
205  // prevdom is the dominating projection of the dominating test.
206  assert( iff->is_If(), "" );
207  assert(iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd || iff->Opcode() == Op_RangeCheck, "Check this code when new subtype is added");
208  int pop = prevdom->Opcode();
209  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
210  if (flip) {
211    if (pop == Op_IfTrue)
212      pop = Op_IfFalse;
213    else
214      pop = Op_IfTrue;
215  }
216  // 'con' is set to true or false to kill the dominated test.
217  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
218  set_ctrl(con, C->root()); // Constant gets a new use
219  // Hack the dominated test
220  _igvn.replace_input_of(iff, 1, con);
221
222  // If I dont have a reachable TRUE and FALSE path following the IfNode then
223  // I can assume this path reaches an infinite loop.  In this case it's not
224  // important to optimize the data Nodes - either the whole compilation will
225  // be tossed or this path (and all data Nodes) will go dead.
226  if (iff->outcnt() != 2) return;
227
228  // Make control-dependent data Nodes on the live path (path that will remain
229  // once the dominated IF is removed) become control-dependent on the
230  // dominating projection.
231  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
232
233  // Loop predicates may have depending checks which should not
234  // be skipped. For example, range check predicate has two checks
235  // for lower and upper bounds.
236  if (dp == NULL)
237    return;
238
239  ProjNode* dp_proj  = dp->as_Proj();
240  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
241  if (exclude_loop_predicate &&
242      (unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate) != NULL ||
243       unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_range_check) != NULL)) {
244    // If this is a range check (IfNode::is_range_check), do not
245    // reorder because Compile::allow_range_check_smearing might have
246    // changed the check.
247    return; // Let IGVN transformation change control dependence.
248  }
249
250  IdealLoopTree *old_loop = get_loop(dp);
251
252  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
253    Node* cd = dp->fast_out(i); // Control-dependent node
254    if (cd->depends_only_on_test()) {
255      assert(cd->in(0) == dp, "");
256      _igvn.replace_input_of(cd, 0, prevdom);
257      set_early_ctrl(cd);
258      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
259      if (old_loop != new_loop) {
260        if (!old_loop->_child) old_loop->_body.yank(cd);
261        if (!new_loop->_child) new_loop->_body.push(cd);
262      }
263      --i;
264      --imax;
265    }
266  }
267}
268
269//------------------------------has_local_phi_input----------------------------
270// Return TRUE if 'n' has Phi inputs from its local block and no other
271// block-local inputs (all non-local-phi inputs come from earlier blocks)
272Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
273  Node *n_ctrl = get_ctrl(n);
274  // See if some inputs come from a Phi in this block, or from before
275  // this block.
276  uint i;
277  for( i = 1; i < n->req(); i++ ) {
278    Node *phi = n->in(i);
279    if( phi->is_Phi() && phi->in(0) == n_ctrl )
280      break;
281  }
282  if( i >= n->req() )
283    return NULL;                // No Phi inputs; nowhere to clone thru
284
285  // Check for inputs created between 'n' and the Phi input.  These
286  // must split as well; they have already been given the chance
287  // (courtesy of a post-order visit) and since they did not we must
288  // recover the 'cost' of splitting them by being very profitable
289  // when splitting 'n'.  Since this is unlikely we simply give up.
290  for( i = 1; i < n->req(); i++ ) {
291    Node *m = n->in(i);
292    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
293      // We allow the special case of AddP's with no local inputs.
294      // This allows us to split-up address expressions.
295      if (m->is_AddP() &&
296          get_ctrl(m->in(2)) != n_ctrl &&
297          get_ctrl(m->in(3)) != n_ctrl) {
298        // Move the AddP up to dominating point
299        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
300        continue;
301      }
302      return NULL;
303    }
304  }
305
306  return n_ctrl;
307}
308
309//------------------------------remix_address_expressions----------------------
310// Rework addressing expressions to get the most loop-invariant stuff
311// moved out.  We'd like to do all associative operators, but it's especially
312// important (common) to do address expressions.
313Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
314  if (!has_ctrl(n))  return NULL;
315  Node *n_ctrl = get_ctrl(n);
316  IdealLoopTree *n_loop = get_loop(n_ctrl);
317
318  // See if 'n' mixes loop-varying and loop-invariant inputs and
319  // itself is loop-varying.
320
321  // Only interested in binary ops (and AddP)
322  if( n->req() < 3 || n->req() > 4 ) return NULL;
323
324  Node *n1_ctrl = get_ctrl(n->in(                    1));
325  Node *n2_ctrl = get_ctrl(n->in(                    2));
326  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
327  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
328  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
329  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
330
331  // Does one of my inputs spin in a tighter loop than self?
332  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
333      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
334      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
335    return NULL;                // Leave well enough alone
336
337  // Is at least one of my inputs loop-invariant?
338  if( n1_loop == n_loop &&
339      n2_loop == n_loop &&
340      n3_loop == n_loop )
341    return NULL;                // No loop-invariant inputs
342
343
344  int n_op = n->Opcode();
345
346  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
347  if( n_op == Op_LShiftI ) {
348    // Scale is loop invariant
349    Node *scale = n->in(2);
350    Node *scale_ctrl = get_ctrl(scale);
351    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
352    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
353      return NULL;
354    const TypeInt *scale_t = scale->bottom_type()->isa_int();
355    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
356      return NULL;              // Dont bother with byte/short masking
357    // Add must vary with loop (else shift would be loop-invariant)
358    Node *add = n->in(1);
359    Node *add_ctrl = get_ctrl(add);
360    IdealLoopTree *add_loop = get_loop(add_ctrl);
361    //assert( n_loop == add_loop, "" );
362    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
363
364    // Convert I-V into I+ (0-V); same for V-I
365    if( add->Opcode() == Op_SubI &&
366        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
367      Node *zero = _igvn.intcon(0);
368      set_ctrl(zero, C->root());
369      Node *neg = new SubINode( _igvn.intcon(0), add->in(2) );
370      register_new_node( neg, get_ctrl(add->in(2) ) );
371      add = new AddINode( add->in(1), neg );
372      register_new_node( add, add_ctrl );
373    }
374    if( add->Opcode() != Op_AddI ) return NULL;
375    // See if one add input is loop invariant
376    Node *add_var = add->in(1);
377    Node *add_var_ctrl = get_ctrl(add_var);
378    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
379    Node *add_invar = add->in(2);
380    Node *add_invar_ctrl = get_ctrl(add_invar);
381    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
382    if( add_var_loop == n_loop ) {
383    } else if( add_invar_loop == n_loop ) {
384      // Swap to find the invariant part
385      add_invar = add_var;
386      add_invar_ctrl = add_var_ctrl;
387      add_invar_loop = add_var_loop;
388      add_var = add->in(2);
389      Node *add_var_ctrl = get_ctrl(add_var);
390      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
391    } else                      // Else neither input is loop invariant
392      return NULL;
393    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
394      return NULL;              // No invariant part of the add?
395
396    // Yes!  Reshape address expression!
397    Node *inv_scale = new LShiftINode( add_invar, scale );
398    Node *inv_scale_ctrl =
399      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
400      add_invar_ctrl : scale_ctrl;
401    register_new_node( inv_scale, inv_scale_ctrl );
402    Node *var_scale = new LShiftINode( add_var, scale );
403    register_new_node( var_scale, n_ctrl );
404    Node *var_add = new AddINode( var_scale, inv_scale );
405    register_new_node( var_add, n_ctrl );
406    _igvn.replace_node( n, var_add );
407    return var_add;
408  }
409
410  // Replace (I+V) with (V+I)
411  if( n_op == Op_AddI ||
412      n_op == Op_AddL ||
413      n_op == Op_AddF ||
414      n_op == Op_AddD ||
415      n_op == Op_MulI ||
416      n_op == Op_MulL ||
417      n_op == Op_MulF ||
418      n_op == Op_MulD ) {
419    if( n2_loop == n_loop ) {
420      assert( n1_loop != n_loop, "" );
421      n->swap_edges(1, 2);
422    }
423  }
424
425  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
426  // but not if I2 is a constant.
427  if( n_op == Op_AddP ) {
428    if( n2_loop == n_loop && n3_loop != n_loop ) {
429      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
430        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
431        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
432        IdealLoopTree *n22loop = get_loop( n22_ctrl );
433        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
434        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
435            n23_loop == n_loop ) {
436          Node *add1 = new AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
437          // Stuff new AddP in the loop preheader
438          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
439          Node *add2 = new AddPNode( n->in(1), add1, n->in(2)->in(3) );
440          register_new_node( add2, n_ctrl );
441          _igvn.replace_node( n, add2 );
442          return add2;
443        }
444      }
445    }
446
447    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
448    if (n2_loop != n_loop && n3_loop == n_loop) {
449      if (n->in(3)->Opcode() == Op_AddX) {
450        Node *V = n->in(3)->in(1);
451        Node *I = n->in(3)->in(2);
452        if (is_member(n_loop,get_ctrl(V))) {
453        } else {
454          Node *tmp = V; V = I; I = tmp;
455        }
456        if (!is_member(n_loop,get_ctrl(I))) {
457          Node *add1 = new AddPNode(n->in(1), n->in(2), I);
458          // Stuff new AddP in the loop preheader
459          register_new_node(add1, n_loop->_head->in(LoopNode::EntryControl));
460          Node *add2 = new AddPNode(n->in(1), add1, V);
461          register_new_node(add2, n_ctrl);
462          _igvn.replace_node(n, add2);
463          return add2;
464        }
465      }
466    }
467  }
468
469  return NULL;
470}
471
472//------------------------------conditional_move-------------------------------
473// Attempt to replace a Phi with a conditional move.  We have some pretty
474// strict profitability requirements.  All Phis at the merge point must
475// be converted, so we can remove the control flow.  We need to limit the
476// number of c-moves to a small handful.  All code that was in the side-arms
477// of the CFG diamond is now speculatively executed.  This code has to be
478// "cheap enough".  We are pretty much limited to CFG diamonds that merge
479// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
480Node *PhaseIdealLoop::conditional_move( Node *region ) {
481
482  assert(region->is_Region(), "sanity check");
483  if (region->req() != 3) return NULL;
484
485  // Check for CFG diamond
486  Node *lp = region->in(1);
487  Node *rp = region->in(2);
488  if (!lp || !rp) return NULL;
489  Node *lp_c = lp->in(0);
490  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
491  IfNode *iff = lp_c->as_If();
492
493  // Check for ops pinned in an arm of the diamond.
494  // Can't remove the control flow in this case
495  if (lp->outcnt() > 1) return NULL;
496  if (rp->outcnt() > 1) return NULL;
497
498  IdealLoopTree* r_loop = get_loop(region);
499  assert(r_loop == get_loop(iff), "sanity");
500  // Always convert to CMOVE if all results are used only outside this loop.
501  bool used_inside_loop = (r_loop == _ltree_root);
502
503  // Check profitability
504  int cost = 0;
505  int phis = 0;
506  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
507    Node *out = region->fast_out(i);
508    if (!out->is_Phi()) continue; // Ignore other control edges, etc
509    phis++;
510    PhiNode* phi = out->as_Phi();
511    BasicType bt = phi->type()->basic_type();
512    switch (bt) {
513    case T_DOUBLE:
514      if (C->use_cmove()) {
515        continue; //TODO: maybe we want to add some cost
516      }
517    case T_FLOAT: {
518      cost += Matcher::float_cmove_cost(); // Could be very expensive
519      break;
520    }
521    case T_LONG: {
522      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
523    }
524    case T_INT:                 // These all CMOV fine
525    case T_ADDRESS: {           // (RawPtr)
526      cost++;
527      break;
528    }
529    case T_NARROWOOP: // Fall through
530    case T_OBJECT: {            // Base oops are OK, but not derived oops
531      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
532      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
533      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
534      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
535      // have a Phi for the base here that we convert to a CMOVE all is well
536      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
537      // the allocator will have to produce a base by creating a CMOVE of the
538      // relevant bases.  This puts the allocator in the business of
539      // manufacturing expensive instructions, generally a bad plan.
540      // Just Say No to Conditionally-Moved Derived Pointers.
541      if (tp && tp->offset() != 0)
542        return NULL;
543      cost++;
544      break;
545    }
546    default:
547      return NULL;              // In particular, can't do memory or I/O
548    }
549    // Add in cost any speculative ops
550    for (uint j = 1; j < region->req(); j++) {
551      Node *proj = region->in(j);
552      Node *inp = phi->in(j);
553      if (get_ctrl(inp) == proj) { // Found local op
554        cost++;
555        // Check for a chain of dependent ops; these will all become
556        // speculative in a CMOV.
557        for (uint k = 1; k < inp->req(); k++)
558          if (get_ctrl(inp->in(k)) == proj)
559            cost += ConditionalMoveLimit; // Too much speculative goo
560      }
561    }
562    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
563    // This will likely Split-If, a higher-payoff operation.
564    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
565      Node* use = phi->fast_out(k);
566      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
567        cost += ConditionalMoveLimit;
568      // Is there a use inside the loop?
569      // Note: check only basic types since CMoveP is pinned.
570      if (!used_inside_loop && is_java_primitive(bt)) {
571        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
572        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
573          used_inside_loop = true;
574        }
575      }
576    }
577  }//for
578  Node* bol = iff->in(1);
579  assert(bol->Opcode() == Op_Bool, "");
580  int cmp_op = bol->in(1)->Opcode();
581  // It is expensive to generate flags from a float compare.
582  // Avoid duplicated float compare.
583  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
584
585  float infrequent_prob = PROB_UNLIKELY_MAG(3);
586  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
587  if (used_inside_loop) {
588    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
589
590    // BlockLayoutByFrequency optimization moves infrequent branch
591    // from hot path. No point in CMOV'ing in such case (110 is used
592    // instead of 100 to take into account not exactness of float value).
593    if (BlockLayoutByFrequency) {
594      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
595    }
596  }
597  // Check for highly predictable branch.  No point in CMOV'ing if
598  // we are going to predict accurately all the time.
599  if (C->use_cmove() && cmp_op == Op_CmpD) ;//keep going
600  else if (iff->_prob < infrequent_prob ||
601      iff->_prob > (1.0f - infrequent_prob))
602    return NULL;
603
604  // --------------
605  // Now replace all Phis with CMOV's
606  Node *cmov_ctrl = iff->in(0);
607  uint flip = (lp->Opcode() == Op_IfTrue);
608  while (1) {
609    PhiNode* phi = NULL;
610    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
611      Node *out = region->fast_out(i);
612      if (out->is_Phi()) {
613        phi = out->as_Phi();
614        break;
615      }
616    }
617    if (phi == NULL)  break;
618    if (PrintOpto && VerifyLoopOptimizations) { tty->print_cr("CMOV"); }
619    // Move speculative ops
620    for (uint j = 1; j < region->req(); j++) {
621      Node *proj = region->in(j);
622      Node *inp = phi->in(j);
623      if (get_ctrl(inp) == proj) { // Found local op
624#ifndef PRODUCT
625        if (PrintOpto && VerifyLoopOptimizations) {
626          tty->print("  speculate: ");
627          inp->dump();
628        }
629#endif
630        set_ctrl(inp, cmov_ctrl);
631      }
632    }
633    Node *cmov = CMoveNode::make(cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi));
634    register_new_node( cmov, cmov_ctrl );
635    _igvn.replace_node( phi, cmov );
636#ifndef PRODUCT
637    if (TraceLoopOpts) {
638      tty->print("CMOV  ");
639      r_loop->dump_head();
640      if (Verbose) {
641        bol->in(1)->dump(1);
642        cmov->dump(1);
643      }
644    }
645    if (VerifyLoopOptimizations) verify();
646#endif
647  }
648
649  // The useless CFG diamond will fold up later; see the optimization in
650  // RegionNode::Ideal.
651  _igvn._worklist.push(region);
652
653  return iff->in(1);
654}
655
656static void enqueue_cfg_uses(Node* m, Unique_Node_List& wq) {
657  for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
658    Node* u = m->fast_out(i);
659    if (u->is_CFG()) {
660      if (u->Opcode() == Op_NeverBranch) {
661        u = ((NeverBranchNode*)u)->proj_out(0);
662        enqueue_cfg_uses(u, wq);
663      } else {
664        wq.push(u);
665      }
666    }
667  }
668}
669
670// Try moving a store out of a loop, right before the loop
671Node* PhaseIdealLoop::try_move_store_before_loop(Node* n, Node *n_ctrl) {
672  // Store has to be first in the loop body
673  IdealLoopTree *n_loop = get_loop(n_ctrl);
674  if (n->is_Store() && n_loop != _ltree_root && n_loop->is_loop() && n->in(0) != NULL) {
675    Node* address = n->in(MemNode::Address);
676    Node* value = n->in(MemNode::ValueIn);
677    Node* mem = n->in(MemNode::Memory);
678    IdealLoopTree* address_loop = get_loop(get_ctrl(address));
679    IdealLoopTree* value_loop = get_loop(get_ctrl(value));
680
681    // - address and value must be loop invariant
682    // - memory must be a memory Phi for the loop
683    // - Store must be the only store on this memory slice in the
684    // loop: if there's another store following this one then value
685    // written at iteration i by the second store could be overwritten
686    // at iteration i+n by the first store: it's not safe to move the
687    // first store out of the loop
688    // - nothing must observe the memory Phi: it guarantees no read
689    // before the store, we are also guaranteed the store post
690    // dominates the loop head (ignoring a possible early
691    // exit). Otherwise there would be extra Phi involved between the
692    // loop's Phi and the store.
693    // - there must be no early exit from the loop before the Store
694    // (such an exit most of the time would be an extra use of the
695    // memory Phi but sometimes is a bottom memory Phi that takes the
696    // store as input).
697
698    if (!n_loop->is_member(address_loop) &&
699        !n_loop->is_member(value_loop) &&
700        mem->is_Phi() && mem->in(0) == n_loop->_head &&
701        mem->outcnt() == 1 &&
702        mem->in(LoopNode::LoopBackControl) == n) {
703
704      assert(n_loop->_tail != NULL, "need a tail");
705      assert(is_dominator(n_ctrl, n_loop->_tail), "store control must not be in a branch in the loop");
706
707      // Verify that there's no early exit of the loop before the store.
708      bool ctrl_ok = false;
709      {
710        // Follow control from loop head until n, we exit the loop or
711        // we reach the tail
712        ResourceMark rm;
713        Unique_Node_List wq;
714        wq.push(n_loop->_head);
715
716        for (uint next = 0; next < wq.size(); ++next) {
717          Node *m = wq.at(next);
718          if (m == n->in(0)) {
719            ctrl_ok = true;
720            continue;
721          }
722          assert(!has_ctrl(m), "should be CFG");
723          if (!n_loop->is_member(get_loop(m)) || m == n_loop->_tail) {
724            ctrl_ok = false;
725            break;
726          }
727          enqueue_cfg_uses(m, wq);
728          if (wq.size() > 10) {
729            ctrl_ok = false;
730            break;
731          }
732        }
733      }
734      if (ctrl_ok) {
735        // move the Store
736        _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
737        _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
738        _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
739        // Disconnect the phi now. An empty phi can confuse other
740        // optimizations in this pass of loop opts.
741        _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
742        n_loop->_body.yank(mem);
743
744        IdealLoopTree* new_loop = get_loop(n->in(0));
745        set_ctrl_and_loop(n, n->in(0));
746
747        return n;
748      }
749    }
750  }
751  return NULL;
752}
753
754// Try moving a store out of a loop, right after the loop
755void PhaseIdealLoop::try_move_store_after_loop(Node* n) {
756  if (n->is_Store() && n->in(0) != NULL) {
757    Node *n_ctrl = get_ctrl(n);
758    IdealLoopTree *n_loop = get_loop(n_ctrl);
759    // Store must be in a loop
760    if (n_loop != _ltree_root && !n_loop->_irreducible) {
761      Node* address = n->in(MemNode::Address);
762      Node* value = n->in(MemNode::ValueIn);
763      IdealLoopTree* address_loop = get_loop(get_ctrl(address));
764      // address must be loop invariant
765      if (!n_loop->is_member(address_loop)) {
766        // Store must be last on this memory slice in the loop and
767        // nothing in the loop must observe it
768        Node* phi = NULL;
769        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
770          Node* u = n->fast_out(i);
771          if (has_ctrl(u)) { // control use?
772            IdealLoopTree *u_loop = get_loop(get_ctrl(u));
773            if (!n_loop->is_member(u_loop)) {
774              continue;
775            }
776            if (u->is_Phi() && u->in(0) == n_loop->_head) {
777              assert(_igvn.type(u) == Type::MEMORY, "bad phi");
778              // multiple phis on the same slice are possible
779              if (phi != NULL) {
780                return;
781              }
782              phi = u;
783              continue;
784            }
785          }
786          return;
787        }
788        if (phi != NULL) {
789          // Nothing in the loop before the store (next iteration)
790          // must observe the stored value
791          bool mem_ok = true;
792          {
793            ResourceMark rm;
794            Unique_Node_List wq;
795            wq.push(phi);
796            for (uint next = 0; next < wq.size() && mem_ok; ++next) {
797              Node *m = wq.at(next);
798              for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax && mem_ok; i++) {
799                Node* u = m->fast_out(i);
800                if (u->is_Store() || u->is_Phi()) {
801                  if (u != n) {
802                    wq.push(u);
803                    mem_ok = (wq.size() <= 10);
804                  }
805                } else {
806                  mem_ok = false;
807                  break;
808                }
809              }
810            }
811          }
812          if (mem_ok) {
813            // Move the Store out of the loop creating clones along
814            // all paths out of the loop that observe the stored value
815            _igvn.rehash_node_delayed(phi);
816            int count = phi->replace_edge(n, n->in(MemNode::Memory));
817            assert(count > 0, "inconsistent phi");
818            for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
819              Node* u = n->fast_out(i);
820              Node* c = get_ctrl(u);
821
822              if (u->is_Phi()) {
823                c = u->in(0)->in(u->find_edge(n));
824              }
825              IdealLoopTree *u_loop = get_loop(c);
826              assert (!n_loop->is_member(u_loop), "only the phi should have been a use in the loop");
827              while(true) {
828                Node* next_c = find_non_split_ctrl(idom(c));
829                if (n_loop->is_member(get_loop(next_c))) {
830                  break;
831                }
832                c = next_c;
833              }
834
835              Node* st = n->clone();
836              st->set_req(0, c);
837              _igvn.register_new_node_with_optimizer(st);
838
839              set_ctrl(st, c);
840              IdealLoopTree* new_loop = get_loop(c);
841              assert(new_loop != n_loop, "should be moved out of loop");
842              if (new_loop->_child == NULL) new_loop->_body.push(st);
843
844              _igvn.replace_input_of(u, u->find_edge(n), st);
845              --imax;
846              --i;
847            }
848
849
850            assert(n->outcnt() == 0, "all uses should be gone");
851            _igvn.replace_input_of(n, MemNode::Memory, C->top());
852            // Disconnect the phi now. An empty phi can confuse other
853            // optimizations in this pass of loop opts..
854            if (phi->in(LoopNode::LoopBackControl) == phi) {
855              _igvn.replace_node(phi, phi->in(LoopNode::EntryControl));
856              n_loop->_body.yank(phi);
857            }
858          }
859        }
860      }
861    }
862  }
863}
864
865//------------------------------split_if_with_blocks_pre-----------------------
866// Do the real work in a non-recursive function.  Data nodes want to be
867// cloned in the pre-order so they can feed each other nicely.
868Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
869  // Cloning these guys is unlikely to win
870  int n_op = n->Opcode();
871  if( n_op == Op_MergeMem ) return n;
872  if( n->is_Proj() ) return n;
873  // Do not clone-up CmpFXXX variations, as these are always
874  // followed by a CmpI
875  if( n->is_Cmp() ) return n;
876  // Attempt to use a conditional move instead of a phi/branch
877  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
878    Node *cmov = conditional_move( n );
879    if( cmov ) return cmov;
880  }
881  if( n->is_CFG() || n->is_LoadStore() )
882    return n;
883  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
884      n_op == Op_Opaque2 ) {
885    if( !C->major_progress() )   // If chance of no more loop opts...
886      _igvn._worklist.push(n);  // maybe we'll remove them
887    return n;
888  }
889
890  if( n->is_Con() ) return n;   // No cloning for Con nodes
891
892  Node *n_ctrl = get_ctrl(n);
893  if( !n_ctrl ) return n;       // Dead node
894
895  Node* res = try_move_store_before_loop(n, n_ctrl);
896  if (res != NULL) {
897    return n;
898  }
899
900  // Attempt to remix address expressions for loop invariants
901  Node *m = remix_address_expressions( n );
902  if( m ) return m;
903
904  if (n->is_ConstraintCast()) {
905    Node* dom_cast = n->as_ConstraintCast()->dominating_cast(this);
906    if (dom_cast != NULL) {
907      _igvn.replace_node(n, dom_cast);
908      return dom_cast;
909    }
910  }
911
912  // Determine if the Node has inputs from some local Phi.
913  // Returns the block to clone thru.
914  Node *n_blk = has_local_phi_input( n );
915  if( !n_blk ) return n;
916
917  // Do not clone the trip counter through on a CountedLoop
918  // (messes up the canonical shape).
919  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
920
921  // Check for having no control input; not pinned.  Allow
922  // dominating control.
923  if (n->in(0)) {
924    Node *dom = idom(n_blk);
925    if (dom_lca(n->in(0), dom) != n->in(0)) {
926      return n;
927    }
928  }
929  // Policy: when is it profitable.  You must get more wins than
930  // policy before it is considered profitable.  Policy is usually 0,
931  // so 1 win is considered profitable.  Big merges will require big
932  // cloning, so get a larger policy.
933  int policy = n_blk->req() >> 2;
934
935  // If the loop is a candidate for range check elimination,
936  // delay splitting through it's phi until a later loop optimization
937  if (n_blk->is_CountedLoop()) {
938    IdealLoopTree *lp = get_loop(n_blk);
939    if (lp && lp->_rce_candidate) {
940      return n;
941    }
942  }
943
944  // Use same limit as split_if_with_blocks_post
945  if( C->live_nodes() > 35000 ) return n; // Method too big
946
947  // Split 'n' through the merge point if it is profitable
948  Node *phi = split_thru_phi( n, n_blk, policy );
949  if (!phi) return n;
950
951  // Found a Phi to split thru!
952  // Replace 'n' with the new phi
953  _igvn.replace_node( n, phi );
954  // Moved a load around the loop, 'en-registering' something.
955  if (n_blk->is_Loop() && n->is_Load() &&
956      !phi->in(LoopNode::LoopBackControl)->is_Load())
957    C->set_major_progress();
958
959  return phi;
960}
961
962static bool merge_point_too_heavy(Compile* C, Node* region) {
963  // Bail out if the region and its phis have too many users.
964  int weight = 0;
965  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
966    weight += region->fast_out(i)->outcnt();
967  }
968  int nodes_left = C->max_node_limit() - C->live_nodes();
969  if (weight * 8 > nodes_left) {
970    if (PrintOpto) {
971      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
972    }
973    return true;
974  } else {
975    return false;
976  }
977}
978
979static bool merge_point_safe(Node* region) {
980  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
981  // having a PhiNode input. This sidesteps the dangerous case where the split
982  // ConvI2LNode may become TOP if the input Value() does not
983  // overlap the ConvI2L range, leaving a node which may not dominate its
984  // uses.
985  // A better fix for this problem can be found in the BugTraq entry, but
986  // expediency for Mantis demands this hack.
987  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
988  // split_if_with_blocks from splitting a block because we could not move around
989  // the FastLockNode.
990  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
991    Node* n = region->fast_out(i);
992    if (n->is_Phi()) {
993      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
994        Node* m = n->fast_out(j);
995        if (m->is_FastLock())
996          return false;
997#ifdef _LP64
998        if (m->Opcode() == Op_ConvI2L)
999          return false;
1000#endif
1001      }
1002    }
1003  }
1004  return true;
1005}
1006
1007
1008//------------------------------place_near_use---------------------------------
1009// Place some computation next to use but not inside inner loops.
1010// For inner loop uses move it to the preheader area.
1011Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
1012  IdealLoopTree *u_loop = get_loop( useblock );
1013  return (u_loop->_irreducible || u_loop->_child)
1014    ? useblock
1015    : u_loop->_head->in(LoopNode::EntryControl);
1016}
1017
1018
1019//------------------------------split_if_with_blocks_post----------------------
1020// Do the real work in a non-recursive function.  CFG hackery wants to be
1021// in the post-order, so it can dirty the I-DOM info and not use the dirtied
1022// info.
1023void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
1024
1025  // Cloning Cmp through Phi's involves the split-if transform.
1026  // FastLock is not used by an If
1027  if( n->is_Cmp() && !n->is_FastLock() ) {
1028    if( C->live_nodes() > 35000 ) return; // Method too big
1029
1030    // Do not do 'split-if' if irreducible loops are present.
1031    if( _has_irreducible_loops )
1032      return;
1033
1034    Node *n_ctrl = get_ctrl(n);
1035    // Determine if the Node has inputs from some local Phi.
1036    // Returns the block to clone thru.
1037    Node *n_blk = has_local_phi_input( n );
1038    if( n_blk != n_ctrl ) return;
1039
1040    if( merge_point_too_heavy(C, n_ctrl) )
1041      return;
1042
1043    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
1044    Node *bol = n->unique_out();
1045    assert( bol->is_Bool(), "expect a bool here" );
1046    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
1047    Node *iff = bol->unique_out();
1048
1049    // Check some safety conditions
1050    if( iff->is_If() ) {        // Classic split-if?
1051      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
1052    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
1053      // Can't split CMove with different control edge.
1054      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
1055      if( get_ctrl(iff->in(2)) == n_ctrl ||
1056          get_ctrl(iff->in(3)) == n_ctrl )
1057        return;                 // Inputs not yet split-up
1058      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
1059        return;                 // Loop-invar test gates loop-varying CMOVE
1060      }
1061    } else {
1062      return;  // some other kind of node, such as an Allocate
1063    }
1064
1065    // Do not do 'split-if' if some paths are dead.  First do dead code
1066    // elimination and then see if its still profitable.
1067    for( uint i = 1; i < n_ctrl->req(); i++ )
1068      if( n_ctrl->in(i) == C->top() )
1069        return;
1070
1071    // When is split-if profitable?  Every 'win' on means some control flow
1072    // goes dead, so it's almost always a win.
1073    int policy = 0;
1074    // If trying to do a 'Split-If' at the loop head, it is only
1075    // profitable if the cmp folds up on BOTH paths.  Otherwise we
1076    // risk peeling a loop forever.
1077
1078    // CNC - Disabled for now.  Requires careful handling of loop
1079    // body selection for the cloned code.  Also, make sure we check
1080    // for any input path not being in the same loop as n_ctrl.  For
1081    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
1082    // because the alternative loop entry points won't be converted
1083    // into LoopNodes.
1084    IdealLoopTree *n_loop = get_loop(n_ctrl);
1085    for( uint j = 1; j < n_ctrl->req(); j++ )
1086      if( get_loop(n_ctrl->in(j)) != n_loop )
1087        return;
1088
1089    // Check for safety of the merge point.
1090    if( !merge_point_safe(n_ctrl) ) {
1091      return;
1092    }
1093
1094    // Split compare 'n' through the merge point if it is profitable
1095    Node *phi = split_thru_phi( n, n_ctrl, policy );
1096    if( !phi ) return;
1097
1098    // Found a Phi to split thru!
1099    // Replace 'n' with the new phi
1100    _igvn.replace_node( n, phi );
1101
1102    // Now split the bool up thru the phi
1103    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
1104    guarantee(bolphi != NULL, "null boolean phi node");
1105
1106    _igvn.replace_node( bol, bolphi );
1107    assert( iff->in(1) == bolphi, "" );
1108
1109    if( bolphi->Value(&_igvn)->singleton() )
1110      return;
1111
1112    // Conditional-move?  Must split up now
1113    if( !iff->is_If() ) {
1114      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
1115      _igvn.replace_node( iff, cmovphi );
1116      return;
1117    }
1118
1119    // Now split the IF
1120    do_split_if( iff );
1121    return;
1122  }
1123
1124  // Check for an IF ready to split; one that has its
1125  // condition codes input coming from a Phi at the block start.
1126  int n_op = n->Opcode();
1127
1128  // Check for an IF being dominated by another IF same test
1129  if (n_op == Op_If ||
1130      n_op == Op_RangeCheck) {
1131    Node *bol = n->in(1);
1132    uint max = bol->outcnt();
1133    // Check for same test used more than once?
1134    if (max > 1 && bol->is_Bool()) {
1135      // Search up IDOMs to see if this IF is dominated.
1136      Node *cutoff = get_ctrl(bol);
1137
1138      // Now search up IDOMs till cutoff, looking for a dominating test
1139      Node *prevdom = n;
1140      Node *dom = idom(prevdom);
1141      while (dom != cutoff) {
1142        if (dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom) {
1143          // Replace the dominated test with an obvious true or false.
1144          // Place it on the IGVN worklist for later cleanup.
1145          C->set_major_progress();
1146          dominated_by(prevdom, n, false, true);
1147#ifndef PRODUCT
1148          if( VerifyLoopOptimizations ) verify();
1149#endif
1150          return;
1151        }
1152        prevdom = dom;
1153        dom = idom(prevdom);
1154      }
1155    }
1156  }
1157
1158  // See if a shared loop-varying computation has no loop-varying uses.
1159  // Happens if something is only used for JVM state in uncommon trap exits,
1160  // like various versions of induction variable+offset.  Clone the
1161  // computation per usage to allow it to sink out of the loop.
1162  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
1163    Node *n_ctrl = get_ctrl(n);
1164    IdealLoopTree *n_loop = get_loop(n_ctrl);
1165    if( n_loop != _ltree_root ) {
1166      DUIterator_Fast imax, i = n->fast_outs(imax);
1167      for (; i < imax; i++) {
1168        Node* u = n->fast_out(i);
1169        if( !has_ctrl(u) )     break; // Found control user
1170        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
1171        if( u_loop == n_loop ) break; // Found loop-varying use
1172        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
1173        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
1174      }
1175      bool did_break = (i < imax);  // Did we break out of the previous loop?
1176      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
1177        Node *late_load_ctrl = NULL;
1178        if (n->is_Load()) {
1179          // If n is a load, get and save the result from get_late_ctrl(),
1180          // to be later used in calculating the control for n's clones.
1181          clear_dom_lca_tags();
1182          late_load_ctrl = get_late_ctrl(n, n_ctrl);
1183        }
1184        // If n is a load, and the late control is the same as the current
1185        // control, then the cloning of n is a pointless exercise, because
1186        // GVN will ensure that we end up where we started.
1187        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
1188          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
1189            Node *u = n->last_out(j); // Clone private computation per use
1190            _igvn.rehash_node_delayed(u);
1191            Node *x = n->clone(); // Clone computation
1192            Node *x_ctrl = NULL;
1193            if( u->is_Phi() ) {
1194              // Replace all uses of normal nodes.  Replace Phi uses
1195              // individually, so the separate Nodes can sink down
1196              // different paths.
1197              uint k = 1;
1198              while( u->in(k) != n ) k++;
1199              u->set_req( k, x );
1200              // x goes next to Phi input path
1201              x_ctrl = u->in(0)->in(k);
1202              --j;
1203            } else {              // Normal use
1204              // Replace all uses
1205              for( uint k = 0; k < u->req(); k++ ) {
1206                if( u->in(k) == n ) {
1207                  u->set_req( k, x );
1208                  --j;
1209                }
1210              }
1211              x_ctrl = get_ctrl(u);
1212            }
1213
1214            // Find control for 'x' next to use but not inside inner loops.
1215            // For inner loop uses get the preheader area.
1216            x_ctrl = place_near_use(x_ctrl);
1217
1218            if (n->is_Load()) {
1219              // For loads, add a control edge to a CFG node outside of the loop
1220              // to force them to not combine and return back inside the loop
1221              // during GVN optimization (4641526).
1222              //
1223              // Because we are setting the actual control input, factor in
1224              // the result from get_late_ctrl() so we respect any
1225              // anti-dependences. (6233005).
1226              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1227
1228              // Don't allow the control input to be a CFG splitting node.
1229              // Such nodes should only have ProjNodes as outs, e.g. IfNode
1230              // should only have IfTrueNode and IfFalseNode (4985384).
1231              x_ctrl = find_non_split_ctrl(x_ctrl);
1232              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1233
1234              x->set_req(0, x_ctrl);
1235            }
1236            register_new_node(x, x_ctrl);
1237
1238            // Some institutional knowledge is needed here: 'x' is
1239            // yanked because if the optimizer runs GVN on it all the
1240            // cloned x's will common up and undo this optimization and
1241            // be forced back in the loop.  This is annoying because it
1242            // makes +VerifyOpto report false-positives on progress.  I
1243            // tried setting control edges on the x's to force them to
1244            // not combine, but the matching gets worried when it tries
1245            // to fold a StoreP and an AddP together (as part of an
1246            // address expression) and the AddP and StoreP have
1247            // different controls.
1248            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1249          }
1250          _igvn.remove_dead_node(n);
1251        }
1252      }
1253    }
1254  }
1255
1256  try_move_store_after_loop(n);
1257
1258  // Check for Opaque2's who's loop has disappeared - who's input is in the
1259  // same loop nest as their output.  Remove 'em, they are no longer useful.
1260  if( n_op == Op_Opaque2 &&
1261      n->in(1) != NULL &&
1262      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1263    _igvn.replace_node( n, n->in(1) );
1264  }
1265}
1266
1267//------------------------------split_if_with_blocks---------------------------
1268// Check for aggressive application of 'split-if' optimization,
1269// using basic block level info.
1270void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1271  Node *n = C->root();
1272  visited.set(n->_idx); // first, mark node as visited
1273  // Do pre-visit work for root
1274  n = split_if_with_blocks_pre( n );
1275  uint cnt = n->outcnt();
1276  uint i   = 0;
1277  while (true) {
1278    // Visit all children
1279    if (i < cnt) {
1280      Node* use = n->raw_out(i);
1281      ++i;
1282      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1283        // Now do pre-visit work for this use
1284        use = split_if_with_blocks_pre( use );
1285        nstack.push(n, i); // Save parent and next use's index.
1286        n   = use;         // Process all children of current use.
1287        cnt = use->outcnt();
1288        i   = 0;
1289      }
1290    }
1291    else {
1292      // All of n's children have been processed, complete post-processing.
1293      if (cnt != 0 && !n->is_Con()) {
1294        assert(has_node(n), "no dead nodes");
1295        split_if_with_blocks_post( n );
1296      }
1297      if (nstack.is_empty()) {
1298        // Finished all nodes on stack.
1299        break;
1300      }
1301      // Get saved parent node and next use's index. Visit the rest of uses.
1302      n   = nstack.node();
1303      cnt = n->outcnt();
1304      i   = nstack.index();
1305      nstack.pop();
1306    }
1307  }
1308}
1309
1310
1311//=============================================================================
1312//
1313//                   C L O N E   A   L O O P   B O D Y
1314//
1315
1316//------------------------------clone_iff--------------------------------------
1317// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1318// "Nearly" because all Nodes have been cloned from the original in the loop,
1319// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1320// through the Phi recursively, and return a Bool.
1321BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1322
1323  // Convert this Phi into a Phi merging Bools
1324  uint i;
1325  for( i = 1; i < phi->req(); i++ ) {
1326    Node *b = phi->in(i);
1327    if( b->is_Phi() ) {
1328      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1329    } else {
1330      assert( b->is_Bool(), "" );
1331    }
1332  }
1333
1334  Node *sample_bool = phi->in(1);
1335  Node *sample_cmp  = sample_bool->in(1);
1336
1337  // Make Phis to merge the Cmp's inputs.
1338  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1339  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1340  for( i = 1; i < phi->req(); i++ ) {
1341    Node *n1 = phi->in(i)->in(1)->in(1);
1342    Node *n2 = phi->in(i)->in(1)->in(2);
1343    phi1->set_req( i, n1 );
1344    phi2->set_req( i, n2 );
1345    phi1->set_type( phi1->type()->meet_speculative(n1->bottom_type()));
1346    phi2->set_type( phi2->type()->meet_speculative(n2->bottom_type()));
1347  }
1348  // See if these Phis have been made before.
1349  // Register with optimizer
1350  Node *hit1 = _igvn.hash_find_insert(phi1);
1351  if( hit1 ) {                  // Hit, toss just made Phi
1352    _igvn.remove_dead_node(phi1); // Remove new phi
1353    assert( hit1->is_Phi(), "" );
1354    phi1 = (PhiNode*)hit1;      // Use existing phi
1355  } else {                      // Miss
1356    _igvn.register_new_node_with_optimizer(phi1);
1357  }
1358  Node *hit2 = _igvn.hash_find_insert(phi2);
1359  if( hit2 ) {                  // Hit, toss just made Phi
1360    _igvn.remove_dead_node(phi2); // Remove new phi
1361    assert( hit2->is_Phi(), "" );
1362    phi2 = (PhiNode*)hit2;      // Use existing phi
1363  } else {                      // Miss
1364    _igvn.register_new_node_with_optimizer(phi2);
1365  }
1366  // Register Phis with loop/block info
1367  set_ctrl(phi1, phi->in(0));
1368  set_ctrl(phi2, phi->in(0));
1369  // Make a new Cmp
1370  Node *cmp = sample_cmp->clone();
1371  cmp->set_req( 1, phi1 );
1372  cmp->set_req( 2, phi2 );
1373  _igvn.register_new_node_with_optimizer(cmp);
1374  set_ctrl(cmp, phi->in(0));
1375
1376  // Make a new Bool
1377  Node *b = sample_bool->clone();
1378  b->set_req(1,cmp);
1379  _igvn.register_new_node_with_optimizer(b);
1380  set_ctrl(b, phi->in(0));
1381
1382  assert( b->is_Bool(), "" );
1383  return (BoolNode*)b;
1384}
1385
1386//------------------------------clone_bool-------------------------------------
1387// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1388// "Nearly" because all Nodes have been cloned from the original in the loop,
1389// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1390// through the Phi recursively, and return a Bool.
1391CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1392  uint i;
1393  // Convert this Phi into a Phi merging Bools
1394  for( i = 1; i < phi->req(); i++ ) {
1395    Node *b = phi->in(i);
1396    if( b->is_Phi() ) {
1397      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1398    } else {
1399      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1400    }
1401  }
1402
1403  Node *sample_cmp = phi->in(1);
1404
1405  // Make Phis to merge the Cmp's inputs.
1406  PhiNode *phi1 = new PhiNode( phi->in(0), Type::TOP );
1407  PhiNode *phi2 = new PhiNode( phi->in(0), Type::TOP );
1408  for( uint j = 1; j < phi->req(); j++ ) {
1409    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1410    Node *n1, *n2;
1411    if( cmp_top->is_Cmp() ) {
1412      n1 = cmp_top->in(1);
1413      n2 = cmp_top->in(2);
1414    } else {
1415      n1 = n2 = cmp_top;
1416    }
1417    phi1->set_req( j, n1 );
1418    phi2->set_req( j, n2 );
1419    phi1->set_type(phi1->type()->meet_speculative(n1->bottom_type()));
1420    phi2->set_type(phi2->type()->meet_speculative(n2->bottom_type()));
1421  }
1422
1423  // See if these Phis have been made before.
1424  // Register with optimizer
1425  Node *hit1 = _igvn.hash_find_insert(phi1);
1426  if( hit1 ) {                  // Hit, toss just made Phi
1427    _igvn.remove_dead_node(phi1); // Remove new phi
1428    assert( hit1->is_Phi(), "" );
1429    phi1 = (PhiNode*)hit1;      // Use existing phi
1430  } else {                      // Miss
1431    _igvn.register_new_node_with_optimizer(phi1);
1432  }
1433  Node *hit2 = _igvn.hash_find_insert(phi2);
1434  if( hit2 ) {                  // Hit, toss just made Phi
1435    _igvn.remove_dead_node(phi2); // Remove new phi
1436    assert( hit2->is_Phi(), "" );
1437    phi2 = (PhiNode*)hit2;      // Use existing phi
1438  } else {                      // Miss
1439    _igvn.register_new_node_with_optimizer(phi2);
1440  }
1441  // Register Phis with loop/block info
1442  set_ctrl(phi1, phi->in(0));
1443  set_ctrl(phi2, phi->in(0));
1444  // Make a new Cmp
1445  Node *cmp = sample_cmp->clone();
1446  cmp->set_req( 1, phi1 );
1447  cmp->set_req( 2, phi2 );
1448  _igvn.register_new_node_with_optimizer(cmp);
1449  set_ctrl(cmp, phi->in(0));
1450
1451  assert( cmp->is_Cmp(), "" );
1452  return (CmpNode*)cmp;
1453}
1454
1455//------------------------------sink_use---------------------------------------
1456// If 'use' was in the loop-exit block, it now needs to be sunk
1457// below the post-loop merge point.
1458void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1459  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1460    set_ctrl(use, post_loop);
1461    for (DUIterator j = use->outs(); use->has_out(j); j++)
1462      sink_use(use->out(j), post_loop);
1463  }
1464}
1465
1466//------------------------------clone_loop-------------------------------------
1467//
1468//                   C L O N E   A   L O O P   B O D Y
1469//
1470// This is the basic building block of the loop optimizations.  It clones an
1471// entire loop body.  It makes an old_new loop body mapping; with this mapping
1472// you can find the new-loop equivalent to an old-loop node.  All new-loop
1473// nodes are exactly equal to their old-loop counterparts, all edges are the
1474// same.  All exits from the old-loop now have a RegionNode that merges the
1475// equivalent new-loop path.  This is true even for the normal "loop-exit"
1476// condition.  All uses of loop-invariant old-loop values now come from (one
1477// or more) Phis that merge their new-loop equivalents.
1478//
1479// This operation leaves the graph in an illegal state: there are two valid
1480// control edges coming from the loop pre-header to both loop bodies.  I'll
1481// definitely have to hack the graph after running this transform.
1482//
1483// From this building block I will further edit edges to perform loop peeling
1484// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1485//
1486// Parameter side_by_size_idom:
1487//   When side_by_size_idom is NULL, the dominator tree is constructed for
1488//      the clone loop to dominate the original.  Used in construction of
1489//      pre-main-post loop sequence.
1490//   When nonnull, the clone and original are side-by-side, both are
1491//      dominated by the side_by_side_idom node.  Used in construction of
1492//      unswitched loops.
1493void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1494                                 Node* side_by_side_idom) {
1495
1496  if (C->do_vector_loop() && PrintOpto) {
1497    const char* mname = C->method()->name()->as_quoted_ascii();
1498    if (mname != NULL) {
1499      tty->print("PhaseIdealLoop::clone_loop: for vectorize method %s\n", mname);
1500    }
1501  }
1502
1503  CloneMap& cm = C->clone_map();
1504  Dict* dict = cm.dict();
1505  if (C->do_vector_loop()) {
1506    cm.set_clone_idx(cm.max_gen()+1);
1507#ifndef PRODUCT
1508    if (PrintOpto) {
1509      tty->print_cr("PhaseIdealLoop::clone_loop: _clone_idx %d", cm.clone_idx());
1510      loop->dump_head();
1511    }
1512#endif
1513  }
1514
1515  // Step 1: Clone the loop body.  Make the old->new mapping.
1516  uint i;
1517  for( i = 0; i < loop->_body.size(); i++ ) {
1518    Node *old = loop->_body.at(i);
1519    Node *nnn = old->clone();
1520    old_new.map( old->_idx, nnn );
1521    if (C->do_vector_loop()) {
1522      cm.verify_insert_and_clone(old, nnn, cm.clone_idx());
1523    }
1524    _igvn.register_new_node_with_optimizer(nnn);
1525  }
1526
1527
1528  // Step 2: Fix the edges in the new body.  If the old input is outside the
1529  // loop use it.  If the old input is INside the loop, use the corresponding
1530  // new node instead.
1531  for( i = 0; i < loop->_body.size(); i++ ) {
1532    Node *old = loop->_body.at(i);
1533    Node *nnn = old_new[old->_idx];
1534    // Fix CFG/Loop controlling the new node
1535    if (has_ctrl(old)) {
1536      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1537    } else {
1538      set_loop(nnn, loop->_parent);
1539      if (old->outcnt() > 0) {
1540        set_idom( nnn, old_new[idom(old)->_idx], dd );
1541      }
1542    }
1543    // Correct edges to the new node
1544    for( uint j = 0; j < nnn->req(); j++ ) {
1545        Node *n = nnn->in(j);
1546        if( n ) {
1547          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1548          if( loop->is_member( old_in_loop ) )
1549            nnn->set_req(j, old_new[n->_idx]);
1550        }
1551    }
1552    _igvn.hash_find_insert(nnn);
1553  }
1554  Node *newhead = old_new[loop->_head->_idx];
1555  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1556
1557
1558  // Step 3: Now fix control uses.  Loop varying control uses have already
1559  // been fixed up (as part of all input edges in Step 2).  Loop invariant
1560  // control uses must be either an IfFalse or an IfTrue.  Make a merge
1561  // point to merge the old and new IfFalse/IfTrue nodes; make the use
1562  // refer to this.
1563  ResourceArea *area = Thread::current()->resource_area();
1564  Node_List worklist(area);
1565  uint new_counter = C->unique();
1566  for( i = 0; i < loop->_body.size(); i++ ) {
1567    Node* old = loop->_body.at(i);
1568    if( !old->is_CFG() ) continue;
1569    Node* nnn = old_new[old->_idx];
1570
1571    // Copy uses to a worklist, so I can munge the def-use info
1572    // with impunity.
1573    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1574      worklist.push(old->fast_out(j));
1575
1576    while( worklist.size() ) {  // Visit all uses
1577      Node *use = worklist.pop();
1578      if (!has_node(use))  continue; // Ignore dead nodes
1579      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1580      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1581        // Both OLD and USE are CFG nodes here.
1582        assert( use->is_Proj(), "" );
1583
1584        // Clone the loop exit control projection
1585        Node *newuse = use->clone();
1586        if (C->do_vector_loop()) {
1587          cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
1588        }
1589        newuse->set_req(0,nnn);
1590        _igvn.register_new_node_with_optimizer(newuse);
1591        set_loop(newuse, use_loop);
1592        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1593
1594        // We need a Region to merge the exit from the peeled body and the
1595        // exit from the old loop body.
1596        RegionNode *r = new RegionNode(3);
1597        // Map the old use to the new merge point
1598        old_new.map( use->_idx, r );
1599        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1600        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1601
1602        // The original user of 'use' uses 'r' instead.
1603        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1604          Node* useuse = use->last_out(l);
1605          _igvn.rehash_node_delayed(useuse);
1606          uint uses_found = 0;
1607          if( useuse->in(0) == use ) {
1608            useuse->set_req(0, r);
1609            uses_found++;
1610            if( useuse->is_CFG() ) {
1611              assert( dom_depth(useuse) > dd_r, "" );
1612              set_idom(useuse, r, dom_depth(useuse));
1613            }
1614          }
1615          for( uint k = 1; k < useuse->req(); k++ ) {
1616            if( useuse->in(k) == use ) {
1617              useuse->set_req(k, r);
1618              uses_found++;
1619            }
1620          }
1621          l -= uses_found;    // we deleted 1 or more copies of this edge
1622        }
1623
1624        // Now finish up 'r'
1625        r->set_req( 1, newuse );
1626        r->set_req( 2,    use );
1627        _igvn.register_new_node_with_optimizer(r);
1628        set_loop(r, use_loop);
1629        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1630      } // End of if a loop-exit test
1631    }
1632  }
1633
1634  // Step 4: If loop-invariant use is not control, it must be dominated by a
1635  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1636  // there if needed.  Make a Phi there merging old and new used values.
1637  Node_List *split_if_set = NULL;
1638  Node_List *split_bool_set = NULL;
1639  Node_List *split_cex_set = NULL;
1640  for( i = 0; i < loop->_body.size(); i++ ) {
1641    Node* old = loop->_body.at(i);
1642    Node* nnn = old_new[old->_idx];
1643    // Copy uses to a worklist, so I can munge the def-use info
1644    // with impunity.
1645    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1646      worklist.push(old->fast_out(j));
1647
1648    while( worklist.size() ) {
1649      Node *use = worklist.pop();
1650      if (!has_node(use))  continue; // Ignore dead nodes
1651      if (use->in(0) == C->top())  continue;
1652      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1653      // Check for data-use outside of loop - at least one of OLD or USE
1654      // must not be a CFG node.
1655      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1656
1657        // If the Data use is an IF, that means we have an IF outside of the
1658        // loop that is switching on a condition that is set inside of the
1659        // loop.  Happens if people set a loop-exit flag; then test the flag
1660        // in the loop to break the loop, then test is again outside of the
1661        // loop to determine which way the loop exited.
1662        // Loop predicate If node connects to Bool node through Opaque1 node.
1663        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use)) {
1664          // Since this code is highly unlikely, we lazily build the worklist
1665          // of such Nodes to go split.
1666          if( !split_if_set )
1667            split_if_set = new Node_List(area);
1668          split_if_set->push(use);
1669        }
1670        if( use->is_Bool() ) {
1671          if( !split_bool_set )
1672            split_bool_set = new Node_List(area);
1673          split_bool_set->push(use);
1674        }
1675        if( use->Opcode() == Op_CreateEx ) {
1676          if( !split_cex_set )
1677            split_cex_set = new Node_List(area);
1678          split_cex_set->push(use);
1679        }
1680
1681
1682        // Get "block" use is in
1683        uint idx = 0;
1684        while( use->in(idx) != old ) idx++;
1685        Node *prev = use->is_CFG() ? use : get_ctrl(use);
1686        assert( !loop->is_member( get_loop( prev ) ), "" );
1687        Node *cfg = prev->_idx >= new_counter
1688          ? prev->in(2)
1689          : idom(prev);
1690        if( use->is_Phi() )     // Phi use is in prior block
1691          cfg = prev->in(idx);  // NOT in block of Phi itself
1692        if (cfg->is_top()) {    // Use is dead?
1693          _igvn.replace_input_of(use, idx, C->top());
1694          continue;
1695        }
1696
1697        while( !loop->is_member( get_loop( cfg ) ) ) {
1698          prev = cfg;
1699          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1700        }
1701        // If the use occurs after merging several exits from the loop, then
1702        // old value must have dominated all those exits.  Since the same old
1703        // value was used on all those exits we did not need a Phi at this
1704        // merge point.  NOW we do need a Phi here.  Each loop exit value
1705        // is now merged with the peeled body exit; each exit gets its own
1706        // private Phi and those Phis need to be merged here.
1707        Node *phi;
1708        if( prev->is_Region() ) {
1709          if( idx == 0 ) {      // Updating control edge?
1710            phi = prev;         // Just use existing control
1711          } else {              // Else need a new Phi
1712            phi = PhiNode::make( prev, old );
1713            // Now recursively fix up the new uses of old!
1714            for( uint i = 1; i < prev->req(); i++ ) {
1715              worklist.push(phi); // Onto worklist once for each 'old' input
1716            }
1717          }
1718        } else {
1719          // Get new RegionNode merging old and new loop exits
1720          prev = old_new[prev->_idx];
1721          assert( prev, "just made this in step 7" );
1722          if( idx == 0 ) {      // Updating control edge?
1723            phi = prev;         // Just use existing control
1724          } else {              // Else need a new Phi
1725            // Make a new Phi merging data values properly
1726            phi = PhiNode::make( prev, old );
1727            phi->set_req( 1, nnn );
1728          }
1729        }
1730        // If inserting a new Phi, check for prior hits
1731        if( idx != 0 ) {
1732          Node *hit = _igvn.hash_find_insert(phi);
1733          if( hit == NULL ) {
1734           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1735          } else {                                      // or
1736            // Remove the new phi from the graph and use the hit
1737            _igvn.remove_dead_node(phi);
1738            phi = hit;                                  // Use existing phi
1739          }
1740          set_ctrl(phi, prev);
1741        }
1742        // Make 'use' use the Phi instead of the old loop body exit value
1743        _igvn.replace_input_of(use, idx, phi);
1744        if( use->_idx >= new_counter ) { // If updating new phis
1745          // Not needed for correctness, but prevents a weak assert
1746          // in AddPNode from tripping (when we end up with different
1747          // base & derived Phis that will become the same after
1748          // IGVN does CSE).
1749          Node *hit = _igvn.hash_find_insert(use);
1750          if( hit )             // Go ahead and re-hash for hits.
1751            _igvn.replace_node( use, hit );
1752        }
1753
1754        // If 'use' was in the loop-exit block, it now needs to be sunk
1755        // below the post-loop merge point.
1756        sink_use( use, prev );
1757      }
1758    }
1759  }
1760
1761  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1762  // the loop uses a condition set in the loop.  The original IF probably
1763  // takes control from one or more OLD Regions (which in turn get from NEW
1764  // Regions).  In any case, there will be a set of Phis for each merge point
1765  // from the IF up to where the original BOOL def exists the loop.
1766  if( split_if_set ) {
1767    while( split_if_set->size() ) {
1768      Node *iff = split_if_set->pop();
1769      if( iff->in(1)->is_Phi() ) {
1770        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1771        _igvn.replace_input_of(iff, 1, b);
1772      }
1773    }
1774  }
1775  if( split_bool_set ) {
1776    while( split_bool_set->size() ) {
1777      Node *b = split_bool_set->pop();
1778      Node *phi = b->in(1);
1779      assert( phi->is_Phi(), "" );
1780      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1781      _igvn.replace_input_of(b, 1, cmp);
1782    }
1783  }
1784  if( split_cex_set ) {
1785    while( split_cex_set->size() ) {
1786      Node *b = split_cex_set->pop();
1787      assert( b->in(0)->is_Region(), "" );
1788      assert( b->in(1)->is_Phi(), "" );
1789      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1790      split_up( b, b->in(0), NULL );
1791    }
1792  }
1793
1794}
1795
1796
1797//---------------------- stride_of_possible_iv -------------------------------------
1798// Looks for an iff/bool/comp with one operand of the compare
1799// being a cycle involving an add and a phi,
1800// with an optional truncation (left-shift followed by a right-shift)
1801// of the add. Returns zero if not an iv.
1802int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1803  Node* trunc1 = NULL;
1804  Node* trunc2 = NULL;
1805  const TypeInt* ttype = NULL;
1806  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1807    return 0;
1808  }
1809  BoolNode* bl = iff->in(1)->as_Bool();
1810  Node* cmp = bl->in(1);
1811  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1812    return 0;
1813  }
1814  // Must have an invariant operand
1815  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1816    return 0;
1817  }
1818  Node* add2 = NULL;
1819  Node* cmp1 = cmp->in(1);
1820  if (cmp1->is_Phi()) {
1821    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1822    Node* phi = cmp1;
1823    for (uint i = 1; i < phi->req(); i++) {
1824      Node* in = phi->in(i);
1825      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1826                                &trunc1, &trunc2, &ttype);
1827      if (add && add->in(1) == phi) {
1828        add2 = add->in(2);
1829        break;
1830      }
1831    }
1832  } else {
1833    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1834    Node* addtrunc = cmp1;
1835    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1836                                &trunc1, &trunc2, &ttype);
1837    if (add && add->in(1)->is_Phi()) {
1838      Node* phi = add->in(1);
1839      for (uint i = 1; i < phi->req(); i++) {
1840        if (phi->in(i) == addtrunc) {
1841          add2 = add->in(2);
1842          break;
1843        }
1844      }
1845    }
1846  }
1847  if (add2 != NULL) {
1848    const TypeInt* add2t = _igvn.type(add2)->is_int();
1849    if (add2t->is_con()) {
1850      return add2t->get_con();
1851    }
1852  }
1853  return 0;
1854}
1855
1856
1857//---------------------- stay_in_loop -------------------------------------
1858// Return the (unique) control output node that's in the loop (if it exists.)
1859Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1860  Node* unique = NULL;
1861  if (!n) return NULL;
1862  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1863    Node* use = n->fast_out(i);
1864    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1865      if (unique != NULL) {
1866        return NULL;
1867      }
1868      unique = use;
1869    }
1870  }
1871  return unique;
1872}
1873
1874//------------------------------ register_node -------------------------------------
1875// Utility to register node "n" with PhaseIdealLoop
1876void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1877  _igvn.register_new_node_with_optimizer(n);
1878  loop->_body.push(n);
1879  if (n->is_CFG()) {
1880    set_loop(n, loop);
1881    set_idom(n, pred, ddepth);
1882  } else {
1883    set_ctrl(n, pred);
1884  }
1885}
1886
1887//------------------------------ proj_clone -------------------------------------
1888// Utility to create an if-projection
1889ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1890  ProjNode* c = p->clone()->as_Proj();
1891  c->set_req(0, iff);
1892  return c;
1893}
1894
1895//------------------------------ short_circuit_if -------------------------------------
1896// Force the iff control output to be the live_proj
1897Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1898  guarantee(live_proj != NULL, "null projection");
1899  int proj_con = live_proj->_con;
1900  assert(proj_con == 0 || proj_con == 1, "false or true projection");
1901  Node *con = _igvn.intcon(proj_con);
1902  set_ctrl(con, C->root());
1903  if (iff) {
1904    iff->set_req(1, con);
1905  }
1906  return con;
1907}
1908
1909//------------------------------ insert_if_before_proj -------------------------------------
1910// Insert a new if before an if projection (* - new node)
1911//
1912// before
1913//           if(test)
1914//           /     \
1915//          v       v
1916//    other-proj   proj (arg)
1917//
1918// after
1919//           if(test)
1920//           /     \
1921//          /       v
1922//         |      * proj-clone
1923//         v          |
1924//    other-proj      v
1925//                * new_if(relop(cmp[IU](left,right)))
1926//                  /  \
1927//                 v    v
1928//         * new-proj  proj
1929//         (returned)
1930//
1931ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1932  IfNode* iff = proj->in(0)->as_If();
1933  IdealLoopTree *loop = get_loop(proj);
1934  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1935  int ddepth = dom_depth(proj);
1936
1937  _igvn.rehash_node_delayed(iff);
1938  _igvn.rehash_node_delayed(proj);
1939
1940  proj->set_req(0, NULL);  // temporary disconnect
1941  ProjNode* proj2 = proj_clone(proj, iff);
1942  register_node(proj2, loop, iff, ddepth);
1943
1944  Node* cmp = Signed ? (Node*) new CmpINode(left, right) : (Node*) new CmpUNode(left, right);
1945  register_node(cmp, loop, proj2, ddepth);
1946
1947  BoolNode* bol = new BoolNode(cmp, relop);
1948  register_node(bol, loop, proj2, ddepth);
1949
1950  int opcode = iff->Opcode();
1951  assert(opcode == Op_If || opcode == Op_RangeCheck, "unexpected opcode");
1952  IfNode* new_if = (opcode == Op_If) ? new IfNode(proj2, bol, iff->_prob, iff->_fcnt):
1953    new RangeCheckNode(proj2, bol, iff->_prob, iff->_fcnt);
1954  register_node(new_if, loop, proj2, ddepth);
1955
1956  proj->set_req(0, new_if); // reattach
1957  set_idom(proj, new_if, ddepth);
1958
1959  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1960  guarantee(new_exit != NULL, "null exit node");
1961  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1962
1963  return new_exit;
1964}
1965
1966//------------------------------ insert_region_before_proj -------------------------------------
1967// Insert a region before an if projection (* - new node)
1968//
1969// before
1970//           if(test)
1971//          /      |
1972//         v       |
1973//       proj      v
1974//               other-proj
1975//
1976// after
1977//           if(test)
1978//          /      |
1979//         v       |
1980// * proj-clone    v
1981//         |     other-proj
1982//         v
1983// * new-region
1984//         |
1985//         v
1986// *      dum_if
1987//       /     \
1988//      v       \
1989// * dum-proj    v
1990//              proj
1991//
1992RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
1993  IfNode* iff = proj->in(0)->as_If();
1994  IdealLoopTree *loop = get_loop(proj);
1995  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1996  int ddepth = dom_depth(proj);
1997
1998  _igvn.rehash_node_delayed(iff);
1999  _igvn.rehash_node_delayed(proj);
2000
2001  proj->set_req(0, NULL);  // temporary disconnect
2002  ProjNode* proj2 = proj_clone(proj, iff);
2003  register_node(proj2, loop, iff, ddepth);
2004
2005  RegionNode* reg = new RegionNode(2);
2006  reg->set_req(1, proj2);
2007  register_node(reg, loop, iff, ddepth);
2008
2009  IfNode* dum_if = new IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
2010  register_node(dum_if, loop, reg, ddepth);
2011
2012  proj->set_req(0, dum_if); // reattach
2013  set_idom(proj, dum_if, ddepth);
2014
2015  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
2016  register_node(dum_proj, loop, dum_if, ddepth);
2017
2018  return reg;
2019}
2020
2021//------------------------------ insert_cmpi_loop_exit -------------------------------------
2022// Clone a signed compare loop exit from an unsigned compare and
2023// insert it before the unsigned cmp on the stay-in-loop path.
2024// All new nodes inserted in the dominator tree between the original
2025// if and it's projections.  The original if test is replaced with
2026// a constant to force the stay-in-loop path.
2027//
2028// This is done to make sure that the original if and it's projections
2029// still dominate the same set of control nodes, that the ctrl() relation
2030// from data nodes to them is preserved, and that their loop nesting is
2031// preserved.
2032//
2033// before
2034//          if(i <u limit)    unsigned compare loop exit
2035//         /       |
2036//        v        v
2037//   exit-proj   stay-in-loop-proj
2038//
2039// after
2040//          if(stay-in-loop-const)  original if
2041//         /       |
2042//        /        v
2043//       /  if(i <  limit)    new signed test
2044//      /  /       |
2045//     /  /        v
2046//    /  /  if(i <u limit)    new cloned unsigned test
2047//   /  /   /      |
2048//   v  v  v       |
2049//    region       |
2050//        |        |
2051//      dum-if     |
2052//     /  |        |
2053// ether  |        |
2054//        v        v
2055//   exit-proj   stay-in-loop-proj
2056//
2057IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
2058  const bool Signed   = true;
2059  const bool Unsigned = false;
2060
2061  BoolNode* bol = if_cmpu->in(1)->as_Bool();
2062  if (bol->_test._test != BoolTest::lt) return NULL;
2063  CmpNode* cmpu = bol->in(1)->as_Cmp();
2064  if (cmpu->Opcode() != Op_CmpU) return NULL;
2065  int stride = stride_of_possible_iv(if_cmpu);
2066  if (stride == 0) return NULL;
2067
2068  Node* lp_proj = stay_in_loop(if_cmpu, loop);
2069  guarantee(lp_proj != NULL, "null loop node");
2070
2071  ProjNode* lp_continue = lp_proj->as_Proj();
2072  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
2073
2074  Node* limit = NULL;
2075  if (stride > 0) {
2076    limit = cmpu->in(2);
2077  } else {
2078    limit = _igvn.makecon(TypeInt::ZERO);
2079    set_ctrl(limit, C->root());
2080  }
2081  // Create a new region on the exit path
2082  RegionNode* reg = insert_region_before_proj(lp_exit);
2083  guarantee(reg != NULL, "null region node");
2084
2085  // Clone the if-cmpu-true-false using a signed compare
2086  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
2087  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
2088  reg->add_req(cmpi_exit);
2089
2090  // Clone the if-cmpu-true-false
2091  BoolTest::mask rel_u = bol->_test._test;
2092  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
2093  reg->add_req(cmpu_exit);
2094
2095  // Force original if to stay in loop.
2096  short_circuit_if(if_cmpu, lp_continue);
2097
2098  return cmpi_exit->in(0)->as_If();
2099}
2100
2101//------------------------------ remove_cmpi_loop_exit -------------------------------------
2102// Remove a previously inserted signed compare loop exit.
2103void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
2104  Node* lp_proj = stay_in_loop(if_cmp, loop);
2105  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
2106         stay_in_loop(lp_proj, loop)->is_If() &&
2107         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
2108  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
2109  set_ctrl(con, C->root());
2110  if_cmp->set_req(1, con);
2111}
2112
2113//------------------------------ scheduled_nodelist -------------------------------------
2114// Create a post order schedule of nodes that are in the
2115// "member" set.  The list is returned in "sched".
2116// The first node in "sched" is the loop head, followed by
2117// nodes which have no inputs in the "member" set, and then
2118// followed by the nodes that have an immediate input dependence
2119// on a node in "sched".
2120void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
2121
2122  assert(member.test(loop->_head->_idx), "loop head must be in member set");
2123  Arena *a = Thread::current()->resource_area();
2124  VectorSet visited(a);
2125  Node_Stack nstack(a, loop->_body.size());
2126
2127  Node* n  = loop->_head;  // top of stack is cached in "n"
2128  uint idx = 0;
2129  visited.set(n->_idx);
2130
2131  // Initially push all with no inputs from within member set
2132  for(uint i = 0; i < loop->_body.size(); i++ ) {
2133    Node *elt = loop->_body.at(i);
2134    if (member.test(elt->_idx)) {
2135      bool found = false;
2136      for (uint j = 0; j < elt->req(); j++) {
2137        Node* def = elt->in(j);
2138        if (def && member.test(def->_idx) && def != elt) {
2139          found = true;
2140          break;
2141        }
2142      }
2143      if (!found && elt != loop->_head) {
2144        nstack.push(n, idx);
2145        n = elt;
2146        assert(!visited.test(n->_idx), "not seen yet");
2147        visited.set(n->_idx);
2148      }
2149    }
2150  }
2151
2152  // traverse out's that are in the member set
2153  while (true) {
2154    if (idx < n->outcnt()) {
2155      Node* use = n->raw_out(idx);
2156      idx++;
2157      if (!visited.test_set(use->_idx)) {
2158        if (member.test(use->_idx)) {
2159          nstack.push(n, idx);
2160          n = use;
2161          idx = 0;
2162        }
2163      }
2164    } else {
2165      // All outputs processed
2166      sched.push(n);
2167      if (nstack.is_empty()) break;
2168      n   = nstack.node();
2169      idx = nstack.index();
2170      nstack.pop();
2171    }
2172  }
2173}
2174
2175
2176//------------------------------ has_use_in_set -------------------------------------
2177// Has a use in the vector set
2178bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
2179  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2180    Node* use = n->fast_out(j);
2181    if (vset.test(use->_idx)) {
2182      return true;
2183    }
2184  }
2185  return false;
2186}
2187
2188
2189//------------------------------ has_use_internal_to_set -------------------------------------
2190// Has use internal to the vector set (ie. not in a phi at the loop head)
2191bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
2192  Node* head  = loop->_head;
2193  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2194    Node* use = n->fast_out(j);
2195    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
2196      return true;
2197    }
2198  }
2199  return false;
2200}
2201
2202
2203//------------------------------ clone_for_use_outside_loop -------------------------------------
2204// clone "n" for uses that are outside of loop
2205int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
2206  int cloned = 0;
2207  assert(worklist.size() == 0, "should be empty");
2208  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2209    Node* use = n->fast_out(j);
2210    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
2211      worklist.push(use);
2212    }
2213  }
2214  while( worklist.size() ) {
2215    Node *use = worklist.pop();
2216    if (!has_node(use) || use->in(0) == C->top()) continue;
2217    uint j;
2218    for (j = 0; j < use->req(); j++) {
2219      if (use->in(j) == n) break;
2220    }
2221    assert(j < use->req(), "must be there");
2222
2223    // clone "n" and insert it between the inputs of "n" and the use outside the loop
2224    Node* n_clone = n->clone();
2225    _igvn.replace_input_of(use, j, n_clone);
2226    cloned++;
2227    Node* use_c;
2228    if (!use->is_Phi()) {
2229      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
2230    } else {
2231      // Use in a phi is considered a use in the associated predecessor block
2232      use_c = use->in(0)->in(j);
2233    }
2234    set_ctrl(n_clone, use_c);
2235    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
2236    get_loop(use_c)->_body.push(n_clone);
2237    _igvn.register_new_node_with_optimizer(n_clone);
2238#if !defined(PRODUCT)
2239    if (TracePartialPeeling) {
2240      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
2241    }
2242#endif
2243  }
2244  return cloned;
2245}
2246
2247
2248//------------------------------ clone_for_special_use_inside_loop -------------------------------------
2249// clone "n" for special uses that are in the not_peeled region.
2250// If these def-uses occur in separate blocks, the code generator
2251// marks the method as not compilable.  For example, if a "BoolNode"
2252// is in a different basic block than the "IfNode" that uses it, then
2253// the compilation is aborted in the code generator.
2254void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
2255                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2256  if (n->is_Phi() || n->is_Load()) {
2257    return;
2258  }
2259  assert(worklist.size() == 0, "should be empty");
2260  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2261    Node* use = n->fast_out(j);
2262    if ( not_peel.test(use->_idx) &&
2263         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2264         use->in(1) == n)  {
2265      worklist.push(use);
2266    }
2267  }
2268  if (worklist.size() > 0) {
2269    // clone "n" and insert it between inputs of "n" and the use
2270    Node* n_clone = n->clone();
2271    loop->_body.push(n_clone);
2272    _igvn.register_new_node_with_optimizer(n_clone);
2273    set_ctrl(n_clone, get_ctrl(n));
2274    sink_list.push(n_clone);
2275    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2276#if !defined(PRODUCT)
2277    if (TracePartialPeeling) {
2278      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2279    }
2280#endif
2281    while( worklist.size() ) {
2282      Node *use = worklist.pop();
2283      _igvn.rehash_node_delayed(use);
2284      for (uint j = 1; j < use->req(); j++) {
2285        if (use->in(j) == n) {
2286          use->set_req(j, n_clone);
2287        }
2288      }
2289    }
2290  }
2291}
2292
2293
2294//------------------------------ insert_phi_for_loop -------------------------------------
2295// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2296void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2297  Node *phi = PhiNode::make(lp, back_edge_val);
2298  phi->set_req(LoopNode::EntryControl, lp_entry_val);
2299  // Use existing phi if it already exists
2300  Node *hit = _igvn.hash_find_insert(phi);
2301  if( hit == NULL ) {
2302    _igvn.register_new_node_with_optimizer(phi);
2303    set_ctrl(phi, lp);
2304  } else {
2305    // Remove the new phi from the graph and use the hit
2306    _igvn.remove_dead_node(phi);
2307    phi = hit;
2308  }
2309  _igvn.replace_input_of(use, idx, phi);
2310}
2311
2312#ifdef ASSERT
2313//------------------------------ is_valid_loop_partition -------------------------------------
2314// Validate the loop partition sets: peel and not_peel
2315bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2316                                              VectorSet& not_peel ) {
2317  uint i;
2318  // Check that peel_list entries are in the peel set
2319  for (i = 0; i < peel_list.size(); i++) {
2320    if (!peel.test(peel_list.at(i)->_idx)) {
2321      return false;
2322    }
2323  }
2324  // Check at loop members are in one of peel set or not_peel set
2325  for (i = 0; i < loop->_body.size(); i++ ) {
2326    Node *def  = loop->_body.at(i);
2327    uint di = def->_idx;
2328    // Check that peel set elements are in peel_list
2329    if (peel.test(di)) {
2330      if (not_peel.test(di)) {
2331        return false;
2332      }
2333      // Must be in peel_list also
2334      bool found = false;
2335      for (uint j = 0; j < peel_list.size(); j++) {
2336        if (peel_list.at(j)->_idx == di) {
2337          found = true;
2338          break;
2339        }
2340      }
2341      if (!found) {
2342        return false;
2343      }
2344    } else if (not_peel.test(di)) {
2345      if (peel.test(di)) {
2346        return false;
2347      }
2348    } else {
2349      return false;
2350    }
2351  }
2352  return true;
2353}
2354
2355//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2356// Ensure a use outside of loop is of the right form
2357bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2358  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2359  return (use->is_Phi() &&
2360          use_c->is_Region() && use_c->req() == 3 &&
2361          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2362           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2363           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2364          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2365}
2366
2367//------------------------------ is_valid_clone_loop_form -------------------------------------
2368// Ensure that all uses outside of loop are of the right form
2369bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2370                                               uint orig_exit_idx, uint clone_exit_idx) {
2371  uint len = peel_list.size();
2372  for (uint i = 0; i < len; i++) {
2373    Node *def = peel_list.at(i);
2374
2375    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2376      Node *use = def->fast_out(j);
2377      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2378      if (!loop->is_member(get_loop(use_c))) {
2379        // use is not in the loop, check for correct structure
2380        if (use->in(0) == def) {
2381          // Okay
2382        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2383          return false;
2384        }
2385      }
2386    }
2387  }
2388  return true;
2389}
2390#endif
2391
2392//------------------------------ partial_peel -------------------------------------
2393// Partially peel (aka loop rotation) the top portion of a loop (called
2394// the peel section below) by cloning it and placing one copy just before
2395// the new loop head and the other copy at the bottom of the new loop.
2396//
2397//    before                       after                where it came from
2398//
2399//    stmt1                        stmt1
2400//  loop:                          stmt2                     clone
2401//    stmt2                        if condA goto exitA       clone
2402//    if condA goto exitA        new_loop:                   new
2403//    stmt3                        stmt3                     clone
2404//    if !condB goto loop          if condB goto exitB       clone
2405//  exitB:                         stmt2                     orig
2406//    stmt4                        if !condA goto new_loop   orig
2407//  exitA:                         goto exitA
2408//                               exitB:
2409//                                 stmt4
2410//                               exitA:
2411//
2412// Step 1: find the cut point: an exit test on probable
2413//         induction variable.
2414// Step 2: schedule (with cloning) operations in the peel
2415//         section that can be executed after the cut into
2416//         the section that is not peeled.  This may need
2417//         to clone operations into exit blocks.  For
2418//         instance, a reference to A[i] in the not-peel
2419//         section and a reference to B[i] in an exit block
2420//         may cause a left-shift of i by 2 to be placed
2421//         in the peel block.  This step will clone the left
2422//         shift into the exit block and sink the left shift
2423//         from the peel to the not-peel section.
2424// Step 3: clone the loop, retarget the control, and insert
2425//         phis for values that are live across the new loop
2426//         head.  This is very dependent on the graph structure
2427//         from clone_loop.  It creates region nodes for
2428//         exit control and associated phi nodes for values
2429//         flow out of the loop through that exit.  The region
2430//         node is dominated by the clone's control projection.
2431//         So the clone's peel section is placed before the
2432//         new loop head, and the clone's not-peel section is
2433//         forms the top part of the new loop.  The original
2434//         peel section forms the tail of the new loop.
2435// Step 4: update the dominator tree and recompute the
2436//         dominator depth.
2437//
2438//                   orig
2439//
2440//                   stmt1
2441//                     |
2442//                     v
2443//               loop predicate
2444//                     |
2445//                     v
2446//                   loop<----+
2447//                     |      |
2448//                   stmt2    |
2449//                     |      |
2450//                     v      |
2451//                    ifA     |
2452//                   / |      |
2453//                  v  v      |
2454//               false true   ^  <-- last_peel
2455//               /     |      |
2456//              /   ===|==cut |
2457//             /     stmt3    |  <-- first_not_peel
2458//            /        |      |
2459//            |        v      |
2460//            v       ifB     |
2461//          exitA:   / \      |
2462//                  /   \     |
2463//                 v     v    |
2464//               false true   |
2465//               /       \    |
2466//              /         ----+
2467//             |
2468//             v
2469//           exitB:
2470//           stmt4
2471//
2472//
2473//            after clone loop
2474//
2475//                   stmt1
2476//                     |
2477//                     v
2478//               loop predicate
2479//                 /       \
2480//        clone   /         \   orig
2481//               /           \
2482//              /             \
2483//             v               v
2484//   +---->loop                loop<----+
2485//   |      |                    |      |
2486//   |    stmt2                stmt2    |
2487//   |      |                    |      |
2488//   |      v                    v      |
2489//   |      ifA                 ifA     |
2490//   |      | \                / |      |
2491//   |      v  v              v  v      |
2492//   ^    true  false      false true   ^  <-- last_peel
2493//   |      |   ^   \       /    |      |
2494//   | cut==|==  \   \     /  ===|==cut |
2495//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2496//   |      |    dom   | |       |      |
2497//   |      v      \  1v v2      v      |
2498//   |      ifB     regionA     ifB     |
2499//   |      / \        |       / \      |
2500//   |     /   \       v      /   \     |
2501//   |    v     v    exitA:  v     v    |
2502//   |    true  false      false true   |
2503//   |    /     ^   \      /       \    |
2504//   +----       \   \    /         ----+
2505//               dom  \  /
2506//                 \  1v v2
2507//                  regionB
2508//                     |
2509//                     v
2510//                   exitB:
2511//                   stmt4
2512//
2513//
2514//           after partial peel
2515//
2516//                  stmt1
2517//                     |
2518//                     v
2519//               loop predicate
2520//                 /
2521//        clone   /             orig
2522//               /          TOP
2523//              /             \
2524//             v               v
2525//    TOP->loop                loop----+
2526//          |                    |      |
2527//        stmt2                stmt2    |
2528//          |                    |      |
2529//          v                    v      |
2530//          ifA                 ifA     |
2531//          | \                / |      |
2532//          v  v              v  v      |
2533//        true  false      false true   |     <-- last_peel
2534//          |   ^   \       /    +------|---+
2535//  +->newloop   \   \     /  === ==cut |   |
2536//  |     stmt3   \   \   /     TOP     |   |
2537//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2538//  |       v      \  1v v2      v      |   |
2539//  |       ifB     regionA     ifB     ^   v
2540//  |       / \        |       / \      |   |
2541//  |      /   \       v      /   \     |   |
2542//  |     v     v    exitA:  v     v    |   |
2543//  |     true  false      false true   |   |
2544//  |     /     ^   \      /       \    |   |
2545//  |    |       \   \    /         v   |   |
2546//  |    |       dom  \  /         TOP  |   |
2547//  |    |         \  1v v2             |   |
2548//  ^    v          regionB             |   |
2549//  |    |             |                |   |
2550//  |    |             v                ^   v
2551//  |    |           exitB:             |   |
2552//  |    |           stmt4              |   |
2553//  |    +------------>-----------------+   |
2554//  |                                       |
2555//  +-----------------<---------------------+
2556//
2557//
2558//              final graph
2559//
2560//                  stmt1
2561//                    |
2562//                    v
2563//               loop predicate
2564//                    |
2565//                    v
2566//                  stmt2 clone
2567//                    |
2568//                    v
2569//         ........> ifA clone
2570//         :        / |
2571//        dom      /  |
2572//         :      v   v
2573//         :  false   true
2574//         :  |       |
2575//         :  |       v
2576//         :  |    newloop<-----+
2577//         :  |        |        |
2578//         :  |     stmt3 clone |
2579//         :  |        |        |
2580//         :  |        v        |
2581//         :  |       ifB       |
2582//         :  |      / \        |
2583//         :  |     v   v       |
2584//         :  |  false true     |
2585//         :  |   |     |       |
2586//         :  |   v    stmt2    |
2587//         :  | exitB:  |       |
2588//         :  | stmt4   v       |
2589//         :  |       ifA orig  |
2590//         :  |      /  \       |
2591//         :  |     /    \      |
2592//         :  |    v     v      |
2593//         :  |  false  true    |
2594//         :  |  /        \     |
2595//         :  v  v         -----+
2596//          RegionA
2597//             |
2598//             v
2599//           exitA
2600//
2601bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2602
2603  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2604  if (!loop->_head->is_Loop()) {
2605    return false;  }
2606
2607  LoopNode *head  = loop->_head->as_Loop();
2608
2609  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2610    return false;
2611  }
2612
2613  // Check for complex exit control
2614  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2615    Node *n = loop->_body.at(ii);
2616    int opc = n->Opcode();
2617    if (n->is_Call()        ||
2618        opc == Op_Catch     ||
2619        opc == Op_CatchProj ||
2620        opc == Op_Jump      ||
2621        opc == Op_JumpProj) {
2622#if !defined(PRODUCT)
2623      if (TracePartialPeeling) {
2624        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2625      }
2626#endif
2627      return false;
2628    }
2629  }
2630
2631  int dd = dom_depth(head);
2632
2633  // Step 1: find cut point
2634
2635  // Walk up dominators to loop head looking for first loop exit
2636  // which is executed on every path thru loop.
2637  IfNode *peel_if = NULL;
2638  IfNode *peel_if_cmpu = NULL;
2639
2640  Node *iff = loop->tail();
2641  while( iff != head ) {
2642    if( iff->is_If() ) {
2643      Node *ctrl = get_ctrl(iff->in(1));
2644      if (ctrl->is_top()) return false; // Dead test on live IF.
2645      // If loop-varying exit-test, check for induction variable
2646      if( loop->is_member(get_loop(ctrl)) &&
2647          loop->is_loop_exit(iff) &&
2648          is_possible_iv_test(iff)) {
2649        Node* cmp = iff->in(1)->in(1);
2650        if (cmp->Opcode() == Op_CmpI) {
2651          peel_if = iff->as_If();
2652        } else {
2653          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2654          peel_if_cmpu = iff->as_If();
2655        }
2656      }
2657    }
2658    iff = idom(iff);
2659  }
2660  // Prefer signed compare over unsigned compare.
2661  IfNode* new_peel_if = NULL;
2662  if (peel_if == NULL) {
2663    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2664      return false;   // No peel point found
2665    }
2666    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2667    if (new_peel_if == NULL) {
2668      return false;   // No peel point found
2669    }
2670    peel_if = new_peel_if;
2671  }
2672  Node* last_peel        = stay_in_loop(peel_if, loop);
2673  Node* first_not_peeled = stay_in_loop(last_peel, loop);
2674  if (first_not_peeled == NULL || first_not_peeled == head) {
2675    return false;
2676  }
2677
2678#if !defined(PRODUCT)
2679  if (TraceLoopOpts) {
2680    tty->print("PartialPeel  ");
2681    loop->dump_head();
2682  }
2683
2684  if (TracePartialPeeling) {
2685    tty->print_cr("before partial peel one iteration");
2686    Node_List wl;
2687    Node* t = head->in(2);
2688    while (true) {
2689      wl.push(t);
2690      if (t == head) break;
2691      t = idom(t);
2692    }
2693    while (wl.size() > 0) {
2694      Node* tt = wl.pop();
2695      tt->dump();
2696      if (tt == last_peel) tty->print_cr("-- cut --");
2697    }
2698  }
2699#endif
2700  ResourceArea *area = Thread::current()->resource_area();
2701  VectorSet peel(area);
2702  VectorSet not_peel(area);
2703  Node_List peel_list(area);
2704  Node_List worklist(area);
2705  Node_List sink_list(area);
2706
2707  // Set of cfg nodes to peel are those that are executable from
2708  // the head through last_peel.
2709  assert(worklist.size() == 0, "should be empty");
2710  worklist.push(head);
2711  peel.set(head->_idx);
2712  while (worklist.size() > 0) {
2713    Node *n = worklist.pop();
2714    if (n != last_peel) {
2715      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2716        Node* use = n->fast_out(j);
2717        if (use->is_CFG() &&
2718            loop->is_member(get_loop(use)) &&
2719            !peel.test_set(use->_idx)) {
2720          worklist.push(use);
2721        }
2722      }
2723    }
2724  }
2725
2726  // Set of non-cfg nodes to peel are those that are control
2727  // dependent on the cfg nodes.
2728  uint i;
2729  for(i = 0; i < loop->_body.size(); i++ ) {
2730    Node *n = loop->_body.at(i);
2731    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2732    if (peel.test(n_c->_idx)) {
2733      peel.set(n->_idx);
2734    } else {
2735      not_peel.set(n->_idx);
2736    }
2737  }
2738
2739  // Step 2: move operations from the peeled section down into the
2740  //         not-peeled section
2741
2742  // Get a post order schedule of nodes in the peel region
2743  // Result in right-most operand.
2744  scheduled_nodelist(loop, peel, peel_list );
2745
2746  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2747
2748  // For future check for too many new phis
2749  uint old_phi_cnt = 0;
2750  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2751    Node* use = head->fast_out(j);
2752    if (use->is_Phi()) old_phi_cnt++;
2753  }
2754
2755#if !defined(PRODUCT)
2756  if (TracePartialPeeling) {
2757    tty->print_cr("\npeeled list");
2758  }
2759#endif
2760
2761  // Evacuate nodes in peel region into the not_peeled region if possible
2762  uint new_phi_cnt = 0;
2763  uint cloned_for_outside_use = 0;
2764  for (i = 0; i < peel_list.size();) {
2765    Node* n = peel_list.at(i);
2766#if !defined(PRODUCT)
2767  if (TracePartialPeeling) n->dump();
2768#endif
2769    bool incr = true;
2770    if ( !n->is_CFG() ) {
2771
2772      if ( has_use_in_set(n, not_peel) ) {
2773
2774        // If not used internal to the peeled region,
2775        // move "n" from peeled to not_peeled region.
2776
2777        if ( !has_use_internal_to_set(n, peel, loop) ) {
2778
2779          // if not pinned and not a load (which maybe anti-dependent on a store)
2780          // and not a CMove (Matcher expects only bool->cmove).
2781          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2782            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2783            sink_list.push(n);
2784            peel     >>= n->_idx; // delete n from peel set.
2785            not_peel <<= n->_idx; // add n to not_peel set.
2786            peel_list.remove(i);
2787            incr = false;
2788#if !defined(PRODUCT)
2789            if (TracePartialPeeling) {
2790              tty->print_cr("sink to not_peeled region: %d newbb: %d",
2791                            n->_idx, get_ctrl(n)->_idx);
2792            }
2793#endif
2794          }
2795        } else {
2796          // Otherwise check for special def-use cases that span
2797          // the peel/not_peel boundary such as bool->if
2798          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2799          new_phi_cnt++;
2800        }
2801      }
2802    }
2803    if (incr) i++;
2804  }
2805
2806  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2807#if !defined(PRODUCT)
2808    if (TracePartialPeeling) {
2809      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2810                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2811    }
2812#endif
2813    if (new_peel_if != NULL) {
2814      remove_cmpi_loop_exit(new_peel_if, loop);
2815    }
2816    // Inhibit more partial peeling on this loop
2817    assert(!head->is_partial_peel_loop(), "not partial peeled");
2818    head->mark_partial_peel_failed();
2819    if (cloned_for_outside_use > 0) {
2820      // Terminate this round of loop opts because
2821      // the graph outside this loop was changed.
2822      C->set_major_progress();
2823      return true;
2824    }
2825    return false;
2826  }
2827
2828  // Step 3: clone loop, retarget control, and insert new phis
2829
2830  // Create new loop head for new phis and to hang
2831  // the nodes being moved (sinked) from the peel region.
2832  LoopNode* new_head = new LoopNode(last_peel, last_peel);
2833  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2834  _igvn.register_new_node_with_optimizer(new_head);
2835  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2836  _igvn.replace_input_of(first_not_peeled, 0, new_head);
2837  set_loop(new_head, loop);
2838  loop->_body.push(new_head);
2839  not_peel.set(new_head->_idx);
2840  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2841  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2842
2843  while (sink_list.size() > 0) {
2844    Node* n = sink_list.pop();
2845    set_ctrl(n, new_head);
2846  }
2847
2848  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2849
2850  clone_loop( loop, old_new, dd );
2851
2852  const uint clone_exit_idx = 1;
2853  const uint orig_exit_idx  = 2;
2854  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2855
2856  Node* head_clone             = old_new[head->_idx];
2857  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2858  Node* orig_tail_clone        = head_clone->in(2);
2859
2860  // Add phi if "def" node is in peel set and "use" is not
2861
2862  for(i = 0; i < peel_list.size(); i++ ) {
2863    Node *def  = peel_list.at(i);
2864    if (!def->is_CFG()) {
2865      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2866        Node *use = def->fast_out(j);
2867        if (has_node(use) && use->in(0) != C->top() &&
2868            (!peel.test(use->_idx) ||
2869             (use->is_Phi() && use->in(0) == head)) ) {
2870          worklist.push(use);
2871        }
2872      }
2873      while( worklist.size() ) {
2874        Node *use = worklist.pop();
2875        for (uint j = 1; j < use->req(); j++) {
2876          Node* n = use->in(j);
2877          if (n == def) {
2878
2879            // "def" is in peel set, "use" is not in peel set
2880            // or "use" is in the entry boundary (a phi) of the peel set
2881
2882            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2883
2884            if ( loop->is_member(get_loop( use_c )) ) {
2885              // use is in loop
2886              if (old_new[use->_idx] != NULL) { // null for dead code
2887                Node* use_clone = old_new[use->_idx];
2888                _igvn.replace_input_of(use, j, C->top());
2889                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2890              }
2891            } else {
2892              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2893              // use is not in the loop, check if the live range includes the cut
2894              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2895              if (not_peel.test(lp_if->_idx)) {
2896                assert(j == orig_exit_idx, "use from original loop");
2897                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2898              }
2899            }
2900          }
2901        }
2902      }
2903    }
2904  }
2905
2906  // Step 3b: retarget control
2907
2908  // Redirect control to the new loop head if a cloned node in
2909  // the not_peeled region has control that points into the peeled region.
2910  // This necessary because the cloned peeled region will be outside
2911  // the loop.
2912  //                            from    to
2913  //          cloned-peeled    <---+
2914  //    new_head_clone:            |    <--+
2915  //          cloned-not_peeled  in(0)    in(0)
2916  //          orig-peeled
2917
2918  for(i = 0; i < loop->_body.size(); i++ ) {
2919    Node *n = loop->_body.at(i);
2920    if (!n->is_CFG()           && n->in(0) != NULL        &&
2921        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2922      Node* n_clone = old_new[n->_idx];
2923      _igvn.replace_input_of(n_clone, 0, new_head_clone);
2924    }
2925  }
2926
2927  // Backedge of the surviving new_head (the clone) is original last_peel
2928  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2929
2930  // Cut first node in original not_peel set
2931  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2932  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2933  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2934
2935  // Copy head_clone back-branch info to original head
2936  // and remove original head's loop entry and
2937  // clone head's back-branch
2938  _igvn.rehash_node_delayed(head); // Multiple edge updates
2939  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2940  head->set_req(LoopNode::LoopBackControl, C->top());
2941  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2942
2943  // Similarly modify the phis
2944  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2945    Node* use = head->fast_out(k);
2946    if (use->is_Phi() && use->outcnt() > 0) {
2947      Node* use_clone = old_new[use->_idx];
2948      _igvn.rehash_node_delayed(use); // Multiple edge updates
2949      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2950      use->set_req(LoopNode::LoopBackControl, C->top());
2951      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2952    }
2953  }
2954
2955  // Step 4: update dominator tree and dominator depth
2956
2957  set_idom(head, orig_tail_clone, dd);
2958  recompute_dom_depth();
2959
2960  // Inhibit more partial peeling on this loop
2961  new_head_clone->set_partial_peel_loop();
2962  C->set_major_progress();
2963  loop->record_for_igvn();
2964
2965#if !defined(PRODUCT)
2966  if (TracePartialPeeling) {
2967    tty->print_cr("\nafter partial peel one iteration");
2968    Node_List wl(area);
2969    Node* t = last_peel;
2970    while (true) {
2971      wl.push(t);
2972      if (t == head_clone) break;
2973      t = idom(t);
2974    }
2975    while (wl.size() > 0) {
2976      Node* tt = wl.pop();
2977      if (tt == head) tty->print_cr("orig head");
2978      else if (tt == new_head_clone) tty->print_cr("new head");
2979      else if (tt == head_clone) tty->print_cr("clone head");
2980      tt->dump();
2981    }
2982  }
2983#endif
2984  return true;
2985}
2986
2987//------------------------------reorg_offsets----------------------------------
2988// Reorganize offset computations to lower register pressure.  Mostly
2989// prevent loop-fallout uses of the pre-incremented trip counter (which are
2990// then alive with the post-incremented trip counter forcing an extra
2991// register move)
2992void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
2993  // Perform it only for canonical counted loops.
2994  // Loop's shape could be messed up by iteration_split_impl.
2995  if (!loop->_head->is_CountedLoop())
2996    return;
2997  if (!loop->_head->as_Loop()->is_valid_counted_loop())
2998    return;
2999
3000  CountedLoopNode *cl = loop->_head->as_CountedLoop();
3001  CountedLoopEndNode *cle = cl->loopexit();
3002  Node *exit = cle->proj_out(false);
3003  Node *phi = cl->phi();
3004
3005  // Check for the special case of folks using the pre-incremented
3006  // trip-counter on the fall-out path (forces the pre-incremented
3007  // and post-incremented trip counter to be live at the same time).
3008  // Fix this by adjusting to use the post-increment trip counter.
3009
3010  bool progress = true;
3011  while (progress) {
3012    progress = false;
3013    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
3014      Node* use = phi->fast_out(i);   // User of trip-counter
3015      if (!has_ctrl(use))  continue;
3016      Node *u_ctrl = get_ctrl(use);
3017      if (use->is_Phi()) {
3018        u_ctrl = NULL;
3019        for (uint j = 1; j < use->req(); j++)
3020          if (use->in(j) == phi)
3021            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
3022      }
3023      IdealLoopTree *u_loop = get_loop(u_ctrl);
3024      // Look for loop-invariant use
3025      if (u_loop == loop) continue;
3026      if (loop->is_member(u_loop)) continue;
3027      // Check that use is live out the bottom.  Assuming the trip-counter
3028      // update is right at the bottom, uses of of the loop middle are ok.
3029      if (dom_lca(exit, u_ctrl) != exit) continue;
3030      // Hit!  Refactor use to use the post-incremented tripcounter.
3031      // Compute a post-increment tripcounter.
3032      Node *opaq = new Opaque2Node( C, cle->incr() );
3033      register_new_node(opaq, exit);
3034      Node *neg_stride = _igvn.intcon(-cle->stride_con());
3035      set_ctrl(neg_stride, C->root());
3036      Node *post = new AddINode( opaq, neg_stride);
3037      register_new_node(post, exit);
3038      _igvn.rehash_node_delayed(use);
3039      for (uint j = 1; j < use->req(); j++) {
3040        if (use->in(j) == phi)
3041          use->set_req(j, post);
3042      }
3043      // Since DU info changed, rerun loop
3044      progress = true;
3045      break;
3046    }
3047  }
3048
3049}
3050