loopopts.cpp revision 5776:de6a9e811145
145135Sobrien/*
295811Sobrien * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
345135Sobrien * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4132738Skan *
545135Sobrien * This code is free software; you can redistribute it and/or modify it
6132738Skan * under the terms of the GNU General Public License version 2 only, as
745135Sobrien * published by the Free Software Foundation.
845135Sobrien *
945135Sobrien * This code is distributed in the hope that it will be useful, but WITHOUT
1045135Sobrien * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11132738Skan * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1245135Sobrien * version 2 for more details (a copy is included in the LICENSE file that
1345135Sobrien * accompanied this code).
1445135Sobrien *
1545135Sobrien * You should have received a copy of the GNU General Public License version
1645135Sobrien * 2 along with this work; if not, write to the Free Software Foundation,
17132738Skan * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18169706Skan *
19169706Skan * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2045135Sobrien * or visit www.oracle.com if you need additional information or have any
2145135Sobrien * questions.
2245135Sobrien *
2345135Sobrien */
2495811Sobrien
2595811Sobrien#include "precompiled.hpp"
2695811Sobrien#include "memory/allocation.inline.hpp"
2795811Sobrien#include "opto/addnode.hpp"
2845135Sobrien#include "opto/connode.hpp"
2952113Sobrien#include "opto/divnode.hpp"
3045135Sobrien#include "opto/loopnode.hpp"
3195811Sobrien#include "opto/matcher.hpp"
3295811Sobrien#include "opto/mulnode.hpp"
3352113Sobrien#include "opto/rootnode.hpp"
3446122Sobrien#include "opto/subnode.hpp"
3545135Sobrien
3673243Sobrien//=============================================================================
3748474Sobrien//------------------------------split_thru_phi---------------------------------
3845135Sobrien// Split Node 'n' through merge point if there is enough win.
3958478SobrienNode *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
4073304Sobrien  if (n->Opcode() == Op_ConvI2L && n->bottom_type() != TypeLong::LONG) {
4149854Sobrien    // ConvI2L may have type information on it which is unsafe to push up
4258478Sobrien    // so disable this for now
4373304Sobrien    return NULL;
4449854Sobrien  }
45117420Skan
46117420Skan  if (n->is_MathExact()) {
47117420Skan    // MathExact has projections that are not correctly handled in the code
4858478Sobrien    // below.
4954807Sobrien    return NULL;
5054807Sobrien  }
5195811Sobrien
5295811Sobrien  int wins = 0;
5373304Sobrien  assert(!n->is_CFG(), "");
5495811Sobrien  assert(region->is_Region(), "");
5595811Sobrien
5695811Sobrien  const Type* type = n->bottom_type();
5758478Sobrien  const TypeOopPtr *t_oop = _igvn.type(n)->isa_oopptr();
5895811Sobrien  Node *phi;
5945135Sobrien  if (t_oop != NULL && t_oop->is_known_instance_field()) {
6045458Sobrien    int iid    = t_oop->instance_id();
6158478Sobrien    int index  = C->get_alias_index(t_oop);
6258478Sobrien    int offset = t_oop->offset();
6358478Sobrien    phi = new (C) PhiNode(region, type, NULL, iid, index, offset);
6458478Sobrien  } else {
6595811Sobrien    phi = PhiNode::make_blank(region, n);
66229715Sandreast  }
67182627Sobrien  uint old_unique = C->unique();
6858478Sobrien  for (uint i = 1; i < region->req(); i++) {
6958478Sobrien    Node *x;
7058478Sobrien    Node* the_clone = NULL;
7195811Sobrien    if (region->in(i) == C->top()) {
7258478Sobrien      x = C->top();             // Dead path?  Use a dead data op
7395811Sobrien    } else {
74182627Sobrien      x = n->clone();           // Else clone up the data op
75182627Sobrien      the_clone = x;            // Remember for possible deletion.
76182627Sobrien      // Alter data node to use pre-phi inputs
77182627Sobrien      if (n->in(0) == region)
7858478Sobrien        x->set_req( 0, region->in(i) );
7995811Sobrien      for (uint j = 1; j < n->req(); j++) {
80117420Skan        Node *in = n->in(j);
81182627Sobrien        if (in->is_Phi() && in->in(0) == region)
82182627Sobrien          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
83182627Sobrien      }
84182627Sobrien    }
85182627Sobrien    // Check for a 'win' on some paths
86182627Sobrien    const Type *t = x->Value(&_igvn);
8795811Sobrien
8895811Sobrien    bool singleton = t->singleton();
8995811Sobrien
9095811Sobrien    // A TOP singleton indicates that there are no possible values incoming
9195811Sobrien    // along a particular edge. In most cases, this is OK, and the Phi will
9295811Sobrien    // be eliminated later in an Ideal call. However, we can't allow this to
9345135Sobrien    // happen if the singleton occurs on loop entry, as the elimination of
9445135Sobrien    // the PhiNode may cause the resulting node to migrate back to a previous
9558478Sobrien    // loop iteration.
9658478Sobrien    if (singleton && t == Type::TOP) {
9758478Sobrien      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
9858478Sobrien      // irreducible loop may not be indicated by an affirmative is_Loop());
9958478Sobrien      // therefore, the only top we can split thru a phi is on a backedge of
10058478Sobrien      // a loop.
10158478Sobrien      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
10258478Sobrien    }
10345135Sobrien
10445135Sobrien    if (singleton) {
105146939Skan      wins++;
106182627Sobrien      x = ((PhaseGVN&)_igvn).makecon(t);
107146939Skan    } else {
108169706Skan      // We now call Identity to try to simplify the cloned node.
109      // Note that some Identity methods call phase->type(this).
110      // Make sure that the type array is big enough for
111      // our new node, even though we may throw the node away.
112      // (Note: This tweaking with igvn only works because x is a new node.)
113      _igvn.set_type(x, t);
114      // If x is a TypeNode, capture any more-precise type permanently into Node
115      // otherwise it will be not updated during igvn->transform since
116      // igvn->type(x) is set to x->Value() already.
117      x->raise_bottom_type(t);
118      Node *y = x->Identity(&_igvn);
119      if (y != x) {
120        wins++;
121        x = y;
122      } else {
123        y = _igvn.hash_find(x);
124        if (y) {
125          wins++;
126          x = y;
127        } else {
128          // Else x is a new node we are keeping
129          // We do not need register_new_node_with_optimizer
130          // because set_type has already been called.
131          _igvn._worklist.push(x);
132        }
133      }
134    }
135    if (x != the_clone && the_clone != NULL)
136      _igvn.remove_dead_node(the_clone);
137    phi->set_req( i, x );
138  }
139  // Too few wins?
140  if (wins <= policy) {
141    _igvn.remove_dead_node(phi);
142    return NULL;
143  }
144
145  // Record Phi
146  register_new_node( phi, region );
147
148  for (uint i2 = 1; i2 < phi->req(); i2++) {
149    Node *x = phi->in(i2);
150    // If we commoned up the cloned 'x' with another existing Node,
151    // the existing Node picks up a new use.  We need to make the
152    // existing Node occur higher up so it dominates its uses.
153    Node *old_ctrl;
154    IdealLoopTree *old_loop;
155
156    if (x->is_Con()) {
157      // Constant's control is always root.
158      set_ctrl(x, C->root());
159      continue;
160    }
161    // The occasional new node
162    if (x->_idx >= old_unique) {     // Found a new, unplaced node?
163      old_ctrl = NULL;
164      old_loop = NULL;               // Not in any prior loop
165    } else {
166      old_ctrl = get_ctrl(x);
167      old_loop = get_loop(old_ctrl); // Get prior loop
168    }
169    // New late point must dominate new use
170    Node *new_ctrl = dom_lca(old_ctrl, region->in(i2));
171    if (new_ctrl == old_ctrl) // Nothing is changed
172      continue;
173
174    IdealLoopTree *new_loop = get_loop(new_ctrl);
175
176    // Don't move x into a loop if its uses are
177    // outside of loop. Otherwise x will be cloned
178    // for each use outside of this loop.
179    IdealLoopTree *use_loop = get_loop(region);
180    if (!new_loop->is_member(use_loop) &&
181        (old_loop == NULL || !new_loop->is_member(old_loop))) {
182      // Take early control, later control will be recalculated
183      // during next iteration of loop optimizations.
184      new_ctrl = get_early_ctrl(x);
185      new_loop = get_loop(new_ctrl);
186    }
187    // Set new location
188    set_ctrl(x, new_ctrl);
189    // If changing loop bodies, see if we need to collect into new body
190    if (old_loop != new_loop) {
191      if (old_loop && !old_loop->_child)
192        old_loop->_body.yank(x);
193      if (!new_loop->_child)
194        new_loop->_body.push(x);  // Collect body info
195    }
196  }
197
198  return phi;
199}
200
201//------------------------------dominated_by------------------------------------
202// Replace the dominated test with an obvious true or false.  Place it on the
203// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
204// live path up to the dominating control.
205void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff, bool flip, bool exclude_loop_predicate ) {
206#ifndef PRODUCT
207  if (VerifyLoopOptimizations && PrintOpto) tty->print_cr("dominating test");
208#endif
209
210
211  // prevdom is the dominating projection of the dominating test.
212  assert( iff->is_If(), "" );
213  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
214  int pop = prevdom->Opcode();
215  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
216  if (flip) {
217    if (pop == Op_IfTrue)
218      pop = Op_IfFalse;
219    else
220      pop = Op_IfTrue;
221  }
222  // 'con' is set to true or false to kill the dominated test.
223  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
224  set_ctrl(con, C->root()); // Constant gets a new use
225  // Hack the dominated test
226  _igvn.replace_input_of(iff, 1, con);
227
228  // If I dont have a reachable TRUE and FALSE path following the IfNode then
229  // I can assume this path reaches an infinite loop.  In this case it's not
230  // important to optimize the data Nodes - either the whole compilation will
231  // be tossed or this path (and all data Nodes) will go dead.
232  if (iff->outcnt() != 2) return;
233
234  // Make control-dependent data Nodes on the live path (path that will remain
235  // once the dominated IF is removed) become control-dependent on the
236  // dominating projection.
237  Node* dp = iff->as_If()->proj_out(pop == Op_IfTrue);
238
239  // Loop predicates may have depending checks which should not
240  // be skipped. For example, range check predicate has two checks
241  // for lower and upper bounds.
242  if (dp == NULL)
243    return;
244
245  ProjNode* dp_proj  = dp->as_Proj();
246  ProjNode* unc_proj = iff->as_If()->proj_out(1 - dp_proj->_con)->as_Proj();
247  if (exclude_loop_predicate &&
248      unc_proj->is_uncommon_trap_proj(Deoptimization::Reason_predicate))
249    return; // Let IGVN transformation change control dependence.
250
251  IdealLoopTree *old_loop = get_loop(dp);
252
253  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
254    Node* cd = dp->fast_out(i); // Control-dependent node
255    if (cd->depends_only_on_test()) {
256      assert(cd->in(0) == dp, "");
257      _igvn.replace_input_of(cd, 0, prevdom);
258      set_early_ctrl(cd);
259      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
260      if (old_loop != new_loop) {
261        if (!old_loop->_child) old_loop->_body.yank(cd);
262        if (!new_loop->_child) new_loop->_body.push(cd);
263      }
264      --i;
265      --imax;
266    }
267  }
268}
269
270//------------------------------has_local_phi_input----------------------------
271// Return TRUE if 'n' has Phi inputs from its local block and no other
272// block-local inputs (all non-local-phi inputs come from earlier blocks)
273Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
274  Node *n_ctrl = get_ctrl(n);
275  // See if some inputs come from a Phi in this block, or from before
276  // this block.
277  uint i;
278  for( i = 1; i < n->req(); i++ ) {
279    Node *phi = n->in(i);
280    if( phi->is_Phi() && phi->in(0) == n_ctrl )
281      break;
282  }
283  if( i >= n->req() )
284    return NULL;                // No Phi inputs; nowhere to clone thru
285
286  // Check for inputs created between 'n' and the Phi input.  These
287  // must split as well; they have already been given the chance
288  // (courtesy of a post-order visit) and since they did not we must
289  // recover the 'cost' of splitting them by being very profitable
290  // when splitting 'n'.  Since this is unlikely we simply give up.
291  for( i = 1; i < n->req(); i++ ) {
292    Node *m = n->in(i);
293    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
294      // We allow the special case of AddP's with no local inputs.
295      // This allows us to split-up address expressions.
296      if (m->is_AddP() &&
297          get_ctrl(m->in(2)) != n_ctrl &&
298          get_ctrl(m->in(3)) != n_ctrl) {
299        // Move the AddP up to dominating point
300        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
301        continue;
302      }
303      return NULL;
304    }
305  }
306
307  return n_ctrl;
308}
309
310//------------------------------remix_address_expressions----------------------
311// Rework addressing expressions to get the most loop-invariant stuff
312// moved out.  We'd like to do all associative operators, but it's especially
313// important (common) to do address expressions.
314Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
315  if (!has_ctrl(n))  return NULL;
316  Node *n_ctrl = get_ctrl(n);
317  IdealLoopTree *n_loop = get_loop(n_ctrl);
318
319  // See if 'n' mixes loop-varying and loop-invariant inputs and
320  // itself is loop-varying.
321
322  // Only interested in binary ops (and AddP)
323  if( n->req() < 3 || n->req() > 4 ) return NULL;
324
325  Node *n1_ctrl = get_ctrl(n->in(                    1));
326  Node *n2_ctrl = get_ctrl(n->in(                    2));
327  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
328  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
329  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
330  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
331
332  // Does one of my inputs spin in a tighter loop than self?
333  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
334      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
335      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
336    return NULL;                // Leave well enough alone
337
338  // Is at least one of my inputs loop-invariant?
339  if( n1_loop == n_loop &&
340      n2_loop == n_loop &&
341      n3_loop == n_loop )
342    return NULL;                // No loop-invariant inputs
343
344
345  int n_op = n->Opcode();
346
347  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
348  if( n_op == Op_LShiftI ) {
349    // Scale is loop invariant
350    Node *scale = n->in(2);
351    Node *scale_ctrl = get_ctrl(scale);
352    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
353    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
354      return NULL;
355    const TypeInt *scale_t = scale->bottom_type()->isa_int();
356    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
357      return NULL;              // Dont bother with byte/short masking
358    // Add must vary with loop (else shift would be loop-invariant)
359    Node *add = n->in(1);
360    Node *add_ctrl = get_ctrl(add);
361    IdealLoopTree *add_loop = get_loop(add_ctrl);
362    //assert( n_loop == add_loop, "" );
363    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
364
365    // Convert I-V into I+ (0-V); same for V-I
366    if( add->Opcode() == Op_SubI &&
367        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
368      Node *zero = _igvn.intcon(0);
369      set_ctrl(zero, C->root());
370      Node *neg = new (C) SubINode( _igvn.intcon(0), add->in(2) );
371      register_new_node( neg, get_ctrl(add->in(2) ) );
372      add = new (C) AddINode( add->in(1), neg );
373      register_new_node( add, add_ctrl );
374    }
375    if( add->Opcode() != Op_AddI ) return NULL;
376    // See if one add input is loop invariant
377    Node *add_var = add->in(1);
378    Node *add_var_ctrl = get_ctrl(add_var);
379    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
380    Node *add_invar = add->in(2);
381    Node *add_invar_ctrl = get_ctrl(add_invar);
382    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
383    if( add_var_loop == n_loop ) {
384    } else if( add_invar_loop == n_loop ) {
385      // Swap to find the invariant part
386      add_invar = add_var;
387      add_invar_ctrl = add_var_ctrl;
388      add_invar_loop = add_var_loop;
389      add_var = add->in(2);
390      Node *add_var_ctrl = get_ctrl(add_var);
391      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
392    } else                      // Else neither input is loop invariant
393      return NULL;
394    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
395      return NULL;              // No invariant part of the add?
396
397    // Yes!  Reshape address expression!
398    Node *inv_scale = new (C) LShiftINode( add_invar, scale );
399    Node *inv_scale_ctrl =
400      dom_depth(add_invar_ctrl) > dom_depth(scale_ctrl) ?
401      add_invar_ctrl : scale_ctrl;
402    register_new_node( inv_scale, inv_scale_ctrl );
403    Node *var_scale = new (C) LShiftINode( add_var, scale );
404    register_new_node( var_scale, n_ctrl );
405    Node *var_add = new (C) AddINode( var_scale, inv_scale );
406    register_new_node( var_add, n_ctrl );
407    _igvn.replace_node( n, var_add );
408    return var_add;
409  }
410
411  // Replace (I+V) with (V+I)
412  if( n_op == Op_AddI ||
413      n_op == Op_AddL ||
414      n_op == Op_AddF ||
415      n_op == Op_AddD ||
416      n_op == Op_MulI ||
417      n_op == Op_MulL ||
418      n_op == Op_MulF ||
419      n_op == Op_MulD ) {
420    if( n2_loop == n_loop ) {
421      assert( n1_loop != n_loop, "" );
422      n->swap_edges(1, 2);
423    }
424  }
425
426  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
427  // but not if I2 is a constant.
428  if( n_op == Op_AddP ) {
429    if( n2_loop == n_loop && n3_loop != n_loop ) {
430      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
431        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
432        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
433        IdealLoopTree *n22loop = get_loop( n22_ctrl );
434        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
435        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
436            n23_loop == n_loop ) {
437          Node *add1 = new (C) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
438          // Stuff new AddP in the loop preheader
439          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
440          Node *add2 = new (C) AddPNode( n->in(1), add1, n->in(2)->in(3) );
441          register_new_node( add2, n_ctrl );
442          _igvn.replace_node( n, add2 );
443          return add2;
444        }
445      }
446    }
447
448    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
449    if( n2_loop != n_loop && n3_loop == n_loop ) {
450      if( n->in(3)->Opcode() == Op_AddI ) {
451        Node *V = n->in(3)->in(1);
452        Node *I = n->in(3)->in(2);
453        if( is_member(n_loop,get_ctrl(V)) ) {
454        } else {
455          Node *tmp = V; V = I; I = tmp;
456        }
457        if( !is_member(n_loop,get_ctrl(I)) ) {
458          Node *add1 = new (C) AddPNode( n->in(1), n->in(2), I );
459          // Stuff new AddP in the loop preheader
460          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
461          Node *add2 = new (C) AddPNode( n->in(1), add1, V );
462          register_new_node( add2, n_ctrl );
463          _igvn.replace_node( n, add2 );
464          return add2;
465        }
466      }
467    }
468  }
469
470  return NULL;
471}
472
473//------------------------------conditional_move-------------------------------
474// Attempt to replace a Phi with a conditional move.  We have some pretty
475// strict profitability requirements.  All Phis at the merge point must
476// be converted, so we can remove the control flow.  We need to limit the
477// number of c-moves to a small handful.  All code that was in the side-arms
478// of the CFG diamond is now speculatively executed.  This code has to be
479// "cheap enough".  We are pretty much limited to CFG diamonds that merge
480// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
481Node *PhaseIdealLoop::conditional_move( Node *region ) {
482
483  assert(region->is_Region(), "sanity check");
484  if (region->req() != 3) return NULL;
485
486  // Check for CFG diamond
487  Node *lp = region->in(1);
488  Node *rp = region->in(2);
489  if (!lp || !rp) return NULL;
490  Node *lp_c = lp->in(0);
491  if (lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If()) return NULL;
492  IfNode *iff = lp_c->as_If();
493
494  // Check for ops pinned in an arm of the diamond.
495  // Can't remove the control flow in this case
496  if (lp->outcnt() > 1) return NULL;
497  if (rp->outcnt() > 1) return NULL;
498
499  IdealLoopTree* r_loop = get_loop(region);
500  assert(r_loop == get_loop(iff), "sanity");
501  // Always convert to CMOVE if all results are used only outside this loop.
502  bool used_inside_loop = (r_loop == _ltree_root);
503
504  // Check profitability
505  int cost = 0;
506  int phis = 0;
507  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
508    Node *out = region->fast_out(i);
509    if (!out->is_Phi()) continue; // Ignore other control edges, etc
510    phis++;
511    PhiNode* phi = out->as_Phi();
512    BasicType bt = phi->type()->basic_type();
513    switch (bt) {
514    case T_FLOAT:
515    case T_DOUBLE: {
516      cost += Matcher::float_cmove_cost(); // Could be very expensive
517      break;
518    }
519    case T_LONG: {
520      cost += Matcher::long_cmove_cost(); // May encodes as 2 CMOV's
521    }
522    case T_INT:                 // These all CMOV fine
523    case T_ADDRESS: {           // (RawPtr)
524      cost++;
525      break;
526    }
527    case T_NARROWOOP: // Fall through
528    case T_OBJECT: {            // Base oops are OK, but not derived oops
529      const TypeOopPtr *tp = phi->type()->make_ptr()->isa_oopptr();
530      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
531      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
532      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
533      // have a Phi for the base here that we convert to a CMOVE all is well
534      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
535      // the allocator will have to produce a base by creating a CMOVE of the
536      // relevant bases.  This puts the allocator in the business of
537      // manufacturing expensive instructions, generally a bad plan.
538      // Just Say No to Conditionally-Moved Derived Pointers.
539      if (tp && tp->offset() != 0)
540        return NULL;
541      cost++;
542      break;
543    }
544    default:
545      return NULL;              // In particular, can't do memory or I/O
546    }
547    // Add in cost any speculative ops
548    for (uint j = 1; j < region->req(); j++) {
549      Node *proj = region->in(j);
550      Node *inp = phi->in(j);
551      if (get_ctrl(inp) == proj) { // Found local op
552        cost++;
553        // Check for a chain of dependent ops; these will all become
554        // speculative in a CMOV.
555        for (uint k = 1; k < inp->req(); k++)
556          if (get_ctrl(inp->in(k)) == proj)
557            cost += ConditionalMoveLimit; // Too much speculative goo
558      }
559    }
560    // See if the Phi is used by a Cmp or Narrow oop Decode/Encode.
561    // This will likely Split-If, a higher-payoff operation.
562    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
563      Node* use = phi->fast_out(k);
564      if (use->is_Cmp() || use->is_DecodeNarrowPtr() || use->is_EncodeNarrowPtr())
565        cost += ConditionalMoveLimit;
566      // Is there a use inside the loop?
567      // Note: check only basic types since CMoveP is pinned.
568      if (!used_inside_loop && is_java_primitive(bt)) {
569        IdealLoopTree* u_loop = get_loop(has_ctrl(use) ? get_ctrl(use) : use);
570        if (r_loop == u_loop || r_loop->is_member(u_loop)) {
571          used_inside_loop = true;
572        }
573      }
574    }
575  }
576  Node* bol = iff->in(1);
577  assert(bol->Opcode() == Op_Bool, "");
578  int cmp_op = bol->in(1)->Opcode();
579  // It is expensive to generate flags from a float compare.
580  // Avoid duplicated float compare.
581  if (phis > 1 && (cmp_op == Op_CmpF || cmp_op == Op_CmpD)) return NULL;
582
583  float infrequent_prob = PROB_UNLIKELY_MAG(3);
584  // Ignore cost and blocks frequency if CMOVE can be moved outside the loop.
585  if (used_inside_loop) {
586    if (cost >= ConditionalMoveLimit) return NULL; // Too much goo
587
588    // BlockLayoutByFrequency optimization moves infrequent branch
589    // from hot path. No point in CMOV'ing in such case (110 is used
590    // instead of 100 to take into account not exactness of float value).
591    if (BlockLayoutByFrequency) {
592      infrequent_prob = MAX2(infrequent_prob, (float)BlockLayoutMinDiamondPercentage/110.0f);
593    }
594  }
595  // Check for highly predictable branch.  No point in CMOV'ing if
596  // we are going to predict accurately all the time.
597  if (iff->_prob < infrequent_prob ||
598      iff->_prob > (1.0f - infrequent_prob))
599    return NULL;
600
601  // --------------
602  // Now replace all Phis with CMOV's
603  Node *cmov_ctrl = iff->in(0);
604  uint flip = (lp->Opcode() == Op_IfTrue);
605  while (1) {
606    PhiNode* phi = NULL;
607    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
608      Node *out = region->fast_out(i);
609      if (out->is_Phi()) {
610        phi = out->as_Phi();
611        break;
612      }
613    }
614    if (phi == NULL)  break;
615#ifndef PRODUCT
616    if (PrintOpto && VerifyLoopOptimizations) tty->print_cr("CMOV");
617#endif
618    // Move speculative ops
619    for (uint j = 1; j < region->req(); j++) {
620      Node *proj = region->in(j);
621      Node *inp = phi->in(j);
622      if (get_ctrl(inp) == proj) { // Found local op
623#ifndef PRODUCT
624        if (PrintOpto && VerifyLoopOptimizations) {
625          tty->print("  speculate: ");
626          inp->dump();
627        }
628#endif
629        set_ctrl(inp, cmov_ctrl);
630      }
631    }
632    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
633    register_new_node( cmov, cmov_ctrl );
634    _igvn.replace_node( phi, cmov );
635#ifndef PRODUCT
636    if (TraceLoopOpts) {
637      tty->print("CMOV  ");
638      r_loop->dump_head();
639      if (Verbose) {
640        bol->in(1)->dump(1);
641        cmov->dump(1);
642      }
643    }
644    if (VerifyLoopOptimizations) verify();
645#endif
646  }
647
648  // The useless CFG diamond will fold up later; see the optimization in
649  // RegionNode::Ideal.
650  _igvn._worklist.push(region);
651
652  return iff->in(1);
653}
654
655//------------------------------split_if_with_blocks_pre-----------------------
656// Do the real work in a non-recursive function.  Data nodes want to be
657// cloned in the pre-order so they can feed each other nicely.
658Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
659  // Cloning these guys is unlikely to win
660  int n_op = n->Opcode();
661  if( n_op == Op_MergeMem ) return n;
662  if( n->is_Proj() ) return n;
663  // Do not clone-up CmpFXXX variations, as these are always
664  // followed by a CmpI
665  if( n->is_Cmp() ) return n;
666  // Attempt to use a conditional move instead of a phi/branch
667  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
668    Node *cmov = conditional_move( n );
669    if( cmov ) return cmov;
670  }
671  if( n->is_CFG() || n->is_LoadStore() )
672    return n;
673  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
674      n_op == Op_Opaque2 ) {
675    if( !C->major_progress() )   // If chance of no more loop opts...
676      _igvn._worklist.push(n);  // maybe we'll remove them
677    return n;
678  }
679
680  if( n->is_Con() ) return n;   // No cloning for Con nodes
681
682  Node *n_ctrl = get_ctrl(n);
683  if( !n_ctrl ) return n;       // Dead node
684
685  // Attempt to remix address expressions for loop invariants
686  Node *m = remix_address_expressions( n );
687  if( m ) return m;
688
689  // Determine if the Node has inputs from some local Phi.
690  // Returns the block to clone thru.
691  Node *n_blk = has_local_phi_input( n );
692  if( !n_blk ) return n;
693  // Do not clone the trip counter through on a CountedLoop
694  // (messes up the canonical shape).
695  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
696
697  // Check for having no control input; not pinned.  Allow
698  // dominating control.
699  if( n->in(0) ) {
700    Node *dom = idom(n_blk);
701    if( dom_lca( n->in(0), dom ) != n->in(0) )
702      return n;
703  }
704  // Policy: when is it profitable.  You must get more wins than
705  // policy before it is considered profitable.  Policy is usually 0,
706  // so 1 win is considered profitable.  Big merges will require big
707  // cloning, so get a larger policy.
708  int policy = n_blk->req() >> 2;
709
710  // If the loop is a candidate for range check elimination,
711  // delay splitting through it's phi until a later loop optimization
712  if (n_blk->is_CountedLoop()) {
713    IdealLoopTree *lp = get_loop(n_blk);
714    if (lp && lp->_rce_candidate) {
715      return n;
716    }
717  }
718
719  // Use same limit as split_if_with_blocks_post
720  if( C->unique() > 35000 ) return n; // Method too big
721
722  // Split 'n' through the merge point if it is profitable
723  Node *phi = split_thru_phi( n, n_blk, policy );
724  if (!phi) return n;
725
726  // Found a Phi to split thru!
727  // Replace 'n' with the new phi
728  _igvn.replace_node( n, phi );
729  // Moved a load around the loop, 'en-registering' something.
730  if (n_blk->is_Loop() && n->is_Load() &&
731      !phi->in(LoopNode::LoopBackControl)->is_Load())
732    C->set_major_progress();
733
734  return phi;
735}
736
737static bool merge_point_too_heavy(Compile* C, Node* region) {
738  // Bail out if the region and its phis have too many users.
739  int weight = 0;
740  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
741    weight += region->fast_out(i)->outcnt();
742  }
743  int nodes_left = MaxNodeLimit - C->live_nodes();
744  if (weight * 8 > nodes_left) {
745#ifndef PRODUCT
746    if (PrintOpto)
747      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
748#endif
749    return true;
750  } else {
751    return false;
752  }
753}
754
755static bool merge_point_safe(Node* region) {
756  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
757  // having a PhiNode input. This sidesteps the dangerous case where the split
758  // ConvI2LNode may become TOP if the input Value() does not
759  // overlap the ConvI2L range, leaving a node which may not dominate its
760  // uses.
761  // A better fix for this problem can be found in the BugTraq entry, but
762  // expediency for Mantis demands this hack.
763  // 6855164: If the merge point has a FastLockNode with a PhiNode input, we stop
764  // split_if_with_blocks from splitting a block because we could not move around
765  // the FastLockNode.
766  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
767    Node* n = region->fast_out(i);
768    if (n->is_Phi()) {
769      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
770        Node* m = n->fast_out(j);
771        if (m->is_FastLock())
772          return false;
773#ifdef _LP64
774        if (m->Opcode() == Op_ConvI2L)
775          return false;
776#endif
777      }
778    }
779  }
780  return true;
781}
782
783
784//------------------------------place_near_use---------------------------------
785// Place some computation next to use but not inside inner loops.
786// For inner loop uses move it to the preheader area.
787Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
788  IdealLoopTree *u_loop = get_loop( useblock );
789  return (u_loop->_irreducible || u_loop->_child)
790    ? useblock
791    : u_loop->_head->in(LoopNode::EntryControl);
792}
793
794
795//------------------------------split_if_with_blocks_post----------------------
796// Do the real work in a non-recursive function.  CFG hackery wants to be
797// in the post-order, so it can dirty the I-DOM info and not use the dirtied
798// info.
799void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
800
801  // Cloning Cmp through Phi's involves the split-if transform.
802  // FastLock is not used by an If
803  if( n->is_Cmp() && !n->is_FastLock() ) {
804    if( C->unique() > 35000 ) return; // Method too big
805
806    // Do not do 'split-if' if irreducible loops are present.
807    if( _has_irreducible_loops )
808      return;
809
810    Node *n_ctrl = get_ctrl(n);
811    // Determine if the Node has inputs from some local Phi.
812    // Returns the block to clone thru.
813    Node *n_blk = has_local_phi_input( n );
814    if( n_blk != n_ctrl ) return;
815
816    if( merge_point_too_heavy(C, n_ctrl) )
817      return;
818
819    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
820    Node *bol = n->unique_out();
821    assert( bol->is_Bool(), "expect a bool here" );
822    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
823    Node *iff = bol->unique_out();
824
825    // Check some safety conditions
826    if( iff->is_If() ) {        // Classic split-if?
827      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
828    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
829      // Can't split CMove with different control edge.
830      if (iff->in(0) != NULL && iff->in(0) != n_ctrl ) return;
831      if( get_ctrl(iff->in(2)) == n_ctrl ||
832          get_ctrl(iff->in(3)) == n_ctrl )
833        return;                 // Inputs not yet split-up
834      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
835        return;                 // Loop-invar test gates loop-varying CMOVE
836      }
837    } else {
838      return;  // some other kind of node, such as an Allocate
839    }
840
841    // Do not do 'split-if' if some paths are dead.  First do dead code
842    // elimination and then see if its still profitable.
843    for( uint i = 1; i < n_ctrl->req(); i++ )
844      if( n_ctrl->in(i) == C->top() )
845        return;
846
847    // When is split-if profitable?  Every 'win' on means some control flow
848    // goes dead, so it's almost always a win.
849    int policy = 0;
850    // If trying to do a 'Split-If' at the loop head, it is only
851    // profitable if the cmp folds up on BOTH paths.  Otherwise we
852    // risk peeling a loop forever.
853
854    // CNC - Disabled for now.  Requires careful handling of loop
855    // body selection for the cloned code.  Also, make sure we check
856    // for any input path not being in the same loop as n_ctrl.  For
857    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
858    // because the alternative loop entry points won't be converted
859    // into LoopNodes.
860    IdealLoopTree *n_loop = get_loop(n_ctrl);
861    for( uint j = 1; j < n_ctrl->req(); j++ )
862      if( get_loop(n_ctrl->in(j)) != n_loop )
863        return;
864
865    // Check for safety of the merge point.
866    if( !merge_point_safe(n_ctrl) ) {
867      return;
868    }
869
870    // Split compare 'n' through the merge point if it is profitable
871    Node *phi = split_thru_phi( n, n_ctrl, policy );
872    if( !phi ) return;
873
874    // Found a Phi to split thru!
875    // Replace 'n' with the new phi
876    _igvn.replace_node( n, phi );
877
878    // Now split the bool up thru the phi
879    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
880    guarantee(bolphi != NULL, "null boolean phi node");
881
882    _igvn.replace_node( bol, bolphi );
883    assert( iff->in(1) == bolphi, "" );
884
885    if( bolphi->Value(&_igvn)->singleton() )
886      return;
887
888    // Conditional-move?  Must split up now
889    if( !iff->is_If() ) {
890      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
891      _igvn.replace_node( iff, cmovphi );
892      return;
893    }
894
895    // Now split the IF
896    do_split_if( iff );
897    return;
898  }
899
900  // Check for an IF ready to split; one that has its
901  // condition codes input coming from a Phi at the block start.
902  int n_op = n->Opcode();
903
904  // Check for an IF being dominated by another IF same test
905  if( n_op == Op_If ) {
906    Node *bol = n->in(1);
907    uint max = bol->outcnt();
908    // Check for same test used more than once?
909    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
910      // Search up IDOMs to see if this IF is dominated.
911      Node *cutoff = get_ctrl(bol);
912
913      // Now search up IDOMs till cutoff, looking for a dominating test
914      Node *prevdom = n;
915      Node *dom = idom(prevdom);
916      while( dom != cutoff ) {
917        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
918          // Replace the dominated test with an obvious true or false.
919          // Place it on the IGVN worklist for later cleanup.
920          C->set_major_progress();
921          dominated_by( prevdom, n, false, true );
922#ifndef PRODUCT
923          if( VerifyLoopOptimizations ) verify();
924#endif
925          return;
926        }
927        prevdom = dom;
928        dom = idom(prevdom);
929      }
930    }
931  }
932
933  // See if a shared loop-varying computation has no loop-varying uses.
934  // Happens if something is only used for JVM state in uncommon trap exits,
935  // like various versions of induction variable+offset.  Clone the
936  // computation per usage to allow it to sink out of the loop.
937  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
938    Node *n_ctrl = get_ctrl(n);
939    IdealLoopTree *n_loop = get_loop(n_ctrl);
940    if( n_loop != _ltree_root ) {
941      DUIterator_Fast imax, i = n->fast_outs(imax);
942      for (; i < imax; i++) {
943        Node* u = n->fast_out(i);
944        if( !has_ctrl(u) )     break; // Found control user
945        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
946        if( u_loop == n_loop ) break; // Found loop-varying use
947        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
948        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
949      }
950      bool did_break = (i < imax);  // Did we break out of the previous loop?
951      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
952        Node *late_load_ctrl = NULL;
953        if (n->is_Load()) {
954          // If n is a load, get and save the result from get_late_ctrl(),
955          // to be later used in calculating the control for n's clones.
956          clear_dom_lca_tags();
957          late_load_ctrl = get_late_ctrl(n, n_ctrl);
958        }
959        // If n is a load, and the late control is the same as the current
960        // control, then the cloning of n is a pointless exercise, because
961        // GVN will ensure that we end up where we started.
962        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
963          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
964            Node *u = n->last_out(j); // Clone private computation per use
965            _igvn.rehash_node_delayed(u);
966            Node *x = n->clone(); // Clone computation
967            Node *x_ctrl = NULL;
968            if( u->is_Phi() ) {
969              // Replace all uses of normal nodes.  Replace Phi uses
970              // individually, so the separate Nodes can sink down
971              // different paths.
972              uint k = 1;
973              while( u->in(k) != n ) k++;
974              u->set_req( k, x );
975              // x goes next to Phi input path
976              x_ctrl = u->in(0)->in(k);
977              --j;
978            } else {              // Normal use
979              // Replace all uses
980              for( uint k = 0; k < u->req(); k++ ) {
981                if( u->in(k) == n ) {
982                  u->set_req( k, x );
983                  --j;
984                }
985              }
986              x_ctrl = get_ctrl(u);
987            }
988
989            // Find control for 'x' next to use but not inside inner loops.
990            // For inner loop uses get the preheader area.
991            x_ctrl = place_near_use(x_ctrl);
992
993            if (n->is_Load()) {
994              // For loads, add a control edge to a CFG node outside of the loop
995              // to force them to not combine and return back inside the loop
996              // during GVN optimization (4641526).
997              //
998              // Because we are setting the actual control input, factor in
999              // the result from get_late_ctrl() so we respect any
1000              // anti-dependences. (6233005).
1001              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
1002
1003              // Don't allow the control input to be a CFG splitting node.
1004              // Such nodes should only have ProjNodes as outs, e.g. IfNode
1005              // should only have IfTrueNode and IfFalseNode (4985384).
1006              x_ctrl = find_non_split_ctrl(x_ctrl);
1007              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
1008
1009              x->set_req(0, x_ctrl);
1010            }
1011            register_new_node(x, x_ctrl);
1012
1013            // Some institutional knowledge is needed here: 'x' is
1014            // yanked because if the optimizer runs GVN on it all the
1015            // cloned x's will common up and undo this optimization and
1016            // be forced back in the loop.  This is annoying because it
1017            // makes +VerifyOpto report false-positives on progress.  I
1018            // tried setting control edges on the x's to force them to
1019            // not combine, but the matching gets worried when it tries
1020            // to fold a StoreP and an AddP together (as part of an
1021            // address expression) and the AddP and StoreP have
1022            // different controls.
1023            if (!x->is_Load() && !x->is_DecodeNarrowPtr()) _igvn._worklist.yank(x);
1024          }
1025          _igvn.remove_dead_node(n);
1026        }
1027      }
1028    }
1029  }
1030
1031  // Check for Opaque2's who's loop has disappeared - who's input is in the
1032  // same loop nest as their output.  Remove 'em, they are no longer useful.
1033  if( n_op == Op_Opaque2 &&
1034      n->in(1) != NULL &&
1035      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
1036    _igvn.replace_node( n, n->in(1) );
1037  }
1038}
1039
1040//------------------------------split_if_with_blocks---------------------------
1041// Check for aggressive application of 'split-if' optimization,
1042// using basic block level info.
1043void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
1044  Node *n = C->root();
1045  visited.set(n->_idx); // first, mark node as visited
1046  // Do pre-visit work for root
1047  n = split_if_with_blocks_pre( n );
1048  uint cnt = n->outcnt();
1049  uint i   = 0;
1050  while (true) {
1051    // Visit all children
1052    if (i < cnt) {
1053      Node* use = n->raw_out(i);
1054      ++i;
1055      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
1056        // Now do pre-visit work for this use
1057        use = split_if_with_blocks_pre( use );
1058        nstack.push(n, i); // Save parent and next use's index.
1059        n   = use;         // Process all children of current use.
1060        cnt = use->outcnt();
1061        i   = 0;
1062      }
1063    }
1064    else {
1065      // All of n's children have been processed, complete post-processing.
1066      if (cnt != 0 && !n->is_Con()) {
1067        assert(has_node(n), "no dead nodes");
1068        split_if_with_blocks_post( n );
1069      }
1070      if (nstack.is_empty()) {
1071        // Finished all nodes on stack.
1072        break;
1073      }
1074      // Get saved parent node and next use's index. Visit the rest of uses.
1075      n   = nstack.node();
1076      cnt = n->outcnt();
1077      i   = nstack.index();
1078      nstack.pop();
1079    }
1080  }
1081}
1082
1083
1084//=============================================================================
1085//
1086//                   C L O N E   A   L O O P   B O D Y
1087//
1088
1089//------------------------------clone_iff--------------------------------------
1090// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1091// "Nearly" because all Nodes have been cloned from the original in the loop,
1092// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1093// through the Phi recursively, and return a Bool.
1094BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
1095
1096  // Convert this Phi into a Phi merging Bools
1097  uint i;
1098  for( i = 1; i < phi->req(); i++ ) {
1099    Node *b = phi->in(i);
1100    if( b->is_Phi() ) {
1101      _igvn.replace_input_of(phi, i, clone_iff( b->as_Phi(), loop ));
1102    } else {
1103      assert( b->is_Bool(), "" );
1104    }
1105  }
1106
1107  Node *sample_bool = phi->in(1);
1108  Node *sample_cmp  = sample_bool->in(1);
1109
1110  // Make Phis to merge the Cmp's inputs.
1111  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1112  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1113  for( i = 1; i < phi->req(); i++ ) {
1114    Node *n1 = phi->in(i)->in(1)->in(1);
1115    Node *n2 = phi->in(i)->in(1)->in(2);
1116    phi1->set_req( i, n1 );
1117    phi2->set_req( i, n2 );
1118    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1119    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1120  }
1121  // See if these Phis have been made before.
1122  // Register with optimizer
1123  Node *hit1 = _igvn.hash_find_insert(phi1);
1124  if( hit1 ) {                  // Hit, toss just made Phi
1125    _igvn.remove_dead_node(phi1); // Remove new phi
1126    assert( hit1->is_Phi(), "" );
1127    phi1 = (PhiNode*)hit1;      // Use existing phi
1128  } else {                      // Miss
1129    _igvn.register_new_node_with_optimizer(phi1);
1130  }
1131  Node *hit2 = _igvn.hash_find_insert(phi2);
1132  if( hit2 ) {                  // Hit, toss just made Phi
1133    _igvn.remove_dead_node(phi2); // Remove new phi
1134    assert( hit2->is_Phi(), "" );
1135    phi2 = (PhiNode*)hit2;      // Use existing phi
1136  } else {                      // Miss
1137    _igvn.register_new_node_with_optimizer(phi2);
1138  }
1139  // Register Phis with loop/block info
1140  set_ctrl(phi1, phi->in(0));
1141  set_ctrl(phi2, phi->in(0));
1142  // Make a new Cmp
1143  Node *cmp = sample_cmp->clone();
1144  cmp->set_req( 1, phi1 );
1145  cmp->set_req( 2, phi2 );
1146  _igvn.register_new_node_with_optimizer(cmp);
1147  set_ctrl(cmp, phi->in(0));
1148
1149  // Make a new Bool
1150  Node *b = sample_bool->clone();
1151  b->set_req(1,cmp);
1152  _igvn.register_new_node_with_optimizer(b);
1153  set_ctrl(b, phi->in(0));
1154
1155  assert( b->is_Bool(), "" );
1156  return (BoolNode*)b;
1157}
1158
1159//------------------------------clone_bool-------------------------------------
1160// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
1161// "Nearly" because all Nodes have been cloned from the original in the loop,
1162// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
1163// through the Phi recursively, and return a Bool.
1164CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
1165  uint i;
1166  // Convert this Phi into a Phi merging Bools
1167  for( i = 1; i < phi->req(); i++ ) {
1168    Node *b = phi->in(i);
1169    if( b->is_Phi() ) {
1170      _igvn.replace_input_of(phi, i, clone_bool( b->as_Phi(), loop ));
1171    } else {
1172      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
1173    }
1174  }
1175
1176  Node *sample_cmp = phi->in(1);
1177
1178  // Make Phis to merge the Cmp's inputs.
1179  PhiNode *phi1 = new (C) PhiNode( phi->in(0), Type::TOP );
1180  PhiNode *phi2 = new (C) PhiNode( phi->in(0), Type::TOP );
1181  for( uint j = 1; j < phi->req(); j++ ) {
1182    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
1183    Node *n1, *n2;
1184    if( cmp_top->is_Cmp() ) {
1185      n1 = cmp_top->in(1);
1186      n2 = cmp_top->in(2);
1187    } else {
1188      n1 = n2 = cmp_top;
1189    }
1190    phi1->set_req( j, n1 );
1191    phi2->set_req( j, n2 );
1192    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
1193    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
1194  }
1195
1196  // See if these Phis have been made before.
1197  // Register with optimizer
1198  Node *hit1 = _igvn.hash_find_insert(phi1);
1199  if( hit1 ) {                  // Hit, toss just made Phi
1200    _igvn.remove_dead_node(phi1); // Remove new phi
1201    assert( hit1->is_Phi(), "" );
1202    phi1 = (PhiNode*)hit1;      // Use existing phi
1203  } else {                      // Miss
1204    _igvn.register_new_node_with_optimizer(phi1);
1205  }
1206  Node *hit2 = _igvn.hash_find_insert(phi2);
1207  if( hit2 ) {                  // Hit, toss just made Phi
1208    _igvn.remove_dead_node(phi2); // Remove new phi
1209    assert( hit2->is_Phi(), "" );
1210    phi2 = (PhiNode*)hit2;      // Use existing phi
1211  } else {                      // Miss
1212    _igvn.register_new_node_with_optimizer(phi2);
1213  }
1214  // Register Phis with loop/block info
1215  set_ctrl(phi1, phi->in(0));
1216  set_ctrl(phi2, phi->in(0));
1217  // Make a new Cmp
1218  Node *cmp = sample_cmp->clone();
1219  cmp->set_req( 1, phi1 );
1220  cmp->set_req( 2, phi2 );
1221  _igvn.register_new_node_with_optimizer(cmp);
1222  set_ctrl(cmp, phi->in(0));
1223
1224  assert( cmp->is_Cmp(), "" );
1225  return (CmpNode*)cmp;
1226}
1227
1228//------------------------------sink_use---------------------------------------
1229// If 'use' was in the loop-exit block, it now needs to be sunk
1230// below the post-loop merge point.
1231void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
1232  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
1233    set_ctrl(use, post_loop);
1234    for (DUIterator j = use->outs(); use->has_out(j); j++)
1235      sink_use(use->out(j), post_loop);
1236  }
1237}
1238
1239//------------------------------clone_loop-------------------------------------
1240//
1241//                   C L O N E   A   L O O P   B O D Y
1242//
1243// This is the basic building block of the loop optimizations.  It clones an
1244// entire loop body.  It makes an old_new loop body mapping; with this mapping
1245// you can find the new-loop equivalent to an old-loop node.  All new-loop
1246// nodes are exactly equal to their old-loop counterparts, all edges are the
1247// same.  All exits from the old-loop now have a RegionNode that merges the
1248// equivalent new-loop path.  This is true even for the normal "loop-exit"
1249// condition.  All uses of loop-invariant old-loop values now come from (one
1250// or more) Phis that merge their new-loop equivalents.
1251//
1252// This operation leaves the graph in an illegal state: there are two valid
1253// control edges coming from the loop pre-header to both loop bodies.  I'll
1254// definitely have to hack the graph after running this transform.
1255//
1256// From this building block I will further edit edges to perform loop peeling
1257// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
1258//
1259// Parameter side_by_size_idom:
1260//   When side_by_size_idom is NULL, the dominator tree is constructed for
1261//      the clone loop to dominate the original.  Used in construction of
1262//      pre-main-post loop sequence.
1263//   When nonnull, the clone and original are side-by-side, both are
1264//      dominated by the side_by_side_idom node.  Used in construction of
1265//      unswitched loops.
1266void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
1267                                 Node* side_by_side_idom) {
1268
1269  // Step 1: Clone the loop body.  Make the old->new mapping.
1270  uint i;
1271  for( i = 0; i < loop->_body.size(); i++ ) {
1272    Node *old = loop->_body.at(i);
1273    Node *nnn = old->clone();
1274    old_new.map( old->_idx, nnn );
1275    _igvn.register_new_node_with_optimizer(nnn);
1276  }
1277
1278
1279  // Step 2: Fix the edges in the new body.  If the old input is outside the
1280  // loop use it.  If the old input is INside the loop, use the corresponding
1281  // new node instead.
1282  for( i = 0; i < loop->_body.size(); i++ ) {
1283    Node *old = loop->_body.at(i);
1284    Node *nnn = old_new[old->_idx];
1285    // Fix CFG/Loop controlling the new node
1286    if (has_ctrl(old)) {
1287      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
1288    } else {
1289      set_loop(nnn, loop->_parent);
1290      if (old->outcnt() > 0) {
1291        set_idom( nnn, old_new[idom(old)->_idx], dd );
1292      }
1293    }
1294    // Correct edges to the new node
1295    for( uint j = 0; j < nnn->req(); j++ ) {
1296        Node *n = nnn->in(j);
1297        if( n ) {
1298          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
1299          if( loop->is_member( old_in_loop ) )
1300            nnn->set_req(j, old_new[n->_idx]);
1301        }
1302    }
1303    _igvn.hash_find_insert(nnn);
1304  }
1305  Node *newhead = old_new[loop->_head->_idx];
1306  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
1307
1308
1309  // Step 3: Now fix control uses.  Loop varying control uses have already
1310  // been fixed up (as part of all input edges in Step 2).  Loop invariant
1311  // control uses must be either an IfFalse or an IfTrue.  Make a merge
1312  // point to merge the old and new IfFalse/IfTrue nodes; make the use
1313  // refer to this.
1314  ResourceArea *area = Thread::current()->resource_area();
1315  Node_List worklist(area);
1316  uint new_counter = C->unique();
1317  for( i = 0; i < loop->_body.size(); i++ ) {
1318    Node* old = loop->_body.at(i);
1319    if( !old->is_CFG() ) continue;
1320    Node* nnn = old_new[old->_idx];
1321
1322    // Copy uses to a worklist, so I can munge the def-use info
1323    // with impunity.
1324    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1325      worklist.push(old->fast_out(j));
1326
1327    while( worklist.size() ) {  // Visit all uses
1328      Node *use = worklist.pop();
1329      if (!has_node(use))  continue; // Ignore dead nodes
1330      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1331      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
1332        // Both OLD and USE are CFG nodes here.
1333        assert( use->is_Proj(), "" );
1334
1335        // Clone the loop exit control projection
1336        Node *newuse = use->clone();
1337        newuse->set_req(0,nnn);
1338        _igvn.register_new_node_with_optimizer(newuse);
1339        set_loop(newuse, use_loop);
1340        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
1341
1342        // We need a Region to merge the exit from the peeled body and the
1343        // exit from the old loop body.
1344        RegionNode *r = new (C) RegionNode(3);
1345        // Map the old use to the new merge point
1346        old_new.map( use->_idx, r );
1347        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
1348        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
1349
1350        // The original user of 'use' uses 'r' instead.
1351        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
1352          Node* useuse = use->last_out(l);
1353          _igvn.rehash_node_delayed(useuse);
1354          uint uses_found = 0;
1355          if( useuse->in(0) == use ) {
1356            useuse->set_req(0, r);
1357            uses_found++;
1358            if( useuse->is_CFG() ) {
1359              assert( dom_depth(useuse) > dd_r, "" );
1360              set_idom(useuse, r, dom_depth(useuse));
1361            }
1362          }
1363          for( uint k = 1; k < useuse->req(); k++ ) {
1364            if( useuse->in(k) == use ) {
1365              useuse->set_req(k, r);
1366              uses_found++;
1367            }
1368          }
1369          l -= uses_found;    // we deleted 1 or more copies of this edge
1370        }
1371
1372        // Now finish up 'r'
1373        r->set_req( 1, newuse );
1374        r->set_req( 2,    use );
1375        _igvn.register_new_node_with_optimizer(r);
1376        set_loop(r, use_loop);
1377        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
1378      } // End of if a loop-exit test
1379    }
1380  }
1381
1382  // Step 4: If loop-invariant use is not control, it must be dominated by a
1383  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
1384  // there if needed.  Make a Phi there merging old and new used values.
1385  Node_List *split_if_set = NULL;
1386  Node_List *split_bool_set = NULL;
1387  Node_List *split_cex_set = NULL;
1388  for( i = 0; i < loop->_body.size(); i++ ) {
1389    Node* old = loop->_body.at(i);
1390    Node* nnn = old_new[old->_idx];
1391    // Copy uses to a worklist, so I can munge the def-use info
1392    // with impunity.
1393    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
1394      worklist.push(old->fast_out(j));
1395
1396    while( worklist.size() ) {
1397      Node *use = worklist.pop();
1398      if (!has_node(use))  continue; // Ignore dead nodes
1399      if (use->in(0) == C->top())  continue;
1400      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
1401      // Check for data-use outside of loop - at least one of OLD or USE
1402      // must not be a CFG node.
1403      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
1404
1405        // If the Data use is an IF, that means we have an IF outside of the
1406        // loop that is switching on a condition that is set inside of the
1407        // loop.  Happens if people set a loop-exit flag; then test the flag
1408        // in the loop to break the loop, then test is again outside of the
1409        // loop to determine which way the loop exited.
1410        if( use->is_If() || use->is_CMove() ) {
1411          // Since this code is highly unlikely, we lazily build the worklist
1412          // of such Nodes to go split.
1413          if( !split_if_set )
1414            split_if_set = new Node_List(area);
1415          split_if_set->push(use);
1416        }
1417        if( use->is_Bool() ) {
1418          if( !split_bool_set )
1419            split_bool_set = new Node_List(area);
1420          split_bool_set->push(use);
1421        }
1422        if( use->Opcode() == Op_CreateEx ) {
1423          if( !split_cex_set )
1424            split_cex_set = new Node_List(area);
1425          split_cex_set->push(use);
1426        }
1427
1428
1429        // Get "block" use is in
1430        uint idx = 0;
1431        while( use->in(idx) != old ) idx++;
1432        Node *prev = use->is_CFG() ? use : get_ctrl(use);
1433        assert( !loop->is_member( get_loop( prev ) ), "" );
1434        Node *cfg = prev->_idx >= new_counter
1435          ? prev->in(2)
1436          : idom(prev);
1437        if( use->is_Phi() )     // Phi use is in prior block
1438          cfg = prev->in(idx);  // NOT in block of Phi itself
1439        if (cfg->is_top()) {    // Use is dead?
1440          _igvn.replace_input_of(use, idx, C->top());
1441          continue;
1442        }
1443
1444        while( !loop->is_member( get_loop( cfg ) ) ) {
1445          prev = cfg;
1446          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
1447        }
1448        // If the use occurs after merging several exits from the loop, then
1449        // old value must have dominated all those exits.  Since the same old
1450        // value was used on all those exits we did not need a Phi at this
1451        // merge point.  NOW we do need a Phi here.  Each loop exit value
1452        // is now merged with the peeled body exit; each exit gets its own
1453        // private Phi and those Phis need to be merged here.
1454        Node *phi;
1455        if( prev->is_Region() ) {
1456          if( idx == 0 ) {      // Updating control edge?
1457            phi = prev;         // Just use existing control
1458          } else {              // Else need a new Phi
1459            phi = PhiNode::make( prev, old );
1460            // Now recursively fix up the new uses of old!
1461            for( uint i = 1; i < prev->req(); i++ ) {
1462              worklist.push(phi); // Onto worklist once for each 'old' input
1463            }
1464          }
1465        } else {
1466          // Get new RegionNode merging old and new loop exits
1467          prev = old_new[prev->_idx];
1468          assert( prev, "just made this in step 7" );
1469          if( idx == 0 ) {      // Updating control edge?
1470            phi = prev;         // Just use existing control
1471          } else {              // Else need a new Phi
1472            // Make a new Phi merging data values properly
1473            phi = PhiNode::make( prev, old );
1474            phi->set_req( 1, nnn );
1475          }
1476        }
1477        // If inserting a new Phi, check for prior hits
1478        if( idx != 0 ) {
1479          Node *hit = _igvn.hash_find_insert(phi);
1480          if( hit == NULL ) {
1481           _igvn.register_new_node_with_optimizer(phi); // Register new phi
1482          } else {                                      // or
1483            // Remove the new phi from the graph and use the hit
1484            _igvn.remove_dead_node(phi);
1485            phi = hit;                                  // Use existing phi
1486          }
1487          set_ctrl(phi, prev);
1488        }
1489        // Make 'use' use the Phi instead of the old loop body exit value
1490        _igvn.replace_input_of(use, idx, phi);
1491        if( use->_idx >= new_counter ) { // If updating new phis
1492          // Not needed for correctness, but prevents a weak assert
1493          // in AddPNode from tripping (when we end up with different
1494          // base & derived Phis that will become the same after
1495          // IGVN does CSE).
1496          Node *hit = _igvn.hash_find_insert(use);
1497          if( hit )             // Go ahead and re-hash for hits.
1498            _igvn.replace_node( use, hit );
1499        }
1500
1501        // If 'use' was in the loop-exit block, it now needs to be sunk
1502        // below the post-loop merge point.
1503        sink_use( use, prev );
1504      }
1505    }
1506  }
1507
1508  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
1509  // the loop uses a condition set in the loop.  The original IF probably
1510  // takes control from one or more OLD Regions (which in turn get from NEW
1511  // Regions).  In any case, there will be a set of Phis for each merge point
1512  // from the IF up to where the original BOOL def exists the loop.
1513  if( split_if_set ) {
1514    while( split_if_set->size() ) {
1515      Node *iff = split_if_set->pop();
1516      if( iff->in(1)->is_Phi() ) {
1517        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
1518        _igvn.replace_input_of(iff, 1, b);
1519      }
1520    }
1521  }
1522  if( split_bool_set ) {
1523    while( split_bool_set->size() ) {
1524      Node *b = split_bool_set->pop();
1525      Node *phi = b->in(1);
1526      assert( phi->is_Phi(), "" );
1527      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
1528      _igvn.replace_input_of(b, 1, cmp);
1529    }
1530  }
1531  if( split_cex_set ) {
1532    while( split_cex_set->size() ) {
1533      Node *b = split_cex_set->pop();
1534      assert( b->in(0)->is_Region(), "" );
1535      assert( b->in(1)->is_Phi(), "" );
1536      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
1537      split_up( b, b->in(0), NULL );
1538    }
1539  }
1540
1541}
1542
1543
1544//---------------------- stride_of_possible_iv -------------------------------------
1545// Looks for an iff/bool/comp with one operand of the compare
1546// being a cycle involving an add and a phi,
1547// with an optional truncation (left-shift followed by a right-shift)
1548// of the add. Returns zero if not an iv.
1549int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
1550  Node* trunc1 = NULL;
1551  Node* trunc2 = NULL;
1552  const TypeInt* ttype = NULL;
1553  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
1554    return 0;
1555  }
1556  BoolNode* bl = iff->in(1)->as_Bool();
1557  Node* cmp = bl->in(1);
1558  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
1559    return 0;
1560  }
1561  // Must have an invariant operand
1562  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
1563    return 0;
1564  }
1565  Node* add2 = NULL;
1566  Node* cmp1 = cmp->in(1);
1567  if (cmp1->is_Phi()) {
1568    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
1569    Node* phi = cmp1;
1570    for (uint i = 1; i < phi->req(); i++) {
1571      Node* in = phi->in(i);
1572      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
1573                                &trunc1, &trunc2, &ttype);
1574      if (add && add->in(1) == phi) {
1575        add2 = add->in(2);
1576        break;
1577      }
1578    }
1579  } else {
1580    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
1581    Node* addtrunc = cmp1;
1582    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
1583                                &trunc1, &trunc2, &ttype);
1584    if (add && add->in(1)->is_Phi()) {
1585      Node* phi = add->in(1);
1586      for (uint i = 1; i < phi->req(); i++) {
1587        if (phi->in(i) == addtrunc) {
1588          add2 = add->in(2);
1589          break;
1590        }
1591      }
1592    }
1593  }
1594  if (add2 != NULL) {
1595    const TypeInt* add2t = _igvn.type(add2)->is_int();
1596    if (add2t->is_con()) {
1597      return add2t->get_con();
1598    }
1599  }
1600  return 0;
1601}
1602
1603
1604//---------------------- stay_in_loop -------------------------------------
1605// Return the (unique) control output node that's in the loop (if it exists.)
1606Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
1607  Node* unique = NULL;
1608  if (!n) return NULL;
1609  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
1610    Node* use = n->fast_out(i);
1611    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
1612      if (unique != NULL) {
1613        return NULL;
1614      }
1615      unique = use;
1616    }
1617  }
1618  return unique;
1619}
1620
1621//------------------------------ register_node -------------------------------------
1622// Utility to register node "n" with PhaseIdealLoop
1623void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
1624  _igvn.register_new_node_with_optimizer(n);
1625  loop->_body.push(n);
1626  if (n->is_CFG()) {
1627    set_loop(n, loop);
1628    set_idom(n, pred, ddepth);
1629  } else {
1630    set_ctrl(n, pred);
1631  }
1632}
1633
1634//------------------------------ proj_clone -------------------------------------
1635// Utility to create an if-projection
1636ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
1637  ProjNode* c = p->clone()->as_Proj();
1638  c->set_req(0, iff);
1639  return c;
1640}
1641
1642//------------------------------ short_circuit_if -------------------------------------
1643// Force the iff control output to be the live_proj
1644Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
1645  guarantee(live_proj != NULL, "null projection");
1646  int proj_con = live_proj->_con;
1647  assert(proj_con == 0 || proj_con == 1, "false or true projection");
1648  Node *con = _igvn.intcon(proj_con);
1649  set_ctrl(con, C->root());
1650  if (iff) {
1651    iff->set_req(1, con);
1652  }
1653  return con;
1654}
1655
1656//------------------------------ insert_if_before_proj -------------------------------------
1657// Insert a new if before an if projection (* - new node)
1658//
1659// before
1660//           if(test)
1661//           /     \
1662//          v       v
1663//    other-proj   proj (arg)
1664//
1665// after
1666//           if(test)
1667//           /     \
1668//          /       v
1669//         |      * proj-clone
1670//         v          |
1671//    other-proj      v
1672//                * new_if(relop(cmp[IU](left,right)))
1673//                  /  \
1674//                 v    v
1675//         * new-proj  proj
1676//         (returned)
1677//
1678ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
1679  IfNode* iff = proj->in(0)->as_If();
1680  IdealLoopTree *loop = get_loop(proj);
1681  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1682  int ddepth = dom_depth(proj);
1683
1684  _igvn.rehash_node_delayed(iff);
1685  _igvn.rehash_node_delayed(proj);
1686
1687  proj->set_req(0, NULL);  // temporary disconnect
1688  ProjNode* proj2 = proj_clone(proj, iff);
1689  register_node(proj2, loop, iff, ddepth);
1690
1691  Node* cmp = Signed ? (Node*) new (C)CmpINode(left, right) : (Node*) new (C)CmpUNode(left, right);
1692  register_node(cmp, loop, proj2, ddepth);
1693
1694  BoolNode* bol = new (C)BoolNode(cmp, relop);
1695  register_node(bol, loop, proj2, ddepth);
1696
1697  IfNode* new_if = new (C)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
1698  register_node(new_if, loop, proj2, ddepth);
1699
1700  proj->set_req(0, new_if); // reattach
1701  set_idom(proj, new_if, ddepth);
1702
1703  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
1704  guarantee(new_exit != NULL, "null exit node");
1705  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
1706
1707  return new_exit;
1708}
1709
1710//------------------------------ insert_region_before_proj -------------------------------------
1711// Insert a region before an if projection (* - new node)
1712//
1713// before
1714//           if(test)
1715//          /      |
1716//         v       |
1717//       proj      v
1718//               other-proj
1719//
1720// after
1721//           if(test)
1722//          /      |
1723//         v       |
1724// * proj-clone    v
1725//         |     other-proj
1726//         v
1727// * new-region
1728//         |
1729//         v
1730// *      dum_if
1731//       /     \
1732//      v       \
1733// * dum-proj    v
1734//              proj
1735//
1736RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
1737  IfNode* iff = proj->in(0)->as_If();
1738  IdealLoopTree *loop = get_loop(proj);
1739  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
1740  int ddepth = dom_depth(proj);
1741
1742  _igvn.rehash_node_delayed(iff);
1743  _igvn.rehash_node_delayed(proj);
1744
1745  proj->set_req(0, NULL);  // temporary disconnect
1746  ProjNode* proj2 = proj_clone(proj, iff);
1747  register_node(proj2, loop, iff, ddepth);
1748
1749  RegionNode* reg = new (C)RegionNode(2);
1750  reg->set_req(1, proj2);
1751  register_node(reg, loop, iff, ddepth);
1752
1753  IfNode* dum_if = new (C)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
1754  register_node(dum_if, loop, reg, ddepth);
1755
1756  proj->set_req(0, dum_if); // reattach
1757  set_idom(proj, dum_if, ddepth);
1758
1759  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
1760  register_node(dum_proj, loop, dum_if, ddepth);
1761
1762  return reg;
1763}
1764
1765//------------------------------ insert_cmpi_loop_exit -------------------------------------
1766// Clone a signed compare loop exit from an unsigned compare and
1767// insert it before the unsigned cmp on the stay-in-loop path.
1768// All new nodes inserted in the dominator tree between the original
1769// if and it's projections.  The original if test is replaced with
1770// a constant to force the stay-in-loop path.
1771//
1772// This is done to make sure that the original if and it's projections
1773// still dominate the same set of control nodes, that the ctrl() relation
1774// from data nodes to them is preserved, and that their loop nesting is
1775// preserved.
1776//
1777// before
1778//          if(i <u limit)    unsigned compare loop exit
1779//         /       |
1780//        v        v
1781//   exit-proj   stay-in-loop-proj
1782//
1783// after
1784//          if(stay-in-loop-const)  original if
1785//         /       |
1786//        /        v
1787//       /  if(i <  limit)    new signed test
1788//      /  /       |
1789//     /  /        v
1790//    /  /  if(i <u limit)    new cloned unsigned test
1791//   /  /   /      |
1792//   v  v  v       |
1793//    region       |
1794//        |        |
1795//      dum-if     |
1796//     /  |        |
1797// ether  |        |
1798//        v        v
1799//   exit-proj   stay-in-loop-proj
1800//
1801IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
1802  const bool Signed   = true;
1803  const bool Unsigned = false;
1804
1805  BoolNode* bol = if_cmpu->in(1)->as_Bool();
1806  if (bol->_test._test != BoolTest::lt) return NULL;
1807  CmpNode* cmpu = bol->in(1)->as_Cmp();
1808  if (cmpu->Opcode() != Op_CmpU) return NULL;
1809  int stride = stride_of_possible_iv(if_cmpu);
1810  if (stride == 0) return NULL;
1811
1812  Node* lp_proj = stay_in_loop(if_cmpu, loop);
1813  guarantee(lp_proj != NULL, "null loop node");
1814
1815  ProjNode* lp_continue = lp_proj->as_Proj();
1816  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
1817
1818  Node* limit = NULL;
1819  if (stride > 0) {
1820    limit = cmpu->in(2);
1821  } else {
1822    limit = _igvn.makecon(TypeInt::ZERO);
1823    set_ctrl(limit, C->root());
1824  }
1825  // Create a new region on the exit path
1826  RegionNode* reg = insert_region_before_proj(lp_exit);
1827  guarantee(reg != NULL, "null region node");
1828
1829  // Clone the if-cmpu-true-false using a signed compare
1830  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
1831  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
1832  reg->add_req(cmpi_exit);
1833
1834  // Clone the if-cmpu-true-false
1835  BoolTest::mask rel_u = bol->_test._test;
1836  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
1837  reg->add_req(cmpu_exit);
1838
1839  // Force original if to stay in loop.
1840  short_circuit_if(if_cmpu, lp_continue);
1841
1842  return cmpi_exit->in(0)->as_If();
1843}
1844
1845//------------------------------ remove_cmpi_loop_exit -------------------------------------
1846// Remove a previously inserted signed compare loop exit.
1847void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
1848  Node* lp_proj = stay_in_loop(if_cmp, loop);
1849  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
1850         stay_in_loop(lp_proj, loop)->is_If() &&
1851         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
1852  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
1853  set_ctrl(con, C->root());
1854  if_cmp->set_req(1, con);
1855}
1856
1857//------------------------------ scheduled_nodelist -------------------------------------
1858// Create a post order schedule of nodes that are in the
1859// "member" set.  The list is returned in "sched".
1860// The first node in "sched" is the loop head, followed by
1861// nodes which have no inputs in the "member" set, and then
1862// followed by the nodes that have an immediate input dependence
1863// on a node in "sched".
1864void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
1865
1866  assert(member.test(loop->_head->_idx), "loop head must be in member set");
1867  Arena *a = Thread::current()->resource_area();
1868  VectorSet visited(a);
1869  Node_Stack nstack(a, loop->_body.size());
1870
1871  Node* n  = loop->_head;  // top of stack is cached in "n"
1872  uint idx = 0;
1873  visited.set(n->_idx);
1874
1875  // Initially push all with no inputs from within member set
1876  for(uint i = 0; i < loop->_body.size(); i++ ) {
1877    Node *elt = loop->_body.at(i);
1878    if (member.test(elt->_idx)) {
1879      bool found = false;
1880      for (uint j = 0; j < elt->req(); j++) {
1881        Node* def = elt->in(j);
1882        if (def && member.test(def->_idx) && def != elt) {
1883          found = true;
1884          break;
1885        }
1886      }
1887      if (!found && elt != loop->_head) {
1888        nstack.push(n, idx);
1889        n = elt;
1890        assert(!visited.test(n->_idx), "not seen yet");
1891        visited.set(n->_idx);
1892      }
1893    }
1894  }
1895
1896  // traverse out's that are in the member set
1897  while (true) {
1898    if (idx < n->outcnt()) {
1899      Node* use = n->raw_out(idx);
1900      idx++;
1901      if (!visited.test_set(use->_idx)) {
1902        if (member.test(use->_idx)) {
1903          nstack.push(n, idx);
1904          n = use;
1905          idx = 0;
1906        }
1907      }
1908    } else {
1909      // All outputs processed
1910      sched.push(n);
1911      if (nstack.is_empty()) break;
1912      n   = nstack.node();
1913      idx = nstack.index();
1914      nstack.pop();
1915    }
1916  }
1917}
1918
1919
1920//------------------------------ has_use_in_set -------------------------------------
1921// Has a use in the vector set
1922bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
1923  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1924    Node* use = n->fast_out(j);
1925    if (vset.test(use->_idx)) {
1926      return true;
1927    }
1928  }
1929  return false;
1930}
1931
1932
1933//------------------------------ has_use_internal_to_set -------------------------------------
1934// Has use internal to the vector set (ie. not in a phi at the loop head)
1935bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
1936  Node* head  = loop->_head;
1937  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1938    Node* use = n->fast_out(j);
1939    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
1940      return true;
1941    }
1942  }
1943  return false;
1944}
1945
1946
1947//------------------------------ clone_for_use_outside_loop -------------------------------------
1948// clone "n" for uses that are outside of loop
1949int PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
1950  int cloned = 0;
1951  assert(worklist.size() == 0, "should be empty");
1952  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
1953    Node* use = n->fast_out(j);
1954    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
1955      worklist.push(use);
1956    }
1957  }
1958  while( worklist.size() ) {
1959    Node *use = worklist.pop();
1960    if (!has_node(use) || use->in(0) == C->top()) continue;
1961    uint j;
1962    for (j = 0; j < use->req(); j++) {
1963      if (use->in(j) == n) break;
1964    }
1965    assert(j < use->req(), "must be there");
1966
1967    // clone "n" and insert it between the inputs of "n" and the use outside the loop
1968    Node* n_clone = n->clone();
1969    _igvn.replace_input_of(use, j, n_clone);
1970    cloned++;
1971    Node* use_c;
1972    if (!use->is_Phi()) {
1973      use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
1974    } else {
1975      // Use in a phi is considered a use in the associated predecessor block
1976      use_c = use->in(0)->in(j);
1977    }
1978    set_ctrl(n_clone, use_c);
1979    assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
1980    get_loop(use_c)->_body.push(n_clone);
1981    _igvn.register_new_node_with_optimizer(n_clone);
1982#if !defined(PRODUCT)
1983    if (TracePartialPeeling) {
1984      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
1985    }
1986#endif
1987  }
1988  return cloned;
1989}
1990
1991
1992//------------------------------ clone_for_special_use_inside_loop -------------------------------------
1993// clone "n" for special uses that are in the not_peeled region.
1994// If these def-uses occur in separate blocks, the code generator
1995// marks the method as not compilable.  For example, if a "BoolNode"
1996// is in a different basic block than the "IfNode" that uses it, then
1997// the compilation is aborted in the code generator.
1998void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
1999                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
2000  if (n->is_Phi() || n->is_Load()) {
2001    return;
2002  }
2003  assert(worklist.size() == 0, "should be empty");
2004  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2005    Node* use = n->fast_out(j);
2006    if ( not_peel.test(use->_idx) &&
2007         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
2008         use->in(1) == n)  {
2009      worklist.push(use);
2010    }
2011  }
2012  if (worklist.size() > 0) {
2013    // clone "n" and insert it between inputs of "n" and the use
2014    Node* n_clone = n->clone();
2015    loop->_body.push(n_clone);
2016    _igvn.register_new_node_with_optimizer(n_clone);
2017    set_ctrl(n_clone, get_ctrl(n));
2018    sink_list.push(n_clone);
2019    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
2020#if !defined(PRODUCT)
2021    if (TracePartialPeeling) {
2022      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
2023    }
2024#endif
2025    while( worklist.size() ) {
2026      Node *use = worklist.pop();
2027      _igvn.rehash_node_delayed(use);
2028      for (uint j = 1; j < use->req(); j++) {
2029        if (use->in(j) == n) {
2030          use->set_req(j, n_clone);
2031        }
2032      }
2033    }
2034  }
2035}
2036
2037
2038//------------------------------ insert_phi_for_loop -------------------------------------
2039// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
2040void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
2041  Node *phi = PhiNode::make(lp, back_edge_val);
2042  phi->set_req(LoopNode::EntryControl, lp_entry_val);
2043  // Use existing phi if it already exists
2044  Node *hit = _igvn.hash_find_insert(phi);
2045  if( hit == NULL ) {
2046    _igvn.register_new_node_with_optimizer(phi);
2047    set_ctrl(phi, lp);
2048  } else {
2049    // Remove the new phi from the graph and use the hit
2050    _igvn.remove_dead_node(phi);
2051    phi = hit;
2052  }
2053  _igvn.replace_input_of(use, idx, phi);
2054}
2055
2056#ifdef ASSERT
2057//------------------------------ is_valid_loop_partition -------------------------------------
2058// Validate the loop partition sets: peel and not_peel
2059bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
2060                                              VectorSet& not_peel ) {
2061  uint i;
2062  // Check that peel_list entries are in the peel set
2063  for (i = 0; i < peel_list.size(); i++) {
2064    if (!peel.test(peel_list.at(i)->_idx)) {
2065      return false;
2066    }
2067  }
2068  // Check at loop members are in one of peel set or not_peel set
2069  for (i = 0; i < loop->_body.size(); i++ ) {
2070    Node *def  = loop->_body.at(i);
2071    uint di = def->_idx;
2072    // Check that peel set elements are in peel_list
2073    if (peel.test(di)) {
2074      if (not_peel.test(di)) {
2075        return false;
2076      }
2077      // Must be in peel_list also
2078      bool found = false;
2079      for (uint j = 0; j < peel_list.size(); j++) {
2080        if (peel_list.at(j)->_idx == di) {
2081          found = true;
2082          break;
2083        }
2084      }
2085      if (!found) {
2086        return false;
2087      }
2088    } else if (not_peel.test(di)) {
2089      if (peel.test(di)) {
2090        return false;
2091      }
2092    } else {
2093      return false;
2094    }
2095  }
2096  return true;
2097}
2098
2099//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
2100// Ensure a use outside of loop is of the right form
2101bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
2102  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2103  return (use->is_Phi() &&
2104          use_c->is_Region() && use_c->req() == 3 &&
2105          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
2106           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
2107           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
2108          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
2109}
2110
2111//------------------------------ is_valid_clone_loop_form -------------------------------------
2112// Ensure that all uses outside of loop are of the right form
2113bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
2114                                               uint orig_exit_idx, uint clone_exit_idx) {
2115  uint len = peel_list.size();
2116  for (uint i = 0; i < len; i++) {
2117    Node *def = peel_list.at(i);
2118
2119    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2120      Node *use = def->fast_out(j);
2121      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
2122      if (!loop->is_member(get_loop(use_c))) {
2123        // use is not in the loop, check for correct structure
2124        if (use->in(0) == def) {
2125          // Okay
2126        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
2127          return false;
2128        }
2129      }
2130    }
2131  }
2132  return true;
2133}
2134#endif
2135
2136//------------------------------ partial_peel -------------------------------------
2137// Partially peel (aka loop rotation) the top portion of a loop (called
2138// the peel section below) by cloning it and placing one copy just before
2139// the new loop head and the other copy at the bottom of the new loop.
2140//
2141//    before                       after                where it came from
2142//
2143//    stmt1                        stmt1
2144//  loop:                          stmt2                     clone
2145//    stmt2                        if condA goto exitA       clone
2146//    if condA goto exitA        new_loop:                   new
2147//    stmt3                        stmt3                     clone
2148//    if !condB goto loop          if condB goto exitB       clone
2149//  exitB:                         stmt2                     orig
2150//    stmt4                        if !condA goto new_loop   orig
2151//  exitA:                         goto exitA
2152//                               exitB:
2153//                                 stmt4
2154//                               exitA:
2155//
2156// Step 1: find the cut point: an exit test on probable
2157//         induction variable.
2158// Step 2: schedule (with cloning) operations in the peel
2159//         section that can be executed after the cut into
2160//         the section that is not peeled.  This may need
2161//         to clone operations into exit blocks.  For
2162//         instance, a reference to A[i] in the not-peel
2163//         section and a reference to B[i] in an exit block
2164//         may cause a left-shift of i by 2 to be placed
2165//         in the peel block.  This step will clone the left
2166//         shift into the exit block and sink the left shift
2167//         from the peel to the not-peel section.
2168// Step 3: clone the loop, retarget the control, and insert
2169//         phis for values that are live across the new loop
2170//         head.  This is very dependent on the graph structure
2171//         from clone_loop.  It creates region nodes for
2172//         exit control and associated phi nodes for values
2173//         flow out of the loop through that exit.  The region
2174//         node is dominated by the clone's control projection.
2175//         So the clone's peel section is placed before the
2176//         new loop head, and the clone's not-peel section is
2177//         forms the top part of the new loop.  The original
2178//         peel section forms the tail of the new loop.
2179// Step 4: update the dominator tree and recompute the
2180//         dominator depth.
2181//
2182//                   orig
2183//
2184//                   stmt1
2185//                     |
2186//                     v
2187//               loop predicate
2188//                     |
2189//                     v
2190//                   loop<----+
2191//                     |      |
2192//                   stmt2    |
2193//                     |      |
2194//                     v      |
2195//                    ifA     |
2196//                   / |      |
2197//                  v  v      |
2198//               false true   ^  <-- last_peel
2199//               /     |      |
2200//              /   ===|==cut |
2201//             /     stmt3    |  <-- first_not_peel
2202//            /        |      |
2203//            |        v      |
2204//            v       ifB     |
2205//          exitA:   / \      |
2206//                  /   \     |
2207//                 v     v    |
2208//               false true   |
2209//               /       \    |
2210//              /         ----+
2211//             |
2212//             v
2213//           exitB:
2214//           stmt4
2215//
2216//
2217//            after clone loop
2218//
2219//                   stmt1
2220//                     |
2221//                     v
2222//               loop predicate
2223//                 /       \
2224//        clone   /         \   orig
2225//               /           \
2226//              /             \
2227//             v               v
2228//   +---->loop                loop<----+
2229//   |      |                    |      |
2230//   |    stmt2                stmt2    |
2231//   |      |                    |      |
2232//   |      v                    v      |
2233//   |      ifA                 ifA     |
2234//   |      | \                / |      |
2235//   |      v  v              v  v      |
2236//   ^    true  false      false true   ^  <-- last_peel
2237//   |      |   ^   \       /    |      |
2238//   | cut==|==  \   \     /  ===|==cut |
2239//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
2240//   |      |    dom   | |       |      |
2241//   |      v      \  1v v2      v      |
2242//   |      ifB     regionA     ifB     |
2243//   |      / \        |       / \      |
2244//   |     /   \       v      /   \     |
2245//   |    v     v    exitA:  v     v    |
2246//   |    true  false      false true   |
2247//   |    /     ^   \      /       \    |
2248//   +----       \   \    /         ----+
2249//               dom  \  /
2250//                 \  1v v2
2251//                  regionB
2252//                     |
2253//                     v
2254//                   exitB:
2255//                   stmt4
2256//
2257//
2258//           after partial peel
2259//
2260//                  stmt1
2261//                     |
2262//                     v
2263//               loop predicate
2264//                 /
2265//        clone   /             orig
2266//               /          TOP
2267//              /             \
2268//             v               v
2269//    TOP->loop                loop----+
2270//          |                    |      |
2271//        stmt2                stmt2    |
2272//          |                    |      |
2273//          v                    v      |
2274//          ifA                 ifA     |
2275//          | \                / |      |
2276//          v  v              v  v      |
2277//        true  false      false true   |     <-- last_peel
2278//          |   ^   \       /    +------|---+
2279//  +->newloop   \   \     /  === ==cut |   |
2280//  |     stmt3   \   \   /     TOP     |   |
2281//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
2282//  |       v      \  1v v2      v      |   |
2283//  |       ifB     regionA     ifB     ^   v
2284//  |       / \        |       / \      |   |
2285//  |      /   \       v      /   \     |   |
2286//  |     v     v    exitA:  v     v    |   |
2287//  |     true  false      false true   |   |
2288//  |     /     ^   \      /       \    |   |
2289//  |    |       \   \    /         v   |   |
2290//  |    |       dom  \  /         TOP  |   |
2291//  |    |         \  1v v2             |   |
2292//  ^    v          regionB             |   |
2293//  |    |             |                |   |
2294//  |    |             v                ^   v
2295//  |    |           exitB:             |   |
2296//  |    |           stmt4              |   |
2297//  |    +------------>-----------------+   |
2298//  |                                       |
2299//  +-----------------<---------------------+
2300//
2301//
2302//              final graph
2303//
2304//                  stmt1
2305//                    |
2306//                    v
2307//               loop predicate
2308//                    |
2309//                    v
2310//                  stmt2 clone
2311//                    |
2312//                    v
2313//         ........> ifA clone
2314//         :        / |
2315//        dom      /  |
2316//         :      v   v
2317//         :  false   true
2318//         :  |       |
2319//         :  |       v
2320//         :  |    newloop<-----+
2321//         :  |        |        |
2322//         :  |     stmt3 clone |
2323//         :  |        |        |
2324//         :  |        v        |
2325//         :  |       ifB       |
2326//         :  |      / \        |
2327//         :  |     v   v       |
2328//         :  |  false true     |
2329//         :  |   |     |       |
2330//         :  |   v    stmt2    |
2331//         :  | exitB:  |       |
2332//         :  | stmt4   v       |
2333//         :  |       ifA orig  |
2334//         :  |      /  \       |
2335//         :  |     /    \      |
2336//         :  |    v     v      |
2337//         :  |  false  true    |
2338//         :  |  /        \     |
2339//         :  v  v         -----+
2340//          RegionA
2341//             |
2342//             v
2343//           exitA
2344//
2345bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
2346
2347  assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
2348  if (!loop->_head->is_Loop()) {
2349    return false;  }
2350
2351  LoopNode *head  = loop->_head->as_Loop();
2352
2353  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
2354    return false;
2355  }
2356
2357  // Check for complex exit control
2358  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
2359    Node *n = loop->_body.at(ii);
2360    int opc = n->Opcode();
2361    if (n->is_Call()        ||
2362        opc == Op_Catch     ||
2363        opc == Op_CatchProj ||
2364        opc == Op_Jump      ||
2365        opc == Op_JumpProj  ||
2366        opc == Op_FlagsProj) {
2367#if !defined(PRODUCT)
2368      if (TracePartialPeeling) {
2369        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
2370      }
2371#endif
2372      return false;
2373    }
2374  }
2375
2376  int dd = dom_depth(head);
2377
2378  // Step 1: find cut point
2379
2380  // Walk up dominators to loop head looking for first loop exit
2381  // which is executed on every path thru loop.
2382  IfNode *peel_if = NULL;
2383  IfNode *peel_if_cmpu = NULL;
2384
2385  Node *iff = loop->tail();
2386  while( iff != head ) {
2387    if( iff->is_If() ) {
2388      Node *ctrl = get_ctrl(iff->in(1));
2389      if (ctrl->is_top()) return false; // Dead test on live IF.
2390      // If loop-varying exit-test, check for induction variable
2391      if( loop->is_member(get_loop(ctrl)) &&
2392          loop->is_loop_exit(iff) &&
2393          is_possible_iv_test(iff)) {
2394        Node* cmp = iff->in(1)->in(1);
2395        if (cmp->Opcode() == Op_CmpI) {
2396          peel_if = iff->as_If();
2397        } else {
2398          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
2399          peel_if_cmpu = iff->as_If();
2400        }
2401      }
2402    }
2403    iff = idom(iff);
2404  }
2405  // Prefer signed compare over unsigned compare.
2406  IfNode* new_peel_if = NULL;
2407  if (peel_if == NULL) {
2408    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
2409      return false;   // No peel point found
2410    }
2411    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
2412    if (new_peel_if == NULL) {
2413      return false;   // No peel point found
2414    }
2415    peel_if = new_peel_if;
2416  }
2417  Node* last_peel        = stay_in_loop(peel_if, loop);
2418  Node* first_not_peeled = stay_in_loop(last_peel, loop);
2419  if (first_not_peeled == NULL || first_not_peeled == head) {
2420    return false;
2421  }
2422
2423#if !defined(PRODUCT)
2424  if (TraceLoopOpts) {
2425    tty->print("PartialPeel  ");
2426    loop->dump_head();
2427  }
2428
2429  if (TracePartialPeeling) {
2430    tty->print_cr("before partial peel one iteration");
2431    Node_List wl;
2432    Node* t = head->in(2);
2433    while (true) {
2434      wl.push(t);
2435      if (t == head) break;
2436      t = idom(t);
2437    }
2438    while (wl.size() > 0) {
2439      Node* tt = wl.pop();
2440      tt->dump();
2441      if (tt == last_peel) tty->print_cr("-- cut --");
2442    }
2443  }
2444#endif
2445  ResourceArea *area = Thread::current()->resource_area();
2446  VectorSet peel(area);
2447  VectorSet not_peel(area);
2448  Node_List peel_list(area);
2449  Node_List worklist(area);
2450  Node_List sink_list(area);
2451
2452  // Set of cfg nodes to peel are those that are executable from
2453  // the head through last_peel.
2454  assert(worklist.size() == 0, "should be empty");
2455  worklist.push(head);
2456  peel.set(head->_idx);
2457  while (worklist.size() > 0) {
2458    Node *n = worklist.pop();
2459    if (n != last_peel) {
2460      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
2461        Node* use = n->fast_out(j);
2462        if (use->is_CFG() &&
2463            loop->is_member(get_loop(use)) &&
2464            !peel.test_set(use->_idx)) {
2465          worklist.push(use);
2466        }
2467      }
2468    }
2469  }
2470
2471  // Set of non-cfg nodes to peel are those that are control
2472  // dependent on the cfg nodes.
2473  uint i;
2474  for(i = 0; i < loop->_body.size(); i++ ) {
2475    Node *n = loop->_body.at(i);
2476    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
2477    if (peel.test(n_c->_idx)) {
2478      peel.set(n->_idx);
2479    } else {
2480      not_peel.set(n->_idx);
2481    }
2482  }
2483
2484  // Step 2: move operations from the peeled section down into the
2485  //         not-peeled section
2486
2487  // Get a post order schedule of nodes in the peel region
2488  // Result in right-most operand.
2489  scheduled_nodelist(loop, peel, peel_list );
2490
2491  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2492
2493  // For future check for too many new phis
2494  uint old_phi_cnt = 0;
2495  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2496    Node* use = head->fast_out(j);
2497    if (use->is_Phi()) old_phi_cnt++;
2498  }
2499
2500#if !defined(PRODUCT)
2501  if (TracePartialPeeling) {
2502    tty->print_cr("\npeeled list");
2503  }
2504#endif
2505
2506  // Evacuate nodes in peel region into the not_peeled region if possible
2507  uint new_phi_cnt = 0;
2508  uint cloned_for_outside_use = 0;
2509  for (i = 0; i < peel_list.size();) {
2510    Node* n = peel_list.at(i);
2511#if !defined(PRODUCT)
2512  if (TracePartialPeeling) n->dump();
2513#endif
2514    bool incr = true;
2515    if ( !n->is_CFG() ) {
2516
2517      if ( has_use_in_set(n, not_peel) ) {
2518
2519        // If not used internal to the peeled region,
2520        // move "n" from peeled to not_peeled region.
2521
2522        if ( !has_use_internal_to_set(n, peel, loop) ) {
2523
2524          // if not pinned and not a load (which maybe anti-dependent on a store)
2525          // and not a CMove (Matcher expects only bool->cmove).
2526          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
2527            cloned_for_outside_use += clone_for_use_outside_loop( loop, n, worklist );
2528            sink_list.push(n);
2529            peel     >>= n->_idx; // delete n from peel set.
2530            not_peel <<= n->_idx; // add n to not_peel set.
2531            peel_list.remove(i);
2532            incr = false;
2533#if !defined(PRODUCT)
2534            if (TracePartialPeeling) {
2535              tty->print_cr("sink to not_peeled region: %d newbb: %d",
2536                            n->_idx, get_ctrl(n)->_idx);
2537            }
2538#endif
2539          }
2540        } else {
2541          // Otherwise check for special def-use cases that span
2542          // the peel/not_peel boundary such as bool->if
2543          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
2544          new_phi_cnt++;
2545        }
2546      }
2547    }
2548    if (incr) i++;
2549  }
2550
2551  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
2552#if !defined(PRODUCT)
2553    if (TracePartialPeeling) {
2554      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
2555                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
2556    }
2557#endif
2558    if (new_peel_if != NULL) {
2559      remove_cmpi_loop_exit(new_peel_if, loop);
2560    }
2561    // Inhibit more partial peeling on this loop
2562    assert(!head->is_partial_peel_loop(), "not partial peeled");
2563    head->mark_partial_peel_failed();
2564    if (cloned_for_outside_use > 0) {
2565      // Terminate this round of loop opts because
2566      // the graph outside this loop was changed.
2567      C->set_major_progress();
2568      return true;
2569    }
2570    return false;
2571  }
2572
2573  // Step 3: clone loop, retarget control, and insert new phis
2574
2575  // Create new loop head for new phis and to hang
2576  // the nodes being moved (sinked) from the peel region.
2577  LoopNode* new_head = new (C) LoopNode(last_peel, last_peel);
2578  new_head->set_unswitch_count(head->unswitch_count()); // Preserve
2579  _igvn.register_new_node_with_optimizer(new_head);
2580  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
2581  first_not_peeled->set_req(0, new_head);
2582  set_loop(new_head, loop);
2583  loop->_body.push(new_head);
2584  not_peel.set(new_head->_idx);
2585  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
2586  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
2587
2588  while (sink_list.size() > 0) {
2589    Node* n = sink_list.pop();
2590    set_ctrl(n, new_head);
2591  }
2592
2593  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
2594
2595  clone_loop( loop, old_new, dd );
2596
2597  const uint clone_exit_idx = 1;
2598  const uint orig_exit_idx  = 2;
2599  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
2600
2601  Node* head_clone             = old_new[head->_idx];
2602  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
2603  Node* orig_tail_clone        = head_clone->in(2);
2604
2605  // Add phi if "def" node is in peel set and "use" is not
2606
2607  for(i = 0; i < peel_list.size(); i++ ) {
2608    Node *def  = peel_list.at(i);
2609    if (!def->is_CFG()) {
2610      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
2611        Node *use = def->fast_out(j);
2612        if (has_node(use) && use->in(0) != C->top() &&
2613            (!peel.test(use->_idx) ||
2614             (use->is_Phi() && use->in(0) == head)) ) {
2615          worklist.push(use);
2616        }
2617      }
2618      while( worklist.size() ) {
2619        Node *use = worklist.pop();
2620        for (uint j = 1; j < use->req(); j++) {
2621          Node* n = use->in(j);
2622          if (n == def) {
2623
2624            // "def" is in peel set, "use" is not in peel set
2625            // or "use" is in the entry boundary (a phi) of the peel set
2626
2627            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
2628
2629            if ( loop->is_member(get_loop( use_c )) ) {
2630              // use is in loop
2631              if (old_new[use->_idx] != NULL) { // null for dead code
2632                Node* use_clone = old_new[use->_idx];
2633                _igvn.replace_input_of(use, j, C->top());
2634                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
2635              }
2636            } else {
2637              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
2638              // use is not in the loop, check if the live range includes the cut
2639              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
2640              if (not_peel.test(lp_if->_idx)) {
2641                assert(j == orig_exit_idx, "use from original loop");
2642                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
2643              }
2644            }
2645          }
2646        }
2647      }
2648    }
2649  }
2650
2651  // Step 3b: retarget control
2652
2653  // Redirect control to the new loop head if a cloned node in
2654  // the not_peeled region has control that points into the peeled region.
2655  // This necessary because the cloned peeled region will be outside
2656  // the loop.
2657  //                            from    to
2658  //          cloned-peeled    <---+
2659  //    new_head_clone:            |    <--+
2660  //          cloned-not_peeled  in(0)    in(0)
2661  //          orig-peeled
2662
2663  for(i = 0; i < loop->_body.size(); i++ ) {
2664    Node *n = loop->_body.at(i);
2665    if (!n->is_CFG()           && n->in(0) != NULL        &&
2666        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
2667      Node* n_clone = old_new[n->_idx];
2668      _igvn.replace_input_of(n_clone, 0, new_head_clone);
2669    }
2670  }
2671
2672  // Backedge of the surviving new_head (the clone) is original last_peel
2673  _igvn.replace_input_of(new_head_clone, LoopNode::LoopBackControl, last_peel);
2674
2675  // Cut first node in original not_peel set
2676  _igvn.rehash_node_delayed(new_head);                     // Multiple edge updates:
2677  new_head->set_req(LoopNode::EntryControl,    C->top());  //   use rehash_node_delayed / set_req instead of
2678  new_head->set_req(LoopNode::LoopBackControl, C->top());  //   multiple replace_input_of calls
2679
2680  // Copy head_clone back-branch info to original head
2681  // and remove original head's loop entry and
2682  // clone head's back-branch
2683  _igvn.rehash_node_delayed(head); // Multiple edge updates
2684  head->set_req(LoopNode::EntryControl,    head_clone->in(LoopNode::LoopBackControl));
2685  head->set_req(LoopNode::LoopBackControl, C->top());
2686  _igvn.replace_input_of(head_clone, LoopNode::LoopBackControl, C->top());
2687
2688  // Similarly modify the phis
2689  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
2690    Node* use = head->fast_out(k);
2691    if (use->is_Phi() && use->outcnt() > 0) {
2692      Node* use_clone = old_new[use->_idx];
2693      _igvn.rehash_node_delayed(use); // Multiple edge updates
2694      use->set_req(LoopNode::EntryControl,    use_clone->in(LoopNode::LoopBackControl));
2695      use->set_req(LoopNode::LoopBackControl, C->top());
2696      _igvn.replace_input_of(use_clone, LoopNode::LoopBackControl, C->top());
2697    }
2698  }
2699
2700  // Step 4: update dominator tree and dominator depth
2701
2702  set_idom(head, orig_tail_clone, dd);
2703  recompute_dom_depth();
2704
2705  // Inhibit more partial peeling on this loop
2706  new_head_clone->set_partial_peel_loop();
2707  C->set_major_progress();
2708
2709#if !defined(PRODUCT)
2710  if (TracePartialPeeling) {
2711    tty->print_cr("\nafter partial peel one iteration");
2712    Node_List wl(area);
2713    Node* t = last_peel;
2714    while (true) {
2715      wl.push(t);
2716      if (t == head_clone) break;
2717      t = idom(t);
2718    }
2719    while (wl.size() > 0) {
2720      Node* tt = wl.pop();
2721      if (tt == head) tty->print_cr("orig head");
2722      else if (tt == new_head_clone) tty->print_cr("new head");
2723      else if (tt == head_clone) tty->print_cr("clone head");
2724      tt->dump();
2725    }
2726  }
2727#endif
2728  return true;
2729}
2730
2731//------------------------------reorg_offsets----------------------------------
2732// Reorganize offset computations to lower register pressure.  Mostly
2733// prevent loop-fallout uses of the pre-incremented trip counter (which are
2734// then alive with the post-incremented trip counter forcing an extra
2735// register move)
2736void PhaseIdealLoop::reorg_offsets(IdealLoopTree *loop) {
2737  // Perform it only for canonical counted loops.
2738  // Loop's shape could be messed up by iteration_split_impl.
2739  if (!loop->_head->is_CountedLoop())
2740    return;
2741  if (!loop->_head->as_Loop()->is_valid_counted_loop())
2742    return;
2743
2744  CountedLoopNode *cl = loop->_head->as_CountedLoop();
2745  CountedLoopEndNode *cle = cl->loopexit();
2746  Node *exit = cle->proj_out(false);
2747  Node *phi = cl->phi();
2748
2749  // Check for the special case of folks using the pre-incremented
2750  // trip-counter on the fall-out path (forces the pre-incremented
2751  // and post-incremented trip counter to be live at the same time).
2752  // Fix this by adjusting to use the post-increment trip counter.
2753
2754  bool progress = true;
2755  while (progress) {
2756    progress = false;
2757    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
2758      Node* use = phi->fast_out(i);   // User of trip-counter
2759      if (!has_ctrl(use))  continue;
2760      Node *u_ctrl = get_ctrl(use);
2761      if (use->is_Phi()) {
2762        u_ctrl = NULL;
2763        for (uint j = 1; j < use->req(); j++)
2764          if (use->in(j) == phi)
2765            u_ctrl = dom_lca(u_ctrl, use->in(0)->in(j));
2766      }
2767      IdealLoopTree *u_loop = get_loop(u_ctrl);
2768      // Look for loop-invariant use
2769      if (u_loop == loop) continue;
2770      if (loop->is_member(u_loop)) continue;
2771      // Check that use is live out the bottom.  Assuming the trip-counter
2772      // update is right at the bottom, uses of of the loop middle are ok.
2773      if (dom_lca(exit, u_ctrl) != exit) continue;
2774      // Hit!  Refactor use to use the post-incremented tripcounter.
2775      // Compute a post-increment tripcounter.
2776      Node *opaq = new (C) Opaque2Node( C, cle->incr() );
2777      register_new_node( opaq, u_ctrl );
2778      Node *neg_stride = _igvn.intcon(-cle->stride_con());
2779      set_ctrl(neg_stride, C->root());
2780      Node *post = new (C) AddINode( opaq, neg_stride);
2781      register_new_node( post, u_ctrl );
2782      _igvn.rehash_node_delayed(use);
2783      for (uint j = 1; j < use->req(); j++) {
2784        if (use->in(j) == phi)
2785          use->set_req(j, post);
2786      }
2787      // Since DU info changed, rerun loop
2788      progress = true;
2789      break;
2790    }
2791  }
2792
2793}
2794