ifg.cpp revision 1472:c18cbe5936b8
152419Sjulian/*
252419Sjulian * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
3139823Simp * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4139823Simp *
5139823Simp * This code is free software; you can redistribute it and/or modify it
652419Sjulian * under the terms of the GNU General Public License version 2 only, as
752419Sjulian * published by the Free Software Foundation.
870784Sjulian *
952419Sjulian * This code is distributed in the hope that it will be useful, but WITHOUT
1052419Sjulian * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1152419Sjulian * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1252419Sjulian * version 2 for more details (a copy is included in the LICENSE file that
1352419Sjulian * accompanied this code).
1452419Sjulian *
1552419Sjulian * You should have received a copy of the GNU General Public License version
1652419Sjulian * 2 along with this work; if not, write to the Free Software Foundation,
1752419Sjulian * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1852419Sjulian *
1970784Sjulian * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
2052419Sjulian * or visit www.oracle.com if you need additional information or have any
2152419Sjulian * questions.
2252419Sjulian *
2352419Sjulian */
2452419Sjulian
2552419Sjulian#include "incls/_precompiled.incl"
2652419Sjulian#include "incls/_ifg.cpp.incl"
2752419Sjulian
2852419Sjulian#define EXACT_PRESSURE 1
2952419Sjulian
3052419Sjulian//=============================================================================
3152419Sjulian//------------------------------IFG--------------------------------------------
3252419SjulianPhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
3352419Sjulian}
3452419Sjulian
3552419Sjulian//------------------------------init-------------------------------------------
3652419Sjulianvoid PhaseIFG::init( uint maxlrg ) {
3752419Sjulian  _maxlrg = maxlrg;
3867506Sjulian  _yanked = new (_arena) VectorSet(_arena);
3952419Sjulian  _is_square = false;
4052419Sjulian  // Make uninitialized adjacency lists
4152752Sjulian  _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
4252419Sjulian  // Also make empty live range structures
4352419Sjulian  _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
4452419Sjulian  memset(_lrgs,0,sizeof(LRG)*maxlrg);
4552419Sjulian  // Init all to empty
4652419Sjulian  for( uint i = 0; i < maxlrg; i++ ) {
4752419Sjulian    _adjs[i].initialize(maxlrg);
4852419Sjulian    _lrgs[i].Set_All();
4952419Sjulian  }
5052419Sjulian}
5152419Sjulian
5252419Sjulian//------------------------------add--------------------------------------------
5352419Sjulian// Add edge between vertices a & b.  These are sorted (triangular matrix),
54230487Sglebius// then the smaller number is inserted in the larger numbered array.
5552419Sjulianint PhaseIFG::add_edge( uint a, uint b ) {
56132705Sglebius  lrgs(a).invalid_degree();
5795759Stanimura  lrgs(b).invalid_degree();
5852419Sjulian  // Sort a and b, so that a is bigger
5952419Sjulian  assert( !_is_square, "only on triangular" );
60132013Srwatson  if( a < b ) { uint tmp = a; a = b; b = tmp; }
61164033Srwatson  return _adjs[a].insert( b );
6252419Sjulian}
6395759Stanimura
6452419Sjulian//------------------------------add_vector-------------------------------------
6552419Sjulian// Add an edge between 'a' and everything in the vector.
66159590Sjhbvoid PhaseIFG::add_vector( uint a, IndexSet *vec ) {
6752419Sjulian  // IFG is triangular, so do the inserts where 'a' < 'b'.
68195837Srwatson  assert( !_is_square, "only on triangular" );
69195837Srwatson  IndexSet *adjs_a = &_adjs[a];
70195837Srwatson  if( !vec->count() ) return;
7152419Sjulian
7252419Sjulian  IndexSetIterator elements(vec);
7352919Sjulian  uint neighbor;
7452419Sjulian  while ((neighbor = elements.next()) != 0) {
7552419Sjulian    add_edge( a, neighbor );
7670870Sjulian  }
77227293Sed}
78227293Sed
7970870Sjulian//------------------------------test-------------------------------------------
8070870Sjulian// Is there an edge between a and b?
8170870Sjulianint PhaseIFG::test_edge( uint a, uint b ) const {
8270870Sjulian  // Sort a and b, so that a is larger
8370870Sjulian  assert( !_is_square, "only on triangular" );
8452419Sjulian  if( a < b ) { uint tmp = a; a = b; b = tmp; }
8552419Sjulian  return _adjs[a].member(b);
8652419Sjulian}
8752419Sjulian
8852419Sjulian//------------------------------SquareUp---------------------------------------
8952419Sjulian// Convert triangular matrix to square matrix
9052419Sjulianvoid PhaseIFG::SquareUp() {
9152419Sjulian  assert( !_is_square, "only on triangular" );
9252419Sjulian
9352419Sjulian  // Simple transpose
9452419Sjulian  for( uint i = 0; i < _maxlrg; i++ ) {
9552419Sjulian    IndexSetIterator elements(&_adjs[i]);
9652419Sjulian    uint datum;
9752419Sjulian    while ((datum = elements.next()) != 0) {
9852419Sjulian      _adjs[datum].insert( i );
9952419Sjulian    }
10052419Sjulian  }
10152419Sjulian  _is_square = true;
10252419Sjulian}
10352419Sjulian
10452419Sjulian//------------------------------Compute_Effective_Degree-----------------------
10552419Sjulian// Compute effective degree in bulk
10652419Sjulianvoid PhaseIFG::Compute_Effective_Degree() {
10752419Sjulian  assert( _is_square, "only on square" );
10852419Sjulian
10952419Sjulian  for( uint i = 0; i < _maxlrg; i++ )
11052419Sjulian    lrgs(i).set_degree(effective_degree(i));
11152752Sjulian}
11252752Sjulian
11370700Sjulian//------------------------------test_edge_sq-----------------------------------
11452752Sjulianint PhaseIFG::test_edge_sq( uint a, uint b ) const {
11572053Sjulian  assert( _is_square, "only on square" );
116230487Sglebius  // Swap, so that 'a' has the lesser count.  Then binary search is on
11752752Sjulian  // the smaller of a's list and b's list.
11852885Sjulian  if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
11952419Sjulian  //return _adjs[a].unordered_member(b);
12052419Sjulian  return _adjs[a].member(b);
12152419Sjulian}
12252419Sjulian
12352419Sjulian//------------------------------Union------------------------------------------
124151975Sglebius// Union edges of B into A
125151975Sglebiusvoid PhaseIFG::Union( uint a, uint b ) {
12652419Sjulian  assert( _is_square, "only on square" );
12752419Sjulian  IndexSet *A = &_adjs[a];
12852419Sjulian  IndexSetIterator b_elements(&_adjs[b]);
12952419Sjulian  uint datum;
130147774Sglebius  while ((datum = b_elements.next()) != 0) {
13152419Sjulian    if(A->insert(datum)) {
13252419Sjulian      _adjs[datum].insert(a);
13352419Sjulian      lrgs(a).invalid_degree();
134129823Sjulian      lrgs(datum).invalid_degree();
135129823Sjulian    }
136129823Sjulian  }
137129823Sjulian}
138129823Sjulian
139129823Sjulian//------------------------------remove_node------------------------------------
140129823Sjulian// Yank a Node and all connected edges from the IFG.  Return a
141129823Sjulian// list of neighbors (edges) yanked.
142230487SglebiusIndexSet *PhaseIFG::remove_node( uint a ) {
143129823Sjulian  assert( _is_square, "only on square" );
144129823Sjulian  assert( !_yanked->test(a), "" );
14552419Sjulian  _yanked->set(a);
146138238Smlaier
14752419Sjulian  // I remove the LRG from all neighbors.
14852419Sjulian  IndexSetIterator elements(&_adjs[a]);
14964512Sarchie  LRG &lrg_a = lrgs(a);
150217320Smdf  uint datum;
151124871Sru  while ((datum = elements.next()) != 0) {
15252419Sjulian    _adjs[datum].remove(a);
153217320Smdf    lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) );
154125116Sru  }
15552419Sjulian  return neighbors(a);
156205082Sglebius}
157205082Sglebius
158205082Sglebius//------------------------------re_insert--------------------------------------
159205082Sglebius// Re-insert a yanked Node.
160205082Sglebiusvoid PhaseIFG::re_insert( uint a ) {
16153526Sjulian  assert( _is_square, "only on square" );
16252419Sjulian  assert( _yanked->test(a), "" );
163131933Smarcel  (*_yanked) >>= a;
16452419Sjulian
16552419Sjulian  IndexSetIterator elements(&_adjs[a]);
16652419Sjulian  uint datum;
16752419Sjulian  while ((datum = elements.next()) != 0) {
168230487Sglebius    _adjs[datum].insert(a);
169230487Sglebius    lrgs(datum).invalid_degree();
170230487Sglebius  }
171230487Sglebius}
172230487Sglebius
173230487Sglebius//------------------------------compute_degree---------------------------------
174230481Sglebius// Compute the degree between 2 live ranges.  If both live ranges are
175230481Sglebius// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
176230481Sglebius// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
177230481Sglebius// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
178230481Sglebius// this is so.
179230487Sglebiusint LRG::compute_degree( LRG &l ) const {
180230487Sglebius  int tmp;
181230481Sglebius  int num_regs = _num_regs;
182230481Sglebius  int nregs = l.num_regs();
183230481Sglebius  tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
184230481Sglebius    ? (num_regs * nregs)                // then use product
185230481Sglebius    : MAX2(num_regs,nregs);             // else use max
186230481Sglebius  return tmp;
187230481Sglebius}
188230481Sglebius
18952419Sjulian//------------------------------effective_degree-------------------------------
19052419Sjulian// Compute effective degree for this live range.  If both live ranges are
19152419Sjulian// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
19252419Sjulian// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
19352419Sjulian// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
19483366Sjulian// this is so.
19552419Sjulianint PhaseIFG::effective_degree( uint lidx ) const {
19652419Sjulian  int eff = 0;
197164033Srwatson  int num_regs = lrgs(lidx).num_regs();
19852419Sjulian  int fat_proj = lrgs(lidx)._fat_proj;
199164033Srwatson  IndexSet *s = neighbors(lidx);
200164033Srwatson  IndexSetIterator elements(s);
201164033Srwatson  uint nidx;
20252419Sjulian  while((nidx = elements.next()) != 0) {
20352419Sjulian    LRG &lrgn = lrgs(nidx);
20452419Sjulian    int nregs = lrgn.num_regs();
20552419Sjulian    eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
20652419Sjulian      ? (num_regs * nregs)              // then use product
207157370Srwatson      : MAX2(num_regs,nregs);           // else use max
20852419Sjulian  }
20952419Sjulian  return eff;
21052419Sjulian}
21152419Sjulian
212157370Srwatson
213151975Sglebius#ifndef PRODUCT
21452419Sjulian//------------------------------dump-------------------------------------------
21552419Sjulianvoid PhaseIFG::dump() const {
21652419Sjulian  tty->print_cr("-- Interference Graph --%s--",
21752419Sjulian                _is_square ? "square" : "triangular" );
21883366Sjulian  if( _is_square ) {
21952419Sjulian    for( uint i = 0; i < _maxlrg; i++ ) {
22052419Sjulian      tty->print( (*_yanked)[i] ? "XX " : "  ");
221147774Sglebius      tty->print("L%d: { ",i);
22252419Sjulian      IndexSetIterator elements(&_adjs[i]);
22370700Sjulian      uint datum;
22452419Sjulian      while ((datum = elements.next()) != 0) {
225146317Sglebius        tty->print("L%d ", datum);
22670700Sjulian      }
22752419Sjulian      tty->print_cr("}");
228172806Smav
22952419Sjulian    }
23052419Sjulian    return;
23152419Sjulian  }
23252419Sjulian
23352419Sjulian  // Triangular
23452419Sjulian  for( uint i = 0; i < _maxlrg; i++ ) {
235163463Sglebius    uint j;
23652419Sjulian    tty->print( (*_yanked)[i] ? "XX " : "  ");
23752419Sjulian    tty->print("L%d: { ",i);
23852419Sjulian    for( j = _maxlrg; j > i; j-- )
23952419Sjulian      if( test_edge(j - 1,i) ) {
24052419Sjulian        tty->print("L%d ",j - 1);
241163463Sglebius      }
242163463Sglebius    tty->print("| ");
243163463Sglebius    IndexSetIterator elements(&_adjs[i]);
244163463Sglebius    uint datum;
24552419Sjulian    while ((datum = elements.next()) != 0) {
246163463Sglebius      tty->print("L%d ", datum);
24752419Sjulian    }
24852419Sjulian    tty->print("}\n");
24952419Sjulian  }
250163463Sglebius  tty->print("\n");
251163463Sglebius}
252163463Sglebius
253163463Sglebius//------------------------------stats------------------------------------------
25452419Sjulianvoid PhaseIFG::stats() const {
25552419Sjulian  ResourceMark rm;
25652419Sjulian  int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
257163463Sglebius  memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
258163463Sglebius  uint i;
259163463Sglebius  for( i = 0; i < _maxlrg; i++ ) {
260163463Sglebius    h_cnt[neighbor_cnt(i)]++;
261163463Sglebius  }
26270700Sjulian  tty->print_cr("--Histogram of counts--");
26352419Sjulian  for( i = 0; i < _maxlrg*2; i++ )
264141308Sglebius    if( h_cnt[i] )
265163463Sglebius      tty->print("%d/%d ",i,h_cnt[i]);
266141308Sglebius  tty->print_cr("");
267141308Sglebius}
268141308Sglebius
269141308Sglebius//------------------------------verify-----------------------------------------
270132705Sglebiusvoid PhaseIFG::verify( const PhaseChaitin *pc ) const {
271132705Sglebius  // IFG is square, sorted and no need for Find
272132705Sglebius  for( uint i = 0; i < _maxlrg; i++ ) {
273132705Sglebius    assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" );
274132705Sglebius    IndexSet *set = &_adjs[i];
275132705Sglebius    IndexSetIterator elements(set);
276132939Sglebius    uint idx;
277132939Sglebius    uint last = 0;
278132705Sglebius    while ((idx = elements.next()) != 0) {
279132705Sglebius      assert( idx != i, "Must have empty diagonal");
280185183Smav      assert( pc->Find_const(idx) == idx, "Must not need Find" );
281132705Sglebius      assert( _adjs[idx].member(i), "IFG not square" );
282159590Sjhb      assert( !(*_yanked)[idx], "No yanked neighbors" );
283132705Sglebius      assert( last < idx, "not sorted increasing");
284146296Sglebius      last = idx;
285146296Sglebius    }
286146296Sglebius    assert( !lrgs(i)._degree_valid ||
287159590Sjhb            effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" );
288132705Sglebius  }
289163463Sglebius}
290132705Sglebius#endif
291132705Sglebius
292132705Sglebius//------------------------------interfere_with_live----------------------------
293146296Sglebius// Interfere this register with everything currently live.  Use the RegMasks
294185183Smav// to trim the set of possible interferences. Return a count of register-only
295163463Sglebius// interferences as an estimate of register pressure.
296159590Sjhbvoid PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
297159590Sjhb  uint retval = 0;
298132705Sglebius  // Interfere with everything live.
299132705Sglebius  const RegMask &rm = lrgs(r).mask();
300132705Sglebius  // Check for interference by checking overlap of regmasks.
301132705Sglebius  // Only interfere if acceptable register masks overlap.
302132705Sglebius  IndexSetIterator elements(liveout);
303132705Sglebius  uint l;
304163463Sglebius  while( (l = elements.next()) != 0 )
305163463Sglebius    if( rm.overlap( lrgs(l).mask() ) )
306163463Sglebius      _ifg->add_edge( r, l );
307146296Sglebius}
308146317Sglebius
309146317Sglebius//------------------------------build_ifg_virtual------------------------------
310146317Sglebius// Actually build the interference graph.  Uses virtual registers only, no
311146317Sglebius// physical register masks.  This allows me to be very aggressive when
312146296Sglebius// coalescing copies.  Some of this aggressiveness will have to be undone
313146317Sglebius// later, but I'd rather get all the copies I can now (since unremoved copies
314146317Sglebius// at this point can end up in bad places).  Copies I re-insert later I have
315146317Sglebius// more opportunity to insert them in low-frequency locations.
316146317Sglebiusvoid PhaseChaitin::build_ifg_virtual( ) {
317146317Sglebius
318146317Sglebius  // For all blocks (in any order) do...
319146317Sglebius  for( uint i=0; i<_cfg._num_blocks; i++ ) {
320146317Sglebius    Block *b = _cfg._blocks[i];
321146317Sglebius    IndexSet *liveout = _live->live(b);
32270784Sjulian
323146317Sglebius    // The IFG is built by a single reverse pass over each basic block.
324147774Sglebius    // Starting with the known live-out set, we remove things that get
325147774Sglebius    // defined and add things that become live (essentially executing one
326147774Sglebius    // pass of a standard LIVE analysis). Just before a Node defines a value
327147774Sglebius    // (and removes it from the live-ness set) that value is certainly live.
328147774Sglebius    // The defined value interferes with everything currently live.  The
329147774Sglebius    // value is then removed from the live-ness set and it's inputs are
330147774Sglebius    // added to the live-ness set.
331147774Sglebius    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
332172806Smav      Node *n = b->_nodes[j-1];
333172806Smav
334172806Smav      // Get value being defined
335172806Smav      uint r = n2lidx(n);
336147774Sglebius
337146296Sglebius      // Some special values do not allocate
338172806Smav      if( r ) {
339147774Sglebius
340172806Smav        // Remove from live-out set
341172806Smav        liveout->remove(r);
342172806Smav
343172806Smav        // Copies do not define a new value and so do not interfere.
344172806Smav        // Remove the copies source from the liveout set before interfering.
345172806Smav        uint idx = n->is_Copy();
346172806Smav        if( idx ) liveout->remove( n2lidx(n->in(idx)) );
347147774Sglebius
34852419Sjulian        // Interfere with everything live
34952419Sjulian        interfere_with_live( r, liveout );
350163463Sglebius      }
35152419Sjulian
35252419Sjulian      // Make all inputs live
35352419Sjulian      if( !n->is_Phi() ) {      // Phi function uses come from prior block
35452419Sjulian        for( uint k = 1; k < n->req(); k++ )
35552419Sjulian          liveout->insert( n2lidx(n->in(k)) );
35652419Sjulian      }
35752419Sjulian
35852419Sjulian      // 2-address instructions always have the defined value live
35983366Sjulian      // on entry to the instruction, even though it is being defined
36052419Sjulian      // by the instruction.  We pretend a virtual copy sits just prior
36152419Sjulian      // to the instruction and kills the src-def'd register.
36252419Sjulian      // In other words, for 2-address instructions the defined value
36352419Sjulian      // interferes with all inputs.
36452419Sjulian      uint idx;
36552419Sjulian      if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
36652419Sjulian        const MachNode *mach = n->as_Mach();
36752419Sjulian        // Sometimes my 2-address ADDs are commuted in a bad way.
36852419Sjulian        // We generally want the USE-DEF register to refer to the
36983366Sjulian        // loop-varying quantity, to avoid a copy.
37052419Sjulian        uint op = mach->ideal_Opcode();
37170700Sjulian        // Check that mach->num_opnds() == 3 to ensure instruction is
37270784Sjulian        // not subsuming constants, effectively excludes addI_cin_imm
37370700Sjulian        // Can NOT swap for instructions like addI_cin_imm since it
37470700Sjulian        // is adding zero to yhi + carry and the second ideal-input
375163463Sglebius        // points to the result of adding low-halves.
37670700Sjulian        // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
37752419Sjulian        if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
37852419Sjulian            n->in(1)->bottom_type()->base() == Type::Int &&
37952419Sjulian            // See if the ADD is involved in a tight data loop the wrong way
38052419Sjulian            n->in(2)->is_Phi() &&
38152419Sjulian            n->in(2)->in(2) == n ) {
38252419Sjulian          Node *tmp = n->in(1);
38352419Sjulian          n->set_req( 1, n->in(2) );
38483366Sjulian          n->set_req( 2, tmp );
38552419Sjulian        }
38652419Sjulian        // Defined value interferes with all inputs
38752419Sjulian        uint lidx = n2lidx(n->in(idx));
38852419Sjulian        for( uint k = 1; k < n->req(); k++ ) {
38952419Sjulian          uint kidx = n2lidx(n->in(k));
39052419Sjulian          if( kidx != lidx )
39152419Sjulian            _ifg->add_edge( r, kidx );
39252419Sjulian        }
393157370Srwatson      }
39452419Sjulian    } // End of forall instructions in block
39552419Sjulian  } // End of forall blocks
39652419Sjulian}
39752419Sjulian
398157558Srwatson//------------------------------count_int_pressure-----------------------------
399151975Sglebiusuint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
40052419Sjulian  IndexSetIterator elements(liveout);
40152419Sjulian  uint lidx;
40252419Sjulian  uint cnt = 0;
40352419Sjulian  while ((lidx = elements.next()) != 0) {
40483366Sjulian    if( lrgs(lidx).mask().is_UP() &&
40552419Sjulian        lrgs(lidx).mask_size() &&
40652419Sjulian        !lrgs(lidx)._is_float &&
40752419Sjulian        lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
408163463Sglebius      cnt += lrgs(lidx).reg_pressure();
40953498Sjulian  }
410125028Sharti  return cnt;
41152419Sjulian}
41252419Sjulian
41352419Sjulian//------------------------------count_float_pressure---------------------------
41452419Sjulianuint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
41552419Sjulian  IndexSetIterator elements(liveout);
41652419Sjulian  uint lidx;
41752419Sjulian  uint cnt = 0;
41852419Sjulian  while ((lidx = elements.next()) != 0) {
41952419Sjulian    if( lrgs(lidx).mask().is_UP() &&
420146718Sbz        lrgs(lidx).mask_size() &&
421146718Sbz        lrgs(lidx)._is_float )
422146718Sbz      cnt += lrgs(lidx).reg_pressure();
423146718Sbz  }
424146718Sbz  return cnt;
425146718Sbz}
42653498Sjulian
42753498Sjulian//------------------------------lower_pressure---------------------------------
42853498Sjulian// Adjust register pressure down by 1.  Capture last hi-to-low transition,
42953498Sjulianstatic void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
430146718Sbz  if( lrg->mask().is_UP() && lrg->mask_size() ) {
43170784Sjulian    if( lrg->_is_float ) {
43253498Sjulian      pressure[1] -= lrg->reg_pressure();
43353498Sjulian      if( pressure[1] == (uint)FLOATPRESSURE ) {
43453498Sjulian        hrp_index[1] = where;
43553498Sjulian#ifdef EXACT_PRESSURE
436163463Sglebius      if( pressure[1] > b->_freg_pressure )
43753498Sjulian        b->_freg_pressure = pressure[1]+1;
43853498Sjulian#else
43970784Sjulian        b->_freg_pressure = (uint)FLOATPRESSURE+1;
44053498Sjulian#endif
441125028Sharti      }
44253498Sjulian    } else if( lrg->mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
44353498Sjulian      pressure[0] -= lrg->reg_pressure();
44453498Sjulian      if( pressure[0] == (uint)INTPRESSURE   ) {
44552419Sjulian        hrp_index[0] = where;
44653498Sjulian#ifdef EXACT_PRESSURE
44753498Sjulian      if( pressure[0] > b->_reg_pressure )
44853498Sjulian        b->_reg_pressure = pressure[0]+1;
44953498Sjulian#else
45053498Sjulian        b->_reg_pressure = (uint)INTPRESSURE+1;
45153498Sjulian#endif
45252419Sjulian      }
45353498Sjulian    }
454163463Sglebius  }
45572053Sjulian}
45653498Sjulian
457163463Sglebius//------------------------------build_ifg_physical-----------------------------
45872053Sjulian// Build the interference graph using physical registers when available.
45952419Sjulian// That is, if 2 live ranges are simultaneously alive but in their acceptable
46052419Sjulian// register sets do not overlap, then they do not interfere.
461163463Sglebiusuint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
462163463Sglebius  NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
46352419Sjulian
46452419Sjulian  uint spill_reg = LRG::SPILL_REG;
46552419Sjulian  uint must_spill = 0;
46652419Sjulian
46752419Sjulian  // For all blocks (in any order) do...
46852419Sjulian  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
46952419Sjulian    Block *b = _cfg._blocks[i];
47052419Sjulian    // Clone (rather than smash in place) the liveout info, so it is alive
47152419Sjulian    // for the "collect_gc_info" phase later.
47252419Sjulian    IndexSet liveout(_live->live(b));
47383366Sjulian    uint last_inst = b->end_idx();
47452419Sjulian    // Compute first nonphi node index
47552419Sjulian    uint first_inst;
47652419Sjulian    for( first_inst = 1; first_inst < last_inst; first_inst++ )
47752419Sjulian      if( !b->_nodes[first_inst]->is_Phi() )
47852419Sjulian        break;
47952419Sjulian
48052419Sjulian    // Spills could be inserted before CreateEx node which should be
48152419Sjulian    // first instruction in block after Phis. Move CreateEx up.
48252419Sjulian    for( uint insidx = first_inst; insidx < last_inst; insidx++ ) {
48352419Sjulian      Node *ex = b->_nodes[insidx];
48452419Sjulian      if( ex->is_SpillCopy() ) continue;
48552419Sjulian      if( insidx > first_inst && ex->is_Mach() &&
486169462Srwatson          ex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
48752419Sjulian        // If the CreateEx isn't above all the MachSpillCopies
48853098Sbrian        // then move it to the top.
48953098Sbrian        b->_nodes.remove(insidx);
490151975Sglebius        b->_nodes.insert(first_inst, ex);
491151975Sglebius      }
49252419Sjulian      // Stop once a CreateEx or any other node is found
49353098Sbrian      break;
494151975Sglebius    }
495151975Sglebius
49652419Sjulian    // Reset block's register pressure values for each ifg construction
49753098Sbrian    uint pressure[2], hrp_index[2];
498231823Sglebius    pressure[0] = pressure[1] = 0;
499231823Sglebius    hrp_index[0] = hrp_index[1] = last_inst+1;
500231823Sglebius    b->_reg_pressure = b->_freg_pressure = 0;
501231823Sglebius    // Liveout things are presumed live for the whole block.  We accumulate
502151975Sglebius    // 'area' accordingly.  If they get killed in the block, we'll subtract
503151975Sglebius    // the unused part of the block from the area.
504151975Sglebius    int inst_count = last_inst - first_inst;
50553098Sbrian    double cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
506151975Sglebius    assert(!(cost < 0.0), "negative spill cost" );
507231823Sglebius    IndexSetIterator elements(&liveout);
508231823Sglebius    uint lidx;
509231823Sglebius    while ((lidx = elements.next()) != 0) {
51053098Sbrian      LRG &lrg = lrgs(lidx);
511151975Sglebius      lrg._area += cost;
512151975Sglebius      // Compute initial register pressure
513151975Sglebius      if( lrg.mask().is_UP() && lrg.mask_size() ) {
514151975Sglebius        if( lrg._is_float ) {   // Count float pressure
515151975Sglebius          pressure[1] += lrg.reg_pressure();
516231823Sglebius#ifdef EXACT_PRESSURE
517151975Sglebius          if( pressure[1] > b->_freg_pressure )
518151975Sglebius            b->_freg_pressure = pressure[1];
519151975Sglebius#endif
520151975Sglebius          // Count int pressure, but do not count the SP, flags
52152419Sjulian        } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
52252419Sjulian          pressure[0] += lrg.reg_pressure();
52352419Sjulian#ifdef EXACT_PRESSURE
52452419Sjulian          if( pressure[0] > b->_reg_pressure )
52552419Sjulian            b->_reg_pressure = pressure[0];
52652419Sjulian#endif
52752419Sjulian        }
52852419Sjulian      }
52952419Sjulian    }
53052419Sjulian    assert( pressure[0] == count_int_pressure  (&liveout), "" );
53152419Sjulian    assert( pressure[1] == count_float_pressure(&liveout), "" );
532151975Sglebius
53352419Sjulian    // The IFG is built by a single reverse pass over each basic block.
534224031Sglebius    // Starting with the known live-out set, we remove things that get
53552419Sjulian    // defined and add things that become live (essentially executing one
53652419Sjulian    // pass of a standard LIVE analysis).  Just before a Node defines a value
53752419Sjulian    // (and removes it from the live-ness set) that value is certainly live.
538224031Sglebius    // The defined value interferes with everything currently live.  The
53952419Sjulian    // value is then removed from the live-ness set and it's inputs are added
54053526Sjulian    // to the live-ness set.
54152419Sjulian    uint j;
542224031Sglebius    for( j = last_inst + 1; j > 1; j-- ) {
543224031Sglebius      Node *n = b->_nodes[j - 1];
544224031Sglebius
545224031Sglebius      // Get value being defined
546224031Sglebius      uint r = n2lidx(n);
547224031Sglebius
548230487Sglebius      // Some special values do not allocate
549230487Sglebius      if( r ) {
550230487Sglebius        // A DEF normally costs block frequency; rematerialized values are
551230487Sglebius        // removed from the DEF sight, so LOWER costs here.
552230487Sglebius        lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
553230487Sglebius
554224031Sglebius        // If it is not live, then this instruction is dead.  Probably caused
555230487Sglebius        // by spilling and rematerialization.  Who cares why, yank this baby.
556224031Sglebius        if( !liveout.member(r) && n->Opcode() != Op_SafePoint ) {
557224031Sglebius          Node *def = n->in(0);
558224031Sglebius          if( !n->is_Proj() ||
559224031Sglebius              // Could also be a flags-projection of a dead ADD or such.
560151975Sglebius              (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) {
561151975Sglebius            b->_nodes.remove(j - 1);
562151975Sglebius            if( lrgs(r)._def == n ) lrgs(r)._def = 0;
563151975Sglebius            n->disconnect_inputs(NULL);
564224031Sglebius            _cfg._bbs.map(n->_idx,NULL);
565230481Sglebius            n->replace_by(C->top());
56652419Sjulian            // Since yanking a Node from block, high pressure moves up one
567151975Sglebius            hrp_index[0]--;
568151975Sglebius            hrp_index[1]--;
569151975Sglebius            continue;
570151975Sglebius          }
571147774Sglebius
57252419Sjulian          // Fat-projections kill many registers which cannot be used to
57352419Sjulian          // hold live ranges.
57452419Sjulian          if( lrgs(r)._fat_proj ) {
57552419Sjulian            // Count the int-only registers
57652419Sjulian            RegMask itmp = lrgs(r).mask();
57752419Sjulian            itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
578163463Sglebius            int iregs = itmp.Size();
57952419Sjulian#ifdef EXACT_PRESSURE
58052419Sjulian            if( pressure[0]+iregs > b->_reg_pressure )
58152419Sjulian              b->_reg_pressure = pressure[0]+iregs;
58252419Sjulian#endif
58352419Sjulian            if( pressure[0]       <= (uint)INTPRESSURE &&
58452419Sjulian                pressure[0]+iregs >  (uint)INTPRESSURE ) {
58552419Sjulian#ifndef EXACT_PRESSURE
58652419Sjulian              b->_reg_pressure = (uint)INTPRESSURE+1;
58752419Sjulian#endif
58852419Sjulian              hrp_index[0] = j-1;
58952419Sjulian            }
59052419Sjulian            // Count the float-only registers
591163463Sglebius            RegMask ftmp = lrgs(r).mask();
59252419Sjulian            ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
59352419Sjulian            int fregs = ftmp.Size();
59452419Sjulian#ifdef EXACT_PRESSURE
59552419Sjulian            if( pressure[1]+fregs > b->_freg_pressure )
596163463Sglebius              b->_freg_pressure = pressure[1]+fregs;
597163463Sglebius#endif
59852419Sjulian            if( pressure[1]       <= (uint)FLOATPRESSURE &&
59952419Sjulian                pressure[1]+fregs >  (uint)FLOATPRESSURE ) {
600163463Sglebius#ifndef EXACT_PRESSURE
601163463Sglebius              b->_freg_pressure = (uint)FLOATPRESSURE+1;
60252419Sjulian#endif
60352419Sjulian              hrp_index[1] = j-1;
604205082Sglebius            }
605205082Sglebius          }
606205082Sglebius
607205082Sglebius        } else {                // Else it is live
60852419Sjulian          // A DEF also ends 'area' partway through the block.
60952419Sjulian          lrgs(r)._area -= cost;
61052419Sjulian          assert(!(lrgs(r)._area < 0.0), "negative spill area" );
61152419Sjulian
61252419Sjulian          // Insure high score for immediate-use spill copies so they get a color
61352419Sjulian          if( n->is_SpillCopy()
61452419Sjulian              && lrgs(r).is_singledef()        // MultiDef live range can still split
61552419Sjulian              && n->outcnt() == 1              // and use must be in this block
61652419Sjulian              && _cfg._bbs[n->unique_out()->_idx] == b ) {
61752419Sjulian            // All single-use MachSpillCopy(s) that immediately precede their
618151975Sglebius            // use must color early.  If a longer live range steals their
61952419Sjulian            // color, the spill copy will split and may push another spill copy
620151975Sglebius            // further away resulting in an infinite spill-split-retry cycle.
62152419Sjulian            // Assigning a zero area results in a high score() and a good
622151975Sglebius            // location in the simplify list.
623151975Sglebius            //
624146290Sglebius
62552419Sjulian            Node *single_use = n->unique_out();
62652419Sjulian            assert( b->find_node(single_use) >= j, "Use must be later in block");
62772053Sjulian            // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
62852419Sjulian
62952419Sjulian            // Find first non SpillCopy 'm' that follows the current instruction
63072053Sjulian            // (j - 1) is index for current instruction 'n'
63152419Sjulian            Node *m = n;
63252419Sjulian            for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
633213794Srpaulo            if( m == single_use ) {
63452419Sjulian              lrgs(r)._area = 0.0;
635151975Sglebius            }
636230481Sglebius          }
637151975Sglebius
638151975Sglebius          // Remove from live-out set
63952419Sjulian          if( liveout.remove(r) ) {
640151975Sglebius            // Adjust register pressure.
64152419Sjulian            // Capture last hi-to-lo pressure transition
642205082Sglebius            lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
643205082Sglebius            assert( pressure[0] == count_int_pressure  (&liveout), "" );
644205082Sglebius            assert( pressure[1] == count_float_pressure(&liveout), "" );
645163463Sglebius          }
64652419Sjulian
64752419Sjulian          // Copies do not define a new value and so do not interfere.
648151975Sglebius          // Remove the copies source from the liveout set before interfering.
649151975Sglebius          uint idx = n->is_Copy();
650151975Sglebius          if( idx ) {
651151975Sglebius            uint x = n2lidx(n->in(idx));
652151975Sglebius            if( liveout.remove( x ) ) {
653151975Sglebius              lrgs(x)._area -= cost;
654151975Sglebius              // Adjust register pressure.
655151975Sglebius              lower_pressure( &lrgs(x), j-1, b, pressure, hrp_index );
656151975Sglebius              assert( pressure[0] == count_int_pressure  (&liveout), "" );
657151975Sglebius              assert( pressure[1] == count_float_pressure(&liveout), "" );
658151975Sglebius            }
659151975Sglebius          }
660230487Sglebius        } // End of if live or not
661163463Sglebius
662151975Sglebius        // Interfere with everything live.  If the defined value must
663151975Sglebius        // go in a particular register, just remove that register from
664151975Sglebius        // all conflicting parties and avoid the interference.
665151975Sglebius
666151975Sglebius        // Make exclusions for rematerializable defs.  Since rematerializable
667151975Sglebius        // DEFs are not bound but the live range is, some uses must be bound.
668151975Sglebius        // If we spill live range 'r', it can rematerialize at each use site
669151975Sglebius        // according to its bindings.
670151975Sglebius        const RegMask &rmask = lrgs(r).mask();
671151975Sglebius        if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
672151975Sglebius          // Smear odd bits; leave only aligned pairs of bits.
673151975Sglebius          RegMask r2mask = rmask;
674151975Sglebius          r2mask.SmearToPairs();
675151975Sglebius          // Check for common case
67652419Sjulian          int r_size = lrgs(r).num_regs();
67752419Sjulian          OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
67852419Sjulian
67952419Sjulian          IndexSetIterator elements(&liveout);
68052419Sjulian          uint l;
68152419Sjulian          while ((l = elements.next()) != 0) {
68252419Sjulian            LRG &lrg = lrgs(l);
68352419Sjulian            // If 'l' must spill already, do not further hack his bits.
68472053Sjulian            // He'll get some interferences and be forced to spill later.
68552419Sjulian            if( lrg._must_spill ) continue;
68670700Sjulian            // Remove bound register(s) from 'l's choices
68752419Sjulian            RegMask old = lrg.mask();
688163463Sglebius            uint old_size = lrg.mask_size();
68952419Sjulian            // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
69052419Sjulian            // longer interferes with 'r'.  If 'l' requires aligned
69152419Sjulian            // adjacent pairs, subtract out bit pairs.
692163463Sglebius            if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
693163463Sglebius              lrg.SUBTRACT( r2mask );
694163463Sglebius              lrg.compute_set_mask_size();
695163463Sglebius            } else if( r_size != 1 ) {
69670700Sjulian              lrg.SUBTRACT( rmask );
69770700Sjulian              lrg.compute_set_mask_size();
69852419Sjulian            } else {            // Common case: size 1 bound removal
699163463Sglebius              if( lrg.mask().Member(r_reg) ) {
700146284Sglebius                lrg.Remove(r_reg);
701163463Sglebius                lrg.set_mask_size(lrg.mask().is_AllStack() ? 65535:old_size-1);
702102244Sarchie              }
70370700Sjulian            }
70452419Sjulian            // If 'l' goes completely dry, it must spill.
70570700Sjulian            if( lrg.not_free() ) {
70670784Sjulian              // Give 'l' some kind of reasonable mask, so he picks up
70770700Sjulian              // interferences (and will spill later).
70870700Sjulian              lrg.set_mask( old );
70970700Sjulian              lrg.set_mask_size(old_size);
71070700Sjulian              must_spill++;
71170784Sjulian              lrg._must_spill = 1;
71270700Sjulian              lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
71352419Sjulian            }
71470700Sjulian          }
71572053Sjulian        } // End of if bound
71672053Sjulian
71770700Sjulian        // Now interference with everything that is live and has
71852419Sjulian        // compatible register sets.
71970700Sjulian        interfere_with_live(r,&liveout);
72052419Sjulian
72170700Sjulian      } // End of if normal register-allocated value
72270700Sjulian
72370700Sjulian      // Area remaining in the block
72470700Sjulian      inst_count--;
725151975Sglebius      cost = (inst_count <= 0) ? 0.0 : b->_freq * double(inst_count);
72672053Sjulian
72772053Sjulian      // Make all inputs live
728230481Sglebius      if( !n->is_Phi() ) {      // Phi function uses come from prior block
729151975Sglebius        JVMState* jvms = n->jvms();
730151975Sglebius        uint debug_start = jvms ? jvms->debug_start() : 999999;
73170700Sjulian        // Start loop at 1 (skip control edge) for most Nodes.
73252419Sjulian        // SCMemProj's might be the sole use of a StoreLConditional.
73352419Sjulian        // While StoreLConditionals set memory (the SCMemProj use)
73452419Sjulian        // they also def flags; if that flag def is unused the
73552419Sjulian        // allocator sees a flag-setting instruction with no use of
73652419Sjulian        // the flags and assumes it's dead.  This keeps the (useless)
73752419Sjulian        // flag-setting behavior alive while also keeping the (useful)
73852419Sjulian        // memory update effect.
73952419Sjulian        for( uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++ ) {
74052419Sjulian          Node *def = n->in(k);
74172053Sjulian          uint x = n2lidx(def);
74252419Sjulian          if( !x ) continue;
74352419Sjulian          LRG &lrg = lrgs(x);
74472053Sjulian          // No use-side cost for spilling debug info
74552419Sjulian          if( k < debug_start )
74652419Sjulian            // A USE costs twice block frequency (once for the Load, once
74752419Sjulian            // for a Load-delay).  Rematerialized uses only cost once.
748163463Sglebius            lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
749163463Sglebius          // It is live now
750163463Sglebius          if( liveout.insert( x ) ) {
75152419Sjulian            // Newly live things assumed live from here to top of block
75252419Sjulian            lrg._area += cost;
75352419Sjulian            // Adjust register pressure
75472053Sjulian            if( lrg.mask().is_UP() && lrg.mask_size() ) {
75552419Sjulian              if( lrg._is_float ) {
75652419Sjulian                pressure[1] += lrg.reg_pressure();
757147774Sglebius#ifdef EXACT_PRESSURE
758147774Sglebius                if( pressure[1] > b->_freg_pressure )
759147774Sglebius                  b->_freg_pressure = pressure[1];
760147774Sglebius#endif
76152419Sjulian              } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
76252419Sjulian                pressure[0] += lrg.reg_pressure();
76352419Sjulian#ifdef EXACT_PRESSURE
76452419Sjulian                if( pressure[0] > b->_reg_pressure )
76570700Sjulian                  b->_reg_pressure = pressure[0];
76652419Sjulian#endif
76752419Sjulian              }
76852419Sjulian            }
76952419Sjulian            assert( pressure[0] == count_int_pressure  (&liveout), "" );
770230487Sglebius            assert( pressure[1] == count_float_pressure(&liveout), "" );
771230487Sglebius          }
772230487Sglebius          assert(!(lrg._area < 0.0), "negative spill area" );
773230487Sglebius        }
774230487Sglebius      }
775230487Sglebius    } // End of reverse pass over all instructions in block
776230487Sglebius
777230487Sglebius    // If we run off the top of the block with high pressure and
778230487Sglebius    // never see a hi-to-low pressure transition, just record that
779230487Sglebius    // the whole block is high pressure.
780230487Sglebius    if( pressure[0] > (uint)INTPRESSURE   ) {
781230487Sglebius      hrp_index[0] = 0;
782230487Sglebius#ifdef EXACT_PRESSURE
783230487Sglebius      if( pressure[0] > b->_reg_pressure )
784230487Sglebius        b->_reg_pressure = pressure[0];
785230487Sglebius#else
786230487Sglebius      b->_reg_pressure = (uint)INTPRESSURE+1;
787230487Sglebius#endif
788230487Sglebius    }
789230487Sglebius    if( pressure[1] > (uint)FLOATPRESSURE ) {
790230487Sglebius      hrp_index[1] = 0;
791230487Sglebius#ifdef EXACT_PRESSURE
792230487Sglebius      if( pressure[1] > b->_freg_pressure )
793230487Sglebius        b->_freg_pressure = pressure[1];
794230487Sglebius#else
795230487Sglebius      b->_freg_pressure = (uint)FLOATPRESSURE+1;
796230487Sglebius#endif
797230487Sglebius    }
798230487Sglebius
79952419Sjulian    // Compute high pressure indice; avoid landing in the middle of projnodes
80052419Sjulian    j = hrp_index[0];
80152419Sjulian    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
80252419Sjulian      Node *cur = b->_nodes[j];
80352419Sjulian      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
80452419Sjulian        j--;
80552419Sjulian        cur = b->_nodes[j];
806230487Sglebius      }
807230487Sglebius    }
808230487Sglebius    b->_ihrp_index = j;
809230487Sglebius    j = hrp_index[1];
810230487Sglebius    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
811230487Sglebius      Node *cur = b->_nodes[j];
812230487Sglebius      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
813230487Sglebius        j--;
814230487Sglebius        cur = b->_nodes[j];
815230487Sglebius      }
816230487Sglebius    }
817230487Sglebius    b->_fhrp_index = j;
818230487Sglebius
819230487Sglebius#ifndef PRODUCT
82052419Sjulian    // Gather Register Pressure Statistics
82152419Sjulian    if( PrintOptoStatistics ) {
82252419Sjulian      if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
823163463Sglebius        _high_pressure++;
824163463Sglebius      else
82572053Sjulian        _low_pressure++;
82672053Sjulian    }
82772053Sjulian#endif
82872053Sjulian  } // End of for all blocks
82972053Sjulian
83072053Sjulian  return must_spill;
83172053Sjulian}
832163463Sglebius