1/*
2 * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "compiler/oopMap.hpp"
27#include "memory/allocation.inline.hpp"
28#include "memory/resourceArea.hpp"
29#include "opto/addnode.hpp"
30#include "opto/block.hpp"
31#include "opto/callnode.hpp"
32#include "opto/cfgnode.hpp"
33#include "opto/chaitin.hpp"
34#include "opto/coalesce.hpp"
35#include "opto/indexSet.hpp"
36#include "opto/machnode.hpp"
37#include "opto/memnode.hpp"
38#include "opto/opcodes.hpp"
39
40PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
41}
42
43void PhaseIFG::init( uint maxlrg ) {
44  _maxlrg = maxlrg;
45  _yanked = new (_arena) VectorSet(_arena);
46  _is_square = false;
47  // Make uninitialized adjacency lists
48  _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
49  // Also make empty live range structures
50  _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
51  memset((void*)_lrgs,0,sizeof(LRG)*maxlrg);
52  // Init all to empty
53  for( uint i = 0; i < maxlrg; i++ ) {
54    _adjs[i].initialize(maxlrg);
55    _lrgs[i].Set_All();
56  }
57}
58
59// Add edge between vertices a & b.  These are sorted (triangular matrix),
60// then the smaller number is inserted in the larger numbered array.
61int PhaseIFG::add_edge( uint a, uint b ) {
62  lrgs(a).invalid_degree();
63  lrgs(b).invalid_degree();
64  // Sort a and b, so that a is bigger
65  assert( !_is_square, "only on triangular" );
66  if( a < b ) { uint tmp = a; a = b; b = tmp; }
67  return _adjs[a].insert( b );
68}
69
70// Add an edge between 'a' and everything in the vector.
71void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
72  // IFG is triangular, so do the inserts where 'a' < 'b'.
73  assert( !_is_square, "only on triangular" );
74  IndexSet *adjs_a = &_adjs[a];
75  if( !vec->count() ) return;
76
77  IndexSetIterator elements(vec);
78  uint neighbor;
79  while ((neighbor = elements.next()) != 0) {
80    add_edge( a, neighbor );
81  }
82}
83
84// Is there an edge between a and b?
85int PhaseIFG::test_edge( uint a, uint b ) const {
86  // Sort a and b, so that a is larger
87  assert( !_is_square, "only on triangular" );
88  if( a < b ) { uint tmp = a; a = b; b = tmp; }
89  return _adjs[a].member(b);
90}
91
92// Convert triangular matrix to square matrix
93void PhaseIFG::SquareUp() {
94  assert( !_is_square, "only on triangular" );
95
96  // Simple transpose
97  for( uint i = 0; i < _maxlrg; i++ ) {
98    IndexSetIterator elements(&_adjs[i]);
99    uint datum;
100    while ((datum = elements.next()) != 0) {
101      _adjs[datum].insert( i );
102    }
103  }
104  _is_square = true;
105}
106
107// Compute effective degree in bulk
108void PhaseIFG::Compute_Effective_Degree() {
109  assert( _is_square, "only on square" );
110
111  for( uint i = 0; i < _maxlrg; i++ )
112    lrgs(i).set_degree(effective_degree(i));
113}
114
115int PhaseIFG::test_edge_sq( uint a, uint b ) const {
116  assert( _is_square, "only on square" );
117  // Swap, so that 'a' has the lesser count.  Then binary search is on
118  // the smaller of a's list and b's list.
119  if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
120  //return _adjs[a].unordered_member(b);
121  return _adjs[a].member(b);
122}
123
124// Union edges of B into A
125void PhaseIFG::Union( uint a, uint b ) {
126  assert( _is_square, "only on square" );
127  IndexSet *A = &_adjs[a];
128  IndexSetIterator b_elements(&_adjs[b]);
129  uint datum;
130  while ((datum = b_elements.next()) != 0) {
131    if(A->insert(datum)) {
132      _adjs[datum].insert(a);
133      lrgs(a).invalid_degree();
134      lrgs(datum).invalid_degree();
135    }
136  }
137}
138
139// Yank a Node and all connected edges from the IFG.  Return a
140// list of neighbors (edges) yanked.
141IndexSet *PhaseIFG::remove_node( uint a ) {
142  assert( _is_square, "only on square" );
143  assert( !_yanked->test(a), "" );
144  _yanked->set(a);
145
146  // I remove the LRG from all neighbors.
147  IndexSetIterator elements(&_adjs[a]);
148  LRG &lrg_a = lrgs(a);
149  uint datum;
150  while ((datum = elements.next()) != 0) {
151    _adjs[datum].remove(a);
152    lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) );
153  }
154  return neighbors(a);
155}
156
157// Re-insert a yanked Node.
158void PhaseIFG::re_insert( uint a ) {
159  assert( _is_square, "only on square" );
160  assert( _yanked->test(a), "" );
161  (*_yanked) >>= a;
162
163  IndexSetIterator elements(&_adjs[a]);
164  uint datum;
165  while ((datum = elements.next()) != 0) {
166    _adjs[datum].insert(a);
167    lrgs(datum).invalid_degree();
168  }
169}
170
171// Compute the degree between 2 live ranges.  If both live ranges are
172// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
173// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
174// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
175// this is so.
176int LRG::compute_degree( LRG &l ) const {
177  int tmp;
178  int num_regs = _num_regs;
179  int nregs = l.num_regs();
180  tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
181    ? (num_regs * nregs)                // then use product
182    : MAX2(num_regs,nregs);             // else use max
183  return tmp;
184}
185
186// Compute effective degree for this live range.  If both live ranges are
187// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
188// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
189// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
190// this is so.
191int PhaseIFG::effective_degree( uint lidx ) const {
192  int eff = 0;
193  int num_regs = lrgs(lidx).num_regs();
194  int fat_proj = lrgs(lidx)._fat_proj;
195  IndexSet *s = neighbors(lidx);
196  IndexSetIterator elements(s);
197  uint nidx;
198  while((nidx = elements.next()) != 0) {
199    LRG &lrgn = lrgs(nidx);
200    int nregs = lrgn.num_regs();
201    eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
202      ? (num_regs * nregs)              // then use product
203      : MAX2(num_regs,nregs);           // else use max
204  }
205  return eff;
206}
207
208
209#ifndef PRODUCT
210void PhaseIFG::dump() const {
211  tty->print_cr("-- Interference Graph --%s--",
212                _is_square ? "square" : "triangular" );
213  if( _is_square ) {
214    for( uint i = 0; i < _maxlrg; i++ ) {
215      tty->print( (*_yanked)[i] ? "XX " : "  ");
216      tty->print("L%d: { ",i);
217      IndexSetIterator elements(&_adjs[i]);
218      uint datum;
219      while ((datum = elements.next()) != 0) {
220        tty->print("L%d ", datum);
221      }
222      tty->print_cr("}");
223
224    }
225    return;
226  }
227
228  // Triangular
229  for( uint i = 0; i < _maxlrg; i++ ) {
230    uint j;
231    tty->print( (*_yanked)[i] ? "XX " : "  ");
232    tty->print("L%d: { ",i);
233    for( j = _maxlrg; j > i; j-- )
234      if( test_edge(j - 1,i) ) {
235        tty->print("L%d ",j - 1);
236      }
237    tty->print("| ");
238    IndexSetIterator elements(&_adjs[i]);
239    uint datum;
240    while ((datum = elements.next()) != 0) {
241      tty->print("L%d ", datum);
242    }
243    tty->print("}\n");
244  }
245  tty->print("\n");
246}
247
248void PhaseIFG::stats() const {
249  ResourceMark rm;
250  int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
251  memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
252  uint i;
253  for( i = 0; i < _maxlrg; i++ ) {
254    h_cnt[neighbor_cnt(i)]++;
255  }
256  tty->print_cr("--Histogram of counts--");
257  for( i = 0; i < _maxlrg*2; i++ )
258    if( h_cnt[i] )
259      tty->print("%d/%d ",i,h_cnt[i]);
260  tty->cr();
261}
262
263void PhaseIFG::verify( const PhaseChaitin *pc ) const {
264  // IFG is square, sorted and no need for Find
265  for( uint i = 0; i < _maxlrg; i++ ) {
266    assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" );
267    IndexSet *set = &_adjs[i];
268    IndexSetIterator elements(set);
269    uint idx;
270    uint last = 0;
271    while ((idx = elements.next()) != 0) {
272      assert(idx != i, "Must have empty diagonal");
273      assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
274      assert(_adjs[idx].member(i), "IFG not square");
275      assert(!(*_yanked)[idx], "No yanked neighbors");
276      assert(last < idx, "not sorted increasing");
277      last = idx;
278    }
279    assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
280  }
281}
282#endif
283
284/*
285 * Interfere this register with everything currently live.
286 * Check for interference by checking overlap of regmasks.
287 * Only interfere if acceptable register masks overlap.
288 */
289void PhaseChaitin::interfere_with_live(uint lid, IndexSet* liveout) {
290  LRG& lrg = lrgs(lid);
291  const RegMask& rm = lrg.mask();
292  IndexSetIterator elements(liveout);
293  uint interfering_lid = elements.next();
294  while (interfering_lid != 0) {
295    LRG& interfering_lrg = lrgs(interfering_lid);
296    if (rm.overlap(interfering_lrg.mask())) {
297      _ifg->add_edge(lid, interfering_lid);
298    }
299    interfering_lid = elements.next();
300  }
301}
302
303// Actually build the interference graph.  Uses virtual registers only, no
304// physical register masks.  This allows me to be very aggressive when
305// coalescing copies.  Some of this aggressiveness will have to be undone
306// later, but I'd rather get all the copies I can now (since unremoved copies
307// at this point can end up in bad places).  Copies I re-insert later I have
308// more opportunity to insert them in low-frequency locations.
309void PhaseChaitin::build_ifg_virtual( ) {
310  Compile::TracePhase tp("buildIFG_virt", &timers[_t_buildIFGvirtual]);
311
312  // For all blocks (in any order) do...
313  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
314    Block* block = _cfg.get_block(i);
315    IndexSet* liveout = _live->live(block);
316
317    // The IFG is built by a single reverse pass over each basic block.
318    // Starting with the known live-out set, we remove things that get
319    // defined and add things that become live (essentially executing one
320    // pass of a standard LIVE analysis). Just before a Node defines a value
321    // (and removes it from the live-ness set) that value is certainly live.
322    // The defined value interferes with everything currently live.  The
323    // value is then removed from the live-ness set and it's inputs are
324    // added to the live-ness set.
325    for (uint j = block->end_idx() + 1; j > 1; j--) {
326      Node* n = block->get_node(j - 1);
327
328      // Get value being defined
329      uint r = _lrg_map.live_range_id(n);
330
331      // Some special values do not allocate
332      if (r) {
333
334        // Remove from live-out set
335        liveout->remove(r);
336
337        // Copies do not define a new value and so do not interfere.
338        // Remove the copies source from the liveout set before interfering.
339        uint idx = n->is_Copy();
340        if (idx != 0) {
341          liveout->remove(_lrg_map.live_range_id(n->in(idx)));
342        }
343
344        // Interfere with everything live
345        interfere_with_live(r, liveout);
346      }
347
348      // Make all inputs live
349      if (!n->is_Phi()) {      // Phi function uses come from prior block
350        for(uint k = 1; k < n->req(); k++) {
351          liveout->insert(_lrg_map.live_range_id(n->in(k)));
352        }
353      }
354
355      // 2-address instructions always have the defined value live
356      // on entry to the instruction, even though it is being defined
357      // by the instruction.  We pretend a virtual copy sits just prior
358      // to the instruction and kills the src-def'd register.
359      // In other words, for 2-address instructions the defined value
360      // interferes with all inputs.
361      uint idx;
362      if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
363        const MachNode *mach = n->as_Mach();
364        // Sometimes my 2-address ADDs are commuted in a bad way.
365        // We generally want the USE-DEF register to refer to the
366        // loop-varying quantity, to avoid a copy.
367        uint op = mach->ideal_Opcode();
368        // Check that mach->num_opnds() == 3 to ensure instruction is
369        // not subsuming constants, effectively excludes addI_cin_imm
370        // Can NOT swap for instructions like addI_cin_imm since it
371        // is adding zero to yhi + carry and the second ideal-input
372        // points to the result of adding low-halves.
373        // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
374        if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
375            n->in(1)->bottom_type()->base() == Type::Int &&
376            // See if the ADD is involved in a tight data loop the wrong way
377            n->in(2)->is_Phi() &&
378            n->in(2)->in(2) == n ) {
379          Node *tmp = n->in(1);
380          n->set_req( 1, n->in(2) );
381          n->set_req( 2, tmp );
382        }
383        // Defined value interferes with all inputs
384        uint lidx = _lrg_map.live_range_id(n->in(idx));
385        for (uint k = 1; k < n->req(); k++) {
386          uint kidx = _lrg_map.live_range_id(n->in(k));
387          if (kidx != lidx) {
388            _ifg->add_edge(r, kidx);
389          }
390        }
391      }
392    } // End of forall instructions in block
393  } // End of forall blocks
394}
395
396#ifdef ASSERT
397uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
398  IndexSetIterator elements(liveout);
399  uint lidx = elements.next();
400  uint cnt = 0;
401  while (lidx != 0) {
402    LRG& lrg = lrgs(lidx);
403    if (lrg.mask_is_nonempty_and_up() &&
404        !lrg.is_float_or_vector() &&
405        lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
406      cnt += lrg.reg_pressure();
407    }
408    lidx = elements.next();
409  }
410  return cnt;
411}
412
413uint PhaseChaitin::count_float_pressure(IndexSet* liveout) {
414  IndexSetIterator elements(liveout);
415  uint lidx = elements.next();
416  uint cnt = 0;
417  while (lidx != 0) {
418    LRG& lrg = lrgs(lidx);
419    if (lrg.mask_is_nonempty_and_up() && lrg.is_float_or_vector()) {
420      cnt += lrg.reg_pressure();
421    }
422    lidx = elements.next();
423  }
424  return cnt;
425}
426#endif
427
428/*
429 * Adjust register pressure down by 1.  Capture last hi-to-low transition,
430 */
431void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure) {
432  if (lrg.mask_is_nonempty_and_up()) {
433    if (lrg.is_float_or_vector()) {
434      float_pressure.lower(lrg, location);
435    } else {
436      // Do not count the SP and flag registers
437      const RegMask& r = lrg.mask();
438      if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
439        int_pressure.lower(lrg, location);
440      }
441    }
442  }
443  if (_scheduling_info_generated == false) {
444    assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
445    assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
446  }
447}
448
449/* Go to the first non-phi index in a block */
450static uint first_nonphi_index(Block* b) {
451  uint i;
452  uint end_idx = b->end_idx();
453  for (i = 1; i < end_idx; i++) {
454    Node* n = b->get_node(i);
455    if (!n->is_Phi()) {
456      break;
457    }
458  }
459  return i;
460}
461
462/*
463 * Spills could be inserted before a CreateEx node which should be the first
464 * instruction in a block after Phi nodes. If so, move the CreateEx node up.
465 */
466static void move_exception_node_up(Block* b, uint first_inst, uint last_inst) {
467  for (uint i = first_inst; i < last_inst; i++) {
468    Node* ex = b->get_node(i);
469    if (ex->is_SpillCopy()) {
470      continue;
471    }
472
473    if (i > first_inst &&
474        ex->is_Mach() && ex->as_Mach()->ideal_Opcode() == Op_CreateEx) {
475      b->remove_node(i);
476      b->insert_node(ex, first_inst);
477    }
478    // Stop once a CreateEx or any other node is found
479    break;
480  }
481}
482
483/*
484 * When new live ranges are live, we raise the register pressure
485 */
486void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pressure& float_pressure) {
487  if (lrg.mask_is_nonempty_and_up()) {
488    if (lrg.is_float_or_vector()) {
489      float_pressure.raise(lrg);
490    } else {
491      // Do not count the SP and flag registers
492      const RegMask& rm = lrg.mask();
493      if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
494        int_pressure.raise(lrg);
495      }
496    }
497  }
498}
499
500
501/*
502 * Computes the initial register pressure of a block, looking at all live
503 * ranges in the liveout. The register pressure is computed for both float
504 * and int/pointer registers.
505 * Live ranges in the liveout are presumed live for the whole block.
506 * We add the cost for the whole block to the area of the live ranges initially.
507 * If a live range gets killed in the block, we'll subtract the unused part of
508 * the block from the area.
509 */
510void PhaseChaitin::compute_initial_block_pressure(Block* b, IndexSet* liveout, Pressure& int_pressure, Pressure& float_pressure, double cost) {
511  IndexSetIterator elements(liveout);
512  uint lid = elements.next();
513  while (lid != 0) {
514    LRG& lrg = lrgs(lid);
515    lrg._area += cost;
516    raise_pressure(b, lrg, int_pressure, float_pressure);
517    lid = elements.next();
518  }
519  assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
520  assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
521}
522
523/*
524* Computes the entry register pressure of a block, looking at all live
525* ranges in the livein. The register pressure is computed for both float
526* and int/pointer registers.
527*/
528void PhaseChaitin::compute_entry_block_pressure(Block* b) {
529  IndexSet* livein = _live->livein(b);
530  IndexSetIterator elements(livein);
531  uint lid = elements.next();
532  while (lid != 0) {
533    LRG& lrg = lrgs(lid);
534    raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
535    lid = elements.next();
536  }
537  // Now check phis for locally defined inputs
538  for (uint j = 0; j < b->number_of_nodes(); j++) {
539    Node* n = b->get_node(j);
540    if (n->is_Phi()) {
541      for (uint k = 1; k < n->req(); k++) {
542        Node* phi_in = n->in(k);
543        // Because we are talking about phis, raise register pressure once for each
544        // instance of a phi to account for a single value
545        if (_cfg.get_block_for_node(phi_in) == b) {
546          LRG& lrg = lrgs(phi_in->_idx);
547          raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
548          break;
549        }
550      }
551    }
552  }
553  _sched_int_pressure.set_start_pressure(_sched_int_pressure.current_pressure());
554  _sched_float_pressure.set_start_pressure(_sched_float_pressure.current_pressure());
555}
556
557/*
558* Computes the exit register pressure of a block, looking at all live
559* ranges in the liveout. The register pressure is computed for both float
560* and int/pointer registers.
561*/
562void PhaseChaitin::compute_exit_block_pressure(Block* b) {
563  IndexSet* livein = _live->live(b);
564  IndexSetIterator elements(livein);
565  _sched_int_pressure.set_current_pressure(0);
566  _sched_float_pressure.set_current_pressure(0);
567  uint lid = elements.next();
568  while (lid != 0) {
569    LRG& lrg = lrgs(lid);
570    raise_pressure(b, lrg, _sched_int_pressure, _sched_float_pressure);
571    lid = elements.next();
572  }
573}
574
575/*
576 * Remove dead node if it's not used.
577 * We only remove projection nodes if the node "defining" the projection is
578 * dead, for example on x86, if we have a dead Add node we remove its
579 * RFLAGS node.
580 */
581bool PhaseChaitin::remove_node_if_not_used(Block* b, uint location, Node* n, uint lid, IndexSet* liveout) {
582  Node* def = n->in(0);
583  if (!n->is_Proj() ||
584      (_lrg_map.live_range_id(def) && !liveout->member(_lrg_map.live_range_id(def)))) {
585    if (n->is_MachProj()) {
586      // Don't remove KILL projections if their "defining" nodes have
587      // memory effects (have SCMemProj projection node) -
588      // they are not dead even when their result is not used.
589      // For example, compareAndSwapL (and other CAS) and EncodeISOArray nodes.
590      // The method add_input_to_liveout() keeps such nodes alive (put them on liveout list)
591      // when it sees SCMemProj node in a block. Unfortunately SCMemProj node could be placed
592      // in block in such order that KILL MachProj nodes are processed first.
593      if (def->has_out_with(Op_SCMemProj)) {
594        return false;
595      }
596    }
597    b->remove_node(location);
598    LRG& lrg = lrgs(lid);
599    if (lrg._def == n) {
600      lrg._def = 0;
601    }
602    n->disconnect_inputs(NULL, C);
603    _cfg.unmap_node_from_block(n);
604    n->replace_by(C->top());
605    return true;
606  }
607  return false;
608}
609
610/*
611 * When encountering a fat projection, we might go from a low to high to low
612 * (since the fat proj only lives at this instruction) going backwards in the
613 * block. If we find a low to high transition, we record it.
614 */
615void PhaseChaitin::check_for_high_pressure_transition_at_fatproj(uint& block_reg_pressure, uint location, LRG& lrg, Pressure& pressure, const int op_regtype) {
616  RegMask mask_tmp = lrg.mask();
617  mask_tmp.AND(*Matcher::idealreg2regmask[op_regtype]);
618  pressure.check_pressure_at_fatproj(location, mask_tmp);
619}
620
621/*
622 * Insure high score for immediate-use spill copies so they get a color.
623 * All single-use MachSpillCopy(s) that immediately precede their
624 * use must color early.  If a longer live range steals their
625 * color, the spill copy will split and may push another spill copy
626 * further away resulting in an infinite spill-split-retry cycle.
627 * Assigning a zero area results in a high score() and a good
628 * location in the simplify list.
629 */
630void PhaseChaitin::assign_high_score_to_immediate_copies(Block* b, Node* n, LRG& lrg, uint next_inst, uint last_inst) {
631  if (n->is_SpillCopy() &&
632      lrg.is_singledef() && // A multi defined live range can still split
633      n->outcnt() == 1 &&   // and use must be in this block
634      _cfg.get_block_for_node(n->unique_out()) == b) {
635
636    Node* single_use = n->unique_out();
637    assert(b->find_node(single_use) >= next_inst, "Use must be later in block");
638    // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
639
640    // Find first non SpillCopy 'm' that follows the current instruction
641    // (current_inst - 1) is index for current instruction 'n'
642    Node* m = n;
643    for (uint i = next_inst; i <= last_inst && m->is_SpillCopy(); ++i) {
644      m = b->get_node(i);
645    }
646    if (m == single_use) {
647      lrg._area = 0.0;
648    }
649  }
650}
651
652/*
653 * Copies do not define a new value and so do not interfere.
654 * Remove the copies source from the liveout set before interfering.
655 */
656void PhaseChaitin::remove_interference_from_copy(Block* b, uint location, uint lid_copy, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
657  if (liveout->remove(lid_copy)) {
658    LRG& lrg_copy = lrgs(lid_copy);
659    lrg_copy._area -= cost;
660
661    // Lower register pressure since copy and definition can share the same register
662    lower_pressure(b, location, lrg_copy, liveout, int_pressure, float_pressure);
663  }
664}
665
666/*
667 * The defined value must go in a particular register. Remove that register from
668 * all conflicting parties and avoid the interference.
669 */
670void PhaseChaitin::remove_bound_register_from_interfering_live_ranges(LRG& lrg, IndexSet* liveout, uint& must_spill) {
671  // Check for common case
672  const RegMask& rm = lrg.mask();
673  int r_size = lrg.num_regs();
674  // Smear odd bits
675  IndexSetIterator elements(liveout);
676  uint l = elements.next();
677  while (l != 0) {
678    LRG& interfering_lrg = lrgs(l);
679    // If 'l' must spill already, do not further hack his bits.
680    // He'll get some interferences and be forced to spill later.
681    if (interfering_lrg._must_spill) {
682      l = elements.next();
683      continue;
684    }
685
686    // Remove bound register(s) from 'l's choices
687    RegMask old = interfering_lrg.mask();
688    uint old_size = interfering_lrg.mask_size();
689
690    // Remove the bits from LRG 'rm' from LRG 'l' so 'l' no
691    // longer interferes with 'rm'.  If 'l' requires aligned
692    // adjacent pairs, subtract out bit pairs.
693    assert(!interfering_lrg._is_vector || !interfering_lrg._fat_proj, "sanity");
694
695    if (interfering_lrg.num_regs() > 1 && !interfering_lrg._fat_proj) {
696      RegMask r2mask = rm;
697      // Leave only aligned set of bits.
698      r2mask.smear_to_sets(interfering_lrg.num_regs());
699      // It includes vector case.
700      interfering_lrg.SUBTRACT(r2mask);
701      interfering_lrg.compute_set_mask_size();
702    } else if (r_size != 1) {
703      // fat proj
704      interfering_lrg.SUBTRACT(rm);
705      interfering_lrg.compute_set_mask_size();
706    } else {
707      // Common case: size 1 bound removal
708      OptoReg::Name r_reg = rm.find_first_elem();
709      if (interfering_lrg.mask().Member(r_reg)) {
710        interfering_lrg.Remove(r_reg);
711        interfering_lrg.set_mask_size(interfering_lrg.mask().is_AllStack() ? LRG::AllStack_size : old_size - 1);
712      }
713    }
714
715    // If 'l' goes completely dry, it must spill.
716    if (interfering_lrg.not_free()) {
717      // Give 'l' some kind of reasonable mask, so it picks up
718      // interferences (and will spill later).
719      interfering_lrg.set_mask(old);
720      interfering_lrg.set_mask_size(old_size);
721      must_spill++;
722      interfering_lrg._must_spill = 1;
723      interfering_lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
724    }
725    l = elements.next();
726  }
727}
728
729/*
730 * Start loop at 1 (skip control edge) for most Nodes. SCMemProj's might be the
731 * sole use of a StoreLConditional. While StoreLConditionals set memory (the
732 * SCMemProj use) they also def flags; if that flag def is unused the allocator
733 * sees a flag-setting instruction with no use of the flags and assumes it's
734 * dead.  This keeps the (useless) flag-setting behavior alive while also
735 * keeping the (useful) memory update effect.
736 */
737void PhaseChaitin::add_input_to_liveout(Block* b, Node* n, IndexSet* liveout, double cost, Pressure& int_pressure, Pressure& float_pressure) {
738  JVMState* jvms = n->jvms();
739  uint debug_start = jvms ? jvms->debug_start() : 999999;
740
741  for (uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++) {
742    Node* def = n->in(k);
743    uint lid = _lrg_map.live_range_id(def);
744    if (!lid) {
745      continue;
746    }
747    LRG& lrg = lrgs(lid);
748
749    // No use-side cost for spilling debug info
750    if (k < debug_start) {
751      // A USE costs twice block frequency (once for the Load, once
752      // for a Load-delay).  Rematerialized uses only cost once.
753      lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq * 2));
754    }
755
756    if (liveout->insert(lid)) {
757      // Newly live things assumed live from here to top of block
758      lrg._area += cost;
759      raise_pressure(b, lrg, int_pressure, float_pressure);
760      assert(int_pressure.current_pressure() == count_int_pressure(liveout), "the int pressure is incorrect");
761      assert(float_pressure.current_pressure() == count_float_pressure(liveout), "the float pressure is incorrect");
762    }
763    assert(lrg._area >= 0.0, "negative spill area" );
764  }
765}
766
767/*
768 * If we run off the top of the block with high pressure just record that the
769 * whole block is high pressure. (Even though we might have a transition
770 * later down in the block)
771 */
772void PhaseChaitin::check_for_high_pressure_block(Pressure& pressure) {
773  // current pressure now means the pressure before the first instruction in the block
774  // (since we have stepped through all instructions backwards)
775  if (pressure.current_pressure() > pressure.high_pressure_limit()) {
776    pressure.set_high_pressure_index_to_block_start();
777  }
778}
779
780/*
781 * Compute high pressure indice; avoid landing in the middle of projnodes
782 * and set the high pressure index for the block
783 */
784void PhaseChaitin::adjust_high_pressure_index(Block* b, uint& block_hrp_index, Pressure& pressure) {
785  uint i = pressure.high_pressure_index();
786  if (i < b->number_of_nodes() && i < b->end_idx() + 1) {
787    Node* cur = b->get_node(i);
788    while (cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch()) {
789      cur = b->get_node(--i);
790    }
791  }
792  block_hrp_index = i;
793}
794
795void PhaseChaitin::print_pressure_info(Pressure& pressure, const char *str) {
796  if (str != NULL) {
797    tty->print_cr("#  *** %s ***", str);
798  }
799  tty->print_cr("#     start pressure is = %d", pressure.start_pressure());
800  tty->print_cr("#     max pressure is = %d", pressure.final_pressure());
801  tty->print_cr("#     end pressure is = %d", pressure.current_pressure());
802  tty->print_cr("#");
803}
804
805/* Build an interference graph:
806 *   That is, if 2 live ranges are simultaneously alive but in their acceptable
807 *   register sets do not overlap, then they do not interfere. The IFG is built
808 *   by a single reverse pass over each basic block. Starting with the known
809 *   live-out set, we remove things that get defined and add things that become
810 *   live (essentially executing one pass of a standard LIVE analysis). Just
811 *   before a Node defines a value (and removes it from the live-ness set) that
812 *   value is certainly live. The defined value interferes with everything
813 *   currently live. The value is then removed from the live-ness set and it's
814 *   inputs are added to the live-ness set.
815 * Compute register pressure for each block:
816 *   We store the biggest register pressure for each block and also the first
817 *   low to high register pressure transition within the block (if any).
818 */
819uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
820  Compile::TracePhase tp("buildIFG", &timers[_t_buildIFGphysical]);
821
822  uint must_spill = 0;
823  for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
824    Block* block = _cfg.get_block(i);
825
826    // Clone (rather than smash in place) the liveout info, so it is alive
827    // for the "collect_gc_info" phase later.
828    IndexSet liveout(_live->live(block));
829
830    uint first_inst = first_nonphi_index(block);
831    uint last_inst = block->end_idx();
832
833    move_exception_node_up(block, first_inst, last_inst);
834
835    Pressure int_pressure(last_inst + 1, INTPRESSURE);
836    Pressure float_pressure(last_inst + 1, FLOATPRESSURE);
837    block->_reg_pressure = 0;
838    block->_freg_pressure = 0;
839
840    int inst_count = last_inst - first_inst;
841    double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
842    assert(cost >= 0.0, "negative spill cost" );
843
844    compute_initial_block_pressure(block, &liveout, int_pressure, float_pressure, cost);
845
846    for (uint location = last_inst; location > 0; location--) {
847      Node* n = block->get_node(location);
848      uint lid = _lrg_map.live_range_id(n);
849
850      if(lid) {
851        LRG& lrg = lrgs(lid);
852
853        // A DEF normally costs block frequency; rematerialized values are
854        // removed from the DEF sight, so LOWER costs here.
855        lrg._cost += n->rematerialize() ? 0 : block->_freq;
856
857        if (!liveout.member(lid) && n->Opcode() != Op_SafePoint) {
858          if (remove_node_if_not_used(block, location, n, lid, &liveout)) {
859            float_pressure.lower_high_pressure_index();
860            int_pressure.lower_high_pressure_index();
861            continue;
862          }
863          if (lrg._fat_proj) {
864            check_for_high_pressure_transition_at_fatproj(block->_reg_pressure, location, lrg, int_pressure, Op_RegI);
865            check_for_high_pressure_transition_at_fatproj(block->_freg_pressure, location, lrg, float_pressure, Op_RegD);
866          }
867        } else {
868          // A live range ends at its definition, remove the remaining area.
869          // If the cost is +Inf (which might happen in extreme cases), the lrg area will also be +Inf,
870          // and +Inf - +Inf = NaN. So let's not do that subtraction.
871          if (g_isfinite(cost)) {
872            lrg._area -= cost;
873          }
874          assert(lrg._area >= 0.0, "negative spill area" );
875
876          assign_high_score_to_immediate_copies(block, n, lrg, location + 1, last_inst);
877
878          if (liveout.remove(lid)) {
879            lower_pressure(block, location, lrg, &liveout, int_pressure, float_pressure);
880          }
881          uint copy_idx = n->is_Copy();
882          if (copy_idx) {
883            uint lid_copy = _lrg_map.live_range_id(n->in(copy_idx));
884            remove_interference_from_copy(block, location, lid_copy, &liveout, cost, int_pressure, float_pressure);
885          }
886        }
887
888        // Since rematerializable DEFs are not bound but the live range is,
889        // some uses must be bound. If we spill live range 'r', it can
890        // rematerialize at each use site according to its bindings.
891        if (lrg.is_bound() && !n->rematerialize() && lrg.mask().is_NotEmpty()) {
892          remove_bound_register_from_interfering_live_ranges(lrg, &liveout, must_spill);
893        }
894        interfere_with_live(lid, &liveout);
895      }
896
897      // Area remaining in the block
898      inst_count--;
899      cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
900
901      if (!n->is_Phi()) {
902        add_input_to_liveout(block, n, &liveout, cost, int_pressure, float_pressure);
903      }
904    }
905
906    check_for_high_pressure_block(int_pressure);
907    check_for_high_pressure_block(float_pressure);
908    adjust_high_pressure_index(block, block->_ihrp_index, int_pressure);
909    adjust_high_pressure_index(block, block->_fhrp_index, float_pressure);
910    // set the final_pressure as the register pressure for the block
911    block->_reg_pressure = int_pressure.final_pressure();
912    block->_freg_pressure = float_pressure.final_pressure();
913
914#ifndef PRODUCT
915    // Gather Register Pressure Statistics
916    if (PrintOptoStatistics) {
917      if (block->_reg_pressure > int_pressure.high_pressure_limit() || block->_freg_pressure > float_pressure.high_pressure_limit()) {
918        _high_pressure++;
919      } else {
920        _low_pressure++;
921      }
922    }
923#endif
924  }
925
926  return must_spill;
927}
928