bb-reorder.c revision 132718
1161709Sdanger/* Basic block reordering routines for the GNU compiler.
2161709Sdanger   Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
3161709Sdanger
4161709Sdanger   This file is part of GCC.
5161709Sdanger
6161709Sdanger   GCC is free software; you can redistribute it and/or modify it
7161709Sdanger   under the terms of the GNU General Public License as published by
8161709Sdanger   the Free Software Foundation; either version 2, or (at your option)
9161709Sdanger   any later version.
10161709Sdanger
11161709Sdanger   GCC is distributed in the hope that it will be useful, but WITHOUT
12161709Sdanger   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13161709Sdanger   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14161709Sdanger   License for more details.
15161709Sdanger
16161709Sdanger   You should have received a copy of the GNU General Public License
17161709Sdanger   along with GCC; see the file COPYING.  If not, write to the Free
18161709Sdanger   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19161709Sdanger   02111-1307, USA.  */
20161709Sdanger
21161709Sdanger/* This (greedy) algorithm constructs traces in several rounds.
22161709Sdanger   The construction starts from "seeds".  The seed for the first round
23161709Sdanger   is the entry point of function.  When there are more than one seed
24161709Sdanger   that one is selected first that has the lowest key in the heap
25161709Sdanger   (see function bb_to_key).  Then the algorithm repeatedly adds the most
26161709Sdanger   probable successor to the end of a trace.  Finally it connects the traces.
27300199Smaxim
28161709Sdanger   There are two parameters: Branch Threshold and Exec Threshold.
29161709Sdanger   If the edge to a successor of the actual basic block is lower than
30161709Sdanger   Branch Threshold or the frequency of the successor is lower than
31161709Sdanger   Exec Threshold the successor will be the seed in one of the next rounds.
32161709Sdanger   Each round has these parameters lower than the previous one.
33161709Sdanger   The last round has to have these parameters set to zero
34161709Sdanger   so that the remaining blocks are picked up.
35161709Sdanger
36300199Smaxim   The algorithm selects the most probable successor from all unvisited
37161709Sdanger   successors and successors that have been added to this trace.
38161709Sdanger   The other successors (that has not been "sent" to the next round) will be
39161709Sdanger   other seeds for this round and the secondary traces will start in them.
40161709Sdanger   If the successor has not been visited in this trace it is added to the trace
41161709Sdanger   (however, there is some heuristic for simple branches).
42161709Sdanger   If the successor has been visited in this trace the loop has been found.
43161709Sdanger   If the loop has many iterations the loop is rotated so that the
44161709Sdanger   source block of the most probable edge going out from the loop
45161709Sdanger   is the last block of the trace.
46161709Sdanger   If the loop has few iterations and there is no edge from the last block of
47161709Sdanger   the loop going out from loop the loop header is duplicated.
48161709Sdanger   Finally, the construction of the trace is terminated.
49161709Sdanger
50161709Sdanger   When connecting traces it first checks whether there is an edge from the
51161709Sdanger   last block of one trace to the first block of another trace.
52161709Sdanger   When there are still some unconnected traces it checks whether there exists
53161709Sdanger   a basic block BB such that BB is a successor of the last bb of one trace
54161709Sdanger   and BB is a predecessor of the first block of another trace. In this case,
55161709Sdanger   BB is duplicated and the traces are connected through this duplicate.
56300199Smaxim   The rest of traces are simply connected so there will be a jump to the
57300199Smaxim   beginning of the rest of trace.
58161709Sdanger
59161709Sdanger
60300199Smaxim   References:
61300199Smaxim
62300199Smaxim   "Software Trace Cache"
63300199Smaxim   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64300199Smaxim   http://citeseer.nj.nec.com/15361.html
65300199Smaxim
66161709Sdanger*/
67161709Sdanger
68161709Sdanger#include "config.h"
69161709Sdanger#include "system.h"
70189880Ssam#include "coretypes.h"
71189880Ssam#include "tm.h"
72189880Ssam#include "rtl.h"
73189880Ssam#include "basic-block.h"
74189880Ssam#include "flags.h"
75189880Ssam#include "timevar.h"
76300199Smaxim#include "output.h"
77300199Smaxim#include "cfglayout.h"
78300199Smaxim#include "fibheap.h"
79300199Smaxim#include "target.h"
80161709Sdanger
81161709Sdanger/* The number of rounds.  */
82161709Sdanger#define N_ROUNDS 4
83161709Sdanger
84300199Smaxim/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
85300199Smaximstatic int branch_threshold[N_ROUNDS] = {400, 200, 100, 0};
86300199Smaxim
87161709Sdanger/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
88161709Sdangerstatic int exec_threshold[N_ROUNDS] = {500, 200, 50, 0};
89161709Sdanger
90161709Sdanger/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
91161709Sdanger   block the edge destination is not duplicated while connecting traces.  */
92161709Sdanger#define DUPLICATION_THRESHOLD 100
93161709Sdanger
94161709Sdanger/* Length of unconditional jump instruction.  */
95161709Sdangerstatic int uncond_jump_length;
96161709Sdanger
97161709Sdanger/* Structure to hold needed information for each basic block.  */
98161709Sdangertypedef struct bbro_basic_block_data_def
99161709Sdanger{
100161709Sdanger  /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
101161709Sdanger  int start_of_trace;
102161709Sdanger
103161709Sdanger  /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
104161709Sdanger  int end_of_trace;
105161709Sdanger
106161709Sdanger  /* Which heap is BB in (if any)?  */
107161709Sdanger  fibheap_t heap;
108161709Sdanger
109161709Sdanger  /* Which heap node is BB in (if any)?  */
110161709Sdanger  fibnode_t node;
111161709Sdanger} bbro_basic_block_data;
112161709Sdanger
113161709Sdanger/* The current size of the following dynamic array.  */
114161709Sdangerstatic int array_size;
115161709Sdanger
116161709Sdanger/* The array which holds needed information for basic blocks.  */
117161709Sdangerstatic bbro_basic_block_data *bbd;
118161709Sdanger
119161709Sdanger/* To avoid frequent reallocation the size of arrays is greater than needed,
120161709Sdanger   the number of elements is (not less than) 1.25 * size_wanted.  */
121161709Sdanger#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
122286663Sbrueffer
123161709Sdanger/* Free the memory and set the pointer to NULL.  */
124161709Sdanger#define FREE(P) \
125161709Sdanger  do { if (P) { free (P); P = 0; } else { abort (); } } while (0)
126161709Sdanger
127161709Sdanger/* Structure for holding information about a trace.  */
128161709Sdangerstruct trace
129161709Sdanger{
130161709Sdanger  /* First and last basic block of the trace.  */
131161709Sdanger  basic_block first, last;
132161709Sdanger
133161709Sdanger  /* The round of the STC creation which this trace was found in.  */
134161709Sdanger  int round;
135161709Sdanger
136161709Sdanger  /* The length (i.e. the number of basic blocks) of the trace.  */
137161709Sdanger  int length;
138161709Sdanger};
139161709Sdanger
140161709Sdanger/* Maximum frequency and count of one of the entry blocks.  */
141161709Sdangerint max_entry_frequency;
142161709Sdangergcov_type max_entry_count;
143161709Sdanger
144161709Sdanger/* Local function prototypes.  */
145161709Sdangerstatic void find_traces (int *, struct trace *);
146161709Sdangerstatic basic_block rotate_loop (edge, struct trace *, int);
147161709Sdangerstatic void mark_bb_visited (basic_block, int);
148161709Sdangerstatic void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
149161709Sdanger				 int, fibheap_t *);
150161709Sdangerstatic basic_block copy_bb (basic_block, edge, basic_block, int);
151161709Sdangerstatic fibheapkey_t bb_to_key (basic_block);
152161709Sdangerstatic bool better_edge_p (basic_block, edge, int, int, int, int);
153161709Sdangerstatic void connect_traces (int, struct trace *);
154161709Sdangerstatic bool copy_bb_p (basic_block, int);
155161709Sdangerstatic int get_uncond_jump_length (void);
156161709Sdanger
157161709Sdanger/* Find the traces for Software Trace Cache.  Chain each trace through
158161709Sdanger   RBI()->next.  Store the number of traces to N_TRACES and description of
159161709Sdanger   traces to TRACES.  */
160161709Sdanger
161161709Sdangerstatic void
162161709Sdangerfind_traces (int *n_traces, struct trace *traces)
163161709Sdanger{
164161709Sdanger  int i;
165161709Sdanger  edge e;
166161709Sdanger  fibheap_t heap;
167161709Sdanger
168161709Sdanger  /* Insert entry points of function into heap.  */
169161709Sdanger  heap = fibheap_new ();
170161709Sdanger  max_entry_frequency = 0;
171161709Sdanger  max_entry_count = 0;
172161709Sdanger  for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
173161709Sdanger    {
174161709Sdanger      bbd[e->dest->index].heap = heap;
175161709Sdanger      bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
176161709Sdanger						    e->dest);
177161709Sdanger      if (e->dest->frequency > max_entry_frequency)
178161709Sdanger	max_entry_frequency = e->dest->frequency;
179161709Sdanger      if (e->dest->count > max_entry_count)
180161709Sdanger	max_entry_count = e->dest->count;
181161709Sdanger    }
182161709Sdanger
183161709Sdanger  /* Find the traces.  */
184161709Sdanger  for (i = 0; i < N_ROUNDS; i++)
185161709Sdanger    {
186161709Sdanger      gcov_type count_threshold;
187161709Sdanger
188161709Sdanger      if (rtl_dump_file)
189161709Sdanger	fprintf (rtl_dump_file, "STC - round %d\n", i + 1);
190161709Sdanger
191161709Sdanger      if (max_entry_count < INT_MAX / 1000)
192161709Sdanger	count_threshold = max_entry_count * exec_threshold[i] / 1000;
193161709Sdanger      else
194161709Sdanger	count_threshold = max_entry_count / 1000 * exec_threshold[i];
195161709Sdanger
196161709Sdanger      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
197161709Sdanger			   max_entry_frequency * exec_threshold[i] / 1000,
198161709Sdanger			   count_threshold, traces, n_traces, i, &heap);
199161709Sdanger    }
200161709Sdanger  fibheap_delete (heap);
201161709Sdanger
202161709Sdanger  if (rtl_dump_file)
203161709Sdanger    {
204161709Sdanger      for (i = 0; i < *n_traces; i++)
205161709Sdanger	{
206161709Sdanger	  basic_block bb;
207161709Sdanger	  fprintf (rtl_dump_file, "Trace %d (round %d):  ", i + 1,
208161709Sdanger		   traces[i].round + 1);
209161709Sdanger	  for (bb = traces[i].first; bb != traces[i].last; bb = bb->rbi->next)
210161709Sdanger	    fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency);
211161709Sdanger	  fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency);
212161709Sdanger	}
213161709Sdanger      fflush (rtl_dump_file);
214161709Sdanger    }
215161709Sdanger}
216161709Sdanger
217161709Sdanger/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
218161709Sdanger   (with sequential number TRACE_N).  */
219161709Sdanger
220161709Sdangerstatic basic_block
221208357Swxsrotate_loop (edge back_edge, struct trace *trace, int trace_n)
222301591Strasz{
223301591Strasz  basic_block bb;
224208357Swxs
225168894Sadrian  /* Information about the best end (end after rotation) of the loop.  */
226168894Sadrian  basic_block best_bb = NULL;
227169834Sru  edge best_edge = NULL;
228169834Sru  int best_freq = -1;
229169834Sru  gcov_type best_count = -1;
230169834Sru  /* The best edge is preferred when its destination is not visited yet
231168894Sadrian     or is a start block of some trace.  */
232169834Sru  bool is_preferred = false;
233169834Sru
234169834Sru  /* Find the most frequent edge that goes out from current trace.  */
235169834Sru  bb = back_edge->dest;
236169834Sru  do
237168894Sadrian    {
238168894Sadrian      edge e;
239168894Sadrian      for (e = bb->succ; e; e = e->succ_next)
240168894Sadrian	if (e->dest != EXIT_BLOCK_PTR
241168894Sadrian	    && e->dest->rbi->visited != trace_n
242168894Sadrian	    && (e->flags & EDGE_CAN_FALLTHRU)
243161709Sdanger	    && !(e->flags & EDGE_COMPLEX))
244161709Sdanger	{
245161709Sdanger	  if (is_preferred)
246161709Sdanger	    {
247161709Sdanger	      /* The best edge is preferred.  */
248161709Sdanger	      if (!e->dest->rbi->visited
249161709Sdanger		  || bbd[e->dest->index].start_of_trace >= 0)
250161709Sdanger		{
251161709Sdanger		  /* The current edge E is also preferred.  */
252161709Sdanger		  int freq = EDGE_FREQUENCY (e);
253161709Sdanger		  if (freq > best_freq || e->count > best_count)
254161709Sdanger		    {
255161709Sdanger		      best_freq = freq;
256161709Sdanger		      best_count = e->count;
257161709Sdanger		      best_edge = e;
258161709Sdanger		      best_bb = bb;
259161709Sdanger		    }
260161709Sdanger		}
261161709Sdanger	    }
262161709Sdanger	  else
263161709Sdanger	    {
264161709Sdanger	      if (!e->dest->rbi->visited
265161709Sdanger		  || bbd[e->dest->index].start_of_trace >= 0)
266161709Sdanger		{
267161709Sdanger		  /* The current edge E is preferred.  */
268161709Sdanger		  is_preferred = true;
269161709Sdanger		  best_freq = EDGE_FREQUENCY (e);
270161709Sdanger		  best_count = e->count;
271161709Sdanger		  best_edge = e;
272161709Sdanger		  best_bb = bb;
273161709Sdanger		}
274161709Sdanger	      else
275161709Sdanger		{
276161709Sdanger		  int freq = EDGE_FREQUENCY (e);
277161709Sdanger		  if (!best_edge || freq > best_freq || e->count > best_count)
278161709Sdanger		    {
279161709Sdanger		      best_freq = freq;
280161709Sdanger		      best_count = e->count;
281161709Sdanger		      best_edge = e;
282161709Sdanger		      best_bb = bb;
283161709Sdanger		    }
284161709Sdanger		}
285161709Sdanger	    }
286161709Sdanger	}
287161709Sdanger      bb = bb->rbi->next;
288161709Sdanger    }
289161709Sdanger  while (bb != back_edge->dest);
290161709Sdanger
291161709Sdanger  if (best_bb)
292161709Sdanger    {
293161709Sdanger      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
294161709Sdanger	 the trace.  */
295161709Sdanger      if (back_edge->dest == trace->first)
296270647Sse	{
297270647Sse	  trace->first = best_bb->rbi->next;
298161709Sdanger	}
299161709Sdanger      else
300161709Sdanger	{
301161709Sdanger	  basic_block prev_bb;
302161709Sdanger
303161709Sdanger	  for (prev_bb = trace->first;
304161709Sdanger	       prev_bb->rbi->next != back_edge->dest;
305161709Sdanger	       prev_bb = prev_bb->rbi->next)
306161709Sdanger	    ;
307161709Sdanger	  prev_bb->rbi->next = best_bb->rbi->next;
308161709Sdanger
309161709Sdanger	  /* Try to get rid of uncond jump to cond jump.  */
310161709Sdanger	  if (prev_bb->succ && !prev_bb->succ->succ_next)
311161709Sdanger	    {
312161709Sdanger	      basic_block header = prev_bb->succ->dest;
313164007Sdanger
314161709Sdanger	      /* Duplicate HEADER if it is a small block containing cond jump
315161709Sdanger		 in the end.  */
316161709Sdanger	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0))
317161709Sdanger		{
318161709Sdanger		  copy_bb (header, prev_bb->succ, prev_bb, trace_n);
319161709Sdanger		}
320161709Sdanger	    }
321161709Sdanger	}
322161709Sdanger    }
323161709Sdanger  else
324161709Sdanger    {
325161709Sdanger      /* We have not found suitable loop tail so do no rotation.  */
326161709Sdanger      best_bb = back_edge->src;
327161709Sdanger    }
328161709Sdanger  best_bb->rbi->next = NULL;
329161709Sdanger  return best_bb;
330161709Sdanger}
331161709Sdanger
332161709Sdanger/* This function marks BB that it was visited in trace number TRACE.  */
333161709Sdanger
334161709Sdangerstatic void
335161709Sdangermark_bb_visited (basic_block bb, int trace)
336161709Sdanger{
337161709Sdanger  bb->rbi->visited = trace;
338161709Sdanger  if (bbd[bb->index].heap)
339161709Sdanger    {
340161709Sdanger      fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
341168894Sadrian      bbd[bb->index].heap = NULL;
342168894Sadrian      bbd[bb->index].node = NULL;
343161709Sdanger    }
344161709Sdanger}
345161709Sdanger
346161709Sdanger/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
347161709Sdanger   not include basic blocks their probability is lower than BRANCH_TH or their
348161709Sdanger   frequency is lower than EXEC_TH into traces (or count is lower than
349161709Sdanger   COUNT_TH).  It stores the new traces into TRACES and modifies the number of
350161709Sdanger   traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
351161709Sdanger   expects that starting basic blocks are in *HEAP and at the end it deletes
352161709Sdanger   *HEAP and stores starting points for the next round into new *HEAP.  */
353267776Sbapt
354161709Sdangerstatic void
355267776Sbaptfind_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
356		     struct trace *traces, int *n_traces, int round,
357		     fibheap_t *heap)
358{
359  /* Heap for discarded basic blocks which are possible starting points for
360     the next round.  */
361  fibheap_t new_heap = fibheap_new ();
362
363  while (!fibheap_empty (*heap))
364    {
365      basic_block bb;
366      struct trace *trace;
367      edge best_edge, e;
368      fibheapkey_t key;
369
370      bb = fibheap_extract_min (*heap);
371      bbd[bb->index].heap = NULL;
372      bbd[bb->index].node = NULL;
373
374      if (rtl_dump_file)
375	fprintf (rtl_dump_file, "Getting bb %d\n", bb->index);
376
377      /* If the BB's frequency is too low send BB to the next round.  */
378      if (round < N_ROUNDS - 1
379	  && (bb->frequency < exec_th || bb->count < count_th
380	      || probably_never_executed_bb_p (bb)))
381	{
382	  int key = bb_to_key (bb);
383	  bbd[bb->index].heap = new_heap;
384	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
385
386	  if (rtl_dump_file)
387	    fprintf (rtl_dump_file,
388		     "  Possible start point of next round: %d (key: %d)\n",
389		     bb->index, key);
390	  continue;
391	}
392
393      trace = traces + *n_traces;
394      trace->first = bb;
395      trace->round = round;
396      trace->length = 0;
397      (*n_traces)++;
398
399      do
400	{
401	  int prob, freq;
402
403	  /* The probability and frequency of the best edge.  */
404	  int best_prob = INT_MIN / 2;
405	  int best_freq = INT_MIN / 2;
406
407	  best_edge = NULL;
408	  mark_bb_visited (bb, *n_traces);
409	  trace->length++;
410
411	  if (rtl_dump_file)
412	    fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n",
413		     bb->index, *n_traces - 1);
414
415	  /* Select the successor that will be placed after BB.  */
416	  for (e = bb->succ; e; e = e->succ_next)
417	    {
418#ifdef ENABLE_CHECKING
419	      if (e->flags & EDGE_FAKE)
420		abort ();
421#endif
422
423	      if (e->dest == EXIT_BLOCK_PTR)
424		continue;
425
426	      if (e->dest->rbi->visited
427		  && e->dest->rbi->visited != *n_traces)
428		continue;
429
430	      prob = e->probability;
431	      freq = EDGE_FREQUENCY (e);
432
433	      /* Edge that cannot be fallthru or improbable or infrequent
434		 successor (ie. it is unsuitable successor).  */
435	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
436		  || prob < branch_th || freq < exec_th || e->count < count_th)
437		continue;
438
439	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq))
440		{
441		  best_edge = e;
442		  best_prob = prob;
443		  best_freq = freq;
444		}
445	    }
446
447	  /* If the best destination has multiple predecessors, and can be
448	     duplicated cheaper than a jump, don't allow it to be added
449	     to a trace.  We'll duplicate it when connecting traces.  */
450	  if (best_edge && best_edge->dest->pred->pred_next
451	      && copy_bb_p (best_edge->dest, 0))
452	    best_edge = NULL;
453
454	  /* Add all non-selected successors to the heaps.  */
455	  for (e = bb->succ; e; e = e->succ_next)
456	    {
457	      if (e == best_edge
458		  || e->dest == EXIT_BLOCK_PTR
459		  || e->dest->rbi->visited)
460		continue;
461
462	      key = bb_to_key (e->dest);
463
464	      if (bbd[e->dest->index].heap)
465		{
466		  /* E->DEST is already in some heap.  */
467		  if (key != bbd[e->dest->index].node->key)
468		    {
469		      if (rtl_dump_file)
470			{
471			  fprintf (rtl_dump_file,
472				   "Changing key for bb %d from %ld to %ld.\n",
473				   e->dest->index,
474				   (long) bbd[e->dest->index].node->key,
475				   key);
476			}
477		      fibheap_replace_key (bbd[e->dest->index].heap,
478					   bbd[e->dest->index].node, key);
479		    }
480		}
481	      else
482		{
483		  fibheap_t which_heap = *heap;
484
485		  prob = e->probability;
486		  freq = EDGE_FREQUENCY (e);
487
488		  if (!(e->flags & EDGE_CAN_FALLTHRU)
489		      || (e->flags & EDGE_COMPLEX)
490		      || prob < branch_th || freq < exec_th
491		      || e->count < count_th)
492		    {
493		      if (round < N_ROUNDS - 1)
494			which_heap = new_heap;
495		    }
496
497		  bbd[e->dest->index].heap = which_heap;
498		  bbd[e->dest->index].node = fibheap_insert (which_heap,
499								key, e->dest);
500
501		  if (rtl_dump_file)
502		    {
503		      fprintf (rtl_dump_file,
504			       "  Possible start of %s round: %d (key: %ld)\n",
505			       (which_heap == new_heap) ? "next" : "this",
506			       e->dest->index, (long) key);
507		    }
508
509		}
510	    }
511
512	  if (best_edge) /* Suitable successor was found.  */
513	    {
514	      if (best_edge->dest->rbi->visited == *n_traces)
515		{
516		  /* We do nothing with one basic block loops.  */
517		  if (best_edge->dest != bb)
518		    {
519		      if (EDGE_FREQUENCY (best_edge)
520			  > 4 * best_edge->dest->frequency / 5)
521			{
522			  /* The loop has at least 4 iterations.  If the loop
523			     header is not the first block of the function
524			     we can rotate the loop.  */
525
526			  if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
527			    {
528			      if (rtl_dump_file)
529				{
530				  fprintf (rtl_dump_file,
531					   "Rotating loop %d - %d\n",
532					   best_edge->dest->index, bb->index);
533				}
534			      bb->rbi->next = best_edge->dest;
535			      bb = rotate_loop (best_edge, trace, *n_traces);
536			    }
537			}
538		      else
539			{
540			  /* The loop has less than 4 iterations.  */
541
542			  /* Check whether there is another edge from BB.  */
543			  edge another_edge;
544			  for (another_edge = bb->succ;
545			       another_edge;
546			       another_edge = another_edge->succ_next)
547			    if (another_edge != best_edge)
548			      break;
549
550			  if (!another_edge && copy_bb_p (best_edge->dest,
551							  !optimize_size))
552			    {
553			      bb = copy_bb (best_edge->dest, best_edge, bb,
554					    *n_traces);
555			    }
556			}
557		    }
558
559		  /* Terminate the trace.  */
560		  break;
561		}
562	      else
563		{
564		  /* Check for a situation
565
566		    A
567		   /|
568		  B |
569		   \|
570		    C
571
572		  where
573		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
574		    >= EDGE_FREQUENCY (AC).
575		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
576		  Best ordering is then A B C.
577
578		  This situation is created for example by:
579
580		  if (A) B;
581		  C;
582
583		  */
584
585		  for (e = bb->succ; e; e = e->succ_next)
586		    if (e != best_edge
587			&& (e->flags & EDGE_CAN_FALLTHRU)
588			&& !(e->flags & EDGE_COMPLEX)
589			&& !e->dest->rbi->visited
590			&& !e->dest->pred->pred_next
591			&& e->dest->succ
592			&& (e->dest->succ->flags & EDGE_CAN_FALLTHRU)
593			&& !(e->dest->succ->flags & EDGE_COMPLEX)
594			&& !e->dest->succ->succ_next
595			&& e->dest->succ->dest == best_edge->dest
596			&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
597		      {
598			best_edge = e;
599			if (rtl_dump_file)
600			  fprintf (rtl_dump_file, "Selecting BB %d\n",
601				   best_edge->dest->index);
602			break;
603		      }
604
605		  bb->rbi->next = best_edge->dest;
606		  bb = best_edge->dest;
607		}
608	    }
609	}
610      while (best_edge);
611      trace->last = bb;
612      bbd[trace->first->index].start_of_trace = *n_traces - 1;
613      bbd[trace->last->index].end_of_trace = *n_traces - 1;
614
615      /* The trace is terminated so we have to recount the keys in heap
616	 (some block can have a lower key because now one of its predecessors
617	 is an end of the trace).  */
618      for (e = bb->succ; e; e = e->succ_next)
619	{
620	  if (e->dest == EXIT_BLOCK_PTR
621	      || e->dest->rbi->visited)
622	    continue;
623
624	  if (bbd[e->dest->index].heap)
625	    {
626	      key = bb_to_key (e->dest);
627	      if (key != bbd[e->dest->index].node->key)
628		{
629		  if (rtl_dump_file)
630		    {
631		      fprintf (rtl_dump_file,
632			       "Changing key for bb %d from %ld to %ld.\n",
633			       e->dest->index,
634			       (long) bbd[e->dest->index].node->key, key);
635		    }
636		  fibheap_replace_key (bbd[e->dest->index].heap,
637				       bbd[e->dest->index].node,
638				       key);
639		}
640	    }
641	}
642    }
643
644  fibheap_delete (*heap);
645
646  /* "Return" the new heap.  */
647  *heap = new_heap;
648}
649
650/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
651   it to trace after BB, mark OLD_BB visited and update pass' data structures
652   (TRACE is a number of trace which OLD_BB is duplicated to).  */
653
654static basic_block
655copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
656{
657  basic_block new_bb;
658
659  new_bb = cfg_layout_duplicate_bb (old_bb, e);
660  if (e->dest != new_bb)
661    abort ();
662  if (e->dest->rbi->visited)
663    abort ();
664  if (rtl_dump_file)
665    fprintf (rtl_dump_file,
666	     "Duplicated bb %d (created bb %d)\n",
667	     old_bb->index, new_bb->index);
668  new_bb->rbi->visited = trace;
669  new_bb->rbi->next = bb->rbi->next;
670  bb->rbi->next = new_bb;
671
672  if (new_bb->index >= array_size || last_basic_block > array_size)
673    {
674      int i;
675      int new_size;
676
677      new_size = MAX (last_basic_block, new_bb->index + 1);
678      new_size = GET_ARRAY_SIZE (new_size);
679      bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
680      for (i = array_size; i < new_size; i++)
681	{
682	  bbd[i].start_of_trace = -1;
683	  bbd[i].end_of_trace = -1;
684	  bbd[i].heap = NULL;
685	  bbd[i].node = NULL;
686	}
687      array_size = new_size;
688
689      if (rtl_dump_file)
690	{
691	  fprintf (rtl_dump_file,
692		   "Growing the dynamic array to %d elements.\n",
693		   array_size);
694	}
695    }
696
697  return new_bb;
698}
699
700/* Compute and return the key (for the heap) of the basic block BB.  */
701
702static fibheapkey_t
703bb_to_key (basic_block bb)
704{
705  edge e;
706
707  int priority = 0;
708
709  /* Do not start in probably never executed blocks.  */
710  if (probably_never_executed_bb_p (bb))
711    return BB_FREQ_MAX;
712
713  /* Prefer blocks whose predecessor is an end of some trace
714     or whose predecessor edge is EDGE_DFS_BACK.  */
715  for (e = bb->pred; e; e = e->pred_next)
716    {
717      if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
718	  || (e->flags & EDGE_DFS_BACK))
719	{
720	  int edge_freq = EDGE_FREQUENCY (e);
721
722	  if (edge_freq > priority)
723	    priority = edge_freq;
724	}
725    }
726
727  if (priority)
728    /* The block with priority should have significantly lower key.  */
729    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
730  return -bb->frequency;
731}
732
733/* Return true when the edge E from basic block BB is better than the temporary
734   best edge (details are in function).  The probability of edge E is PROB. The
735   frequency of the successor is FREQ.  The current best probability is
736   BEST_PROB, the best frequency is BEST_FREQ.
737   The edge is considered to be equivalent when PROB does not differ much from
738   BEST_PROB; similarly for frequency.  */
739
740static bool
741better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
742	       int best_freq)
743{
744  bool is_better_edge;
745
746  /* The BEST_* values do not have to be best, but can be a bit smaller than
747     maximum values.  */
748  int diff_prob = best_prob / 10;
749  int diff_freq = best_freq / 10;
750
751  if (prob > best_prob + diff_prob)
752    /* The edge has higher probability than the temporary best edge.  */
753    is_better_edge = true;
754  else if (prob < best_prob - diff_prob)
755    /* The edge has lower probability than the temporary best edge.  */
756    is_better_edge = false;
757  else if (freq < best_freq - diff_freq)
758    /* The edge and the temporary best edge  have almost equivalent
759       probabilities.  The higher frequency of a successor now means
760       that there is another edge going into that successor.
761       This successor has lower frequency so it is better.  */
762    is_better_edge = true;
763  else if (freq > best_freq + diff_freq)
764    /* This successor has higher frequency so it is worse.  */
765    is_better_edge = false;
766  else if (e->dest->prev_bb == bb)
767    /* The edges have equivalent probabilities and the successors
768       have equivalent frequencies.  Select the previous successor.  */
769    is_better_edge = true;
770  else
771    is_better_edge = false;
772
773  return is_better_edge;
774}
775
776/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
777
778static void
779connect_traces (int n_traces, struct trace *traces)
780{
781  int i;
782  bool *connected;
783  int last_trace;
784  int freq_threshold;
785  gcov_type count_threshold;
786
787  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
788  if (max_entry_count < INT_MAX / 1000)
789    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
790  else
791    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
792
793  connected = xcalloc (n_traces, sizeof (bool));
794  last_trace = -1;
795  for (i = 0; i < n_traces; i++)
796    {
797      int t = i;
798      int t2;
799      edge e, best;
800      int best_len;
801
802      if (connected[t])
803	continue;
804
805      connected[t] = true;
806
807      /* Find the predecessor traces.  */
808      for (t2 = t; t2 > 0;)
809	{
810	  best = NULL;
811	  best_len = 0;
812	  for (e = traces[t2].first->pred; e; e = e->pred_next)
813	    {
814	      int si = e->src->index;
815
816	      if (e->src != ENTRY_BLOCK_PTR
817		  && (e->flags & EDGE_CAN_FALLTHRU)
818		  && !(e->flags & EDGE_COMPLEX)
819		  && bbd[si].end_of_trace >= 0
820		  && !connected[bbd[si].end_of_trace]
821		  && (!best
822		      || e->probability > best->probability
823		      || (e->probability == best->probability
824			  && traces[bbd[si].end_of_trace].length > best_len)))
825		{
826		  best = e;
827		  best_len = traces[bbd[si].end_of_trace].length;
828		}
829	    }
830	  if (best)
831	    {
832	      best->src->rbi->next = best->dest;
833	      t2 = bbd[best->src->index].end_of_trace;
834	      connected[t2] = true;
835	      if (rtl_dump_file)
836		{
837		  fprintf (rtl_dump_file, "Connection: %d %d\n",
838			   best->src->index, best->dest->index);
839		}
840	    }
841	  else
842	    break;
843	}
844
845      if (last_trace >= 0)
846	traces[last_trace].last->rbi->next = traces[t2].first;
847      last_trace = t;
848
849      /* Find the successor traces.  */
850      while (1)
851	{
852	  /* Find the continuation of the chain.  */
853	  best = NULL;
854	  best_len = 0;
855	  for (e = traces[t].last->succ; e; e = e->succ_next)
856	    {
857	      int di = e->dest->index;
858
859	      if (e->dest != EXIT_BLOCK_PTR
860		  && (e->flags & EDGE_CAN_FALLTHRU)
861		  && !(e->flags & EDGE_COMPLEX)
862		  && bbd[di].start_of_trace >= 0
863		  && !connected[bbd[di].start_of_trace]
864		  && (!best
865		      || e->probability > best->probability
866		      || (e->probability == best->probability
867			  && traces[bbd[di].start_of_trace].length > best_len)))
868		{
869		  best = e;
870		  best_len = traces[bbd[di].start_of_trace].length;
871		}
872	    }
873
874	  if (best)
875	    {
876	      if (rtl_dump_file)
877		{
878		  fprintf (rtl_dump_file, "Connection: %d %d\n",
879			   best->src->index, best->dest->index);
880		}
881	      t = bbd[best->dest->index].start_of_trace;
882	      traces[last_trace].last->rbi->next = traces[t].first;
883	      connected[t] = true;
884	      last_trace = t;
885	    }
886	  else
887	    {
888	      /* Try to connect the traces by duplication of 1 block.  */
889	      edge e2;
890	      basic_block next_bb = NULL;
891	      bool try_copy = false;
892
893	      for (e = traces[t].last->succ; e; e = e->succ_next)
894		if (e->dest != EXIT_BLOCK_PTR
895		    && (e->flags & EDGE_CAN_FALLTHRU)
896		    && !(e->flags & EDGE_COMPLEX)
897		    && (!best || e->probability > best->probability))
898		  {
899		    edge best2 = NULL;
900		    int best2_len = 0;
901
902		    /* If the destination is a start of a trace which is only
903		       one block long, then no need to search the successor
904		       blocks of the trace.  Accept it.  */
905		    if (bbd[e->dest->index].start_of_trace >= 0
906			&& traces[bbd[e->dest->index].start_of_trace].length
907			   == 1)
908		      {
909			best = e;
910			try_copy = true;
911			continue;
912		      }
913
914		    for (e2 = e->dest->succ; e2; e2 = e2->succ_next)
915		      {
916			int di = e2->dest->index;
917
918			if (e2->dest == EXIT_BLOCK_PTR
919			    || ((e2->flags & EDGE_CAN_FALLTHRU)
920				&& !(e2->flags & EDGE_COMPLEX)
921				&& bbd[di].start_of_trace >= 0
922				&& !connected[bbd[di].start_of_trace]
923				&& (EDGE_FREQUENCY (e2) >= freq_threshold)
924				&& (e2->count >= count_threshold)
925				&& (!best2
926				    || e2->probability > best2->probability
927				    || (e2->probability == best2->probability
928					&& traces[bbd[di].start_of_trace].length
929					   > best2_len))))
930			  {
931			    best = e;
932			    best2 = e2;
933			    if (e2->dest != EXIT_BLOCK_PTR)
934			      best2_len = traces[bbd[di].start_of_trace].length;
935			    else
936			      best2_len = INT_MAX;
937			    next_bb = e2->dest;
938			    try_copy = true;
939			  }
940		      }
941		  }
942
943	      /* Copy tiny blocks always; copy larger blocks only when the
944		 edge is traversed frequently enough.  */
945	      if (try_copy
946		  && copy_bb_p (best->dest,
947				!optimize_size
948				&& EDGE_FREQUENCY (best) >= freq_threshold
949				&& best->count >= count_threshold))
950		{
951		  basic_block new_bb;
952
953		  if (rtl_dump_file)
954		    {
955		      fprintf (rtl_dump_file, "Connection: %d %d ",
956			       traces[t].last->index, best->dest->index);
957		      if (!next_bb)
958			fputc ('\n', rtl_dump_file);
959		      else if (next_bb == EXIT_BLOCK_PTR)
960			fprintf (rtl_dump_file, "exit\n");
961		      else
962			fprintf (rtl_dump_file, "%d\n", next_bb->index);
963		    }
964
965		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
966		  traces[t].last = new_bb;
967		  if (next_bb && next_bb != EXIT_BLOCK_PTR)
968		    {
969		      t = bbd[next_bb->index].start_of_trace;
970		      traces[last_trace].last->rbi->next = traces[t].first;
971		      connected[t] = true;
972		      last_trace = t;
973		    }
974		  else
975		    break;	/* Stop finding the successor traces.  */
976		}
977	      else
978		break;	/* Stop finding the successor traces.  */
979	    }
980	}
981    }
982
983  if (rtl_dump_file)
984    {
985      basic_block bb;
986
987      fprintf (rtl_dump_file, "Final order:\n");
988      for (bb = traces[0].first; bb; bb = bb->rbi->next)
989	fprintf (rtl_dump_file, "%d ", bb->index);
990      fprintf (rtl_dump_file, "\n");
991      fflush (rtl_dump_file);
992    }
993
994  FREE (connected);
995}
996
997/* Return true when BB can and should be copied. CODE_MAY_GROW is true
998   when code size is allowed to grow by duplication.  */
999
1000static bool
1001copy_bb_p (basic_block bb, int code_may_grow)
1002{
1003  int size = 0;
1004  int max_size = uncond_jump_length;
1005  rtx insn;
1006  int n_succ;
1007  edge e;
1008
1009  if (!bb->frequency)
1010    return false;
1011  if (!bb->pred || !bb->pred->pred_next)
1012    return false;
1013  if (!cfg_layout_can_duplicate_bb_p (bb))
1014    return false;
1015
1016  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1017  n_succ = 0;
1018  for (e = bb->succ; e; e = e->succ_next)
1019    {
1020      n_succ++;
1021      if (n_succ > 8)
1022	return false;
1023    }
1024
1025  if (code_may_grow && maybe_hot_bb_p (bb))
1026    max_size *= 8;
1027
1028  for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1029       insn = NEXT_INSN (insn))
1030    {
1031      if (INSN_P (insn))
1032	size += get_attr_length (insn);
1033    }
1034
1035  if (size <= max_size)
1036    return true;
1037
1038  if (rtl_dump_file)
1039    {
1040      fprintf (rtl_dump_file,
1041	       "Block %d can't be copied because its size = %d.\n",
1042	       bb->index, size);
1043    }
1044
1045  return false;
1046}
1047
1048/* Return the length of unconditional jump instruction.  */
1049
1050static int
1051get_uncond_jump_length (void)
1052{
1053  rtx label, jump;
1054  int length;
1055
1056  label = emit_label_before (gen_label_rtx (), get_insns ());
1057  jump = emit_jump_insn (gen_jump (label));
1058
1059  length = get_attr_length (jump);
1060
1061  delete_insn (jump);
1062  delete_insn (label);
1063  return length;
1064}
1065
1066/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1067   the set of flags to pass to cfg_layout_initialize().  */
1068
1069void
1070reorder_basic_blocks (unsigned int flags)
1071{
1072  int n_traces;
1073  int i;
1074  struct trace *traces;
1075
1076  if (n_basic_blocks <= 1)
1077    return;
1078
1079  if ((* targetm.cannot_modify_jumps_p) ())
1080    return;
1081
1082  timevar_push (TV_REORDER_BLOCKS);
1083
1084  cfg_layout_initialize (flags);
1085
1086  set_edge_can_fallthru_flag ();
1087  mark_dfs_back_edges ();
1088
1089  /* We are estimating the length of uncond jump insn only once since the code
1090     for getting the insn length always returns the minimal length now.  */
1091  if (uncond_jump_length == 0)
1092    uncond_jump_length = get_uncond_jump_length ();
1093
1094  /* We need to know some information for each basic block.  */
1095  array_size = GET_ARRAY_SIZE (last_basic_block);
1096  bbd = xmalloc (array_size * sizeof (bbro_basic_block_data));
1097  for (i = 0; i < array_size; i++)
1098    {
1099      bbd[i].start_of_trace = -1;
1100      bbd[i].end_of_trace = -1;
1101      bbd[i].heap = NULL;
1102      bbd[i].node = NULL;
1103    }
1104
1105  traces = xmalloc (n_basic_blocks * sizeof (struct trace));
1106  n_traces = 0;
1107  find_traces (&n_traces, traces);
1108  connect_traces (n_traces, traces);
1109  FREE (traces);
1110  FREE (bbd);
1111
1112  if (rtl_dump_file)
1113    dump_flow_info (rtl_dump_file);
1114
1115  cfg_layout_finalize ();
1116
1117  timevar_pop (TV_REORDER_BLOCKS);
1118}
1119