tracer.c revision 132718
1/* The tracer pass for the GNU compiler.
2   Contributed by Jan Hubicka, SuSE Labs.
3   Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING.  If not, write to the Free
19   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20   02111-1307, USA.  */
21
22/* This pass performs the tail duplication needed for superblock formation.
23   For more information see:
24
25     Design and Analysis of Profile-Based Optimization in Compaq's
26     Compilation Tools for Alpha; Journal of Instruction-Level
27     Parallelism 3 (2000) 1-25
28
29   Unlike Compaq's implementation we don't do the loop peeling as most
30   probably a better job can be done by a special pass and we don't
31   need to worry too much about the code size implications as the tail
32   duplicates are crossjumped again if optimizations are not
33   performed.  */
34
35
36#include "config.h"
37#include "system.h"
38#include "coretypes.h"
39#include "tm.h"
40#include "tree.h"
41#include "rtl.h"
42#include "hard-reg-set.h"
43#include "basic-block.h"
44#include "output.h"
45#include "cfglayout.h"
46#include "fibheap.h"
47#include "flags.h"
48#include "timevar.h"
49#include "params.h"
50#include "coverage.h"
51
52static int count_insns (basic_block);
53static bool ignore_bb_p (basic_block);
54static bool better_p (edge, edge);
55static edge find_best_successor (basic_block);
56static edge find_best_predecessor (basic_block);
57static int find_trace (basic_block, basic_block *);
58static void tail_duplicate (void);
59static void layout_superblocks (void);
60
61/* Minimal outgoing edge probability considered for superblock formation.  */
62static int probability_cutoff;
63static int branch_ratio_cutoff;
64
65/* Return true if BB has been seen - it is connected to some trace
66   already.  */
67
68#define seen(bb) (bb->rbi->visited || bb->rbi->next)
69
70/* Return true if we should ignore the basic block for purposes of tracing.  */
71static bool
72ignore_bb_p (basic_block bb)
73{
74  if (bb->index < 0)
75    return true;
76  if (!maybe_hot_bb_p (bb))
77    return true;
78  return false;
79}
80
81/* Return number of instructions in the block.  */
82
83static int
84count_insns (basic_block bb)
85{
86  rtx insn;
87  int n = 0;
88
89  for (insn = BB_HEAD (bb);
90       insn != NEXT_INSN (BB_END (bb));
91       insn = NEXT_INSN (insn))
92    if (active_insn_p (insn))
93      n++;
94  return n;
95}
96
97/* Return true if E1 is more frequent than E2.  */
98static bool
99better_p (edge e1, edge e2)
100{
101  if (e1->count != e2->count)
102    return e1->count > e2->count;
103  if (e1->src->frequency * e1->probability !=
104      e2->src->frequency * e2->probability)
105    return (e1->src->frequency * e1->probability
106	    > e2->src->frequency * e2->probability);
107  /* This is needed to avoid changes in the decision after
108     CFG is modified.  */
109  if (e1->src != e2->src)
110    return e1->src->index > e2->src->index;
111  return e1->dest->index > e2->dest->index;
112}
113
114/* Return most frequent successor of basic block BB.  */
115
116static edge
117find_best_successor (basic_block bb)
118{
119  edge e;
120  edge best = NULL;
121
122  for (e = bb->succ; e; e = e->succ_next)
123    if (!best || better_p (e, best))
124      best = e;
125  if (!best || ignore_bb_p (best->dest))
126    return NULL;
127  if (best->probability <= probability_cutoff)
128    return NULL;
129  return best;
130}
131
132/* Return most frequent predecessor of basic block BB.  */
133
134static edge
135find_best_predecessor (basic_block bb)
136{
137  edge e;
138  edge best = NULL;
139
140  for (e = bb->pred; e; e = e->pred_next)
141    if (!best || better_p (e, best))
142      best = e;
143  if (!best || ignore_bb_p (best->src))
144    return NULL;
145  if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
146      < bb->frequency * branch_ratio_cutoff)
147    return NULL;
148  return best;
149}
150
151/* Find the trace using bb and record it in the TRACE array.
152   Return number of basic blocks recorded.  */
153
154static int
155find_trace (basic_block bb, basic_block *trace)
156{
157  int i = 0;
158  edge e;
159
160  if (rtl_dump_file)
161    fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
162
163  while ((e = find_best_predecessor (bb)) != NULL)
164    {
165      basic_block bb2 = e->src;
166      if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
167	  || find_best_successor (bb2) != e)
168	break;
169      if (rtl_dump_file)
170	fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
171      bb = bb2;
172    }
173  if (rtl_dump_file)
174    fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency);
175  trace[i++] = bb;
176
177  /* Follow the trace in forward direction.  */
178  while ((e = find_best_successor (bb)) != NULL)
179    {
180      bb = e->dest;
181      if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
182	  || find_best_predecessor (bb) != e)
183	break;
184      if (rtl_dump_file)
185	fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
186      trace[i++] = bb;
187    }
188  if (rtl_dump_file)
189    fprintf (rtl_dump_file, "\n");
190  return i;
191}
192
193/* Look for basic blocks in frequency order, construct traces and tail duplicate
194   if profitable.  */
195
196static void
197tail_duplicate (void)
198{
199  fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
200  basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
201  int *counts = xmalloc (sizeof (int) * last_basic_block);
202  int ninsns = 0, nduplicated = 0;
203  gcov_type weighted_insns = 0, traced_insns = 0;
204  fibheap_t heap = fibheap_new ();
205  gcov_type cover_insns;
206  int max_dup_insns;
207  basic_block bb;
208
209  if (profile_info && flag_branch_probabilities)
210    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
211  else
212    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
213  probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
214
215  branch_ratio_cutoff =
216    (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
217
218  FOR_EACH_BB (bb)
219    {
220      int n = count_insns (bb);
221      if (!ignore_bb_p (bb))
222	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
223					    bb);
224
225      counts [bb->index] = n;
226      ninsns += n;
227      weighted_insns += n * bb->frequency;
228    }
229
230  if (profile_info && flag_branch_probabilities)
231    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
232  else
233    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
234  cover_insns = (weighted_insns * cover_insns + 50) / 100;
235  max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
236
237  while (traced_insns < cover_insns && nduplicated < max_dup_insns
238         && !fibheap_empty (heap))
239    {
240      basic_block bb = fibheap_extract_min (heap);
241      int n, pos;
242
243      if (!bb)
244	break;
245
246      blocks[bb->index] = NULL;
247
248      if (ignore_bb_p (bb))
249	continue;
250      if (seen (bb))
251	abort ();
252
253      n = find_trace (bb, trace);
254
255      bb = trace[0];
256      traced_insns += bb->frequency * counts [bb->index];
257      if (blocks[bb->index])
258	{
259	  fibheap_delete_node (heap, blocks[bb->index]);
260	  blocks[bb->index] = NULL;
261	}
262
263      for (pos = 1; pos < n; pos++)
264	{
265	  basic_block bb2 = trace[pos];
266
267	  if (blocks[bb2->index])
268	    {
269	      fibheap_delete_node (heap, blocks[bb2->index]);
270	      blocks[bb2->index] = NULL;
271	    }
272	  traced_insns += bb2->frequency * counts [bb2->index];
273	  if (bb2->pred && bb2->pred->pred_next
274	      && cfg_layout_can_duplicate_bb_p (bb2))
275	    {
276	      edge e = bb2->pred;
277	      basic_block old = bb2;
278
279	      while (e->src != bb)
280		e = e->pred_next;
281	      nduplicated += counts [bb2->index];
282	      bb2 = cfg_layout_duplicate_bb (bb2, e);
283
284	      /* Reconsider the original copy of block we've duplicated.
285	         Removing the most common predecessor may make it to be
286	         head.  */
287	      blocks[old->index] =
288		fibheap_insert (heap, -old->frequency, old);
289
290	      if (rtl_dump_file)
291		fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n",
292			 old->index, bb2->index, bb2->frequency);
293	    }
294	  bb->rbi->next = bb2;
295	  bb2->rbi->visited = 1;
296	  bb = bb2;
297	  /* In case the trace became infrequent, stop duplicating.  */
298	  if (ignore_bb_p (bb))
299	    break;
300	}
301      if (rtl_dump_file)
302	fprintf (rtl_dump_file, " covered now %.1f\n\n",
303		 traced_insns * 100.0 / weighted_insns);
304    }
305  if (rtl_dump_file)
306    fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
307	     nduplicated * 100 / ninsns);
308
309  free (blocks);
310  free (trace);
311  free (counts);
312  fibheap_delete (heap);
313}
314
315/* Connect the superblocks into linear sequence.  At the moment we attempt to keep
316   the original order as much as possible, but the algorithm may be made smarter
317   later if needed.  BB reordering pass should void most of the benefits of such
318   change though.  */
319
320static void
321layout_superblocks (void)
322{
323  basic_block end = ENTRY_BLOCK_PTR->succ->dest;
324  basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb;
325
326  while (bb != EXIT_BLOCK_PTR)
327    {
328      edge e, best = NULL;
329      while (end->rbi->next)
330	end = end->rbi->next;
331
332      for (e = end->succ; e; e = e->succ_next)
333	if (e->dest != EXIT_BLOCK_PTR
334	    && e->dest != ENTRY_BLOCK_PTR->succ->dest
335	    && !e->dest->rbi->visited
336	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
337	  best = e;
338
339      if (best)
340	{
341	  end->rbi->next = best->dest;
342	  best->dest->rbi->visited = 1;
343	}
344      else
345	for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
346	  {
347	    if (!bb->rbi->visited)
348	      {
349		end->rbi->next = bb;
350		bb->rbi->visited = 1;
351		break;
352	      }
353	  }
354    }
355}
356
357/* Main entry point to this file.  FLAGS is the set of flags to pass
358   to cfg_layout_initialize().  */
359
360void
361tracer (unsigned int flags)
362{
363  if (n_basic_blocks <= 1)
364    return;
365
366  timevar_push (TV_TRACER);
367
368  cfg_layout_initialize (flags);
369  mark_dfs_back_edges ();
370  if (rtl_dump_file)
371    dump_flow_info (rtl_dump_file);
372  tail_duplicate ();
373  layout_superblocks ();
374  if (rtl_dump_file)
375    dump_flow_info (rtl_dump_file);
376  cfg_layout_finalize ();
377
378  /* Merge basic blocks in duplicated traces.  */
379  cleanup_cfg (CLEANUP_EXPENSIVE);
380
381  timevar_pop (TV_TRACER);
382}
383