tracer.c revision 117395
1/* The tracer pass for the GNU compiler.
2   Contributed by Jan Hubicka, SuSE Labs.
3   Copyright (C) 2001, 2002 Free Software Foundation, Inc.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING.  If not, write to the Free
19   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20   02111-1307, USA.  */
21
22/* This pass performs the tail duplication needed for superblock formation.
23   For more information see:
24
25     Design and Analysis of Profile-Based Optimization in Compaq's
26     Compilation Tools for Alpha; Journal of Instruction-Level
27     Parallelism 3 (2000) 1-25
28
29   Unlike Compaq's implementation we don't do the loop peeling as most
30   probably a better job can be done by a special pass and we don't
31   need to worry too much about the code size implications as the tail
32   duplicates are crossjumped again if optimizations are not
33   performed.  */
34
35
36#include "config.h"
37#include "system.h"
38#include "tree.h"
39#include "rtl.h"
40#include "hard-reg-set.h"
41#include "basic-block.h"
42#include "output.h"
43#include "cfglayout.h"
44#include "fibheap.h"
45#include "flags.h"
46#include "params.h"
47#include "profile.h"
48
49static int count_insns		PARAMS ((basic_block));
50static bool ignore_bb_p		PARAMS ((basic_block));
51static bool better_p		PARAMS ((edge, edge));
52static edge find_best_successor PARAMS ((basic_block));
53static edge find_best_predecessor PARAMS ((basic_block));
54static int find_trace		PARAMS ((basic_block, basic_block *));
55static void tail_duplicate	PARAMS ((void));
56static void layout_superblocks	PARAMS ((void));
57static bool ignore_bb_p		PARAMS ((basic_block));
58
59/* Minimal outgoing edge probability considered for superblock formation.  */
60static int probability_cutoff;
61static int branch_ratio_cutoff;
62
63/* Return true if BB has been seen - it is connected to some trace
64   already.  */
65
66#define seen(bb) (RBI (bb)->visited || RBI (bb)->next)
67
68/* Return true if we should ignore the basic block for purposes of tracing.  */
69static bool
70ignore_bb_p (bb)
71     basic_block bb;
72{
73  if (bb->index < 0)
74    return true;
75  if (!maybe_hot_bb_p (bb))
76    return true;
77  return false;
78}
79
80/* Return number of instructions in the block.  */
81
82static int
83count_insns (bb)
84     basic_block bb;
85{
86  rtx insn;
87  int n = 0;
88
89  for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn))
90    if (active_insn_p (insn))
91      n++;
92  return n;
93}
94
95/* Return true if E1 is more frequent than E2.  */
96static bool
97better_p (e1, e2)
98     edge e1, e2;
99{
100  if (e1->count != e2->count)
101    return e1->count > e2->count;
102  if (e1->src->frequency * e1->probability !=
103      e2->src->frequency * e2->probability)
104    return (e1->src->frequency * e1->probability
105	    > e2->src->frequency * e2->probability);
106  /* This is needed to avoid changes in the decision after
107     CFG is modified.  */
108  if (e1->src != e2->src)
109    return e1->src->index > e2->src->index;
110  return e1->dest->index > e2->dest->index;
111}
112
113/* Return most frequent successor of basic block BB.  */
114
115static edge
116find_best_successor (bb)
117     basic_block bb;
118{
119  edge e;
120  edge best = NULL;
121
122  for (e = bb->succ; e; e = e->succ_next)
123    if (!best || better_p (e, best))
124      best = e;
125  if (!best || ignore_bb_p (best->dest))
126    return NULL;
127  if (best->probability <= probability_cutoff)
128    return NULL;
129  return best;
130}
131
132/* Return most frequent predecessor of basic block BB.  */
133
134static edge
135find_best_predecessor (bb)
136     basic_block bb;
137{
138  edge e;
139  edge best = NULL;
140
141  for (e = bb->pred; e; e = e->pred_next)
142    if (!best || better_p (e, best))
143      best = e;
144  if (!best || ignore_bb_p (best->src))
145    return NULL;
146  if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
147      < bb->frequency * branch_ratio_cutoff)
148    return NULL;
149  return best;
150}
151
152/* Find the trace using bb and record it in the TRACE array.
153   Return number of basic blocks recorded.  */
154
155static int
156find_trace (bb, trace)
157     basic_block bb;
158     basic_block *trace;
159{
160  int i = 0;
161  edge e;
162
163  if (rtl_dump_file)
164    fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
165
166  while ((e = find_best_predecessor (bb)) != NULL)
167    {
168      basic_block bb2 = e->src;
169      if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
170	  || find_best_successor (bb2) != e)
171	break;
172      if (rtl_dump_file)
173	fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
174      bb = bb2;
175    }
176  if (rtl_dump_file)
177    fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency);
178  trace[i++] = bb;
179
180  /* Follow the trace in forward direction.  */
181  while ((e = find_best_successor (bb)) != NULL)
182    {
183      bb = e->dest;
184      if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
185	  || find_best_predecessor (bb) != e)
186	break;
187      if (rtl_dump_file)
188	fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency);
189      trace[i++] = bb;
190    }
191  if (rtl_dump_file)
192    fprintf (rtl_dump_file, "\n");
193  return i;
194}
195
196/* Look for basic blocks in frequency order, construct traces and tail duplicate
197   if profitable.  */
198
199static void
200tail_duplicate ()
201{
202  fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t));
203  basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks);
204  int *counts = xmalloc (sizeof (int) * last_basic_block);
205  int ninsns = 0, nduplicated = 0;
206  gcov_type weighted_insns = 0, traced_insns = 0;
207  fibheap_t heap = fibheap_new ();
208  gcov_type cover_insns;
209  int max_dup_insns;
210  basic_block bb;
211
212  if (profile_info.count_profiles_merged && flag_branch_probabilities)
213    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
214  else
215    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
216  probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
217
218  branch_ratio_cutoff =
219    (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
220
221  FOR_EACH_BB (bb)
222    {
223      int n = count_insns (bb);
224      if (!ignore_bb_p (bb))
225	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
226					    bb);
227
228      counts [bb->index] = n;
229      ninsns += n;
230      weighted_insns += n * bb->frequency;
231    }
232
233  if (profile_info.count_profiles_merged && flag_branch_probabilities)
234    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
235  else
236    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
237  cover_insns = (weighted_insns * cover_insns + 50) / 100;
238  max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
239
240  while (traced_insns < cover_insns && nduplicated < max_dup_insns
241         && !fibheap_empty (heap))
242    {
243      basic_block bb = fibheap_extract_min (heap);
244      int n, pos;
245
246      if (!bb)
247	break;
248
249      blocks[bb->index] = NULL;
250
251      if (ignore_bb_p (bb))
252	continue;
253      if (seen (bb))
254	abort ();
255
256      n = find_trace (bb, trace);
257
258      bb = trace[0];
259      traced_insns += bb->frequency * counts [bb->index];
260      if (blocks[bb->index])
261	{
262	  fibheap_delete_node (heap, blocks[bb->index]);
263	  blocks[bb->index] = NULL;
264	}
265
266      for (pos = 1; pos < n; pos++)
267	{
268	  basic_block bb2 = trace[pos];
269
270	  if (blocks[bb2->index])
271	    {
272	      fibheap_delete_node (heap, blocks[bb2->index]);
273	      blocks[bb2->index] = NULL;
274	    }
275	  traced_insns += bb2->frequency * counts [bb2->index];
276	  if (bb2->pred && bb2->pred->pred_next
277	      && cfg_layout_can_duplicate_bb_p (bb2))
278	    {
279	      edge e = bb2->pred;
280	      basic_block old = bb2;
281
282	      while (e->src != bb)
283		e = e->pred_next;
284	      nduplicated += counts [bb2->index];
285	      bb2 = cfg_layout_duplicate_bb (bb2, e);
286
287	      /* Reconsider the original copy of block we've duplicated.
288	         Removing the most common predecesor may make it to be
289	         head.  */
290	      blocks[old->index] =
291		fibheap_insert (heap, -old->frequency, old);
292
293	      if (rtl_dump_file)
294		fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n",
295			 old->index, bb2->index, bb2->frequency);
296	    }
297	  RBI (bb)->next = bb2;
298	  RBI (bb2)->visited = 1;
299	  bb = bb2;
300	  /* In case the trace became infrequent, stop duplicating.  */
301	  if (ignore_bb_p (bb))
302	    break;
303	}
304      if (rtl_dump_file)
305	fprintf (rtl_dump_file, " covered now %.1f\n\n",
306		 traced_insns * 100.0 / weighted_insns);
307    }
308  if (rtl_dump_file)
309    fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
310	     nduplicated * 100 / ninsns);
311
312  free (blocks);
313  free (trace);
314  free (counts);
315  fibheap_delete (heap);
316}
317
318/* Connect the superblocks into linear seuqence.  At the moment we attempt to keep
319   the original order as much as possible, but the algorithm may be made smarter
320   later if needed.  BB reordering pass should void most of the benefits of such
321   change though.  */
322
323static void
324layout_superblocks ()
325{
326  basic_block end = ENTRY_BLOCK_PTR->succ->dest;
327  basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb;
328
329  while (bb != EXIT_BLOCK_PTR)
330    {
331      edge e, best = NULL;
332      while (RBI (end)->next)
333	end = RBI (end)->next;
334
335      for (e = end->succ; e; e = e->succ_next)
336	if (e->dest != EXIT_BLOCK_PTR
337	    && e->dest != ENTRY_BLOCK_PTR->succ->dest
338	    && !RBI (e->dest)->visited
339	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
340	  best = e;
341
342      if (best)
343	{
344	  RBI (end)->next = best->dest;
345	  RBI (best->dest)->visited = 1;
346	}
347      else
348	for (; bb != EXIT_BLOCK_PTR; bb=bb->next_bb)
349	  {
350	    if (!RBI (bb)->visited)
351	      {
352		RBI (end)->next = bb;
353		RBI (bb)->visited = 1;
354		break;
355	      }
356	  }
357    }
358}
359
360/* Main entry point to this file.  */
361
362void
363tracer ()
364{
365  if (n_basic_blocks <= 1)
366    return;
367  cfg_layout_initialize ();
368  mark_dfs_back_edges ();
369  if (rtl_dump_file)
370    dump_flow_info (rtl_dump_file);
371  tail_duplicate ();
372  layout_superblocks ();
373  if (rtl_dump_file)
374    dump_flow_info (rtl_dump_file);
375  cfg_layout_finalize ();
376  /* Merge basic blocks in duplicated traces.  */
377  cleanup_cfg (CLEANUP_EXPENSIVE);
378}
379