tracer.c revision 267654
1114702Smurray/* The tracer pass for the GNU compiler.
2114702Smurray   Contributed by Jan Hubicka, SuSE Labs.
3114702Smurray   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4114702Smurray
5114702Smurray   This file is part of GCC.
6114702Smurray
7114702Smurray   GCC is free software; you can redistribute it and/or modify it
8114702Smurray   under the terms of the GNU General Public License as published by
9114702Smurray   the Free Software Foundation; either version 2, or (at your option)
10114702Smurray   any later version.
11114702Smurray
12114702Smurray   GCC is distributed in the hope that it will be useful, but WITHOUT
13114702Smurray   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14115049Smurray   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15115049Smurray   License for more details.
16115049Smurray
17115049Smurray   You should have received a copy of the GNU General Public License
18114702Smurray   along with GCC; see the file COPYING.  If not, write to the Free
19114702Smurray   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20114702Smurray   02110-1301, USA.  */
21114702Smurray
22114702Smurray/* This pass performs the tail duplication needed for superblock formation.
23114702Smurray   For more information see:
24114702Smurray
25114702Smurray     Design and Analysis of Profile-Based Optimization in Compaq's
26114702Smurray     Compilation Tools for Alpha; Journal of Instruction-Level
27114702Smurray     Parallelism 3 (2000) 1-25
28114702Smurray
29114702Smurray   Unlike Compaq's implementation we don't do the loop peeling as most
30114702Smurray   probably a better job can be done by a special pass and we don't
31114702Smurray   need to worry too much about the code size implications as the tail
32114702Smurray   duplicates are crossjumped again if optimizations are not
33114702Smurray   performed.  */
34114702Smurray
35114702Smurray
36114702Smurray#include "config.h"
37114702Smurray#include "system.h"
38114702Smurray#include "coretypes.h"
39114702Smurray#include "tm.h"
40114702Smurray#include "tree.h"
41114702Smurray#include "rtl.h"
42114702Smurray#include "hard-reg-set.h"
43114702Smurray#include "basic-block.h"
44114702Smurray#include "output.h"
45114702Smurray#include "cfglayout.h"
46114702Smurray#include "fibheap.h"
47114702Smurray#include "flags.h"
48114702Smurray#include "timevar.h"
49115049Smurray#include "params.h"
50114702Smurray#include "coverage.h"
51114702Smurray#include "tree-pass.h"
52114702Smurray
53114702Smurraystatic int count_insns (basic_block);
54114702Smurraystatic bool ignore_bb_p (basic_block);
55114702Smurraystatic bool better_p (edge, edge);
56114702Smurraystatic edge find_best_successor (basic_block);
57114702Smurraystatic edge find_best_predecessor (basic_block);
58114702Smurraystatic int find_trace (basic_block, basic_block *);
59114702Smurraystatic void tail_duplicate (void);
60114702Smurraystatic void layout_superblocks (void);
61114702Smurray
62114702Smurray/* Minimal outgoing edge probability considered for superblock formation.  */
63114702Smurraystatic int probability_cutoff;
64114702Smurraystatic int branch_ratio_cutoff;
65114702Smurray
66114702Smurray/* Return true if BB has been seen - it is connected to some trace
67114702Smurray   already.  */
68114702Smurray
69114702Smurray#define seen(bb) (bb->il.rtl->visited || bb->aux)
70114702Smurray
71114702Smurray/* Return true if we should ignore the basic block for purposes of tracing.  */
72114702Smurraystatic bool
73114702Smurrayignore_bb_p (basic_block bb)
74114702Smurray{
75114702Smurray  if (bb->index < NUM_FIXED_BLOCKS)
76114702Smurray    return true;
77114702Smurray  if (!maybe_hot_bb_p (bb))
78114702Smurray    return true;
79114702Smurray  return false;
80114702Smurray}
81114702Smurray
82114702Smurray/* Return number of instructions in the block.  */
83114702Smurray
84114702Smurraystatic int
85114702Smurraycount_insns (basic_block bb)
86114702Smurray{
87114702Smurray  rtx insn;
88114702Smurray  int n = 0;
89114702Smurray
90114702Smurray  for (insn = BB_HEAD (bb);
91114702Smurray       insn != NEXT_INSN (BB_END (bb));
92114702Smurray       insn = NEXT_INSN (insn))
93114702Smurray    if (active_insn_p (insn))
94114702Smurray      n++;
95114702Smurray  return n;
96114702Smurray}
97114702Smurray
98114702Smurray/* Return true if E1 is more frequent than E2.  */
99114702Smurraystatic bool
100114702Smurraybetter_p (edge e1, edge e2)
101114702Smurray{
102114702Smurray  if (e1->count != e2->count)
103114702Smurray    return e1->count > e2->count;
104114702Smurray  if (e1->src->frequency * e1->probability !=
105114702Smurray      e2->src->frequency * e2->probability)
106114702Smurray    return (e1->src->frequency * e1->probability
107114702Smurray	    > e2->src->frequency * e2->probability);
108114702Smurray  /* This is needed to avoid changes in the decision after
109114702Smurray     CFG is modified.  */
110114702Smurray  if (e1->src != e2->src)
111114702Smurray    return e1->src->index > e2->src->index;
112114702Smurray  return e1->dest->index > e2->dest->index;
113114702Smurray}
114114702Smurray
115/* Return most frequent successor of basic block BB.  */
116
117static edge
118find_best_successor (basic_block bb)
119{
120  edge e;
121  edge best = NULL;
122  edge_iterator ei;
123
124  FOR_EACH_EDGE (e, ei, bb->succs)
125    if (!best || better_p (e, best))
126      best = e;
127  if (!best || ignore_bb_p (best->dest))
128    return NULL;
129  if (best->probability <= probability_cutoff)
130    return NULL;
131  return best;
132}
133
134/* Return most frequent predecessor of basic block BB.  */
135
136static edge
137find_best_predecessor (basic_block bb)
138{
139  edge e;
140  edge best = NULL;
141  edge_iterator ei;
142
143  FOR_EACH_EDGE (e, ei, bb->preds)
144    if (!best || better_p (e, best))
145      best = e;
146  if (!best || ignore_bb_p (best->src))
147    return NULL;
148  if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
149      < bb->frequency * branch_ratio_cutoff)
150    return NULL;
151  return best;
152}
153
154/* Find the trace using bb and record it in the TRACE array.
155   Return number of basic blocks recorded.  */
156
157static int
158find_trace (basic_block bb, basic_block *trace)
159{
160  int i = 0;
161  edge e;
162
163  if (dump_file)
164    fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
165
166  while ((e = find_best_predecessor (bb)) != NULL)
167    {
168      basic_block bb2 = e->src;
169      if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
170	  || find_best_successor (bb2) != e)
171	break;
172      if (dump_file)
173	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
174      bb = bb2;
175    }
176  if (dump_file)
177    fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency);
178  trace[i++] = bb;
179
180  /* Follow the trace in forward direction.  */
181  while ((e = find_best_successor (bb)) != NULL)
182    {
183      bb = e->dest;
184      if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
185	  || find_best_predecessor (bb) != e)
186	break;
187      if (dump_file)
188	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
189      trace[i++] = bb;
190    }
191  if (dump_file)
192    fprintf (dump_file, "\n");
193  return i;
194}
195
196/* Look for basic blocks in frequency order, construct traces and tail duplicate
197   if profitable.  */
198
199static void
200tail_duplicate (void)
201{
202  fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block);
203  basic_block *trace = XNEWVEC (basic_block, n_basic_blocks);
204  int *counts = XNEWVEC (int, last_basic_block);
205  int ninsns = 0, nduplicated = 0;
206  gcov_type weighted_insns = 0, traced_insns = 0;
207  fibheap_t heap = fibheap_new ();
208  gcov_type cover_insns;
209  int max_dup_insns;
210  basic_block bb;
211
212  if (profile_info && flag_branch_probabilities)
213    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
214  else
215    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
216  probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
217
218  branch_ratio_cutoff =
219    (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
220
221  FOR_EACH_BB (bb)
222    {
223      int n = count_insns (bb);
224      if (!ignore_bb_p (bb))
225	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
226					    bb);
227
228      counts [bb->index] = n;
229      ninsns += n;
230      weighted_insns += n * bb->frequency;
231    }
232
233  if (profile_info && flag_branch_probabilities)
234    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
235  else
236    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
237  cover_insns = (weighted_insns * cover_insns + 50) / 100;
238  max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
239
240  while (traced_insns < cover_insns && nduplicated < max_dup_insns
241         && !fibheap_empty (heap))
242    {
243      basic_block bb = fibheap_extract_min (heap);
244      int n, pos;
245
246      if (!bb)
247	break;
248
249      blocks[bb->index] = NULL;
250
251      if (ignore_bb_p (bb))
252	continue;
253      gcc_assert (!seen (bb));
254
255      n = find_trace (bb, trace);
256
257      bb = trace[0];
258      traced_insns += bb->frequency * counts [bb->index];
259      if (blocks[bb->index])
260	{
261	  fibheap_delete_node (heap, blocks[bb->index]);
262	  blocks[bb->index] = NULL;
263	}
264
265      for (pos = 1; pos < n; pos++)
266	{
267	  basic_block bb2 = trace[pos];
268
269	  if (blocks[bb2->index])
270	    {
271	      fibheap_delete_node (heap, blocks[bb2->index]);
272	      blocks[bb2->index] = NULL;
273	    }
274	  traced_insns += bb2->frequency * counts [bb2->index];
275	  if (EDGE_COUNT (bb2->preds) > 1
276	      && can_duplicate_block_p (bb2))
277	    {
278	      edge e;
279	      basic_block old = bb2;
280
281	      e = find_edge (bb, bb2);
282
283	      nduplicated += counts [bb2->index];
284	      bb2 = duplicate_block (bb2, e, bb);
285
286	      /* Reconsider the original copy of block we've duplicated.
287	         Removing the most common predecessor may make it to be
288	         head.  */
289	      blocks[old->index] =
290		fibheap_insert (heap, -old->frequency, old);
291
292	      if (dump_file)
293		fprintf (dump_file, "Duplicated %i as %i [%i]\n",
294			 old->index, bb2->index, bb2->frequency);
295	    }
296	  bb->aux = bb2;
297	  bb2->il.rtl->visited = 1;
298	  bb = bb2;
299	  /* In case the trace became infrequent, stop duplicating.  */
300	  if (ignore_bb_p (bb))
301	    break;
302	}
303      if (dump_file)
304	fprintf (dump_file, " covered now %.1f\n\n",
305		 traced_insns * 100.0 / weighted_insns);
306    }
307  if (dump_file)
308    fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
309	     nduplicated * 100 / ninsns);
310
311  free (blocks);
312  free (trace);
313  free (counts);
314  fibheap_delete (heap);
315}
316
317/* Connect the superblocks into linear sequence.  At the moment we attempt to keep
318   the original order as much as possible, but the algorithm may be made smarter
319   later if needed.  BB reordering pass should void most of the benefits of such
320   change though.  */
321
322static void
323layout_superblocks (void)
324{
325  basic_block end = single_succ (ENTRY_BLOCK_PTR);
326  basic_block bb = end->next_bb;
327
328  while (bb != EXIT_BLOCK_PTR)
329    {
330      edge_iterator ei;
331      edge e, best = NULL;
332      while (end->aux)
333	end = end->aux;
334
335      FOR_EACH_EDGE (e, ei, end->succs)
336	if (e->dest != EXIT_BLOCK_PTR
337	    && e->dest != single_succ (ENTRY_BLOCK_PTR)
338	    && !e->dest->il.rtl->visited
339	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
340	  best = e;
341
342      if (best)
343	{
344	  end->aux = best->dest;
345	  best->dest->il.rtl->visited = 1;
346	}
347      else
348	for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
349	  {
350	    if (!bb->il.rtl->visited)
351	      {
352		end->aux = bb;
353		bb->il.rtl->visited = 1;
354		break;
355	      }
356	  }
357    }
358}
359
360/* Main entry point to this file.  FLAGS is the set of flags to pass
361   to cfg_layout_initialize().  */
362
363void
364tracer (unsigned int flags)
365{
366  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
367    return;
368
369  cfg_layout_initialize (flags);
370  mark_dfs_back_edges ();
371  if (dump_file)
372    dump_flow_info (dump_file, dump_flags);
373  tail_duplicate ();
374  layout_superblocks ();
375  if (dump_file)
376    dump_flow_info (dump_file, dump_flags);
377  cfg_layout_finalize ();
378
379  /* Merge basic blocks in duplicated traces.  */
380  cleanup_cfg (CLEANUP_EXPENSIVE);
381}
382
383static bool
384gate_handle_tracer (void)
385{
386  return (optimize > 0 && flag_tracer);
387}
388
389/* Run tracer.  */
390static unsigned int
391rest_of_handle_tracer (void)
392{
393  if (dump_file)
394    dump_flow_info (dump_file, dump_flags);
395  tracer (0);
396  cleanup_cfg (CLEANUP_EXPENSIVE);
397  reg_scan (get_insns (), max_reg_num ());
398  return 0;
399}
400
401struct tree_opt_pass pass_tracer =
402{
403  "tracer",                             /* name */
404  gate_handle_tracer,                   /* gate */
405  rest_of_handle_tracer,                /* execute */
406  NULL,                                 /* sub */
407  NULL,                                 /* next */
408  0,                                    /* static_pass_number */
409  TV_TRACER,                            /* tv_id */
410  0,                                    /* properties_required */
411  0,                                    /* properties_provided */
412  0,                                    /* properties_destroyed */
413  0,                                    /* todo_flags_start */
414  TODO_dump_func,                       /* todo_flags_finish */
415  'T'                                   /* letter */
416};
417
418