1117395Skan/* The tracer pass for the GNU compiler.
2117395Skan   Contributed by Jan Hubicka, SuSE Labs.
3169689Skan   Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4117395Skan
5117395Skan   This file is part of GCC.
6117395Skan
7117395Skan   GCC is free software; you can redistribute it and/or modify it
8117395Skan   under the terms of the GNU General Public License as published by
9117395Skan   the Free Software Foundation; either version 2, or (at your option)
10117395Skan   any later version.
11117395Skan
12117395Skan   GCC is distributed in the hope that it will be useful, but WITHOUT
13117395Skan   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14117395Skan   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15117395Skan   License for more details.
16117395Skan
17117395Skan   You should have received a copy of the GNU General Public License
18117395Skan   along with GCC; see the file COPYING.  If not, write to the Free
19169689Skan   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan   02110-1301, USA.  */
21117395Skan
22117395Skan/* This pass performs the tail duplication needed for superblock formation.
23117395Skan   For more information see:
24117395Skan
25117395Skan     Design and Analysis of Profile-Based Optimization in Compaq's
26117395Skan     Compilation Tools for Alpha; Journal of Instruction-Level
27117395Skan     Parallelism 3 (2000) 1-25
28117395Skan
29117395Skan   Unlike Compaq's implementation we don't do the loop peeling as most
30117395Skan   probably a better job can be done by a special pass and we don't
31117395Skan   need to worry too much about the code size implications as the tail
32117395Skan   duplicates are crossjumped again if optimizations are not
33117395Skan   performed.  */
34117395Skan
35117395Skan
36117395Skan#include "config.h"
37117395Skan#include "system.h"
38132718Skan#include "coretypes.h"
39132718Skan#include "tm.h"
40117395Skan#include "tree.h"
41117395Skan#include "rtl.h"
42117395Skan#include "hard-reg-set.h"
43117395Skan#include "basic-block.h"
44117395Skan#include "output.h"
45117395Skan#include "cfglayout.h"
46117395Skan#include "fibheap.h"
47117395Skan#include "flags.h"
48132718Skan#include "timevar.h"
49117395Skan#include "params.h"
50132718Skan#include "coverage.h"
51169689Skan#include "tree-pass.h"
52117395Skan
53132718Skanstatic int count_insns (basic_block);
54132718Skanstatic bool ignore_bb_p (basic_block);
55132718Skanstatic bool better_p (edge, edge);
56132718Skanstatic edge find_best_successor (basic_block);
57132718Skanstatic edge find_best_predecessor (basic_block);
58132718Skanstatic int find_trace (basic_block, basic_block *);
59132718Skanstatic void tail_duplicate (void);
60132718Skanstatic void layout_superblocks (void);
61117395Skan
62117395Skan/* Minimal outgoing edge probability considered for superblock formation.  */
63117395Skanstatic int probability_cutoff;
64117395Skanstatic int branch_ratio_cutoff;
65117395Skan
66117395Skan/* Return true if BB has been seen - it is connected to some trace
67117395Skan   already.  */
68117395Skan
69169689Skan#define seen(bb) (bb->il.rtl->visited || bb->aux)
70117395Skan
71117395Skan/* Return true if we should ignore the basic block for purposes of tracing.  */
72117395Skanstatic bool
73132718Skanignore_bb_p (basic_block bb)
74117395Skan{
75169689Skan  if (bb->index < NUM_FIXED_BLOCKS)
76117395Skan    return true;
77117395Skan  if (!maybe_hot_bb_p (bb))
78117395Skan    return true;
79117395Skan  return false;
80117395Skan}
81117395Skan
82117395Skan/* Return number of instructions in the block.  */
83117395Skan
84117395Skanstatic int
85132718Skancount_insns (basic_block bb)
86117395Skan{
87117395Skan  rtx insn;
88117395Skan  int n = 0;
89117395Skan
90132718Skan  for (insn = BB_HEAD (bb);
91132718Skan       insn != NEXT_INSN (BB_END (bb));
92132718Skan       insn = NEXT_INSN (insn))
93117395Skan    if (active_insn_p (insn))
94117395Skan      n++;
95117395Skan  return n;
96117395Skan}
97117395Skan
98117395Skan/* Return true if E1 is more frequent than E2.  */
99117395Skanstatic bool
100132718Skanbetter_p (edge e1, edge e2)
101117395Skan{
102117395Skan  if (e1->count != e2->count)
103117395Skan    return e1->count > e2->count;
104117395Skan  if (e1->src->frequency * e1->probability !=
105117395Skan      e2->src->frequency * e2->probability)
106117395Skan    return (e1->src->frequency * e1->probability
107117395Skan	    > e2->src->frequency * e2->probability);
108117395Skan  /* This is needed to avoid changes in the decision after
109117395Skan     CFG is modified.  */
110117395Skan  if (e1->src != e2->src)
111117395Skan    return e1->src->index > e2->src->index;
112117395Skan  return e1->dest->index > e2->dest->index;
113117395Skan}
114117395Skan
115117395Skan/* Return most frequent successor of basic block BB.  */
116117395Skan
117117395Skanstatic edge
118132718Skanfind_best_successor (basic_block bb)
119117395Skan{
120117395Skan  edge e;
121117395Skan  edge best = NULL;
122169689Skan  edge_iterator ei;
123117395Skan
124169689Skan  FOR_EACH_EDGE (e, ei, bb->succs)
125117395Skan    if (!best || better_p (e, best))
126117395Skan      best = e;
127117395Skan  if (!best || ignore_bb_p (best->dest))
128117395Skan    return NULL;
129117395Skan  if (best->probability <= probability_cutoff)
130117395Skan    return NULL;
131117395Skan  return best;
132117395Skan}
133117395Skan
134117395Skan/* Return most frequent predecessor of basic block BB.  */
135117395Skan
136117395Skanstatic edge
137132718Skanfind_best_predecessor (basic_block bb)
138117395Skan{
139117395Skan  edge e;
140117395Skan  edge best = NULL;
141169689Skan  edge_iterator ei;
142117395Skan
143169689Skan  FOR_EACH_EDGE (e, ei, bb->preds)
144117395Skan    if (!best || better_p (e, best))
145117395Skan      best = e;
146117395Skan  if (!best || ignore_bb_p (best->src))
147117395Skan    return NULL;
148117395Skan  if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE
149117395Skan      < bb->frequency * branch_ratio_cutoff)
150117395Skan    return NULL;
151117395Skan  return best;
152117395Skan}
153117395Skan
154117395Skan/* Find the trace using bb and record it in the TRACE array.
155117395Skan   Return number of basic blocks recorded.  */
156117395Skan
157117395Skanstatic int
158132718Skanfind_trace (basic_block bb, basic_block *trace)
159117395Skan{
160117395Skan  int i = 0;
161117395Skan  edge e;
162117395Skan
163169689Skan  if (dump_file)
164169689Skan    fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency);
165117395Skan
166117395Skan  while ((e = find_best_predecessor (bb)) != NULL)
167117395Skan    {
168117395Skan      basic_block bb2 = e->src;
169117395Skan      if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
170117395Skan	  || find_best_successor (bb2) != e)
171117395Skan	break;
172169689Skan      if (dump_file)
173169689Skan	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
174117395Skan      bb = bb2;
175117395Skan    }
176169689Skan  if (dump_file)
177169689Skan    fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency);
178117395Skan  trace[i++] = bb;
179117395Skan
180117395Skan  /* Follow the trace in forward direction.  */
181117395Skan  while ((e = find_best_successor (bb)) != NULL)
182117395Skan    {
183117395Skan      bb = e->dest;
184117395Skan      if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
185117395Skan	  || find_best_predecessor (bb) != e)
186117395Skan	break;
187169689Skan      if (dump_file)
188169689Skan	fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency);
189117395Skan      trace[i++] = bb;
190117395Skan    }
191169689Skan  if (dump_file)
192169689Skan    fprintf (dump_file, "\n");
193117395Skan  return i;
194117395Skan}
195117395Skan
196117395Skan/* Look for basic blocks in frequency order, construct traces and tail duplicate
197117395Skan   if profitable.  */
198117395Skan
199117395Skanstatic void
200132718Skantail_duplicate (void)
201117395Skan{
202169689Skan  fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block);
203169689Skan  basic_block *trace = XNEWVEC (basic_block, n_basic_blocks);
204169689Skan  int *counts = XNEWVEC (int, last_basic_block);
205117395Skan  int ninsns = 0, nduplicated = 0;
206117395Skan  gcov_type weighted_insns = 0, traced_insns = 0;
207117395Skan  fibheap_t heap = fibheap_new ();
208117395Skan  gcov_type cover_insns;
209117395Skan  int max_dup_insns;
210117395Skan  basic_block bb;
211117395Skan
212132718Skan  if (profile_info && flag_branch_probabilities)
213117395Skan    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
214117395Skan  else
215117395Skan    probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
216117395Skan  probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
217117395Skan
218117395Skan  branch_ratio_cutoff =
219117395Skan    (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO));
220117395Skan
221117395Skan  FOR_EACH_BB (bb)
222117395Skan    {
223117395Skan      int n = count_insns (bb);
224117395Skan      if (!ignore_bb_p (bb))
225117395Skan	blocks[bb->index] = fibheap_insert (heap, -bb->frequency,
226117395Skan					    bb);
227117395Skan
228117395Skan      counts [bb->index] = n;
229117395Skan      ninsns += n;
230117395Skan      weighted_insns += n * bb->frequency;
231117395Skan    }
232117395Skan
233132718Skan  if (profile_info && flag_branch_probabilities)
234117395Skan    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK);
235117395Skan  else
236117395Skan    cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE);
237117395Skan  cover_insns = (weighted_insns * cover_insns + 50) / 100;
238117395Skan  max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100;
239117395Skan
240117395Skan  while (traced_insns < cover_insns && nduplicated < max_dup_insns
241117395Skan         && !fibheap_empty (heap))
242117395Skan    {
243117395Skan      basic_block bb = fibheap_extract_min (heap);
244117395Skan      int n, pos;
245117395Skan
246117395Skan      if (!bb)
247117395Skan	break;
248117395Skan
249117395Skan      blocks[bb->index] = NULL;
250117395Skan
251117395Skan      if (ignore_bb_p (bb))
252117395Skan	continue;
253169689Skan      gcc_assert (!seen (bb));
254117395Skan
255117395Skan      n = find_trace (bb, trace);
256117395Skan
257117395Skan      bb = trace[0];
258117395Skan      traced_insns += bb->frequency * counts [bb->index];
259117395Skan      if (blocks[bb->index])
260117395Skan	{
261117395Skan	  fibheap_delete_node (heap, blocks[bb->index]);
262117395Skan	  blocks[bb->index] = NULL;
263117395Skan	}
264117395Skan
265117395Skan      for (pos = 1; pos < n; pos++)
266117395Skan	{
267117395Skan	  basic_block bb2 = trace[pos];
268117395Skan
269117395Skan	  if (blocks[bb2->index])
270117395Skan	    {
271117395Skan	      fibheap_delete_node (heap, blocks[bb2->index]);
272117395Skan	      blocks[bb2->index] = NULL;
273117395Skan	    }
274117395Skan	  traced_insns += bb2->frequency * counts [bb2->index];
275169689Skan	  if (EDGE_COUNT (bb2->preds) > 1
276169689Skan	      && can_duplicate_block_p (bb2))
277117395Skan	    {
278169689Skan	      edge e;
279117395Skan	      basic_block old = bb2;
280117395Skan
281169689Skan	      e = find_edge (bb, bb2);
282169689Skan
283117395Skan	      nduplicated += counts [bb2->index];
284169689Skan	      bb2 = duplicate_block (bb2, e, bb);
285117395Skan
286117395Skan	      /* Reconsider the original copy of block we've duplicated.
287132718Skan	         Removing the most common predecessor may make it to be
288117395Skan	         head.  */
289117395Skan	      blocks[old->index] =
290117395Skan		fibheap_insert (heap, -old->frequency, old);
291117395Skan
292169689Skan	      if (dump_file)
293169689Skan		fprintf (dump_file, "Duplicated %i as %i [%i]\n",
294117395Skan			 old->index, bb2->index, bb2->frequency);
295117395Skan	    }
296169689Skan	  bb->aux = bb2;
297169689Skan	  bb2->il.rtl->visited = 1;
298117395Skan	  bb = bb2;
299117395Skan	  /* In case the trace became infrequent, stop duplicating.  */
300117395Skan	  if (ignore_bb_p (bb))
301117395Skan	    break;
302117395Skan	}
303169689Skan      if (dump_file)
304169689Skan	fprintf (dump_file, " covered now %.1f\n\n",
305117395Skan		 traced_insns * 100.0 / weighted_insns);
306117395Skan    }
307169689Skan  if (dump_file)
308169689Skan    fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
309117395Skan	     nduplicated * 100 / ninsns);
310117395Skan
311117395Skan  free (blocks);
312117395Skan  free (trace);
313117395Skan  free (counts);
314117395Skan  fibheap_delete (heap);
315117395Skan}
316117395Skan
317132718Skan/* Connect the superblocks into linear sequence.  At the moment we attempt to keep
318117395Skan   the original order as much as possible, but the algorithm may be made smarter
319117395Skan   later if needed.  BB reordering pass should void most of the benefits of such
320117395Skan   change though.  */
321117395Skan
322117395Skanstatic void
323132718Skanlayout_superblocks (void)
324117395Skan{
325169689Skan  basic_block end = single_succ (ENTRY_BLOCK_PTR);
326169689Skan  basic_block bb = end->next_bb;
327117395Skan
328117395Skan  while (bb != EXIT_BLOCK_PTR)
329117395Skan    {
330169689Skan      edge_iterator ei;
331117395Skan      edge e, best = NULL;
332169689Skan      while (end->aux)
333169689Skan	end = end->aux;
334117395Skan
335169689Skan      FOR_EACH_EDGE (e, ei, end->succs)
336117395Skan	if (e->dest != EXIT_BLOCK_PTR
337169689Skan	    && e->dest != single_succ (ENTRY_BLOCK_PTR)
338169689Skan	    && !e->dest->il.rtl->visited
339117395Skan	    && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best)))
340117395Skan	  best = e;
341117395Skan
342117395Skan      if (best)
343117395Skan	{
344169689Skan	  end->aux = best->dest;
345169689Skan	  best->dest->il.rtl->visited = 1;
346117395Skan	}
347117395Skan      else
348132718Skan	for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb)
349117395Skan	  {
350169689Skan	    if (!bb->il.rtl->visited)
351117395Skan	      {
352169689Skan		end->aux = bb;
353169689Skan		bb->il.rtl->visited = 1;
354117395Skan		break;
355117395Skan	      }
356117395Skan	  }
357117395Skan    }
358117395Skan}
359117395Skan
360132718Skan/* Main entry point to this file.  FLAGS is the set of flags to pass
361132718Skan   to cfg_layout_initialize().  */
362117395Skan
363117395Skanvoid
364132718Skantracer (unsigned int flags)
365117395Skan{
366169689Skan  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
367117395Skan    return;
368132718Skan
369132718Skan  cfg_layout_initialize (flags);
370117395Skan  mark_dfs_back_edges ();
371169689Skan  if (dump_file)
372169689Skan    dump_flow_info (dump_file, dump_flags);
373117395Skan  tail_duplicate ();
374117395Skan  layout_superblocks ();
375169689Skan  if (dump_file)
376169689Skan    dump_flow_info (dump_file, dump_flags);
377117395Skan  cfg_layout_finalize ();
378132718Skan
379117395Skan  /* Merge basic blocks in duplicated traces.  */
380117395Skan  cleanup_cfg (CLEANUP_EXPENSIVE);
381169689Skan}
382169689Skan
383169689Skanstatic bool
384169689Skangate_handle_tracer (void)
385169689Skan{
386169689Skan  return (optimize > 0 && flag_tracer);
387169689Skan}
388132718Skan
389169689Skan/* Run tracer.  */
390169689Skanstatic unsigned int
391169689Skanrest_of_handle_tracer (void)
392169689Skan{
393169689Skan  if (dump_file)
394169689Skan    dump_flow_info (dump_file, dump_flags);
395169689Skan  tracer (0);
396169689Skan  cleanup_cfg (CLEANUP_EXPENSIVE);
397169689Skan  reg_scan (get_insns (), max_reg_num ());
398169689Skan  return 0;
399117395Skan}
400169689Skan
401169689Skanstruct tree_opt_pass pass_tracer =
402169689Skan{
403169689Skan  "tracer",                             /* name */
404169689Skan  gate_handle_tracer,                   /* gate */
405169689Skan  rest_of_handle_tracer,                /* execute */
406169689Skan  NULL,                                 /* sub */
407169689Skan  NULL,                                 /* next */
408169689Skan  0,                                    /* static_pass_number */
409169689Skan  TV_TRACER,                            /* tv_id */
410169689Skan  0,                                    /* properties_required */
411169689Skan  0,                                    /* properties_provided */
412169689Skan  0,                                    /* properties_destroyed */
413169689Skan  0,                                    /* todo_flags_start */
414169689Skan  TODO_dump_func,                       /* todo_flags_finish */
415169689Skan  'T'                                   /* letter */
416169689Skan};
417169689Skan
418