1/* Calculate branch probabilities, and basic block execution counts.
2   Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3   2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009
4   Free Software Foundation, Inc.
5   Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
6   based on some ideas from Dain Samples of UC Berkeley.
7   Further mangling by Bob Manson, Cygnus Support.
8
9This file is part of GCC.
10
11GCC is free software; you can redistribute it and/or modify it under
12the terms of the GNU General Public License as published by the Free
13Software Foundation; either version 3, or (at your option) any later
14version.
15
16GCC is distributed in the hope that it will be useful, but WITHOUT ANY
17WARRANTY; without even the implied warranty of MERCHANTABILITY or
18FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
19for more details.
20
21You should have received a copy of the GNU General Public License
22along with GCC; see the file COPYING3.  If not see
23<http://www.gnu.org/licenses/>.  */
24
25/* Generate basic block profile instrumentation and auxiliary files.
26   Profile generation is optimized, so that not all arcs in the basic
27   block graph need instrumenting. First, the BB graph is closed with
28   one entry (function start), and one exit (function exit).  Any
29   ABNORMAL_EDGE cannot be instrumented (because there is no control
30   path to place the code). We close the graph by inserting fake
31   EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32   edges that do not go to the exit_block. We ignore such abnormal
33   edges.  Naturally these fake edges are never directly traversed,
34   and so *cannot* be directly instrumented.  Some other graph
35   massaging is done. To optimize the instrumentation we generate the
36   BB minimal span tree, only edges that are not on the span tree
37   (plus the entry point) need instrumenting. From that information
38   all other edge counts can be deduced.  By construction all fake
39   edges must be on the spanning tree. We also attempt to place
40   EDGE_CRITICAL edges on the spanning tree.
41
42   The auxiliary files generated are <dumpbase>.gcno (at compile time)
43   and <dumpbase>.gcda (at run time).  The format is
44   described in full in gcov-io.h.  */
45
46/* ??? Register allocation should use basic block execution counts to
47   give preference to the most commonly executed blocks.  */
48
49/* ??? Should calculate branch probabilities before instrumenting code, since
50   then we can use arc counts to help decide which arcs to instrument.  */
51
52#include "config.h"
53#include "system.h"
54#include "coretypes.h"
55#include "tm.h"
56#include "rtl.h"
57#include "flags.h"
58#include "output.h"
59#include "regs.h"
60#include "expr.h"
61#include "function.h"
62#include "toplev.h"
63#include "coverage.h"
64#include "value-prof.h"
65#include "tree.h"
66#include "cfghooks.h"
67#include "tree-flow.h"
68#include "timevar.h"
69#include "cfgloop.h"
70#include "tree-pass.h"
71
72#include "profile.h"
73
74/* Hooks for profiling.  */
75static struct profile_hooks* profile_hooks;
76
77struct bb_info {
78  unsigned int count_valid : 1;
79
80  /* Number of successor and predecessor edges.  */
81  gcov_type succ_count;
82  gcov_type pred_count;
83};
84
85#define BB_INFO(b)  ((struct bb_info *) (b)->aux)
86
87
88/* Counter summary from the last set of coverage counts read.  */
89
90const struct gcov_ctr_summary *profile_info;
91
92/* Collect statistics on the performance of this pass for the entire source
93   file.  */
94
95static int total_num_blocks;
96static int total_num_edges;
97static int total_num_edges_ignored;
98static int total_num_edges_instrumented;
99static int total_num_blocks_created;
100static int total_num_passes;
101static int total_num_times_called;
102static int total_hist_br_prob[20];
103static int total_num_branches;
104
105/* Forward declarations.  */
106static void find_spanning_tree (struct edge_list *);
107static unsigned instrument_edges (struct edge_list *);
108static void instrument_values (histogram_values);
109static void compute_branch_probabilities (void);
110static void compute_value_histograms (histogram_values);
111static gcov_type * get_exec_counts (void);
112static basic_block find_group (basic_block);
113static void union_groups (basic_block, basic_block);
114
115/* Add edge instrumentation code to the entire insn chain.
116
117   F is the first insn of the chain.
118   NUM_BLOCKS is the number of basic blocks found in F.  */
119
120static unsigned
121instrument_edges (struct edge_list *el)
122{
123  unsigned num_instr_edges = 0;
124  int num_edges = NUM_EDGES (el);
125  basic_block bb;
126
127  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
128    {
129      edge e;
130      edge_iterator ei;
131
132      FOR_EACH_EDGE (e, ei, bb->succs)
133	{
134	  struct edge_info *inf = EDGE_INFO (e);
135
136	  if (!inf->ignore && !inf->on_tree)
137	    {
138	      gcc_assert (!(e->flags & EDGE_ABNORMAL));
139	      if (dump_file)
140		fprintf (dump_file, "Edge %d to %d instrumented%s\n",
141			 e->src->index, e->dest->index,
142			 EDGE_CRITICAL_P (e) ? " (and split)" : "");
143	      (profile_hooks->gen_edge_profiler) (num_instr_edges++, e);
144	    }
145	}
146    }
147
148  total_num_blocks_created += num_edges;
149  if (dump_file)
150    fprintf (dump_file, "%d edges instrumented\n", num_instr_edges);
151  return num_instr_edges;
152}
153
154/* Add code to measure histograms for values in list VALUES.  */
155static void
156instrument_values (histogram_values values)
157{
158  unsigned i, t;
159
160  /* Emit code to generate the histograms before the insns.  */
161
162  for (i = 0; i < VEC_length (histogram_value, values); i++)
163    {
164      histogram_value hist = VEC_index (histogram_value, values, i);
165      switch (hist->type)
166	{
167	case HIST_TYPE_INTERVAL:
168	  t = GCOV_COUNTER_V_INTERVAL;
169	  break;
170
171	case HIST_TYPE_POW2:
172	  t = GCOV_COUNTER_V_POW2;
173	  break;
174
175	case HIST_TYPE_SINGLE_VALUE:
176	  t = GCOV_COUNTER_V_SINGLE;
177	  break;
178
179	case HIST_TYPE_CONST_DELTA:
180	  t = GCOV_COUNTER_V_DELTA;
181	  break;
182
183 	case HIST_TYPE_INDIR_CALL:
184 	  t = GCOV_COUNTER_V_INDIR;
185 	  break;
186
187 	case HIST_TYPE_AVERAGE:
188 	  t = GCOV_COUNTER_AVERAGE;
189 	  break;
190
191 	case HIST_TYPE_IOR:
192 	  t = GCOV_COUNTER_IOR;
193 	  break;
194
195	default:
196	  gcc_unreachable ();
197	}
198      if (!coverage_counter_alloc (t, hist->n_counters))
199	continue;
200
201      switch (hist->type)
202	{
203	case HIST_TYPE_INTERVAL:
204	  (profile_hooks->gen_interval_profiler) (hist, t, 0);
205	  break;
206
207	case HIST_TYPE_POW2:
208	  (profile_hooks->gen_pow2_profiler) (hist, t, 0);
209	  break;
210
211	case HIST_TYPE_SINGLE_VALUE:
212	  (profile_hooks->gen_one_value_profiler) (hist, t, 0);
213	  break;
214
215	case HIST_TYPE_CONST_DELTA:
216	  (profile_hooks->gen_const_delta_profiler) (hist, t, 0);
217	  break;
218
219 	case HIST_TYPE_INDIR_CALL:
220 	  (profile_hooks->gen_ic_profiler) (hist, t, 0);
221  	  break;
222
223	case HIST_TYPE_AVERAGE:
224	  (profile_hooks->gen_average_profiler) (hist, t, 0);
225	  break;
226
227	case HIST_TYPE_IOR:
228	  (profile_hooks->gen_ior_profiler) (hist, t, 0);
229	  break;
230
231	default:
232	  gcc_unreachable ();
233	}
234    }
235}
236
237
238/* Computes hybrid profile for all matching entries in da_file.  */
239
240static gcov_type *
241get_exec_counts (void)
242{
243  unsigned num_edges = 0;
244  basic_block bb;
245  gcov_type *counts;
246
247  /* Count the edges to be (possibly) instrumented.  */
248  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
249    {
250      edge e;
251      edge_iterator ei;
252
253      FOR_EACH_EDGE (e, ei, bb->succs)
254	if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
255	  num_edges++;
256    }
257
258  counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info);
259  if (!counts)
260    return NULL;
261
262  if (dump_file && profile_info)
263    fprintf(dump_file, "Merged %u profiles with maximal count %u.\n",
264	    profile_info->runs, (unsigned) profile_info->sum_max);
265
266  return counts;
267}
268
269
270static bool
271is_edge_inconsistent (VEC(edge,gc) *edges)
272{
273  edge e;
274  edge_iterator ei;
275  FOR_EACH_EDGE (e, ei, edges)
276    {
277      if (!EDGE_INFO (e)->ignore)
278        {
279          if (e->count < 0
280	      && (!(e->flags & EDGE_FAKE)
281	          || !block_ends_with_call_p (e->src)))
282	    {
283	      if (dump_file)
284		{
285		  fprintf (dump_file,
286		  	   "Edge %i->%i is inconsistent, count"HOST_WIDEST_INT_PRINT_DEC,
287			   e->src->index, e->dest->index, e->count);
288		  dump_bb (e->src, dump_file, 0);
289		  dump_bb (e->dest, dump_file, 0);
290		}
291              return true;
292	    }
293        }
294    }
295  return false;
296}
297
298static void
299correct_negative_edge_counts (void)
300{
301  basic_block bb;
302  edge e;
303  edge_iterator ei;
304
305  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
306    {
307      FOR_EACH_EDGE (e, ei, bb->succs)
308        {
309           if (e->count < 0)
310             e->count = 0;
311        }
312    }
313}
314
315/* Check consistency.
316   Return true if inconsistency is found.  */
317static bool
318is_inconsistent (void)
319{
320  basic_block bb;
321  bool inconsistent = false;
322  FOR_EACH_BB (bb)
323    {
324      inconsistent |= is_edge_inconsistent (bb->preds);
325      if (!dump_file && inconsistent)
326	return true;
327      inconsistent |= is_edge_inconsistent (bb->succs);
328      if (!dump_file && inconsistent)
329	return true;
330      if (bb->count < 0)
331        {
332	  if (dump_file)
333	    {
334	      fprintf (dump_file, "BB %i count is negative "
335		       HOST_WIDEST_INT_PRINT_DEC,
336		       bb->index,
337		       bb->count);
338	      dump_bb (bb, dump_file, 0);
339	    }
340	  inconsistent = true;
341	}
342      if (bb->count != sum_edge_counts (bb->preds))
343        {
344	  if (dump_file)
345	    {
346	      fprintf (dump_file, "BB %i count does not match sum of incoming edges "
347		       HOST_WIDEST_INT_PRINT_DEC" should be " HOST_WIDEST_INT_PRINT_DEC,
348		       bb->index,
349		       bb->count,
350		       sum_edge_counts (bb->preds));
351	      dump_bb (bb, dump_file, 0);
352	    }
353	  inconsistent = true;
354	}
355      if (bb->count != sum_edge_counts (bb->succs) &&
356          ! (find_edge (bb, EXIT_BLOCK_PTR) != NULL && block_ends_with_call_p (bb)))
357	{
358	  if (dump_file)
359	    {
360	      fprintf (dump_file, "BB %i count does not match sum of outgoing edges "
361		       HOST_WIDEST_INT_PRINT_DEC" should be " HOST_WIDEST_INT_PRINT_DEC,
362		       bb->index,
363		       bb->count,
364		       sum_edge_counts (bb->succs));
365	      dump_bb (bb, dump_file, 0);
366	    }
367	  inconsistent = true;
368	}
369      if (!dump_file && inconsistent)
370	return true;
371    }
372
373  return inconsistent;
374}
375
376/* Set each basic block count to the sum of its outgoing edge counts */
377static void
378set_bb_counts (void)
379{
380  basic_block bb;
381  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
382    {
383      bb->count = sum_edge_counts (bb->succs);
384      gcc_assert (bb->count >= 0);
385    }
386}
387
388/* Reads profile data and returns total number of edge counts read */
389static int
390read_profile_edge_counts (gcov_type *exec_counts)
391{
392  basic_block bb;
393  int num_edges = 0;
394  int exec_counts_pos = 0;
395  /* For each edge not on the spanning tree, set its execution count from
396     the .da file.  */
397  /* The first count in the .da file is the number of times that the function
398     was entered.  This is the exec_count for block zero.  */
399
400  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
401    {
402      edge e;
403      edge_iterator ei;
404
405      FOR_EACH_EDGE (e, ei, bb->succs)
406	if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
407	  {
408	    num_edges++;
409	    if (exec_counts)
410	      {
411		e->count = exec_counts[exec_counts_pos++];
412		if (e->count > profile_info->sum_max)
413		  {
414		    error ("corrupted profile info: edge from %i to %i exceeds maximal count",
415			   bb->index, e->dest->index);
416		  }
417	      }
418	    else
419	      e->count = 0;
420
421	    EDGE_INFO (e)->count_valid = 1;
422	    BB_INFO (bb)->succ_count--;
423	    BB_INFO (e->dest)->pred_count--;
424	    if (dump_file)
425	      {
426		fprintf (dump_file, "\nRead edge from %i to %i, count:",
427			 bb->index, e->dest->index);
428		fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
429			 (HOST_WIDEST_INT) e->count);
430	      }
431	  }
432    }
433
434    return num_edges;
435}
436
437/* Compute the branch probabilities for the various branches.
438   Annotate them accordingly.  */
439
440static void
441compute_branch_probabilities (void)
442{
443  basic_block bb;
444  int i;
445  int num_edges = 0;
446  int changes;
447  int passes;
448  int hist_br_prob[20];
449  int num_branches;
450  gcov_type *exec_counts = get_exec_counts ();
451  int inconsistent = 0;
452
453  /* Very simple sanity checks so we catch bugs in our profiling code.  */
454  if (!profile_info)
455    return;
456  if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
457    {
458      error ("corrupted profile info: run_max * runs < sum_max");
459      exec_counts = NULL;
460    }
461
462  if (profile_info->sum_all < profile_info->sum_max)
463    {
464      error ("corrupted profile info: sum_all is smaller than sum_max");
465      exec_counts = NULL;
466    }
467
468  /* Attach extra info block to each bb.  */
469  alloc_aux_for_blocks (sizeof (struct bb_info));
470  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
471    {
472      edge e;
473      edge_iterator ei;
474
475      FOR_EACH_EDGE (e, ei, bb->succs)
476	if (!EDGE_INFO (e)->ignore)
477	  BB_INFO (bb)->succ_count++;
478      FOR_EACH_EDGE (e, ei, bb->preds)
479	if (!EDGE_INFO (e)->ignore)
480	  BB_INFO (bb)->pred_count++;
481    }
482
483  /* Avoid predicting entry on exit nodes.  */
484  BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
485  BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
486
487  num_edges = read_profile_edge_counts (exec_counts);
488
489  if (dump_file)
490    fprintf (dump_file, "\n%d edge counts read\n", num_edges);
491
492  /* For every block in the file,
493     - if every exit/entrance edge has a known count, then set the block count
494     - if the block count is known, and every exit/entrance edge but one has
495     a known execution count, then set the count of the remaining edge
496
497     As edge counts are set, decrement the succ/pred count, but don't delete
498     the edge, that way we can easily tell when all edges are known, or only
499     one edge is unknown.  */
500
501  /* The order that the basic blocks are iterated through is important.
502     Since the code that finds spanning trees starts with block 0, low numbered
503     edges are put on the spanning tree in preference to high numbered edges.
504     Hence, most instrumented edges are at the end.  Graph solving works much
505     faster if we propagate numbers from the end to the start.
506
507     This takes an average of slightly more than 3 passes.  */
508
509  changes = 1;
510  passes = 0;
511  while (changes)
512    {
513      passes++;
514      changes = 0;
515      FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
516	{
517	  struct bb_info *bi = BB_INFO (bb);
518	  if (! bi->count_valid)
519	    {
520	      if (bi->succ_count == 0)
521		{
522		  edge e;
523		  edge_iterator ei;
524		  gcov_type total = 0;
525
526		  FOR_EACH_EDGE (e, ei, bb->succs)
527		    total += e->count;
528		  bb->count = total;
529		  bi->count_valid = 1;
530		  changes = 1;
531		}
532	      else if (bi->pred_count == 0)
533		{
534		  edge e;
535		  edge_iterator ei;
536		  gcov_type total = 0;
537
538		  FOR_EACH_EDGE (e, ei, bb->preds)
539		    total += e->count;
540		  bb->count = total;
541		  bi->count_valid = 1;
542		  changes = 1;
543		}
544	    }
545	  if (bi->count_valid)
546	    {
547	      if (bi->succ_count == 1)
548		{
549		  edge e;
550		  edge_iterator ei;
551		  gcov_type total = 0;
552
553		  /* One of the counts will be invalid, but it is zero,
554		     so adding it in also doesn't hurt.  */
555		  FOR_EACH_EDGE (e, ei, bb->succs)
556		    total += e->count;
557
558		  /* Search for the invalid edge, and set its count.  */
559		  FOR_EACH_EDGE (e, ei, bb->succs)
560		    if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
561		      break;
562
563		  /* Calculate count for remaining edge by conservation.  */
564		  total = bb->count - total;
565
566		  gcc_assert (e);
567		  EDGE_INFO (e)->count_valid = 1;
568		  e->count = total;
569		  bi->succ_count--;
570
571		  BB_INFO (e->dest)->pred_count--;
572		  changes = 1;
573		}
574	      if (bi->pred_count == 1)
575		{
576		  edge e;
577		  edge_iterator ei;
578		  gcov_type total = 0;
579
580		  /* One of the counts will be invalid, but it is zero,
581		     so adding it in also doesn't hurt.  */
582		  FOR_EACH_EDGE (e, ei, bb->preds)
583		    total += e->count;
584
585		  /* Search for the invalid edge, and set its count.  */
586		  FOR_EACH_EDGE (e, ei, bb->preds)
587		    if (!EDGE_INFO (e)->count_valid && !EDGE_INFO (e)->ignore)
588		      break;
589
590		  /* Calculate count for remaining edge by conservation.  */
591		  total = bb->count - total + e->count;
592
593		  gcc_assert (e);
594		  EDGE_INFO (e)->count_valid = 1;
595		  e->count = total;
596		  bi->pred_count--;
597
598		  BB_INFO (e->src)->succ_count--;
599		  changes = 1;
600		}
601	    }
602	}
603    }
604  if (dump_file)
605    dump_flow_info (dump_file, dump_flags);
606
607  total_num_passes += passes;
608  if (dump_file)
609    fprintf (dump_file, "Graph solving took %d passes.\n\n", passes);
610
611  /* If the graph has been correctly solved, every block will have a
612     succ and pred count of zero.  */
613  FOR_EACH_BB (bb)
614    {
615      gcc_assert (!BB_INFO (bb)->succ_count && !BB_INFO (bb)->pred_count);
616    }
617
618  /* Check for inconsistent basic block counts */
619  inconsistent = is_inconsistent ();
620
621  if (inconsistent)
622   {
623     if (flag_profile_correction)
624       {
625         /* Inconsistency detected. Make it flow-consistent. */
626         static int informed = 0;
627         if (informed == 0)
628           {
629             informed = 1;
630             inform (input_location, "correcting inconsistent profile data");
631           }
632         correct_negative_edge_counts ();
633         /* Set bb counts to the sum of the outgoing edge counts */
634         set_bb_counts ();
635         if (dump_file)
636           fprintf (dump_file, "\nCalling mcf_smooth_cfg\n");
637         mcf_smooth_cfg ();
638       }
639     else
640       error ("corrupted profile info: profile data is not flow-consistent");
641   }
642
643  /* For every edge, calculate its branch probability and add a reg_note
644     to the branch insn to indicate this.  */
645
646  for (i = 0; i < 20; i++)
647    hist_br_prob[i] = 0;
648  num_branches = 0;
649
650  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
651    {
652      edge e;
653      edge_iterator ei;
654
655      if (bb->count < 0)
656	{
657	  error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
658		 bb->index, (int)bb->count);
659	  bb->count = 0;
660	}
661      FOR_EACH_EDGE (e, ei, bb->succs)
662	{
663	  /* Function may return twice in the cased the called function is
664	     setjmp or calls fork, but we can't represent this by extra
665	     edge from the entry, since extra edge from the exit is
666	     already present.  We get negative frequency from the entry
667	     point.  */
668	  if ((e->count < 0
669	       && e->dest == EXIT_BLOCK_PTR)
670	      || (e->count > bb->count
671		  && e->dest != EXIT_BLOCK_PTR))
672	    {
673	      if (block_ends_with_call_p (bb))
674		e->count = e->count < 0 ? 0 : bb->count;
675	    }
676	  if (e->count < 0 || e->count > bb->count)
677	    {
678	      error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
679		     e->src->index, e->dest->index,
680		     (int)e->count);
681	      e->count = bb->count / 2;
682	    }
683	}
684      if (bb->count)
685	{
686	  FOR_EACH_EDGE (e, ei, bb->succs)
687	    e->probability = (e->count * REG_BR_PROB_BASE + bb->count / 2) / bb->count;
688	  if (bb->index >= NUM_FIXED_BLOCKS
689	      && block_ends_with_condjump_p (bb)
690	      && EDGE_COUNT (bb->succs) >= 2)
691	    {
692	      int prob;
693	      edge e;
694	      int index;
695
696	      /* Find the branch edge.  It is possible that we do have fake
697		 edges here.  */
698	      FOR_EACH_EDGE (e, ei, bb->succs)
699		if (!(e->flags & (EDGE_FAKE | EDGE_FALLTHRU)))
700		  break;
701
702	      prob = e->probability;
703	      index = prob * 20 / REG_BR_PROB_BASE;
704
705	      if (index == 20)
706		index = 19;
707	      hist_br_prob[index]++;
708
709	      num_branches++;
710	    }
711	}
712      /* As a last resort, distribute the probabilities evenly.
713	 Use simple heuristics that if there are normal edges,
714	 give all abnormals frequency of 0, otherwise distribute the
715	 frequency over abnormals (this is the case of noreturn
716	 calls).  */
717      else if (profile_status == PROFILE_ABSENT)
718	{
719	  int total = 0;
720
721	  FOR_EACH_EDGE (e, ei, bb->succs)
722	    if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
723	      total ++;
724	  if (total)
725	    {
726	      FOR_EACH_EDGE (e, ei, bb->succs)
727		if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
728		  e->probability = REG_BR_PROB_BASE / total;
729		else
730		  e->probability = 0;
731	    }
732	  else
733	    {
734	      total += EDGE_COUNT (bb->succs);
735	      FOR_EACH_EDGE (e, ei, bb->succs)
736		e->probability = REG_BR_PROB_BASE / total;
737	    }
738	  if (bb->index >= NUM_FIXED_BLOCKS
739	      && block_ends_with_condjump_p (bb)
740	      && EDGE_COUNT (bb->succs) >= 2)
741	    num_branches++;
742	}
743    }
744  counts_to_freqs ();
745  profile_status = PROFILE_READ;
746
747  if (dump_file)
748    {
749      fprintf (dump_file, "%d branches\n", num_branches);
750      if (num_branches)
751	for (i = 0; i < 10; i++)
752	  fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
753		   (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
754		   5 * i, 5 * i + 5);
755
756      total_num_branches += num_branches;
757      for (i = 0; i < 20; i++)
758	total_hist_br_prob[i] += hist_br_prob[i];
759
760      fputc ('\n', dump_file);
761      fputc ('\n', dump_file);
762    }
763
764  free_aux_for_blocks ();
765}
766
767/* Load value histograms values whose description is stored in VALUES array
768   from .gcda file.  */
769
770static void
771compute_value_histograms (histogram_values values)
772{
773  unsigned i, j, t, any;
774  unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
775  gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
776  gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
777  gcov_type *aact_count;
778
779  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
780    n_histogram_counters[t] = 0;
781
782  for (i = 0; i < VEC_length (histogram_value, values); i++)
783    {
784      histogram_value hist = VEC_index (histogram_value, values, i);
785      n_histogram_counters[(int) hist->type] += hist->n_counters;
786    }
787
788  any = 0;
789  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
790    {
791      if (!n_histogram_counters[t])
792	{
793	  histogram_counts[t] = NULL;
794	  continue;
795	}
796
797      histogram_counts[t] =
798	get_coverage_counts (COUNTER_FOR_HIST_TYPE (t),
799			     n_histogram_counters[t], NULL);
800      if (histogram_counts[t])
801	any = 1;
802      act_count[t] = histogram_counts[t];
803    }
804  if (!any)
805    return;
806
807  for (i = 0; i < VEC_length (histogram_value, values); i++)
808    {
809      histogram_value hist = VEC_index (histogram_value, values, i);
810      gimple stmt = hist->hvalue.stmt;
811
812      t = (int) hist->type;
813
814      aact_count = act_count[t];
815      act_count[t] += hist->n_counters;
816
817      gimple_add_histogram_value (cfun, stmt, hist);
818      hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
819      for (j = 0; j < hist->n_counters; j++)
820	hist->hvalue.counters[j] = aact_count[j];
821    }
822
823  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
824    if (histogram_counts[t])
825      free (histogram_counts[t]);
826}
827
828/* The entry basic block will be moved around so that it has index=1,
829   there is nothing at index 0 and the exit is at n_basic_block.  */
830#define BB_TO_GCOV_INDEX(bb)  ((bb)->index - 1)
831/* When passed NULL as file_name, initialize.
832   When passed something else, output the necessary commands to change
833   line to LINE and offset to FILE_NAME.  */
834static void
835output_location (char const *file_name, int line,
836		 gcov_position_t *offset, basic_block bb)
837{
838  static char const *prev_file_name;
839  static int prev_line;
840  bool name_differs, line_differs;
841
842  if (!file_name)
843    {
844      prev_file_name = NULL;
845      prev_line = -1;
846      return;
847    }
848
849  name_differs = !prev_file_name || strcmp (file_name, prev_file_name);
850  line_differs = prev_line != line;
851
852  if (name_differs || line_differs)
853    {
854      if (!*offset)
855	{
856	  *offset = gcov_write_tag (GCOV_TAG_LINES);
857	  gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
858	  name_differs = line_differs=true;
859	}
860
861      /* If this is a new source file, then output the
862	 file's name to the .bb file.  */
863      if (name_differs)
864	{
865	  prev_file_name = file_name;
866	  gcov_write_unsigned (0);
867	  gcov_write_string (prev_file_name);
868	}
869      if (line_differs)
870	{
871	  gcov_write_unsigned (line);
872	  prev_line = line;
873	}
874     }
875}
876
877/* Instrument and/or analyze program behavior based on program flow graph.
878   In either case, this function builds a flow graph for the function being
879   compiled.  The flow graph is stored in BB_GRAPH.
880
881   When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
882   the flow graph that are needed to reconstruct the dynamic behavior of the
883   flow graph.
884
885   When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
886   information from a data file containing edge count information from previous
887   executions of the function being compiled.  In this case, the flow graph is
888   annotated with actual execution counts, which are later propagated into the
889   rtl for optimization purposes.
890
891   Main entry point of this file.  */
892
893void
894branch_prob (void)
895{
896  basic_block bb;
897  unsigned i;
898  unsigned num_edges, ignored_edges;
899  unsigned num_instrumented;
900  struct edge_list *el;
901  histogram_values values = NULL;
902
903  total_num_times_called++;
904
905  flow_call_edges_add (NULL);
906  add_noreturn_fake_exit_edges ();
907
908  /* We can't handle cyclic regions constructed using abnormal edges.
909     To avoid these we replace every source of abnormal edge by a fake
910     edge from entry node and every destination by fake edge to exit.
911     This keeps graph acyclic and our calculation exact for all normal
912     edges except for exit and entrance ones.
913
914     We also add fake exit edges for each call and asm statement in the
915     basic, since it may not return.  */
916
917  FOR_EACH_BB (bb)
918    {
919      int need_exit_edge = 0, need_entry_edge = 0;
920      int have_exit_edge = 0, have_entry_edge = 0;
921      edge e;
922      edge_iterator ei;
923
924      /* Functions returning multiple times are not handled by extra edges.
925         Instead we simply allow negative counts on edges from exit to the
926         block past call and corresponding probabilities.  We can't go
927         with the extra edges because that would result in flowgraph that
928	 needs to have fake edges outside the spanning tree.  */
929
930      FOR_EACH_EDGE (e, ei, bb->succs)
931	{
932	  gimple_stmt_iterator gsi;
933	  gimple last = NULL;
934
935	  /* It may happen that there are compiler generated statements
936	     without a locus at all.  Go through the basic block from the
937	     last to the first statement looking for a locus.  */
938	  for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
939	    {
940	      last = gsi_stmt (gsi);
941	      if (gimple_has_location (last))
942		break;
943	    }
944
945	  /* Edge with goto locus might get wrong coverage info unless
946	     it is the only edge out of BB.
947	     Don't do that when the locuses match, so
948	     if (blah) goto something;
949	     is not computed twice.  */
950	  if (last
951	      && gimple_has_location (last)
952	      && e->goto_locus != UNKNOWN_LOCATION
953	      && !single_succ_p (bb)
954	      && (LOCATION_FILE (e->goto_locus)
955	          != LOCATION_FILE (gimple_location (last))
956		  || (LOCATION_LINE (e->goto_locus)
957		      != LOCATION_LINE (gimple_location (last)))))
958	    {
959	      basic_block new_bb = split_edge (e);
960	      edge ne = single_succ_edge (new_bb);
961	      ne->goto_locus = e->goto_locus;
962	      ne->goto_block = e->goto_block;
963	    }
964	  if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
965	       && e->dest != EXIT_BLOCK_PTR)
966	    need_exit_edge = 1;
967	  if (e->dest == EXIT_BLOCK_PTR)
968	    have_exit_edge = 1;
969	}
970      FOR_EACH_EDGE (e, ei, bb->preds)
971	{
972	  if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
973	       && e->src != ENTRY_BLOCK_PTR)
974	    need_entry_edge = 1;
975	  if (e->src == ENTRY_BLOCK_PTR)
976	    have_entry_edge = 1;
977	}
978
979      if (need_exit_edge && !have_exit_edge)
980	{
981	  if (dump_file)
982	    fprintf (dump_file, "Adding fake exit edge to bb %i\n",
983		     bb->index);
984	  make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
985	}
986      if (need_entry_edge && !have_entry_edge)
987	{
988	  if (dump_file)
989	    fprintf (dump_file, "Adding fake entry edge to bb %i\n",
990		     bb->index);
991	  make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
992	}
993    }
994
995  el = create_edge_list ();
996  num_edges = NUM_EDGES (el);
997  alloc_aux_for_edges (sizeof (struct edge_info));
998
999  /* The basic blocks are expected to be numbered sequentially.  */
1000  compact_blocks ();
1001
1002  ignored_edges = 0;
1003  for (i = 0 ; i < num_edges ; i++)
1004    {
1005      edge e = INDEX_EDGE (el, i);
1006      e->count = 0;
1007
1008      /* Mark edges we've replaced by fake edges above as ignored.  */
1009      if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
1010	  && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
1011	{
1012	  EDGE_INFO (e)->ignore = 1;
1013	  ignored_edges++;
1014	}
1015    }
1016
1017  /* Create spanning tree from basic block graph, mark each edge that is
1018     on the spanning tree.  We insert as many abnormal and critical edges
1019     as possible to minimize number of edge splits necessary.  */
1020
1021  find_spanning_tree (el);
1022
1023  /* Fake edges that are not on the tree will not be instrumented, so
1024     mark them ignored.  */
1025  for (num_instrumented = i = 0; i < num_edges; i++)
1026    {
1027      edge e = INDEX_EDGE (el, i);
1028      struct edge_info *inf = EDGE_INFO (e);
1029
1030      if (inf->ignore || inf->on_tree)
1031	/*NOP*/;
1032      else if (e->flags & EDGE_FAKE)
1033	{
1034	  inf->ignore = 1;
1035	  ignored_edges++;
1036	}
1037      else
1038	num_instrumented++;
1039    }
1040
1041  total_num_blocks += n_basic_blocks;
1042  if (dump_file)
1043    fprintf (dump_file, "%d basic blocks\n", n_basic_blocks);
1044
1045  total_num_edges += num_edges;
1046  if (dump_file)
1047    fprintf (dump_file, "%d edges\n", num_edges);
1048
1049  total_num_edges_ignored += ignored_edges;
1050  if (dump_file)
1051    fprintf (dump_file, "%d ignored edges\n", ignored_edges);
1052
1053  /* Write the data from which gcov can reconstruct the basic block
1054     graph.  */
1055
1056  /* Basic block flags */
1057  if (coverage_begin_output ())
1058    {
1059      gcov_position_t offset;
1060
1061      offset = gcov_write_tag (GCOV_TAG_BLOCKS);
1062      for (i = 0; i != (unsigned) (n_basic_blocks); i++)
1063	gcov_write_unsigned (0);
1064      gcov_write_length (offset);
1065    }
1066
1067   /* Keep all basic block indexes nonnegative in the gcov output.
1068      Index 0 is used for entry block, last index is for exit block.
1069      */
1070  ENTRY_BLOCK_PTR->index = 1;
1071  EXIT_BLOCK_PTR->index = last_basic_block;
1072
1073  /* Arcs */
1074  if (coverage_begin_output ())
1075    {
1076      gcov_position_t offset;
1077
1078      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
1079	{
1080	  edge e;
1081	  edge_iterator ei;
1082
1083	  offset = gcov_write_tag (GCOV_TAG_ARCS);
1084	  gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
1085
1086	  FOR_EACH_EDGE (e, ei, bb->succs)
1087	    {
1088	      struct edge_info *i = EDGE_INFO (e);
1089	      if (!i->ignore)
1090		{
1091		  unsigned flag_bits = 0;
1092
1093		  if (i->on_tree)
1094		    flag_bits |= GCOV_ARC_ON_TREE;
1095		  if (e->flags & EDGE_FAKE)
1096		    flag_bits |= GCOV_ARC_FAKE;
1097		  if (e->flags & EDGE_FALLTHRU)
1098		    flag_bits |= GCOV_ARC_FALLTHROUGH;
1099		  /* On trees we don't have fallthru flags, but we can
1100		     recompute them from CFG shape.  */
1101		  if (e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
1102		      && e->src->next_bb == e->dest)
1103		    flag_bits |= GCOV_ARC_FALLTHROUGH;
1104
1105		  gcov_write_unsigned (BB_TO_GCOV_INDEX (e->dest));
1106		  gcov_write_unsigned (flag_bits);
1107	        }
1108	    }
1109
1110	  gcov_write_length (offset);
1111	}
1112    }
1113
1114  /* Line numbers.  */
1115  if (coverage_begin_output ())
1116    {
1117      gcov_position_t offset;
1118
1119      /* Initialize the output.  */
1120      output_location (NULL, 0, NULL, NULL);
1121
1122      FOR_EACH_BB (bb)
1123	{
1124	  gimple_stmt_iterator gsi;
1125
1126	  offset = 0;
1127
1128	  if (bb == ENTRY_BLOCK_PTR->next_bb)
1129	    {
1130	      expanded_location curr_location =
1131		expand_location (DECL_SOURCE_LOCATION (current_function_decl));
1132	      output_location (curr_location.file, curr_location.line,
1133			       &offset, bb);
1134	    }
1135
1136	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1137	    {
1138	      gimple stmt = gsi_stmt (gsi);
1139	      if (gimple_has_location (stmt))
1140		output_location (gimple_filename (stmt), gimple_lineno (stmt),
1141				 &offset, bb);
1142	    }
1143
1144	  /* Notice GOTO expressions we eliminated while constructing the
1145	     CFG.  */
1146	  if (single_succ_p (bb)
1147	      && single_succ_edge (bb)->goto_locus != UNKNOWN_LOCATION)
1148	    {
1149	      location_t curr_location = single_succ_edge (bb)->goto_locus;
1150	      /* ??? The FILE/LINE API is inconsistent for these cases.  */
1151	      output_location (LOCATION_FILE (curr_location),
1152			       LOCATION_LINE (curr_location), &offset, bb);
1153	    }
1154
1155	  if (offset)
1156	    {
1157	      /* A file of NULL indicates the end of run.  */
1158	      gcov_write_unsigned (0);
1159	      gcov_write_string (NULL);
1160	      gcov_write_length (offset);
1161	    }
1162	}
1163    }
1164
1165  ENTRY_BLOCK_PTR->index = ENTRY_BLOCK;
1166  EXIT_BLOCK_PTR->index = EXIT_BLOCK;
1167#undef BB_TO_GCOV_INDEX
1168
1169  if (flag_profile_values)
1170    find_values_to_profile (&values);
1171
1172  if (flag_branch_probabilities)
1173    {
1174      compute_branch_probabilities ();
1175      if (flag_profile_values)
1176	compute_value_histograms (values);
1177    }
1178
1179  remove_fake_edges ();
1180
1181  /* For each edge not on the spanning tree, add counting code.  */
1182  if (profile_arc_flag
1183      && coverage_counter_alloc (GCOV_COUNTER_ARCS, num_instrumented))
1184    {
1185      unsigned n_instrumented;
1186
1187      profile_hooks->init_edge_profiler ();
1188
1189      n_instrumented = instrument_edges (el);
1190
1191      gcc_assert (n_instrumented == num_instrumented);
1192
1193      if (flag_profile_values)
1194	instrument_values (values);
1195
1196      /* Commit changes done by instrumentation.  */
1197      gsi_commit_edge_inserts ();
1198    }
1199
1200  free_aux_for_edges ();
1201
1202  VEC_free (histogram_value, heap, values);
1203  free_edge_list (el);
1204  coverage_end_function ();
1205}
1206
1207/* Union find algorithm implementation for the basic blocks using
1208   aux fields.  */
1209
1210static basic_block
1211find_group (basic_block bb)
1212{
1213  basic_block group = bb, bb1;
1214
1215  while ((basic_block) group->aux != group)
1216    group = (basic_block) group->aux;
1217
1218  /* Compress path.  */
1219  while ((basic_block) bb->aux != group)
1220    {
1221      bb1 = (basic_block) bb->aux;
1222      bb->aux = (void *) group;
1223      bb = bb1;
1224    }
1225  return group;
1226}
1227
1228static void
1229union_groups (basic_block bb1, basic_block bb2)
1230{
1231  basic_block bb1g = find_group (bb1);
1232  basic_block bb2g = find_group (bb2);
1233
1234  /* ??? I don't have a place for the rank field.  OK.  Lets go w/o it,
1235     this code is unlikely going to be performance problem anyway.  */
1236  gcc_assert (bb1g != bb2g);
1237
1238  bb1g->aux = bb2g;
1239}
1240
1241/* This function searches all of the edges in the program flow graph, and puts
1242   as many bad edges as possible onto the spanning tree.  Bad edges include
1243   abnormals edges, which can't be instrumented at the moment.  Since it is
1244   possible for fake edges to form a cycle, we will have to develop some
1245   better way in the future.  Also put critical edges to the tree, since they
1246   are more expensive to instrument.  */
1247
1248static void
1249find_spanning_tree (struct edge_list *el)
1250{
1251  int i;
1252  int num_edges = NUM_EDGES (el);
1253  basic_block bb;
1254
1255  /* We use aux field for standard union-find algorithm.  */
1256  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1257    bb->aux = bb;
1258
1259  /* Add fake edge exit to entry we can't instrument.  */
1260  union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
1261
1262  /* First add all abnormal edges to the tree unless they form a cycle. Also
1263     add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
1264     setting return value from function.  */
1265  for (i = 0; i < num_edges; i++)
1266    {
1267      edge e = INDEX_EDGE (el, i);
1268      if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1269	   || e->dest == EXIT_BLOCK_PTR)
1270	  && !EDGE_INFO (e)->ignore
1271	  && (find_group (e->src) != find_group (e->dest)))
1272	{
1273	  if (dump_file)
1274	    fprintf (dump_file, "Abnormal edge %d to %d put to tree\n",
1275		     e->src->index, e->dest->index);
1276	  EDGE_INFO (e)->on_tree = 1;
1277	  union_groups (e->src, e->dest);
1278	}
1279    }
1280
1281  /* Now insert all critical edges to the tree unless they form a cycle.  */
1282  for (i = 0; i < num_edges; i++)
1283    {
1284      edge e = INDEX_EDGE (el, i);
1285      if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
1286	  && find_group (e->src) != find_group (e->dest))
1287	{
1288	  if (dump_file)
1289	    fprintf (dump_file, "Critical edge %d to %d put to tree\n",
1290		     e->src->index, e->dest->index);
1291	  EDGE_INFO (e)->on_tree = 1;
1292	  union_groups (e->src, e->dest);
1293	}
1294    }
1295
1296  /* And now the rest.  */
1297  for (i = 0; i < num_edges; i++)
1298    {
1299      edge e = INDEX_EDGE (el, i);
1300      if (!EDGE_INFO (e)->ignore
1301	  && find_group (e->src) != find_group (e->dest))
1302	{
1303	  if (dump_file)
1304	    fprintf (dump_file, "Normal edge %d to %d put to tree\n",
1305		     e->src->index, e->dest->index);
1306	  EDGE_INFO (e)->on_tree = 1;
1307	  union_groups (e->src, e->dest);
1308	}
1309    }
1310
1311  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1312    bb->aux = NULL;
1313}
1314
1315/* Perform file-level initialization for branch-prob processing.  */
1316
1317void
1318init_branch_prob (void)
1319{
1320  int i;
1321
1322  total_num_blocks = 0;
1323  total_num_edges = 0;
1324  total_num_edges_ignored = 0;
1325  total_num_edges_instrumented = 0;
1326  total_num_blocks_created = 0;
1327  total_num_passes = 0;
1328  total_num_times_called = 0;
1329  total_num_branches = 0;
1330  for (i = 0; i < 20; i++)
1331    total_hist_br_prob[i] = 0;
1332}
1333
1334/* Performs file-level cleanup after branch-prob processing
1335   is completed.  */
1336
1337void
1338end_branch_prob (void)
1339{
1340  if (dump_file)
1341    {
1342      fprintf (dump_file, "\n");
1343      fprintf (dump_file, "Total number of blocks: %d\n",
1344	       total_num_blocks);
1345      fprintf (dump_file, "Total number of edges: %d\n", total_num_edges);
1346      fprintf (dump_file, "Total number of ignored edges: %d\n",
1347	       total_num_edges_ignored);
1348      fprintf (dump_file, "Total number of instrumented edges: %d\n",
1349	       total_num_edges_instrumented);
1350      fprintf (dump_file, "Total number of blocks created: %d\n",
1351	       total_num_blocks_created);
1352      fprintf (dump_file, "Total number of graph solution passes: %d\n",
1353	       total_num_passes);
1354      if (total_num_times_called != 0)
1355	fprintf (dump_file, "Average number of graph solution passes: %d\n",
1356		 (total_num_passes + (total_num_times_called  >> 1))
1357		 / total_num_times_called);
1358      fprintf (dump_file, "Total number of branches: %d\n",
1359	       total_num_branches);
1360      if (total_num_branches)
1361	{
1362	  int i;
1363
1364	  for (i = 0; i < 10; i++)
1365	    fprintf (dump_file, "%d%% branches in range %d-%d%%\n",
1366		     (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1367		     / total_num_branches, 5*i, 5*i+5);
1368	}
1369    }
1370}
1371
1372/* Set up hooks to enable tree-based profiling.  */
1373
1374void
1375tree_register_profile_hooks (void)
1376{
1377  gcc_assert (current_ir_type () == IR_GIMPLE);
1378  profile_hooks = &tree_profile_hooks;
1379}
1380