profile.c revision 132718
1/* Calculate branch probabilities, and basic block execution counts.
2   Copyright (C) 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999,
3   2000, 2001, 2002, 2003  Free Software Foundation, Inc.
4   Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
5   based on some ideas from Dain Samples of UC Berkeley.
6   Further mangling by Bob Manson, Cygnus Support.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify it under
11the terms of the GNU General Public License as published by the Free
12Software Foundation; either version 2, or (at your option) any later
13version.
14
15GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16WARRANTY; without even the implied warranty of MERCHANTABILITY or
17FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
18for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING.  If not, write to the Free
22Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2302111-1307, USA.  */
24
25/* Generate basic block profile instrumentation and auxiliary files.
26   Profile generation is optimized, so that not all arcs in the basic
27   block graph need instrumenting. First, the BB graph is closed with
28   one entry (function start), and one exit (function exit).  Any
29   ABNORMAL_EDGE cannot be instrumented (because there is no control
30   path to place the code). We close the graph by inserting fake
31   EDGE_FAKE edges to the EXIT_BLOCK, from the sources of abnormal
32   edges that do not go to the exit_block. We ignore such abnormal
33   edges.  Naturally these fake edges are never directly traversed,
34   and so *cannot* be directly instrumented.  Some other graph
35   massaging is done. To optimize the instrumentation we generate the
36   BB minimal span tree, only edges that are not on the span tree
37   (plus the entry point) need instrumenting. From that information
38   all other edge counts can be deduced.  By construction all fake
39   edges must be on the spanning tree. We also attempt to place
40   EDGE_CRITICAL edges on the spanning tree.
41
42   The auxiliary file generated is <dumpbase>.bbg. The format is
43   described in full in gcov-io.h.  */
44
45/* ??? Register allocation should use basic block execution counts to
46   give preference to the most commonly executed blocks.  */
47
48/* ??? Should calculate branch probabilities before instrumenting code, since
49   then we can use arc counts to help decide which arcs to instrument.  */
50
51#include "config.h"
52#include "system.h"
53#include "coretypes.h"
54#include "tm.h"
55#include "rtl.h"
56#include "flags.h"
57#include "output.h"
58#include "regs.h"
59#include "expr.h"
60#include "function.h"
61#include "toplev.h"
62#include "coverage.h"
63#include "value-prof.h"
64#include "tree.h"
65
66/* Additional information about the edges we need.  */
67struct edge_info {
68  unsigned int count_valid : 1;
69
70  /* Is on the spanning tree.  */
71  unsigned int on_tree : 1;
72
73  /* Pretend this edge does not exist (it is abnormal and we've
74     inserted a fake to compensate).  */
75  unsigned int ignore : 1;
76};
77
78struct bb_info {
79  unsigned int count_valid : 1;
80
81  /* Number of successor and predecessor edges.  */
82  gcov_type succ_count;
83  gcov_type pred_count;
84};
85
86#define EDGE_INFO(e)  ((struct edge_info *) (e)->aux)
87#define BB_INFO(b)  ((struct bb_info *) (b)->aux)
88
89/* Counter summary from the last set of coverage counts read.  */
90
91const struct gcov_ctr_summary *profile_info;
92
93/* Collect statistics on the performance of this pass for the entire source
94   file.  */
95
96static int total_num_blocks;
97static int total_num_edges;
98static int total_num_edges_ignored;
99static int total_num_edges_instrumented;
100static int total_num_blocks_created;
101static int total_num_passes;
102static int total_num_times_called;
103static int total_hist_br_prob[20];
104static int total_num_never_executed;
105static int total_num_branches;
106
107/* Forward declarations.  */
108static void find_spanning_tree (struct edge_list *);
109static rtx gen_edge_profiler (int);
110static rtx gen_interval_profiler (struct histogram_value *, unsigned,
111				  unsigned);
112static rtx gen_pow2_profiler (struct histogram_value *, unsigned, unsigned);
113static rtx gen_one_value_profiler (struct histogram_value *, unsigned,
114				   unsigned);
115static rtx gen_const_delta_profiler (struct histogram_value *, unsigned,
116				     unsigned);
117static unsigned instrument_edges (struct edge_list *);
118static void instrument_values (unsigned, struct histogram_value *);
119static void compute_branch_probabilities (void);
120static void compute_value_histograms (unsigned, struct histogram_value *);
121static gcov_type * get_exec_counts (void);
122static basic_block find_group (basic_block);
123static void union_groups (basic_block, basic_block);
124
125
126/* Add edge instrumentation code to the entire insn chain.
127
128   F is the first insn of the chain.
129   NUM_BLOCKS is the number of basic blocks found in F.  */
130
131static unsigned
132instrument_edges (struct edge_list *el)
133{
134  unsigned num_instr_edges = 0;
135  int num_edges = NUM_EDGES (el);
136  basic_block bb;
137
138  remove_fake_edges ();
139
140  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
141    {
142      edge e;
143
144      for (e = bb->succ; e; e = e->succ_next)
145	{
146	  struct edge_info *inf = EDGE_INFO (e);
147
148	  if (!inf->ignore && !inf->on_tree)
149	    {
150	      rtx edge_profile;
151
152	      if (e->flags & EDGE_ABNORMAL)
153		abort ();
154	      if (rtl_dump_file)
155		fprintf (rtl_dump_file, "Edge %d to %d instrumented%s\n",
156			 e->src->index, e->dest->index,
157			 EDGE_CRITICAL_P (e) ? " (and split)" : "");
158	      edge_profile = gen_edge_profiler (num_instr_edges++);
159	      insert_insn_on_edge (edge_profile, e);
160	      rebuild_jump_labels (e->insns);
161	    }
162	}
163    }
164
165  total_num_blocks_created += num_edges;
166  if (rtl_dump_file)
167    fprintf (rtl_dump_file, "%d edges instrumented\n", num_instr_edges);
168  return num_instr_edges;
169}
170
171/* Add code to measure histograms list of VALUES of length N_VALUES.  */
172static void
173instrument_values (unsigned n_values, struct histogram_value *values)
174{
175  rtx sequence;
176  unsigned i, t;
177  edge e;
178
179  /* Emit code to generate the histograms before the insns.  */
180
181  for (i = 0; i < n_values; i++)
182    {
183      e = split_block (BLOCK_FOR_INSN (values[i].insn),
184		       PREV_INSN (values[i].insn));
185      switch (values[i].type)
186	{
187	case HIST_TYPE_INTERVAL:
188	  t = GCOV_COUNTER_V_INTERVAL;
189	  break;
190
191	case HIST_TYPE_POW2:
192	  t = GCOV_COUNTER_V_POW2;
193	  break;
194
195	case HIST_TYPE_SINGLE_VALUE:
196	  t = GCOV_COUNTER_V_SINGLE;
197	  break;
198
199	case HIST_TYPE_CONST_DELTA:
200	  t = GCOV_COUNTER_V_DELTA;
201	  break;
202
203	default:
204	  abort ();
205	}
206      if (!coverage_counter_alloc (t, values[i].n_counters))
207	continue;
208
209      switch (values[i].type)
210	{
211	case HIST_TYPE_INTERVAL:
212	  sequence = gen_interval_profiler (values + i, t, 0);
213	  break;
214
215	case HIST_TYPE_POW2:
216	  sequence = gen_pow2_profiler (values + i, t, 0);
217	  break;
218
219	case HIST_TYPE_SINGLE_VALUE:
220	  sequence = gen_one_value_profiler (values + i, t, 0);
221	  break;
222
223	case HIST_TYPE_CONST_DELTA:
224	  sequence = gen_const_delta_profiler (values + i, t, 0);
225	  break;
226
227	default:
228	  abort ();
229	}
230
231      safe_insert_insn_on_edge (sequence, e);
232    }
233}
234
235
236/* Computes hybrid profile for all matching entries in da_file.  */
237
238static gcov_type *
239get_exec_counts (void)
240{
241  unsigned num_edges = 0;
242  basic_block bb;
243  gcov_type *counts;
244
245  /* Count the edges to be (possibly) instrumented.  */
246  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
247    {
248      edge e;
249      for (e = bb->succ; e; e = e->succ_next)
250	if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
251	  num_edges++;
252    }
253
254  counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info);
255  if (!counts)
256    return NULL;
257
258  if (rtl_dump_file && profile_info)
259    fprintf(rtl_dump_file, "Merged %u profiles with maximal count %u.\n",
260	    profile_info->runs, (unsigned) profile_info->sum_max);
261
262  return counts;
263}
264
265
266/* Compute the branch probabilities for the various branches.
267   Annotate them accordingly.  */
268
269static void
270compute_branch_probabilities (void)
271{
272  basic_block bb;
273  int i;
274  int num_edges = 0;
275  int changes;
276  int passes;
277  int hist_br_prob[20];
278  int num_never_executed;
279  int num_branches;
280  gcov_type *exec_counts = get_exec_counts ();
281  int exec_counts_pos = 0;
282
283  /* Very simple sanity checks so we catch bugs in our profiling code.  */
284  if (profile_info)
285    {
286      if (profile_info->run_max * profile_info->runs < profile_info->sum_max)
287	{
288	  error ("corrupted profile info: run_max * runs < sum_max");
289	  exec_counts = NULL;
290	}
291
292      if (profile_info->sum_all < profile_info->sum_max)
293	{
294	  error ("corrupted profile info: sum_all is smaller than sum_max");
295	  exec_counts = NULL;
296	}
297    }
298
299  /* Attach extra info block to each bb.  */
300
301  alloc_aux_for_blocks (sizeof (struct bb_info));
302  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
303    {
304      edge e;
305
306      for (e = bb->succ; e; e = e->succ_next)
307	if (!EDGE_INFO (e)->ignore)
308	  BB_INFO (bb)->succ_count++;
309      for (e = bb->pred; e; e = e->pred_next)
310	if (!EDGE_INFO (e)->ignore)
311	  BB_INFO (bb)->pred_count++;
312    }
313
314  /* Avoid predicting entry on exit nodes.  */
315  BB_INFO (EXIT_BLOCK_PTR)->succ_count = 2;
316  BB_INFO (ENTRY_BLOCK_PTR)->pred_count = 2;
317
318  /* For each edge not on the spanning tree, set its execution count from
319     the .da file.  */
320
321  /* The first count in the .da file is the number of times that the function
322     was entered.  This is the exec_count for block zero.  */
323
324  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
325    {
326      edge e;
327      for (e = bb->succ; e; e = e->succ_next)
328	if (!EDGE_INFO (e)->ignore && !EDGE_INFO (e)->on_tree)
329	  {
330	    num_edges++;
331	    if (exec_counts)
332	      {
333		e->count = exec_counts[exec_counts_pos++];
334		if (e->count > profile_info->sum_max)
335		  {
336		    error ("corrupted profile info: edge from %i to %i exceeds maximal count",
337			   bb->index, e->dest->index);
338		  }
339	      }
340	    else
341	      e->count = 0;
342
343	    EDGE_INFO (e)->count_valid = 1;
344	    BB_INFO (bb)->succ_count--;
345	    BB_INFO (e->dest)->pred_count--;
346	    if (rtl_dump_file)
347	      {
348		fprintf (rtl_dump_file, "\nRead edge from %i to %i, count:",
349			 bb->index, e->dest->index);
350		fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,
351			 (HOST_WIDEST_INT) e->count);
352	      }
353	  }
354    }
355
356  if (rtl_dump_file)
357    fprintf (rtl_dump_file, "\n%d edge counts read\n", num_edges);
358
359  /* For every block in the file,
360     - if every exit/entrance edge has a known count, then set the block count
361     - if the block count is known, and every exit/entrance edge but one has
362     a known execution count, then set the count of the remaining edge
363
364     As edge counts are set, decrement the succ/pred count, but don't delete
365     the edge, that way we can easily tell when all edges are known, or only
366     one edge is unknown.  */
367
368  /* The order that the basic blocks are iterated through is important.
369     Since the code that finds spanning trees starts with block 0, low numbered
370     edges are put on the spanning tree in preference to high numbered edges.
371     Hence, most instrumented edges are at the end.  Graph solving works much
372     faster if we propagate numbers from the end to the start.
373
374     This takes an average of slightly more than 3 passes.  */
375
376  changes = 1;
377  passes = 0;
378  while (changes)
379    {
380      passes++;
381      changes = 0;
382      FOR_BB_BETWEEN (bb, EXIT_BLOCK_PTR, NULL, prev_bb)
383	{
384	  struct bb_info *bi = BB_INFO (bb);
385	  if (! bi->count_valid)
386	    {
387	      if (bi->succ_count == 0)
388		{
389		  edge e;
390		  gcov_type total = 0;
391
392		  for (e = bb->succ; e; e = e->succ_next)
393		    total += e->count;
394		  bb->count = total;
395		  bi->count_valid = 1;
396		  changes = 1;
397		}
398	      else if (bi->pred_count == 0)
399		{
400		  edge e;
401		  gcov_type total = 0;
402
403		  for (e = bb->pred; e; e = e->pred_next)
404		    total += e->count;
405		  bb->count = total;
406		  bi->count_valid = 1;
407		  changes = 1;
408		}
409	    }
410	  if (bi->count_valid)
411	    {
412	      if (bi->succ_count == 1)
413		{
414		  edge e;
415		  gcov_type total = 0;
416
417		  /* One of the counts will be invalid, but it is zero,
418		     so adding it in also doesn't hurt.  */
419		  for (e = bb->succ; e; e = e->succ_next)
420		    total += e->count;
421
422		  /* Seedgeh for the invalid edge, and set its count.  */
423		  for (e = bb->succ; e; e = e->succ_next)
424		    if (! EDGE_INFO (e)->count_valid && ! EDGE_INFO (e)->ignore)
425		      break;
426
427		  /* Calculate count for remaining edge by conservation.  */
428		  total = bb->count - total;
429
430		  if (! e)
431		    abort ();
432		  EDGE_INFO (e)->count_valid = 1;
433		  e->count = total;
434		  bi->succ_count--;
435
436		  BB_INFO (e->dest)->pred_count--;
437		  changes = 1;
438		}
439	      if (bi->pred_count == 1)
440		{
441		  edge e;
442		  gcov_type total = 0;
443
444		  /* One of the counts will be invalid, but it is zero,
445		     so adding it in also doesn't hurt.  */
446		  for (e = bb->pred; e; e = e->pred_next)
447		    total += e->count;
448
449		  /* Search for the invalid edge, and set its count.  */
450		  for (e = bb->pred; e; e = e->pred_next)
451		    if (!EDGE_INFO (e)->count_valid && !EDGE_INFO (e)->ignore)
452		      break;
453
454		  /* Calculate count for remaining edge by conservation.  */
455		  total = bb->count - total + e->count;
456
457		  if (! e)
458		    abort ();
459		  EDGE_INFO (e)->count_valid = 1;
460		  e->count = total;
461		  bi->pred_count--;
462
463		  BB_INFO (e->src)->succ_count--;
464		  changes = 1;
465		}
466	    }
467	}
468    }
469  if (rtl_dump_file)
470    dump_flow_info (rtl_dump_file);
471
472  total_num_passes += passes;
473  if (rtl_dump_file)
474    fprintf (rtl_dump_file, "Graph solving took %d passes.\n\n", passes);
475
476  /* If the graph has been correctly solved, every block will have a
477     succ and pred count of zero.  */
478  FOR_EACH_BB (bb)
479    {
480      if (BB_INFO (bb)->succ_count || BB_INFO (bb)->pred_count)
481	abort ();
482    }
483
484  /* For every edge, calculate its branch probability and add a reg_note
485     to the branch insn to indicate this.  */
486
487  for (i = 0; i < 20; i++)
488    hist_br_prob[i] = 0;
489  num_never_executed = 0;
490  num_branches = 0;
491
492  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
493    {
494      edge e;
495      rtx note;
496
497      if (bb->count < 0)
498	{
499	  error ("corrupted profile info: number of iterations for basic block %d thought to be %i",
500		 bb->index, (int)bb->count);
501	  bb->count = 0;
502	}
503      for (e = bb->succ; e; e = e->succ_next)
504	{
505	  /* Function may return twice in the cased the called function is
506	     setjmp or calls fork, but we can't represent this by extra
507	     edge from the entry, since extra edge from the exit is
508	     already present.  We get negative frequency from the entry
509	     point.  */
510	  if ((e->count < 0
511	       && e->dest == EXIT_BLOCK_PTR)
512	      || (e->count > bb->count
513		  && e->dest != EXIT_BLOCK_PTR))
514	    {
515	      rtx insn = BB_END (bb);
516
517	      while (GET_CODE (insn) != CALL_INSN
518		     && insn != BB_HEAD (bb)
519		     && keep_with_call_p (insn))
520		insn = PREV_INSN (insn);
521	      if (GET_CODE (insn) == CALL_INSN)
522		e->count = e->count < 0 ? 0 : bb->count;
523	    }
524	  if (e->count < 0 || e->count > bb->count)
525	    {
526	      error ("corrupted profile info: number of executions for edge %d-%d thought to be %i",
527		     e->src->index, e->dest->index,
528		     (int)e->count);
529	      e->count = bb->count / 2;
530	    }
531	}
532      if (bb->count)
533	{
534	  for (e = bb->succ; e; e = e->succ_next)
535	    e->probability = (e->count * REG_BR_PROB_BASE + bb->count / 2) / bb->count;
536	  if (bb->index >= 0
537	      && any_condjump_p (BB_END (bb))
538	      && bb->succ->succ_next)
539	    {
540	      int prob;
541	      edge e;
542	      int index;
543
544	      /* Find the branch edge.  It is possible that we do have fake
545		 edges here.  */
546	      for (e = bb->succ; e->flags & (EDGE_FAKE | EDGE_FALLTHRU);
547		   e = e->succ_next)
548		continue; /* Loop body has been intentionally left blank.  */
549
550	      prob = e->probability;
551	      index = prob * 20 / REG_BR_PROB_BASE;
552
553	      if (index == 20)
554		index = 19;
555	      hist_br_prob[index]++;
556
557	      note = find_reg_note (BB_END (bb), REG_BR_PROB, 0);
558	      /* There may be already note put by some other pass, such
559		 as builtin_expect expander.  */
560	      if (note)
561		XEXP (note, 0) = GEN_INT (prob);
562	      else
563		REG_NOTES (BB_END (bb))
564		  = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
565				       REG_NOTES (BB_END (bb)));
566	      num_branches++;
567	    }
568	}
569      /* Otherwise distribute the probabilities evenly so we get sane
570	 sum.  Use simple heuristics that if there are normal edges,
571	 give all abnormals frequency of 0, otherwise distribute the
572	 frequency over abnormals (this is the case of noreturn
573	 calls).  */
574      else
575	{
576	  int total = 0;
577
578	  for (e = bb->succ; e; e = e->succ_next)
579	    if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
580	      total ++;
581	  if (total)
582	    {
583	      for (e = bb->succ; e; e = e->succ_next)
584		if (!(e->flags & (EDGE_COMPLEX | EDGE_FAKE)))
585		  e->probability = REG_BR_PROB_BASE / total;
586		else
587		  e->probability = 0;
588	    }
589	  else
590	    {
591	      for (e = bb->succ; e; e = e->succ_next)
592		total ++;
593	      for (e = bb->succ; e; e = e->succ_next)
594		e->probability = REG_BR_PROB_BASE / total;
595	    }
596	  if (bb->index >= 0
597	      && any_condjump_p (BB_END (bb))
598	      && bb->succ->succ_next)
599	    num_branches++, num_never_executed;
600	}
601    }
602
603  if (rtl_dump_file)
604    {
605      fprintf (rtl_dump_file, "%d branches\n", num_branches);
606      fprintf (rtl_dump_file, "%d branches never executed\n",
607	       num_never_executed);
608      if (num_branches)
609	for (i = 0; i < 10; i++)
610	  fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
611		   (hist_br_prob[i] + hist_br_prob[19-i]) * 100 / num_branches,
612		   5 * i, 5 * i + 5);
613
614      total_num_branches += num_branches;
615      total_num_never_executed += num_never_executed;
616      for (i = 0; i < 20; i++)
617	total_hist_br_prob[i] += hist_br_prob[i];
618
619      fputc ('\n', rtl_dump_file);
620      fputc ('\n', rtl_dump_file);
621    }
622
623  free_aux_for_blocks ();
624}
625
626/* Load value histograms for N_VALUES values whose description is stored
627   in VALUES array from .da file.  */
628static void
629compute_value_histograms (unsigned n_values, struct histogram_value *values)
630{
631  unsigned i, j, t, any;
632  unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS];
633  gcov_type *histogram_counts[GCOV_N_VALUE_COUNTERS];
634  gcov_type *act_count[GCOV_N_VALUE_COUNTERS];
635  gcov_type *aact_count;
636
637  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
638    n_histogram_counters[t] = 0;
639
640  for (i = 0; i < n_values; i++)
641    n_histogram_counters[(int) (values[i].type)] += values[i].n_counters;
642
643  any = 0;
644  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
645    {
646      if (!n_histogram_counters[t])
647	{
648	  histogram_counts[t] = NULL;
649	  continue;
650	}
651
652      histogram_counts[t] =
653	get_coverage_counts (COUNTER_FOR_HIST_TYPE (t),
654			     n_histogram_counters[t], NULL);
655      if (histogram_counts[t])
656	any = 1;
657      act_count[t] = histogram_counts[t];
658    }
659  if (!any)
660    return;
661
662  for (i = 0; i < n_values; i++)
663    {
664      rtx hist_list = NULL_RTX;
665      t = (int) (values[i].type);
666
667      aact_count = act_count[t];
668      act_count[t] += values[i].n_counters;
669      for (j = values[i].n_counters; j > 0; j--)
670	hist_list = alloc_EXPR_LIST (0, GEN_INT (aact_count[j - 1]), hist_list);
671      hist_list = alloc_EXPR_LIST (0, copy_rtx (values[i].value), hist_list);
672      hist_list = alloc_EXPR_LIST (0, GEN_INT (values[i].type), hist_list);
673      REG_NOTES (values[i].insn) =
674	      alloc_EXPR_LIST (REG_VALUE_PROFILE, hist_list,
675			       REG_NOTES (values[i].insn));
676    }
677
678  for (t = 0; t < GCOV_N_VALUE_COUNTERS; t++)
679    if (histogram_counts[t])
680      free (histogram_counts[t]);
681}
682
683/* Instrument and/or analyze program behavior based on program flow graph.
684   In either case, this function builds a flow graph for the function being
685   compiled.  The flow graph is stored in BB_GRAPH.
686
687   When FLAG_PROFILE_ARCS is nonzero, this function instruments the edges in
688   the flow graph that are needed to reconstruct the dynamic behavior of the
689   flow graph.
690
691   When FLAG_BRANCH_PROBABILITIES is nonzero, this function reads auxiliary
692   information from a data file containing edge count information from previous
693   executions of the function being compiled.  In this case, the flow graph is
694   annotated with actual execution counts, which are later propagated into the
695   rtl for optimization purposes.
696
697   Main entry point of this file.  */
698
699void
700branch_prob (void)
701{
702  basic_block bb;
703  unsigned i;
704  unsigned num_edges, ignored_edges;
705  unsigned num_instrumented;
706  struct edge_list *el;
707  unsigned n_values = 0;
708  struct histogram_value *values = NULL;
709
710  total_num_times_called++;
711
712  flow_call_edges_add (NULL);
713  add_noreturn_fake_exit_edges ();
714
715  /* We can't handle cyclic regions constructed using abnormal edges.
716     To avoid these we replace every source of abnormal edge by a fake
717     edge from entry node and every destination by fake edge to exit.
718     This keeps graph acyclic and our calculation exact for all normal
719     edges except for exit and entrance ones.
720
721     We also add fake exit edges for each call and asm statement in the
722     basic, since it may not return.  */
723
724  FOR_EACH_BB (bb)
725    {
726      int need_exit_edge = 0, need_entry_edge = 0;
727      int have_exit_edge = 0, have_entry_edge = 0;
728      edge e;
729
730      /* Functions returning multiple times are not handled by extra edges.
731         Instead we simply allow negative counts on edges from exit to the
732         block past call and corresponding probabilities.  We can't go
733         with the extra edges because that would result in flowgraph that
734	 needs to have fake edges outside the spanning tree.  */
735
736      for (e = bb->succ; e; e = e->succ_next)
737	{
738	  if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
739	       && e->dest != EXIT_BLOCK_PTR)
740	    need_exit_edge = 1;
741	  if (e->dest == EXIT_BLOCK_PTR)
742	    have_exit_edge = 1;
743	}
744      for (e = bb->pred; e; e = e->pred_next)
745	{
746	  if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
747	       && e->src != ENTRY_BLOCK_PTR)
748	    need_entry_edge = 1;
749	  if (e->src == ENTRY_BLOCK_PTR)
750	    have_entry_edge = 1;
751	}
752
753      if (need_exit_edge && !have_exit_edge)
754	{
755	  if (rtl_dump_file)
756	    fprintf (rtl_dump_file, "Adding fake exit edge to bb %i\n",
757		     bb->index);
758	  make_edge (bb, EXIT_BLOCK_PTR, EDGE_FAKE);
759	}
760      if (need_entry_edge && !have_entry_edge)
761	{
762	  if (rtl_dump_file)
763	    fprintf (rtl_dump_file, "Adding fake entry edge to bb %i\n",
764		     bb->index);
765	  make_edge (ENTRY_BLOCK_PTR, bb, EDGE_FAKE);
766	}
767    }
768
769  el = create_edge_list ();
770  num_edges = NUM_EDGES (el);
771  alloc_aux_for_edges (sizeof (struct edge_info));
772
773  /* The basic blocks are expected to be numbered sequentially.  */
774  compact_blocks ();
775
776  ignored_edges = 0;
777  for (i = 0 ; i < num_edges ; i++)
778    {
779      edge e = INDEX_EDGE (el, i);
780      e->count = 0;
781
782      /* Mark edges we've replaced by fake edges above as ignored.  */
783      if ((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL))
784	  && e->src != ENTRY_BLOCK_PTR && e->dest != EXIT_BLOCK_PTR)
785	{
786	  EDGE_INFO (e)->ignore = 1;
787	  ignored_edges++;
788	}
789    }
790
791#ifdef ENABLE_CHECKING
792  verify_flow_info ();
793#endif
794
795  /* Create spanning tree from basic block graph, mark each edge that is
796     on the spanning tree.  We insert as many abnormal and critical edges
797     as possible to minimize number of edge splits necessary.  */
798
799  find_spanning_tree (el);
800
801  /* Fake edges that are not on the tree will not be instrumented, so
802     mark them ignored.  */
803  for (num_instrumented = i = 0; i < num_edges; i++)
804    {
805      edge e = INDEX_EDGE (el, i);
806      struct edge_info *inf = EDGE_INFO (e);
807
808      if (inf->ignore || inf->on_tree)
809	/*NOP*/;
810      else if (e->flags & EDGE_FAKE)
811	{
812	  inf->ignore = 1;
813	  ignored_edges++;
814	}
815      else
816	num_instrumented++;
817    }
818
819  total_num_blocks += n_basic_blocks + 2;
820  if (rtl_dump_file)
821    fprintf (rtl_dump_file, "%d basic blocks\n", n_basic_blocks);
822
823  total_num_edges += num_edges;
824  if (rtl_dump_file)
825    fprintf (rtl_dump_file, "%d edges\n", num_edges);
826
827  total_num_edges_ignored += ignored_edges;
828  if (rtl_dump_file)
829    fprintf (rtl_dump_file, "%d ignored edges\n", ignored_edges);
830
831  /* Write the data from which gcov can reconstruct the basic block
832     graph.  */
833
834  /* Basic block flags */
835  if (coverage_begin_output ())
836    {
837      gcov_position_t offset;
838
839      offset = gcov_write_tag (GCOV_TAG_BLOCKS);
840      for (i = 0; i != (unsigned) (n_basic_blocks + 2); i++)
841	gcov_write_unsigned (0);
842      gcov_write_length (offset);
843    }
844
845   /* Keep all basic block indexes nonnegative in the gcov output.
846      Index 0 is used for entry block, last index is for exit block.
847      */
848  ENTRY_BLOCK_PTR->index = -1;
849  EXIT_BLOCK_PTR->index = last_basic_block;
850#define BB_TO_GCOV_INDEX(bb)  ((bb)->index + 1)
851
852  /* Arcs */
853  if (coverage_begin_output ())
854    {
855      gcov_position_t offset;
856
857      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
858	{
859	  edge e;
860
861	  offset = gcov_write_tag (GCOV_TAG_ARCS);
862	  gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
863
864	  for (e = bb->succ; e; e = e->succ_next)
865	    {
866	      struct edge_info *i = EDGE_INFO (e);
867	      if (!i->ignore)
868		{
869		  unsigned flag_bits = 0;
870
871		  if (i->on_tree)
872		    flag_bits |= GCOV_ARC_ON_TREE;
873		  if (e->flags & EDGE_FAKE)
874		    flag_bits |= GCOV_ARC_FAKE;
875		  if (e->flags & EDGE_FALLTHRU)
876		    flag_bits |= GCOV_ARC_FALLTHROUGH;
877
878		  gcov_write_unsigned (BB_TO_GCOV_INDEX (e->dest));
879		  gcov_write_unsigned (flag_bits);
880	        }
881	    }
882
883	  gcov_write_length (offset);
884	}
885    }
886
887  /* Line numbers.  */
888  if (coverage_begin_output ())
889    {
890      char const *prev_file_name = NULL;
891      gcov_position_t offset;
892
893      FOR_EACH_BB (bb)
894	{
895	  rtx insn = BB_HEAD (bb);
896	  int ignore_next_note = 0;
897
898	  offset = 0;
899
900	  /* We are looking for line number notes.  Search backward
901	     before basic block to find correct ones.  */
902	  insn = prev_nonnote_insn (insn);
903	  if (!insn)
904	    insn = get_insns ();
905	  else
906	    insn = NEXT_INSN (insn);
907
908	  while (insn != BB_END (bb))
909	    {
910	      if (GET_CODE (insn) == NOTE)
911		{
912		  /* Must ignore the line number notes that
913		     immediately follow the end of an inline function
914		     to avoid counting it twice.  There is a note
915		     before the call, and one after the call.  */
916		  if (NOTE_LINE_NUMBER (insn)
917		      == NOTE_INSN_REPEATED_LINE_NUMBER)
918		    ignore_next_note = 1;
919		  else if (NOTE_LINE_NUMBER (insn) <= 0)
920		    /*NOP*/;
921		  else if (ignore_next_note)
922		    ignore_next_note = 0;
923		  else
924		    {
925		      if (!offset)
926			{
927			  offset = gcov_write_tag (GCOV_TAG_LINES);
928			  gcov_write_unsigned (BB_TO_GCOV_INDEX (bb));
929			}
930
931		      /* If this is a new source file, then output the
932			 file's name to the .bb file.  */
933		      if (!prev_file_name
934			  || strcmp (NOTE_SOURCE_FILE (insn),
935				     prev_file_name))
936			{
937			  prev_file_name = NOTE_SOURCE_FILE (insn);
938			  gcov_write_unsigned (0);
939			  gcov_write_string (prev_file_name);
940			}
941		      gcov_write_unsigned (NOTE_LINE_NUMBER (insn));
942		    }
943		}
944	      insn = NEXT_INSN (insn);
945	    }
946
947	  if (offset)
948	    {
949	      /* A file of NULL indicates the end of run.  */
950	      gcov_write_unsigned (0);
951	      gcov_write_string (NULL);
952	      gcov_write_length (offset);
953	    }
954	}
955    }
956  ENTRY_BLOCK_PTR->index = ENTRY_BLOCK;
957  EXIT_BLOCK_PTR->index = EXIT_BLOCK;
958#undef BB_TO_GCOV_INDEX
959
960  if (flag_profile_values)
961    {
962      life_analysis (get_insns (), NULL, PROP_DEATH_NOTES);
963      find_values_to_profile (&n_values, &values);
964      allocate_reg_info (max_reg_num (), FALSE, FALSE);
965    }
966
967  if (flag_branch_probabilities)
968    {
969      compute_branch_probabilities ();
970      if (flag_profile_values)
971	compute_value_histograms (n_values, values);
972    }
973
974  /* For each edge not on the spanning tree, add counting code as rtl.  */
975  if (profile_arc_flag
976      && coverage_counter_alloc (GCOV_COUNTER_ARCS, num_instrumented))
977    {
978      unsigned n_instrumented = instrument_edges (el);
979
980      if (n_instrumented != num_instrumented)
981	abort ();
982
983      if (flag_profile_values)
984	instrument_values (n_values, values);
985
986      /* Commit changes done by instrumentation.  */
987      commit_edge_insertions_watch_calls ();
988      allocate_reg_info (max_reg_num (), FALSE, FALSE);
989    }
990
991  remove_fake_edges ();
992  free_aux_for_edges ();
993  /* Re-merge split basic blocks and the mess introduced by
994     insert_insn_on_edge.  */
995  cleanup_cfg (profile_arc_flag ? CLEANUP_EXPENSIVE : 0);
996  if (rtl_dump_file)
997    dump_flow_info (rtl_dump_file);
998
999  free_edge_list (el);
1000}
1001
1002/* Union find algorithm implementation for the basic blocks using
1003   aux fields.  */
1004
1005static basic_block
1006find_group (basic_block bb)
1007{
1008  basic_block group = bb, bb1;
1009
1010  while ((basic_block) group->aux != group)
1011    group = (basic_block) group->aux;
1012
1013  /* Compress path.  */
1014  while ((basic_block) bb->aux != group)
1015    {
1016      bb1 = (basic_block) bb->aux;
1017      bb->aux = (void *) group;
1018      bb = bb1;
1019    }
1020  return group;
1021}
1022
1023static void
1024union_groups (basic_block bb1, basic_block bb2)
1025{
1026  basic_block bb1g = find_group (bb1);
1027  basic_block bb2g = find_group (bb2);
1028
1029  /* ??? I don't have a place for the rank field.  OK.  Lets go w/o it,
1030     this code is unlikely going to be performance problem anyway.  */
1031  if (bb1g == bb2g)
1032    abort ();
1033
1034  bb1g->aux = bb2g;
1035}
1036
1037/* This function searches all of the edges in the program flow graph, and puts
1038   as many bad edges as possible onto the spanning tree.  Bad edges include
1039   abnormals edges, which can't be instrumented at the moment.  Since it is
1040   possible for fake edges to form a cycle, we will have to develop some
1041   better way in the future.  Also put critical edges to the tree, since they
1042   are more expensive to instrument.  */
1043
1044static void
1045find_spanning_tree (struct edge_list *el)
1046{
1047  int i;
1048  int num_edges = NUM_EDGES (el);
1049  basic_block bb;
1050
1051  /* We use aux field for standard union-find algorithm.  */
1052  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1053    bb->aux = bb;
1054
1055  /* Add fake edge exit to entry we can't instrument.  */
1056  union_groups (EXIT_BLOCK_PTR, ENTRY_BLOCK_PTR);
1057
1058  /* First add all abnormal edges to the tree unless they form a cycle. Also
1059     add all edges to EXIT_BLOCK_PTR to avoid inserting profiling code behind
1060     setting return value from function.  */
1061  for (i = 0; i < num_edges; i++)
1062    {
1063      edge e = INDEX_EDGE (el, i);
1064      if (((e->flags & (EDGE_ABNORMAL | EDGE_ABNORMAL_CALL | EDGE_FAKE))
1065	   || e->dest == EXIT_BLOCK_PTR)
1066	  && !EDGE_INFO (e)->ignore
1067	  && (find_group (e->src) != find_group (e->dest)))
1068	{
1069	  if (rtl_dump_file)
1070	    fprintf (rtl_dump_file, "Abnormal edge %d to %d put to tree\n",
1071		     e->src->index, e->dest->index);
1072	  EDGE_INFO (e)->on_tree = 1;
1073	  union_groups (e->src, e->dest);
1074	}
1075    }
1076
1077  /* Now insert all critical edges to the tree unless they form a cycle.  */
1078  for (i = 0; i < num_edges; i++)
1079    {
1080      edge e = INDEX_EDGE (el, i);
1081      if (EDGE_CRITICAL_P (e) && !EDGE_INFO (e)->ignore
1082	  && find_group (e->src) != find_group (e->dest))
1083	{
1084	  if (rtl_dump_file)
1085	    fprintf (rtl_dump_file, "Critical edge %d to %d put to tree\n",
1086		     e->src->index, e->dest->index);
1087	  EDGE_INFO (e)->on_tree = 1;
1088	  union_groups (e->src, e->dest);
1089	}
1090    }
1091
1092  /* And now the rest.  */
1093  for (i = 0; i < num_edges; i++)
1094    {
1095      edge e = INDEX_EDGE (el, i);
1096      if (!EDGE_INFO (e)->ignore
1097	  && find_group (e->src) != find_group (e->dest))
1098	{
1099	  if (rtl_dump_file)
1100	    fprintf (rtl_dump_file, "Normal edge %d to %d put to tree\n",
1101		     e->src->index, e->dest->index);
1102	  EDGE_INFO (e)->on_tree = 1;
1103	  union_groups (e->src, e->dest);
1104	}
1105    }
1106
1107  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1108    bb->aux = NULL;
1109}
1110
1111/* Perform file-level initialization for branch-prob processing.  */
1112
1113void
1114init_branch_prob (void)
1115{
1116  int i;
1117
1118  total_num_blocks = 0;
1119  total_num_edges = 0;
1120  total_num_edges_ignored = 0;
1121  total_num_edges_instrumented = 0;
1122  total_num_blocks_created = 0;
1123  total_num_passes = 0;
1124  total_num_times_called = 0;
1125  total_num_branches = 0;
1126  total_num_never_executed = 0;
1127  for (i = 0; i < 20; i++)
1128    total_hist_br_prob[i] = 0;
1129}
1130
1131/* Performs file-level cleanup after branch-prob processing
1132   is completed.  */
1133
1134void
1135end_branch_prob (void)
1136{
1137  if (rtl_dump_file)
1138    {
1139      fprintf (rtl_dump_file, "\n");
1140      fprintf (rtl_dump_file, "Total number of blocks: %d\n",
1141	       total_num_blocks);
1142      fprintf (rtl_dump_file, "Total number of edges: %d\n", total_num_edges);
1143      fprintf (rtl_dump_file, "Total number of ignored edges: %d\n",
1144	       total_num_edges_ignored);
1145      fprintf (rtl_dump_file, "Total number of instrumented edges: %d\n",
1146	       total_num_edges_instrumented);
1147      fprintf (rtl_dump_file, "Total number of blocks created: %d\n",
1148	       total_num_blocks_created);
1149      fprintf (rtl_dump_file, "Total number of graph solution passes: %d\n",
1150	       total_num_passes);
1151      if (total_num_times_called != 0)
1152	fprintf (rtl_dump_file, "Average number of graph solution passes: %d\n",
1153		 (total_num_passes + (total_num_times_called  >> 1))
1154		 / total_num_times_called);
1155      fprintf (rtl_dump_file, "Total number of branches: %d\n",
1156	       total_num_branches);
1157      fprintf (rtl_dump_file, "Total number of branches never executed: %d\n",
1158	       total_num_never_executed);
1159      if (total_num_branches)
1160	{
1161	  int i;
1162
1163	  for (i = 0; i < 10; i++)
1164	    fprintf (rtl_dump_file, "%d%% branches in range %d-%d%%\n",
1165		     (total_hist_br_prob[i] + total_hist_br_prob[19-i]) * 100
1166		     / total_num_branches, 5*i, 5*i+5);
1167	}
1168    }
1169}
1170
1171
1172/* Output instructions as RTL to increment the edge execution count.  */
1173
1174static rtx
1175gen_edge_profiler (int edgeno)
1176{
1177  rtx ref = coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1178  rtx tmp;
1179  enum machine_mode mode = GET_MODE (ref);
1180  rtx sequence;
1181
1182  start_sequence ();
1183  ref = validize_mem (ref);
1184
1185  tmp = expand_simple_binop (mode, PLUS, ref, const1_rtx,
1186			     ref, 0, OPTAB_WIDEN);
1187
1188  if (tmp != ref)
1189    emit_move_insn (copy_rtx (ref), tmp);
1190
1191  sequence = get_insns ();
1192  end_sequence ();
1193  return sequence;
1194}
1195
1196/* Output instructions as RTL to increment the interval histogram counter.
1197   VALUE is the expression whose value is profiled.  TAG is the tag of the
1198   section for counters, BASE is offset of the counter position.  */
1199
1200static rtx
1201gen_interval_profiler (struct histogram_value *value, unsigned tag,
1202		       unsigned base)
1203{
1204  unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1205  enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1206  rtx mem_ref, tmp, tmp1, mr, val;
1207  rtx sequence;
1208  rtx more_label = gen_label_rtx ();
1209  rtx less_label = gen_label_rtx ();
1210  rtx end_of_code_label = gen_label_rtx ();
1211  int per_counter = gcov_size / BITS_PER_UNIT;
1212
1213  start_sequence ();
1214
1215  if (value->seq)
1216    emit_insn (value->seq);
1217
1218  mr = gen_reg_rtx (Pmode);
1219
1220  tmp = coverage_counter_ref (tag, base);
1221  tmp = force_reg (Pmode, XEXP (tmp, 0));
1222
1223  val = expand_simple_binop (value->mode, MINUS,
1224			     copy_rtx (value->value),
1225			     GEN_INT (value->hdata.intvl.int_start),
1226			     NULL_RTX, 0, OPTAB_WIDEN);
1227
1228  if (value->hdata.intvl.may_be_more)
1229    do_compare_rtx_and_jump (copy_rtx (val), GEN_INT (value->hdata.intvl.steps),
1230			     GE, 0, value->mode, NULL_RTX, NULL_RTX, more_label);
1231  if (value->hdata.intvl.may_be_less)
1232    do_compare_rtx_and_jump (copy_rtx (val), const0_rtx, LT, 0, value->mode,
1233			     NULL_RTX, NULL_RTX, less_label);
1234
1235  /* We are in range.  */
1236  tmp1 = expand_simple_binop (value->mode, MULT,
1237			      copy_rtx (val), GEN_INT (per_counter),
1238			      NULL_RTX, 0, OPTAB_WIDEN);
1239  tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp), tmp1, mr,
1240			      0, OPTAB_WIDEN);
1241  if (tmp1 != mr)
1242    emit_move_insn (copy_rtx (mr), tmp1);
1243
1244  if (value->hdata.intvl.may_be_more
1245      || value->hdata.intvl.may_be_less)
1246    {
1247      emit_jump_insn (gen_jump (end_of_code_label));
1248      emit_barrier ();
1249    }
1250
1251  /* Above the interval.  */
1252  if (value->hdata.intvl.may_be_more)
1253    {
1254      emit_label (more_label);
1255      tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1256				  GEN_INT (per_counter * value->hdata.intvl.steps),
1257				  mr, 0, OPTAB_WIDEN);
1258      if (tmp1 != mr)
1259	emit_move_insn (copy_rtx (mr), tmp1);
1260      if (value->hdata.intvl.may_be_less)
1261	{
1262	  emit_jump_insn (gen_jump (end_of_code_label));
1263	  emit_barrier ();
1264	}
1265    }
1266
1267  /* Below the interval.  */
1268  if (value->hdata.intvl.may_be_less)
1269    {
1270      emit_label (less_label);
1271      tmp1 = expand_simple_binop (Pmode, PLUS, copy_rtx (tmp),
1272		GEN_INT (per_counter * (value->hdata.intvl.steps
1273					+ (value->hdata.intvl.may_be_more ? 1 : 0))),
1274		mr, 0, OPTAB_WIDEN);
1275      if (tmp1 != mr)
1276	emit_move_insn (copy_rtx (mr), tmp1);
1277    }
1278
1279  if (value->hdata.intvl.may_be_more
1280      || value->hdata.intvl.may_be_less)
1281    emit_label (end_of_code_label);
1282
1283  mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1284
1285  tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1286			     mem_ref, 0, OPTAB_WIDEN);
1287
1288  if (tmp != mem_ref)
1289    emit_move_insn (copy_rtx (mem_ref), tmp);
1290
1291  sequence = get_insns ();
1292  end_sequence ();
1293  rebuild_jump_labels (sequence);
1294  return sequence;
1295}
1296
1297/* Output instructions as RTL to increment the power of two histogram counter.
1298   VALUE is the expression whose value is profiled.  TAG is the tag of the
1299   section for counters, BASE is offset of the counter position.  */
1300
1301static rtx
1302gen_pow2_profiler (struct histogram_value *value, unsigned tag, unsigned base)
1303{
1304  unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1305  enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1306  rtx mem_ref, tmp, mr, uval;
1307  rtx sequence;
1308  rtx end_of_code_label = gen_label_rtx ();
1309  rtx loop_label = gen_label_rtx ();
1310  int per_counter = gcov_size / BITS_PER_UNIT;
1311
1312  start_sequence ();
1313
1314  if (value->seq)
1315    emit_insn (value->seq);
1316
1317  mr = gen_reg_rtx (Pmode);
1318  tmp = coverage_counter_ref (tag, base);
1319  tmp = force_reg (Pmode, XEXP (tmp, 0));
1320  emit_move_insn (mr, tmp);
1321
1322  uval = gen_reg_rtx (value->mode);
1323  emit_move_insn (uval, copy_rtx (value->value));
1324
1325  /* Check for non-power of 2.  */
1326  if (value->hdata.pow2.may_be_other)
1327    {
1328      do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, LE, 0, value->mode,
1329			       NULL_RTX, NULL_RTX, end_of_code_label);
1330      tmp = expand_simple_binop (value->mode, PLUS, copy_rtx (uval),
1331				 constm1_rtx, NULL_RTX, 0, OPTAB_WIDEN);
1332      tmp = expand_simple_binop (value->mode, AND, copy_rtx (uval), tmp,
1333				 NULL_RTX, 0, OPTAB_WIDEN);
1334      do_compare_rtx_and_jump (tmp, const0_rtx, NE, 0, value->mode, NULL_RTX,
1335			       NULL_RTX, end_of_code_label);
1336    }
1337
1338  /* Count log_2(value).  */
1339  emit_label (loop_label);
1340
1341  tmp = expand_simple_binop (Pmode, PLUS, copy_rtx (mr), GEN_INT (per_counter), mr, 0, OPTAB_WIDEN);
1342  if (tmp != mr)
1343    emit_move_insn (copy_rtx (mr), tmp);
1344
1345  tmp = expand_simple_binop (value->mode, ASHIFTRT, copy_rtx (uval), const1_rtx,
1346			     uval, 0, OPTAB_WIDEN);
1347  if (tmp != uval)
1348    emit_move_insn (copy_rtx (uval), tmp);
1349
1350  do_compare_rtx_and_jump (copy_rtx (uval), const0_rtx, NE, 0, value->mode,
1351			   NULL_RTX, NULL_RTX, loop_label);
1352
1353  /* Increase the counter.  */
1354  emit_label (end_of_code_label);
1355
1356  mem_ref = validize_mem (gen_rtx_MEM (mode, mr));
1357
1358  tmp = expand_simple_binop (mode, PLUS, copy_rtx (mem_ref), const1_rtx,
1359			     mem_ref, 0, OPTAB_WIDEN);
1360
1361  if (tmp != mem_ref)
1362    emit_move_insn (copy_rtx (mem_ref), tmp);
1363
1364  sequence = get_insns ();
1365  end_sequence ();
1366  rebuild_jump_labels (sequence);
1367  return sequence;
1368}
1369
1370/* Output instructions as RTL for code to find the most common value.
1371   VALUE is the expression whose value is profiled.  TAG is the tag of the
1372   section for counters, BASE is offset of the counter position.  */
1373
1374static rtx
1375gen_one_value_profiler (struct histogram_value *value, unsigned tag,
1376			unsigned base)
1377{
1378  unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1379  enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1380  rtx stored_value_ref, counter_ref, all_ref, stored_value, counter, all;
1381  rtx tmp, uval;
1382  rtx sequence;
1383  rtx same_label = gen_label_rtx ();
1384  rtx zero_label = gen_label_rtx ();
1385  rtx end_of_code_label = gen_label_rtx ();
1386
1387  start_sequence ();
1388
1389  if (value->seq)
1390    emit_insn (value->seq);
1391
1392  stored_value_ref = coverage_counter_ref (tag, base);
1393  counter_ref = coverage_counter_ref (tag, base + 1);
1394  all_ref = coverage_counter_ref (tag, base + 2);
1395  stored_value = validize_mem (stored_value_ref);
1396  counter = validize_mem (counter_ref);
1397  all = validize_mem (all_ref);
1398
1399  uval = gen_reg_rtx (mode);
1400  convert_move (uval, copy_rtx (value->value), 0);
1401
1402  /* Check if the stored value matches.  */
1403  do_compare_rtx_and_jump (copy_rtx (uval), copy_rtx (stored_value), EQ,
1404			   0, mode, NULL_RTX, NULL_RTX, same_label);
1405
1406  /* Does not match; check whether the counter is zero.  */
1407  do_compare_rtx_and_jump (copy_rtx (counter), const0_rtx, EQ, 0, mode,
1408			   NULL_RTX, NULL_RTX, zero_label);
1409
1410  /* The counter is not zero yet.  */
1411  tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), constm1_rtx,
1412			     counter, 0, OPTAB_WIDEN);
1413
1414  if (tmp != counter)
1415    emit_move_insn (copy_rtx (counter), tmp);
1416
1417  emit_jump_insn (gen_jump (end_of_code_label));
1418  emit_barrier ();
1419
1420  emit_label (zero_label);
1421  /* Set new value.  */
1422  emit_move_insn (copy_rtx (stored_value), copy_rtx (uval));
1423
1424  emit_label (same_label);
1425  /* Increase the counter.  */
1426  tmp = expand_simple_binop (mode, PLUS, copy_rtx (counter), const1_rtx,
1427			     counter, 0, OPTAB_WIDEN);
1428
1429  if (tmp != counter)
1430    emit_move_insn (copy_rtx (counter), tmp);
1431
1432  emit_label (end_of_code_label);
1433
1434  /* Increase the counter of all executions; this seems redundant given
1435     that ve have counts for edges in cfg, but it may happen that some
1436     optimization will change the counts for the block (either because
1437     it is unable to update them correctly, or because it will duplicate
1438     the block or its part).  */
1439  tmp = expand_simple_binop (mode, PLUS, copy_rtx (all), const1_rtx,
1440			     all, 0, OPTAB_WIDEN);
1441
1442  if (tmp != all)
1443    emit_move_insn (copy_rtx (all), tmp);
1444  sequence = get_insns ();
1445  end_sequence ();
1446  rebuild_jump_labels (sequence);
1447  return sequence;
1448}
1449
1450/* Output instructions as RTL for code to find the most common value of
1451   a difference between two evaluations of an expression.
1452   VALUE is the expression whose value is profiled.  TAG is the tag of the
1453   section for counters, BASE is offset of the counter position.  */
1454
1455static rtx
1456gen_const_delta_profiler (struct histogram_value *value, unsigned tag,
1457			  unsigned base)
1458{
1459  struct histogram_value one_value_delta;
1460  unsigned gcov_size = tree_low_cst (TYPE_SIZE (GCOV_TYPE_NODE), 1);
1461  enum machine_mode mode = mode_for_size (gcov_size, MODE_INT, 0);
1462  rtx stored_value_ref, stored_value, tmp, uval;
1463  rtx sequence;
1464
1465  start_sequence ();
1466
1467  if (value->seq)
1468    emit_insn (value->seq);
1469
1470  stored_value_ref = coverage_counter_ref (tag, base);
1471  stored_value = validize_mem (stored_value_ref);
1472
1473  uval = gen_reg_rtx (mode);
1474  convert_move (uval, copy_rtx (value->value), 0);
1475  tmp = expand_simple_binop (mode, MINUS,
1476			     copy_rtx (uval), copy_rtx (stored_value),
1477			     NULL_RTX, 0, OPTAB_WIDEN);
1478
1479  one_value_delta.value = tmp;
1480  one_value_delta.mode = mode;
1481  one_value_delta.seq = NULL_RTX;
1482  one_value_delta.insn = value->insn;
1483  one_value_delta.type = HIST_TYPE_SINGLE_VALUE;
1484  emit_insn (gen_one_value_profiler (&one_value_delta, tag, base + 1));
1485
1486  emit_move_insn (copy_rtx (stored_value), uval);
1487  sequence = get_insns ();
1488  end_sequence ();
1489  rebuild_jump_labels (sequence);
1490  return sequence;
1491}
1492