1/* Branch prediction routines for the GNU compiler.
2   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
3   Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22/* References:
23
24   [1] "Branch Prediction for Free"
25       Ball and Larus; PLDI '93.
26   [2] "Static Branch Frequency and Program Profile Analysis"
27       Wu and Larus; MICRO-27.
28   [3] "Corpus-based Static Branch Prediction"
29       Calder, Grunwald, Lindsay, Martin, Mozer, and Zorn; PLDI '95.  */
30
31
32#include "config.h"
33#include "system.h"
34#include "coretypes.h"
35#include "tm.h"
36#include "tree.h"
37#include "rtl.h"
38#include "tm_p.h"
39#include "hard-reg-set.h"
40#include "basic-block.h"
41#include "insn-config.h"
42#include "regs.h"
43#include "flags.h"
44#include "output.h"
45#include "function.h"
46#include "except.h"
47#include "toplev.h"
48#include "recog.h"
49#include "expr.h"
50#include "predict.h"
51#include "coverage.h"
52#include "sreal.h"
53#include "params.h"
54#include "target.h"
55#include "cfgloop.h"
56#include "tree-flow.h"
57#include "ggc.h"
58#include "tree-dump.h"
59#include "tree-pass.h"
60#include "timevar.h"
61#include "tree-scalar-evolution.h"
62#include "cfgloop.h"
63
64/* real constants: 0, 1, 1-1/REG_BR_PROB_BASE, REG_BR_PROB_BASE,
65		   1/REG_BR_PROB_BASE, 0.5, BB_FREQ_MAX.  */
66static sreal real_zero, real_one, real_almost_one, real_br_prob_base,
67	     real_inv_br_prob_base, real_one_half, real_bb_freq_max;
68
69/* Random guesstimation given names.  */
70#define PROB_VERY_UNLIKELY	(REG_BR_PROB_BASE / 100 - 1)
71#define PROB_EVEN		(REG_BR_PROB_BASE / 2)
72#define PROB_VERY_LIKELY	(REG_BR_PROB_BASE - PROB_VERY_UNLIKELY)
73#define PROB_ALWAYS		(REG_BR_PROB_BASE)
74
75static void combine_predictions_for_insn (rtx, basic_block);
76static void dump_prediction (FILE *, enum br_predictor, int, basic_block, int);
77static void estimate_loops_at_level (struct loop *, bitmap);
78static void propagate_freq (struct loop *, bitmap);
79static void estimate_bb_frequencies (struct loops *);
80static void predict_paths_leading_to (basic_block, int *, enum br_predictor, enum prediction);
81static bool last_basic_block_p (basic_block);
82static void compute_function_frequency (void);
83static void choose_function_section (void);
84static bool can_predict_insn_p (rtx);
85
86/* Information we hold about each branch predictor.
87   Filled using information from predict.def.  */
88
89struct predictor_info
90{
91  const char *const name;	/* Name used in the debugging dumps.  */
92  const int hitrate;		/* Expected hitrate used by
93				   predict_insn_def call.  */
94  const int flags;
95};
96
97/* Use given predictor without Dempster-Shaffer theory if it matches
98   using first_match heuristics.  */
99#define PRED_FLAG_FIRST_MATCH 1
100
101/* Recompute hitrate in percent to our representation.  */
102
103#define HITRATE(VAL) ((int) ((VAL) * REG_BR_PROB_BASE + 50) / 100)
104
105#define DEF_PREDICTOR(ENUM, NAME, HITRATE, FLAGS) {NAME, HITRATE, FLAGS},
106static const struct predictor_info predictor_info[]= {
107#include "predict.def"
108
109  /* Upper bound on predictors.  */
110  {NULL, 0, 0}
111};
112#undef DEF_PREDICTOR
113
114/* Return true in case BB can be CPU intensive and should be optimized
115   for maximal performance.  */
116
117bool
118maybe_hot_bb_p (basic_block bb)
119{
120  if (profile_info && flag_branch_probabilities
121      && (bb->count
122	  < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
123    return false;
124  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
125    return false;
126  return true;
127}
128
129/* Return true in case BB is cold and should be optimized for size.  */
130
131bool
132probably_cold_bb_p (basic_block bb)
133{
134  if (profile_info && flag_branch_probabilities
135      && (bb->count
136	  < profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
137    return true;
138  if (bb->frequency < BB_FREQ_MAX / PARAM_VALUE (HOT_BB_FREQUENCY_FRACTION))
139    return true;
140  return false;
141}
142
143/* Return true in case BB is probably never executed.  */
144bool
145probably_never_executed_bb_p (basic_block bb)
146{
147  if (profile_info && flag_branch_probabilities)
148    return ((bb->count + profile_info->runs / 2) / profile_info->runs) == 0;
149  return false;
150}
151
152/* Return true if the one of outgoing edges is already predicted by
153   PREDICTOR.  */
154
155bool
156rtl_predicted_by_p (basic_block bb, enum br_predictor predictor)
157{
158  rtx note;
159  if (!INSN_P (BB_END (bb)))
160    return false;
161  for (note = REG_NOTES (BB_END (bb)); note; note = XEXP (note, 1))
162    if (REG_NOTE_KIND (note) == REG_BR_PRED
163	&& INTVAL (XEXP (XEXP (note, 0), 0)) == (int)predictor)
164      return true;
165  return false;
166}
167
168/* Return true if the one of outgoing edges is already predicted by
169   PREDICTOR.  */
170
171bool
172tree_predicted_by_p (basic_block bb, enum br_predictor predictor)
173{
174  struct edge_prediction *i;
175  for (i = bb->predictions; i; i = i->next)
176    if (i->predictor == predictor)
177      return true;
178  return false;
179}
180
181static void
182predict_insn (rtx insn, enum br_predictor predictor, int probability)
183{
184  gcc_assert (any_condjump_p (insn));
185  if (!flag_guess_branch_prob)
186    return;
187
188  REG_NOTES (insn)
189    = gen_rtx_EXPR_LIST (REG_BR_PRED,
190			 gen_rtx_CONCAT (VOIDmode,
191					 GEN_INT ((int) predictor),
192					 GEN_INT ((int) probability)),
193			 REG_NOTES (insn));
194}
195
196/* Predict insn by given predictor.  */
197
198void
199predict_insn_def (rtx insn, enum br_predictor predictor,
200		  enum prediction taken)
201{
202   int probability = predictor_info[(int) predictor].hitrate;
203
204   if (taken != TAKEN)
205     probability = REG_BR_PROB_BASE - probability;
206
207   predict_insn (insn, predictor, probability);
208}
209
210/* Predict edge E with given probability if possible.  */
211
212void
213rtl_predict_edge (edge e, enum br_predictor predictor, int probability)
214{
215  rtx last_insn;
216  last_insn = BB_END (e->src);
217
218  /* We can store the branch prediction information only about
219     conditional jumps.  */
220  if (!any_condjump_p (last_insn))
221    return;
222
223  /* We always store probability of branching.  */
224  if (e->flags & EDGE_FALLTHRU)
225    probability = REG_BR_PROB_BASE - probability;
226
227  predict_insn (last_insn, predictor, probability);
228}
229
230/* Predict edge E with the given PROBABILITY.  */
231void
232tree_predict_edge (edge e, enum br_predictor predictor, int probability)
233{
234  gcc_assert (profile_status != PROFILE_GUESSED);
235  if ((e->src != ENTRY_BLOCK_PTR && EDGE_COUNT (e->src->succs) > 1)
236      && flag_guess_branch_prob && optimize)
237    {
238      struct edge_prediction *i = ggc_alloc (sizeof (struct edge_prediction));
239
240      i->next = e->src->predictions;
241      e->src->predictions = i;
242      i->probability = probability;
243      i->predictor = predictor;
244      i->edge = e;
245    }
246}
247
248/* Remove all predictions on given basic block that are attached
249   to edge E.  */
250void
251remove_predictions_associated_with_edge (edge e)
252{
253  if (e->src->predictions)
254    {
255      struct edge_prediction **prediction = &e->src->predictions;
256      while (*prediction)
257	{
258	  if ((*prediction)->edge == e)
259	    *prediction = (*prediction)->next;
260	  else
261	    prediction = &((*prediction)->next);
262	}
263    }
264}
265
266/* Return true when we can store prediction on insn INSN.
267   At the moment we represent predictions only on conditional
268   jumps, not at computed jump or other complicated cases.  */
269static bool
270can_predict_insn_p (rtx insn)
271{
272  return (JUMP_P (insn)
273	  && any_condjump_p (insn)
274	  && EDGE_COUNT (BLOCK_FOR_INSN (insn)->succs) >= 2);
275}
276
277/* Predict edge E by given predictor if possible.  */
278
279void
280predict_edge_def (edge e, enum br_predictor predictor,
281		  enum prediction taken)
282{
283   int probability = predictor_info[(int) predictor].hitrate;
284
285   if (taken != TAKEN)
286     probability = REG_BR_PROB_BASE - probability;
287
288   predict_edge (e, predictor, probability);
289}
290
291/* Invert all branch predictions or probability notes in the INSN.  This needs
292   to be done each time we invert the condition used by the jump.  */
293
294void
295invert_br_probabilities (rtx insn)
296{
297  rtx note;
298
299  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
300    if (REG_NOTE_KIND (note) == REG_BR_PROB)
301      XEXP (note, 0) = GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (note, 0)));
302    else if (REG_NOTE_KIND (note) == REG_BR_PRED)
303      XEXP (XEXP (note, 0), 1)
304	= GEN_INT (REG_BR_PROB_BASE - INTVAL (XEXP (XEXP (note, 0), 1)));
305}
306
307/* Dump information about the branch prediction to the output file.  */
308
309static void
310dump_prediction (FILE *file, enum br_predictor predictor, int probability,
311		 basic_block bb, int used)
312{
313  edge e;
314  edge_iterator ei;
315
316  if (!file)
317    return;
318
319  FOR_EACH_EDGE (e, ei, bb->succs)
320    if (! (e->flags & EDGE_FALLTHRU))
321      break;
322
323  fprintf (file, "  %s heuristics%s: %.1f%%",
324	   predictor_info[predictor].name,
325	   used ? "" : " (ignored)", probability * 100.0 / REG_BR_PROB_BASE);
326
327  if (bb->count)
328    {
329      fprintf (file, "  exec ");
330      fprintf (file, HOST_WIDEST_INT_PRINT_DEC, bb->count);
331      if (e)
332	{
333	  fprintf (file, " hit ");
334	  fprintf (file, HOST_WIDEST_INT_PRINT_DEC, e->count);
335	  fprintf (file, " (%.1f%%)", e->count * 100.0 / bb->count);
336	}
337    }
338
339  fprintf (file, "\n");
340}
341
342/* We can not predict the probabilities of outgoing edges of bb.  Set them
343   evenly and hope for the best.  */
344static void
345set_even_probabilities (basic_block bb)
346{
347  int nedges = 0;
348  edge e;
349  edge_iterator ei;
350
351  FOR_EACH_EDGE (e, ei, bb->succs)
352    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
353      nedges ++;
354  FOR_EACH_EDGE (e, ei, bb->succs)
355    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
356      e->probability = (REG_BR_PROB_BASE + nedges / 2) / nedges;
357    else
358      e->probability = 0;
359}
360
361/* Combine all REG_BR_PRED notes into single probability and attach REG_BR_PROB
362   note if not already present.  Remove now useless REG_BR_PRED notes.  */
363
364static void
365combine_predictions_for_insn (rtx insn, basic_block bb)
366{
367  rtx prob_note;
368  rtx *pnote;
369  rtx note;
370  int best_probability = PROB_EVEN;
371  int best_predictor = END_PREDICTORS;
372  int combined_probability = REG_BR_PROB_BASE / 2;
373  int d;
374  bool first_match = false;
375  bool found = false;
376
377  if (!can_predict_insn_p (insn))
378    {
379      set_even_probabilities (bb);
380      return;
381    }
382
383  prob_note = find_reg_note (insn, REG_BR_PROB, 0);
384  pnote = &REG_NOTES (insn);
385  if (dump_file)
386    fprintf (dump_file, "Predictions for insn %i bb %i\n", INSN_UID (insn),
387	     bb->index);
388
389  /* We implement "first match" heuristics and use probability guessed
390     by predictor with smallest index.  */
391  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
392    if (REG_NOTE_KIND (note) == REG_BR_PRED)
393      {
394	int predictor = INTVAL (XEXP (XEXP (note, 0), 0));
395	int probability = INTVAL (XEXP (XEXP (note, 0), 1));
396
397	found = true;
398	if (best_predictor > predictor)
399	  best_probability = probability, best_predictor = predictor;
400
401	d = (combined_probability * probability
402	     + (REG_BR_PROB_BASE - combined_probability)
403	     * (REG_BR_PROB_BASE - probability));
404
405	/* Use FP math to avoid overflows of 32bit integers.  */
406	if (d == 0)
407	  /* If one probability is 0% and one 100%, avoid division by zero.  */
408	  combined_probability = REG_BR_PROB_BASE / 2;
409	else
410	  combined_probability = (((double) combined_probability) * probability
411				  * REG_BR_PROB_BASE / d + 0.5);
412      }
413
414  /* Decide which heuristic to use.  In case we didn't match anything,
415     use no_prediction heuristic, in case we did match, use either
416     first match or Dempster-Shaffer theory depending on the flags.  */
417
418  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
419    first_match = true;
420
421  if (!found)
422    dump_prediction (dump_file, PRED_NO_PREDICTION,
423		     combined_probability, bb, true);
424  else
425    {
426      dump_prediction (dump_file, PRED_DS_THEORY, combined_probability,
427		       bb, !first_match);
428      dump_prediction (dump_file, PRED_FIRST_MATCH, best_probability,
429		       bb, first_match);
430    }
431
432  if (first_match)
433    combined_probability = best_probability;
434  dump_prediction (dump_file, PRED_COMBINED, combined_probability, bb, true);
435
436  while (*pnote)
437    {
438      if (REG_NOTE_KIND (*pnote) == REG_BR_PRED)
439	{
440	  int predictor = INTVAL (XEXP (XEXP (*pnote, 0), 0));
441	  int probability = INTVAL (XEXP (XEXP (*pnote, 0), 1));
442
443	  dump_prediction (dump_file, predictor, probability, bb,
444			   !first_match || best_predictor == predictor);
445	  *pnote = XEXP (*pnote, 1);
446	}
447      else
448	pnote = &XEXP (*pnote, 1);
449    }
450
451  if (!prob_note)
452    {
453      REG_NOTES (insn)
454	= gen_rtx_EXPR_LIST (REG_BR_PROB,
455			     GEN_INT (combined_probability), REG_NOTES (insn));
456
457      /* Save the prediction into CFG in case we are seeing non-degenerated
458	 conditional jump.  */
459      if (!single_succ_p (bb))
460	{
461	  BRANCH_EDGE (bb)->probability = combined_probability;
462	  FALLTHRU_EDGE (bb)->probability
463	    = REG_BR_PROB_BASE - combined_probability;
464	}
465    }
466  else if (!single_succ_p (bb))
467    {
468      int prob = INTVAL (XEXP (prob_note, 0));
469
470      BRANCH_EDGE (bb)->probability = prob;
471      FALLTHRU_EDGE (bb)->probability = REG_BR_PROB_BASE - prob;
472    }
473  else
474    single_succ_edge (bb)->probability = REG_BR_PROB_BASE;
475}
476
477/* Combine predictions into single probability and store them into CFG.
478   Remove now useless prediction entries.  */
479
480static void
481combine_predictions_for_bb (FILE *file, basic_block bb)
482{
483  int best_probability = PROB_EVEN;
484  int best_predictor = END_PREDICTORS;
485  int combined_probability = REG_BR_PROB_BASE / 2;
486  int d;
487  bool first_match = false;
488  bool found = false;
489  struct edge_prediction *pred;
490  int nedges = 0;
491  edge e, first = NULL, second = NULL;
492  edge_iterator ei;
493
494  FOR_EACH_EDGE (e, ei, bb->succs)
495    if (!(e->flags & (EDGE_EH | EDGE_FAKE)))
496      {
497	nedges ++;
498	if (first && !second)
499	  second = e;
500	if (!first)
501	  first = e;
502      }
503
504  /* When there is no successor or only one choice, prediction is easy.
505
506     We are lazy for now and predict only basic blocks with two outgoing
507     edges.  It is possible to predict generic case too, but we have to
508     ignore first match heuristics and do more involved combining.  Implement
509     this later.  */
510  if (nedges != 2)
511    {
512      if (!bb->count)
513	set_even_probabilities (bb);
514      bb->predictions = NULL;
515      if (file)
516	fprintf (file, "%i edges in bb %i predicted to even probabilities\n",
517		 nedges, bb->index);
518      return;
519    }
520
521  if (file)
522    fprintf (file, "Predictions for bb %i\n", bb->index);
523
524  /* We implement "first match" heuristics and use probability guessed
525     by predictor with smallest index.  */
526  for (pred = bb->predictions; pred; pred = pred->next)
527    {
528      int predictor = pred->predictor;
529      int probability = pred->probability;
530
531      if (pred->edge != first)
532	probability = REG_BR_PROB_BASE - probability;
533
534      found = true;
535      if (best_predictor > predictor)
536	best_probability = probability, best_predictor = predictor;
537
538      d = (combined_probability * probability
539	   + (REG_BR_PROB_BASE - combined_probability)
540	   * (REG_BR_PROB_BASE - probability));
541
542      /* Use FP math to avoid overflows of 32bit integers.  */
543      if (d == 0)
544	/* If one probability is 0% and one 100%, avoid division by zero.  */
545	combined_probability = REG_BR_PROB_BASE / 2;
546      else
547	combined_probability = (((double) combined_probability) * probability
548				* REG_BR_PROB_BASE / d + 0.5);
549    }
550
551  /* Decide which heuristic to use.  In case we didn't match anything,
552     use no_prediction heuristic, in case we did match, use either
553     first match or Dempster-Shaffer theory depending on the flags.  */
554
555  if (predictor_info [best_predictor].flags & PRED_FLAG_FIRST_MATCH)
556    first_match = true;
557
558  if (!found)
559    dump_prediction (file, PRED_NO_PREDICTION, combined_probability, bb, true);
560  else
561    {
562      dump_prediction (file, PRED_DS_THEORY, combined_probability, bb,
563		       !first_match);
564      dump_prediction (file, PRED_FIRST_MATCH, best_probability, bb,
565		       first_match);
566    }
567
568  if (first_match)
569    combined_probability = best_probability;
570  dump_prediction (file, PRED_COMBINED, combined_probability, bb, true);
571
572  for (pred = bb->predictions; pred; pred = pred->next)
573    {
574      int predictor = pred->predictor;
575      int probability = pred->probability;
576
577      if (pred->edge != EDGE_SUCC (bb, 0))
578	probability = REG_BR_PROB_BASE - probability;
579      dump_prediction (file, predictor, probability, bb,
580		       !first_match || best_predictor == predictor);
581    }
582  bb->predictions = NULL;
583
584  if (!bb->count)
585    {
586      first->probability = combined_probability;
587      second->probability = REG_BR_PROB_BASE - combined_probability;
588    }
589}
590
591/* Predict edge probabilities by exploiting loop structure.
592   When RTLSIMPLELOOPS is set, attempt to count number of iterations by analyzing
593   RTL otherwise use tree based approach.  */
594static void
595predict_loops (struct loops *loops_info, bool rtlsimpleloops)
596{
597  unsigned i;
598
599  if (!rtlsimpleloops)
600    scev_initialize (loops_info);
601
602  /* Try to predict out blocks in a loop that are not part of a
603     natural loop.  */
604  for (i = 1; i < loops_info->num; i++)
605    {
606      basic_block bb, *bbs;
607      unsigned j;
608      unsigned n_exits;
609      struct loop *loop = loops_info->parray[i];
610      struct niter_desc desc;
611      unsigned HOST_WIDE_INT niter;
612      edge *exits;
613
614      exits = get_loop_exit_edges (loop, &n_exits);
615
616      if (rtlsimpleloops)
617	{
618	  iv_analysis_loop_init (loop);
619	  find_simple_exit (loop, &desc);
620
621	  if (desc.simple_p && desc.const_iter)
622	    {
623	      int prob;
624	      niter = desc.niter + 1;
625	      if (niter == 0)        /* We might overflow here.  */
626		niter = desc.niter;
627	      if (niter
628		  > (unsigned int) PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS))
629		niter = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
630
631	      prob = (REG_BR_PROB_BASE
632		      - (REG_BR_PROB_BASE + niter /2) / niter);
633	      /* Branch prediction algorithm gives 0 frequency for everything
634		 after the end of loop for loop having 0 probability to finish.  */
635	      if (prob == REG_BR_PROB_BASE)
636		prob = REG_BR_PROB_BASE - 1;
637	      predict_edge (desc.in_edge, PRED_LOOP_ITERATIONS,
638			    prob);
639	    }
640	}
641      else
642	{
643	  struct tree_niter_desc niter_desc;
644
645	  for (j = 0; j < n_exits; j++)
646	    {
647	      tree niter = NULL;
648
649	      if (number_of_iterations_exit (loop, exits[j], &niter_desc, false))
650		niter = niter_desc.niter;
651	      if (!niter || TREE_CODE (niter_desc.niter) != INTEGER_CST)
652	        niter = loop_niter_by_eval (loop, exits[j]);
653
654	      if (TREE_CODE (niter) == INTEGER_CST)
655		{
656		  int probability;
657		  int max = PARAM_VALUE (PARAM_MAX_PREDICTED_ITERATIONS);
658		  if (host_integerp (niter, 1)
659		      && tree_int_cst_lt (niter,
660				          build_int_cstu (NULL_TREE, max - 1)))
661		    {
662		      HOST_WIDE_INT nitercst = tree_low_cst (niter, 1) + 1;
663		      probability = ((REG_BR_PROB_BASE + nitercst / 2)
664				     / nitercst);
665		    }
666		  else
667		    probability = ((REG_BR_PROB_BASE + max / 2) / max);
668
669		  predict_edge (exits[j], PRED_LOOP_ITERATIONS, probability);
670		}
671	    }
672
673	}
674      free (exits);
675
676      bbs = get_loop_body (loop);
677
678      for (j = 0; j < loop->num_nodes; j++)
679	{
680	  int header_found = 0;
681	  edge e;
682	  edge_iterator ei;
683
684	  bb = bbs[j];
685
686	  /* Bypass loop heuristics on continue statement.  These
687	     statements construct loops via "non-loop" constructs
688	     in the source language and are better to be handled
689	     separately.  */
690	  if ((rtlsimpleloops && !can_predict_insn_p (BB_END (bb)))
691	      || predicted_by_p (bb, PRED_CONTINUE))
692	    continue;
693
694	  /* Loop branch heuristics - predict an edge back to a
695	     loop's head as taken.  */
696	  if (bb == loop->latch)
697	    {
698	      e = find_edge (loop->latch, loop->header);
699	      if (e)
700		{
701		  header_found = 1;
702		  predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
703		}
704	    }
705
706	  /* Loop exit heuristics - predict an edge exiting the loop if the
707	     conditional has no loop header successors as not taken.  */
708	  if (!header_found)
709	    FOR_EACH_EDGE (e, ei, bb->succs)
710	      if (e->dest->index < 0
711		  || !flow_bb_inside_loop_p (loop, e->dest))
712		predict_edge
713		  (e, PRED_LOOP_EXIT,
714		   (REG_BR_PROB_BASE
715		    - predictor_info [(int) PRED_LOOP_EXIT].hitrate)
716		   / n_exits);
717	}
718
719      /* Free basic blocks from get_loop_body.  */
720      free (bbs);
721    }
722
723  if (!rtlsimpleloops)
724    {
725      scev_finalize ();
726      current_loops = NULL;
727    }
728}
729
730/* Attempt to predict probabilities of BB outgoing edges using local
731   properties.  */
732static void
733bb_estimate_probability_locally (basic_block bb)
734{
735  rtx last_insn = BB_END (bb);
736  rtx cond;
737
738  if (! can_predict_insn_p (last_insn))
739    return;
740  cond = get_condition (last_insn, NULL, false, false);
741  if (! cond)
742    return;
743
744  /* Try "pointer heuristic."
745     A comparison ptr == 0 is predicted as false.
746     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
747  if (COMPARISON_P (cond)
748      && ((REG_P (XEXP (cond, 0)) && REG_POINTER (XEXP (cond, 0)))
749	  || (REG_P (XEXP (cond, 1)) && REG_POINTER (XEXP (cond, 1)))))
750    {
751      if (GET_CODE (cond) == EQ)
752	predict_insn_def (last_insn, PRED_POINTER, NOT_TAKEN);
753      else if (GET_CODE (cond) == NE)
754	predict_insn_def (last_insn, PRED_POINTER, TAKEN);
755    }
756  else
757
758  /* Try "opcode heuristic."
759     EQ tests are usually false and NE tests are usually true. Also,
760     most quantities are positive, so we can make the appropriate guesses
761     about signed comparisons against zero.  */
762    switch (GET_CODE (cond))
763      {
764      case CONST_INT:
765	/* Unconditional branch.  */
766	predict_insn_def (last_insn, PRED_UNCONDITIONAL,
767			  cond == const0_rtx ? NOT_TAKEN : TAKEN);
768	break;
769
770      case EQ:
771      case UNEQ:
772	/* Floating point comparisons appears to behave in a very
773	   unpredictable way because of special role of = tests in
774	   FP code.  */
775	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
776	  ;
777	/* Comparisons with 0 are often used for booleans and there is
778	   nothing useful to predict about them.  */
779	else if (XEXP (cond, 1) == const0_rtx
780		 || XEXP (cond, 0) == const0_rtx)
781	  ;
782	else
783	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, NOT_TAKEN);
784	break;
785
786      case NE:
787      case LTGT:
788	/* Floating point comparisons appears to behave in a very
789	   unpredictable way because of special role of = tests in
790	   FP code.  */
791	if (FLOAT_MODE_P (GET_MODE (XEXP (cond, 0))))
792	  ;
793	/* Comparisons with 0 are often used for booleans and there is
794	   nothing useful to predict about them.  */
795	else if (XEXP (cond, 1) == const0_rtx
796		 || XEXP (cond, 0) == const0_rtx)
797	  ;
798	else
799	  predict_insn_def (last_insn, PRED_OPCODE_NONEQUAL, TAKEN);
800	break;
801
802      case ORDERED:
803	predict_insn_def (last_insn, PRED_FPOPCODE, TAKEN);
804	break;
805
806      case UNORDERED:
807	predict_insn_def (last_insn, PRED_FPOPCODE, NOT_TAKEN);
808	break;
809
810      case LE:
811      case LT:
812	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
813	    || XEXP (cond, 1) == constm1_rtx)
814	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, NOT_TAKEN);
815	break;
816
817      case GE:
818      case GT:
819	if (XEXP (cond, 1) == const0_rtx || XEXP (cond, 1) == const1_rtx
820	    || XEXP (cond, 1) == constm1_rtx)
821	  predict_insn_def (last_insn, PRED_OPCODE_POSITIVE, TAKEN);
822	break;
823
824      default:
825	break;
826      }
827}
828
829/* Statically estimate the probability that a branch will be taken and produce
830   estimated profile.  When profile feedback is present never executed portions
831   of function gets estimated.  */
832
833void
834estimate_probability (struct loops *loops_info)
835{
836  basic_block bb;
837
838  connect_infinite_loops_to_exit ();
839  calculate_dominance_info (CDI_DOMINATORS);
840  calculate_dominance_info (CDI_POST_DOMINATORS);
841
842  predict_loops (loops_info, true);
843
844  iv_analysis_done ();
845
846  /* Attempt to predict conditional jumps using a number of heuristics.  */
847  FOR_EACH_BB (bb)
848    {
849      rtx last_insn = BB_END (bb);
850      edge e;
851      edge_iterator ei;
852
853      if (! can_predict_insn_p (last_insn))
854	continue;
855
856      FOR_EACH_EDGE (e, ei, bb->succs)
857	{
858	  /* Predict early returns to be probable, as we've already taken
859	     care for error returns and other are often used for fast paths
860	     trought function.  */
861	  if ((e->dest == EXIT_BLOCK_PTR
862	       || (single_succ_p (e->dest)
863		   && single_succ (e->dest) == EXIT_BLOCK_PTR))
864	       && !predicted_by_p (bb, PRED_NULL_RETURN)
865	       && !predicted_by_p (bb, PRED_CONST_RETURN)
866	       && !predicted_by_p (bb, PRED_NEGATIVE_RETURN)
867	       && !last_basic_block_p (e->dest))
868	    predict_edge_def (e, PRED_EARLY_RETURN, TAKEN);
869
870	  /* Look for block we are guarding (i.e. we dominate it,
871	     but it doesn't postdominate us).  */
872	  if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
873	      && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
874	      && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
875	    {
876	      rtx insn;
877
878	      /* The call heuristic claims that a guarded function call
879		 is improbable.  This is because such calls are often used
880		 to signal exceptional situations such as printing error
881		 messages.  */
882	      for (insn = BB_HEAD (e->dest); insn != NEXT_INSN (BB_END (e->dest));
883		   insn = NEXT_INSN (insn))
884		if (CALL_P (insn)
885		    /* Constant and pure calls are hardly used to signalize
886		       something exceptional.  */
887		    && ! CONST_OR_PURE_CALL_P (insn))
888		  {
889		    predict_edge_def (e, PRED_CALL, NOT_TAKEN);
890		    break;
891		  }
892	    }
893	}
894      bb_estimate_probability_locally (bb);
895    }
896
897  /* Attach the combined probability to each conditional jump.  */
898  FOR_EACH_BB (bb)
899    combine_predictions_for_insn (BB_END (bb), bb);
900
901  remove_fake_edges ();
902  estimate_bb_frequencies (loops_info);
903  free_dominance_info (CDI_POST_DOMINATORS);
904  if (profile_status == PROFILE_ABSENT)
905    profile_status = PROFILE_GUESSED;
906}
907
908/* Set edge->probability for each successor edge of BB.  */
909void
910guess_outgoing_edge_probabilities (basic_block bb)
911{
912  bb_estimate_probability_locally (bb);
913  combine_predictions_for_insn (BB_END (bb), bb);
914}
915
916/* Return constant EXPR will likely have at execution time, NULL if unknown.
917   The function is used by builtin_expect branch predictor so the evidence
918   must come from this construct and additional possible constant folding.
919
920   We may want to implement more involved value guess (such as value range
921   propagation based prediction), but such tricks shall go to new
922   implementation.  */
923
924static tree
925expr_expected_value (tree expr, bitmap visited)
926{
927  if (TREE_CONSTANT (expr))
928    return expr;
929  else if (TREE_CODE (expr) == SSA_NAME)
930    {
931      tree def = SSA_NAME_DEF_STMT (expr);
932
933      /* If we were already here, break the infinite cycle.  */
934      if (bitmap_bit_p (visited, SSA_NAME_VERSION (expr)))
935	return NULL;
936      bitmap_set_bit (visited, SSA_NAME_VERSION (expr));
937
938      if (TREE_CODE (def) == PHI_NODE)
939	{
940	  /* All the arguments of the PHI node must have the same constant
941	     length.  */
942	  int i;
943	  tree val = NULL, new_val;
944
945	  for (i = 0; i < PHI_NUM_ARGS (def); i++)
946	    {
947	      tree arg = PHI_ARG_DEF (def, i);
948
949	      /* If this PHI has itself as an argument, we cannot
950		 determine the string length of this argument.  However,
951		 if we can find an expected constant value for the other
952		 PHI args then we can still be sure that this is
953		 likely a constant.  So be optimistic and just
954		 continue with the next argument.  */
955	      if (arg == PHI_RESULT (def))
956		continue;
957
958	      new_val = expr_expected_value (arg, visited);
959	      if (!new_val)
960		return NULL;
961	      if (!val)
962		val = new_val;
963	      else if (!operand_equal_p (val, new_val, false))
964		return NULL;
965	    }
966	  return val;
967	}
968      if (TREE_CODE (def) != MODIFY_EXPR || TREE_OPERAND (def, 0) != expr)
969	return NULL;
970      return expr_expected_value (TREE_OPERAND (def, 1), visited);
971    }
972  else if (TREE_CODE (expr) == CALL_EXPR)
973    {
974      tree decl = get_callee_fndecl (expr);
975      if (!decl)
976	return NULL;
977      if (DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL
978	  && DECL_FUNCTION_CODE (decl) == BUILT_IN_EXPECT)
979	{
980	  tree arglist = TREE_OPERAND (expr, 1);
981	  tree val;
982
983	  if (arglist == NULL_TREE
984	      || TREE_CHAIN (arglist) == NULL_TREE)
985	    return NULL;
986	  val = TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
987	  if (TREE_CONSTANT (val))
988	    return val;
989	  return TREE_VALUE (TREE_CHAIN (TREE_OPERAND (expr, 1)));
990	}
991    }
992  if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
993    {
994      tree op0, op1, res;
995      op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
996      if (!op0)
997	return NULL;
998      op1 = expr_expected_value (TREE_OPERAND (expr, 1), visited);
999      if (!op1)
1000	return NULL;
1001      res = fold_build2 (TREE_CODE (expr), TREE_TYPE (expr), op0, op1);
1002      if (TREE_CONSTANT (res))
1003	return res;
1004      return NULL;
1005    }
1006  if (UNARY_CLASS_P (expr))
1007    {
1008      tree op0, res;
1009      op0 = expr_expected_value (TREE_OPERAND (expr, 0), visited);
1010      if (!op0)
1011	return NULL;
1012      res = fold_build1 (TREE_CODE (expr), TREE_TYPE (expr), op0);
1013      if (TREE_CONSTANT (res))
1014	return res;
1015      return NULL;
1016    }
1017  return NULL;
1018}
1019
1020/* Get rid of all builtin_expect calls we no longer need.  */
1021static void
1022strip_builtin_expect (void)
1023{
1024  basic_block bb;
1025  FOR_EACH_BB (bb)
1026    {
1027      block_stmt_iterator bi;
1028      for (bi = bsi_start (bb); !bsi_end_p (bi); bsi_next (&bi))
1029	{
1030	  tree stmt = bsi_stmt (bi);
1031	  tree fndecl;
1032	  tree arglist;
1033
1034	  if (TREE_CODE (stmt) == MODIFY_EXPR
1035	      && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR
1036	      && (fndecl = get_callee_fndecl (TREE_OPERAND (stmt, 1)))
1037	      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
1038	      && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_EXPECT
1039	      && (arglist = TREE_OPERAND (TREE_OPERAND (stmt, 1), 1))
1040	      && TREE_CHAIN (arglist))
1041	    {
1042	      TREE_OPERAND (stmt, 1) = TREE_VALUE (arglist);
1043	      update_stmt (stmt);
1044	    }
1045	}
1046    }
1047}
1048
1049/* Predict using opcode of the last statement in basic block.  */
1050static void
1051tree_predict_by_opcode (basic_block bb)
1052{
1053  tree stmt = last_stmt (bb);
1054  edge then_edge;
1055  tree cond;
1056  tree op0;
1057  tree type;
1058  tree val;
1059  bitmap visited;
1060  edge_iterator ei;
1061
1062  if (!stmt || TREE_CODE (stmt) != COND_EXPR)
1063    return;
1064  FOR_EACH_EDGE (then_edge, ei, bb->succs)
1065    if (then_edge->flags & EDGE_TRUE_VALUE)
1066      break;
1067  cond = TREE_OPERAND (stmt, 0);
1068  if (!COMPARISON_CLASS_P (cond))
1069    return;
1070  op0 = TREE_OPERAND (cond, 0);
1071  type = TREE_TYPE (op0);
1072  visited = BITMAP_ALLOC (NULL);
1073  val = expr_expected_value (cond, visited);
1074  BITMAP_FREE (visited);
1075  if (val)
1076    {
1077      if (integer_zerop (val))
1078	predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, NOT_TAKEN);
1079      else
1080	predict_edge_def (then_edge, PRED_BUILTIN_EXPECT, TAKEN);
1081      return;
1082    }
1083  /* Try "pointer heuristic."
1084     A comparison ptr == 0 is predicted as false.
1085     Similarly, a comparison ptr1 == ptr2 is predicted as false.  */
1086  if (POINTER_TYPE_P (type))
1087    {
1088      if (TREE_CODE (cond) == EQ_EXPR)
1089	predict_edge_def (then_edge, PRED_TREE_POINTER, NOT_TAKEN);
1090      else if (TREE_CODE (cond) == NE_EXPR)
1091	predict_edge_def (then_edge, PRED_TREE_POINTER, TAKEN);
1092    }
1093  else
1094
1095  /* Try "opcode heuristic."
1096     EQ tests are usually false and NE tests are usually true. Also,
1097     most quantities are positive, so we can make the appropriate guesses
1098     about signed comparisons against zero.  */
1099    switch (TREE_CODE (cond))
1100      {
1101      case EQ_EXPR:
1102      case UNEQ_EXPR:
1103	/* Floating point comparisons appears to behave in a very
1104	   unpredictable way because of special role of = tests in
1105	   FP code.  */
1106	if (FLOAT_TYPE_P (type))
1107	  ;
1108	/* Comparisons with 0 are often used for booleans and there is
1109	   nothing useful to predict about them.  */
1110	else if (integer_zerop (op0)
1111		 || integer_zerop (TREE_OPERAND (cond, 1)))
1112	  ;
1113	else
1114	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, NOT_TAKEN);
1115	break;
1116
1117      case NE_EXPR:
1118      case LTGT_EXPR:
1119	/* Floating point comparisons appears to behave in a very
1120	   unpredictable way because of special role of = tests in
1121	   FP code.  */
1122	if (FLOAT_TYPE_P (type))
1123	  ;
1124	/* Comparisons with 0 are often used for booleans and there is
1125	   nothing useful to predict about them.  */
1126	else if (integer_zerop (op0)
1127		 || integer_zerop (TREE_OPERAND (cond, 1)))
1128	  ;
1129	else
1130	  predict_edge_def (then_edge, PRED_TREE_OPCODE_NONEQUAL, TAKEN);
1131	break;
1132
1133      case ORDERED_EXPR:
1134	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, TAKEN);
1135	break;
1136
1137      case UNORDERED_EXPR:
1138	predict_edge_def (then_edge, PRED_TREE_FPOPCODE, NOT_TAKEN);
1139	break;
1140
1141      case LE_EXPR:
1142      case LT_EXPR:
1143	if (integer_zerop (TREE_OPERAND (cond, 1))
1144	    || integer_onep (TREE_OPERAND (cond, 1))
1145	    || integer_all_onesp (TREE_OPERAND (cond, 1))
1146	    || real_zerop (TREE_OPERAND (cond, 1))
1147	    || real_onep (TREE_OPERAND (cond, 1))
1148	    || real_minus_onep (TREE_OPERAND (cond, 1)))
1149	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, NOT_TAKEN);
1150	break;
1151
1152      case GE_EXPR:
1153      case GT_EXPR:
1154	if (integer_zerop (TREE_OPERAND (cond, 1))
1155	    || integer_onep (TREE_OPERAND (cond, 1))
1156	    || integer_all_onesp (TREE_OPERAND (cond, 1))
1157	    || real_zerop (TREE_OPERAND (cond, 1))
1158	    || real_onep (TREE_OPERAND (cond, 1))
1159	    || real_minus_onep (TREE_OPERAND (cond, 1)))
1160	  predict_edge_def (then_edge, PRED_TREE_OPCODE_POSITIVE, TAKEN);
1161	break;
1162
1163      default:
1164	break;
1165      }
1166}
1167
1168/* Try to guess whether the value of return means error code.  */
1169static enum br_predictor
1170return_prediction (tree val, enum prediction *prediction)
1171{
1172  /* VOID.  */
1173  if (!val)
1174    return PRED_NO_PREDICTION;
1175  /* Different heuristics for pointers and scalars.  */
1176  if (POINTER_TYPE_P (TREE_TYPE (val)))
1177    {
1178      /* NULL is usually not returned.  */
1179      if (integer_zerop (val))
1180	{
1181	  *prediction = NOT_TAKEN;
1182	  return PRED_NULL_RETURN;
1183	}
1184    }
1185  else if (INTEGRAL_TYPE_P (TREE_TYPE (val)))
1186    {
1187      /* Negative return values are often used to indicate
1188         errors.  */
1189      if (TREE_CODE (val) == INTEGER_CST
1190	  && tree_int_cst_sgn (val) < 0)
1191	{
1192	  *prediction = NOT_TAKEN;
1193	  return PRED_NEGATIVE_RETURN;
1194	}
1195      /* Constant return values seems to be commonly taken.
1196         Zero/one often represent booleans so exclude them from the
1197	 heuristics.  */
1198      if (TREE_CONSTANT (val)
1199	  && (!integer_zerop (val) && !integer_onep (val)))
1200	{
1201	  *prediction = TAKEN;
1202	  return PRED_NEGATIVE_RETURN;
1203	}
1204    }
1205  return PRED_NO_PREDICTION;
1206}
1207
1208/* Find the basic block with return expression and look up for possible
1209   return value trying to apply RETURN_PREDICTION heuristics.  */
1210static void
1211apply_return_prediction (int *heads)
1212{
1213  tree return_stmt = NULL;
1214  tree return_val;
1215  edge e;
1216  tree phi;
1217  int phi_num_args, i;
1218  enum br_predictor pred;
1219  enum prediction direction;
1220  edge_iterator ei;
1221
1222  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
1223    {
1224      return_stmt = last_stmt (e->src);
1225      if (TREE_CODE (return_stmt) == RETURN_EXPR)
1226	break;
1227    }
1228  if (!e)
1229    return;
1230  return_val = TREE_OPERAND (return_stmt, 0);
1231  if (!return_val)
1232    return;
1233  if (TREE_CODE (return_val) == MODIFY_EXPR)
1234    return_val = TREE_OPERAND (return_val, 1);
1235  if (TREE_CODE (return_val) != SSA_NAME
1236      || !SSA_NAME_DEF_STMT (return_val)
1237      || TREE_CODE (SSA_NAME_DEF_STMT (return_val)) != PHI_NODE)
1238    return;
1239  for (phi = SSA_NAME_DEF_STMT (return_val); phi; phi = PHI_CHAIN (phi))
1240    if (PHI_RESULT (phi) == return_val)
1241      break;
1242  if (!phi)
1243    return;
1244  phi_num_args = PHI_NUM_ARGS (phi);
1245  pred = return_prediction (PHI_ARG_DEF (phi, 0), &direction);
1246
1247  /* Avoid the degenerate case where all return values form the function
1248     belongs to same category (ie they are all positive constants)
1249     so we can hardly say something about them.  */
1250  for (i = 1; i < phi_num_args; i++)
1251    if (pred != return_prediction (PHI_ARG_DEF (phi, i), &direction))
1252      break;
1253  if (i != phi_num_args)
1254    for (i = 0; i < phi_num_args; i++)
1255      {
1256	pred = return_prediction (PHI_ARG_DEF (phi, i), &direction);
1257	if (pred != PRED_NO_PREDICTION)
1258	  predict_paths_leading_to (PHI_ARG_EDGE (phi, i)->src, heads, pred,
1259				    direction);
1260      }
1261}
1262
1263/* Look for basic block that contains unlikely to happen events
1264   (such as noreturn calls) and mark all paths leading to execution
1265   of this basic blocks as unlikely.  */
1266
1267static void
1268tree_bb_level_predictions (void)
1269{
1270  basic_block bb;
1271  int *heads;
1272
1273  heads = xmalloc (sizeof (int) * last_basic_block);
1274  memset (heads, -1, sizeof (int) * last_basic_block);
1275  heads[ENTRY_BLOCK_PTR->next_bb->index] = last_basic_block;
1276
1277  apply_return_prediction (heads);
1278
1279  FOR_EACH_BB (bb)
1280    {
1281      block_stmt_iterator bsi = bsi_last (bb);
1282
1283      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1284	{
1285	  tree stmt = bsi_stmt (bsi);
1286	  switch (TREE_CODE (stmt))
1287	    {
1288	      case MODIFY_EXPR:
1289		if (TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR)
1290		  {
1291		    stmt = TREE_OPERAND (stmt, 1);
1292		    goto call_expr;
1293		  }
1294		break;
1295	      case CALL_EXPR:
1296call_expr:;
1297		if (call_expr_flags (stmt) & ECF_NORETURN)
1298		  predict_paths_leading_to (bb, heads, PRED_NORETURN,
1299		      			    NOT_TAKEN);
1300		break;
1301	      default:
1302		break;
1303	    }
1304	}
1305    }
1306
1307  free (heads);
1308}
1309
1310/* Predict branch probabilities and estimate profile of the tree CFG.  */
1311static void
1312tree_estimate_probability (void)
1313{
1314  basic_block bb;
1315  struct loops loops_info;
1316
1317  flow_loops_find (&loops_info);
1318  if (dump_file && (dump_flags & TDF_DETAILS))
1319    flow_loops_dump (&loops_info, dump_file, NULL, 0);
1320
1321  add_noreturn_fake_exit_edges ();
1322  connect_infinite_loops_to_exit ();
1323  calculate_dominance_info (CDI_DOMINATORS);
1324  calculate_dominance_info (CDI_POST_DOMINATORS);
1325
1326  tree_bb_level_predictions ();
1327
1328  mark_irreducible_loops (&loops_info);
1329  predict_loops (&loops_info, false);
1330
1331  FOR_EACH_BB (bb)
1332    {
1333      edge e;
1334      edge_iterator ei;
1335
1336      FOR_EACH_EDGE (e, ei, bb->succs)
1337	{
1338	  /* Predict early returns to be probable, as we've already taken
1339	     care for error returns and other cases are often used for
1340	     fast paths trought function.  */
1341	  if (e->dest == EXIT_BLOCK_PTR
1342	      && TREE_CODE (last_stmt (bb)) == RETURN_EXPR
1343	      && !single_pred_p (bb))
1344	    {
1345	      edge e1;
1346	      edge_iterator ei1;
1347
1348	      FOR_EACH_EDGE (e1, ei1, bb->preds)
1349	      	if (!predicted_by_p (e1->src, PRED_NULL_RETURN)
1350		    && !predicted_by_p (e1->src, PRED_CONST_RETURN)
1351		    && !predicted_by_p (e1->src, PRED_NEGATIVE_RETURN)
1352		    && !last_basic_block_p (e1->src))
1353		  predict_edge_def (e1, PRED_TREE_EARLY_RETURN, NOT_TAKEN);
1354	    }
1355
1356	  /* Look for block we are guarding (ie we dominate it,
1357	     but it doesn't postdominate us).  */
1358	  if (e->dest != EXIT_BLOCK_PTR && e->dest != bb
1359	      && dominated_by_p (CDI_DOMINATORS, e->dest, e->src)
1360	      && !dominated_by_p (CDI_POST_DOMINATORS, e->src, e->dest))
1361	    {
1362	      block_stmt_iterator bi;
1363
1364	      /* The call heuristic claims that a guarded function call
1365		 is improbable.  This is because such calls are often used
1366		 to signal exceptional situations such as printing error
1367		 messages.  */
1368	      for (bi = bsi_start (e->dest); !bsi_end_p (bi);
1369		   bsi_next (&bi))
1370		{
1371		  tree stmt = bsi_stmt (bi);
1372		  if ((TREE_CODE (stmt) == CALL_EXPR
1373		       || (TREE_CODE (stmt) == MODIFY_EXPR
1374			   && TREE_CODE (TREE_OPERAND (stmt, 1)) == CALL_EXPR))
1375		      /* Constant and pure calls are hardly used to signalize
1376			 something exceptional.  */
1377		      && TREE_SIDE_EFFECTS (stmt))
1378		    {
1379		      predict_edge_def (e, PRED_CALL, NOT_TAKEN);
1380		      break;
1381		    }
1382		}
1383	    }
1384	}
1385      tree_predict_by_opcode (bb);
1386    }
1387  FOR_EACH_BB (bb)
1388    combine_predictions_for_bb (dump_file, bb);
1389
1390  if (!flag_loop_optimize)
1391    strip_builtin_expect ();
1392  estimate_bb_frequencies (&loops_info);
1393  free_dominance_info (CDI_POST_DOMINATORS);
1394  remove_fake_exit_edges ();
1395  flow_loops_free (&loops_info);
1396  if (dump_file && (dump_flags & TDF_DETAILS))
1397    dump_tree_cfg (dump_file, dump_flags);
1398  if (profile_status == PROFILE_ABSENT)
1399    profile_status = PROFILE_GUESSED;
1400}
1401
1402/* __builtin_expect dropped tokens into the insn stream describing expected
1403   values of registers.  Generate branch probabilities based off these
1404   values.  */
1405
1406void
1407expected_value_to_br_prob (void)
1408{
1409  rtx insn, cond, ev = NULL_RTX, ev_reg = NULL_RTX;
1410
1411  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
1412    {
1413      switch (GET_CODE (insn))
1414	{
1415	case NOTE:
1416	  /* Look for expected value notes.  */
1417	  if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EXPECTED_VALUE)
1418	    {
1419	      ev = NOTE_EXPECTED_VALUE (insn);
1420	      ev_reg = XEXP (ev, 0);
1421	      delete_insn (insn);
1422	    }
1423	  continue;
1424
1425	case CODE_LABEL:
1426	  /* Never propagate across labels.  */
1427	  ev = NULL_RTX;
1428	  continue;
1429
1430	case JUMP_INSN:
1431	  /* Look for simple conditional branches.  If we haven't got an
1432	     expected value yet, no point going further.  */
1433	  if (!JUMP_P (insn) || ev == NULL_RTX
1434	      || ! any_condjump_p (insn))
1435	    continue;
1436	  break;
1437
1438	default:
1439	  /* Look for insns that clobber the EV register.  */
1440	  if (ev && reg_set_p (ev_reg, insn))
1441	    ev = NULL_RTX;
1442	  continue;
1443	}
1444
1445      /* Collect the branch condition, hopefully relative to EV_REG.  */
1446      /* ???  At present we'll miss things like
1447		(expected_value (eq r70 0))
1448		(set r71 -1)
1449		(set r80 (lt r70 r71))
1450		(set pc (if_then_else (ne r80 0) ...))
1451	 as canonicalize_condition will render this to us as
1452		(lt r70, r71)
1453	 Could use cselib to try and reduce this further.  */
1454      cond = XEXP (SET_SRC (pc_set (insn)), 0);
1455      cond = canonicalize_condition (insn, cond, 0, NULL, ev_reg,
1456				     false, false);
1457      if (! cond || XEXP (cond, 0) != ev_reg
1458	  || GET_CODE (XEXP (cond, 1)) != CONST_INT)
1459	continue;
1460
1461      /* Substitute and simplify.  Given that the expression we're
1462	 building involves two constants, we should wind up with either
1463	 true or false.  */
1464      cond = gen_rtx_fmt_ee (GET_CODE (cond), VOIDmode,
1465			     XEXP (ev, 1), XEXP (cond, 1));
1466      cond = simplify_rtx (cond);
1467
1468      /* Turn the condition into a scaled branch probability.  */
1469      gcc_assert (cond == const_true_rtx || cond == const0_rtx);
1470      predict_insn_def (insn, PRED_BUILTIN_EXPECT,
1471		        cond == const_true_rtx ? TAKEN : NOT_TAKEN);
1472    }
1473}
1474
1475/* Check whether this is the last basic block of function.  Commonly
1476   there is one extra common cleanup block.  */
1477static bool
1478last_basic_block_p (basic_block bb)
1479{
1480  if (bb == EXIT_BLOCK_PTR)
1481    return false;
1482
1483  return (bb->next_bb == EXIT_BLOCK_PTR
1484	  || (bb->next_bb->next_bb == EXIT_BLOCK_PTR
1485	      && single_succ_p (bb)
1486	      && single_succ (bb)->next_bb == EXIT_BLOCK_PTR));
1487}
1488
1489/* Sets branch probabilities according to PREDiction and
1490   FLAGS. HEADS[bb->index] should be index of basic block in that we
1491   need to alter branch predictions (i.e. the first of our dominators
1492   such that we do not post-dominate it) (but we fill this information
1493   on demand, so -1 may be there in case this was not needed yet).  */
1494
1495static void
1496predict_paths_leading_to (basic_block bb, int *heads, enum br_predictor pred,
1497			  enum prediction taken)
1498{
1499  edge e;
1500  edge_iterator ei;
1501  int y;
1502
1503  if (heads[bb->index] < 0)
1504    {
1505      /* This is first time we need this field in heads array; so
1506         find first dominator that we do not post-dominate (we are
1507         using already known members of heads array).  */
1508      basic_block ai = bb;
1509      basic_block next_ai = get_immediate_dominator (CDI_DOMINATORS, bb);
1510      int head;
1511
1512      while (heads[next_ai->index] < 0)
1513	{
1514	  if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1515	    break;
1516	  heads[next_ai->index] = ai->index;
1517	  ai = next_ai;
1518	  next_ai = get_immediate_dominator (CDI_DOMINATORS, next_ai);
1519	}
1520      if (!dominated_by_p (CDI_POST_DOMINATORS, next_ai, bb))
1521	head = next_ai->index;
1522      else
1523	head = heads[next_ai->index];
1524      while (next_ai != bb)
1525	{
1526	  next_ai = ai;
1527	  if (heads[ai->index] == ENTRY_BLOCK)
1528	    ai = ENTRY_BLOCK_PTR;
1529	  else
1530	    ai = BASIC_BLOCK (heads[ai->index]);
1531	  heads[next_ai->index] = head;
1532	}
1533    }
1534  y = heads[bb->index];
1535
1536  /* Now find the edge that leads to our branch and aply the prediction.  */
1537
1538  if (y == last_basic_block)
1539    return;
1540  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (y)->succs)
1541    if (e->dest->index >= 0
1542	&& dominated_by_p (CDI_POST_DOMINATORS, e->dest, bb))
1543      predict_edge_def (e, pred, taken);
1544}
1545
1546/* This is used to carry information about basic blocks.  It is
1547   attached to the AUX field of the standard CFG block.  */
1548
1549typedef struct block_info_def
1550{
1551  /* Estimated frequency of execution of basic_block.  */
1552  sreal frequency;
1553
1554  /* To keep queue of basic blocks to process.  */
1555  basic_block next;
1556
1557  /* Number of predecessors we need to visit first.  */
1558  int npredecessors;
1559} *block_info;
1560
1561/* Similar information for edges.  */
1562typedef struct edge_info_def
1563{
1564  /* In case edge is a loopback edge, the probability edge will be reached
1565     in case header is.  Estimated number of iterations of the loop can be
1566     then computed as 1 / (1 - back_edge_prob).  */
1567  sreal back_edge_prob;
1568  /* True if the edge is a loopback edge in the natural loop.  */
1569  unsigned int back_edge:1;
1570} *edge_info;
1571
1572#define BLOCK_INFO(B)	((block_info) (B)->aux)
1573#define EDGE_INFO(E)	((edge_info) (E)->aux)
1574
1575/* Helper function for estimate_bb_frequencies.
1576   Propagate the frequencies for LOOP.  */
1577
1578static void
1579propagate_freq (struct loop *loop, bitmap tovisit)
1580{
1581  basic_block head = loop->header;
1582  basic_block bb;
1583  basic_block last;
1584  unsigned i;
1585  edge e;
1586  basic_block nextbb;
1587  bitmap_iterator bi;
1588
1589  /* For each basic block we need to visit count number of his predecessors
1590     we need to visit first.  */
1591  EXECUTE_IF_SET_IN_BITMAP (tovisit, 0, i, bi)
1592    {
1593      edge_iterator ei;
1594      int count = 0;
1595
1596       /* The outermost "loop" includes the exit block, which we can not
1597	  look up via BASIC_BLOCK.  Detect this and use EXIT_BLOCK_PTR
1598	  directly.  Do the same for the entry block.  */
1599     if (i == (unsigned)ENTRY_BLOCK)
1600       bb = ENTRY_BLOCK_PTR;
1601     else if (i == (unsigned)EXIT_BLOCK)
1602       bb = EXIT_BLOCK_PTR;
1603     else
1604       bb = BASIC_BLOCK (i);
1605
1606      FOR_EACH_EDGE (e, ei, bb->preds)
1607	{
1608	  bool visit = bitmap_bit_p (tovisit, e->src->index);
1609
1610	  if (visit && !(e->flags & EDGE_DFS_BACK))
1611	    count++;
1612	  else if (visit && dump_file && !EDGE_INFO (e)->back_edge)
1613	    fprintf (dump_file,
1614		     "Irreducible region hit, ignoring edge to %i->%i\n",
1615		     e->src->index, bb->index);
1616	}
1617      BLOCK_INFO (bb)->npredecessors = count;
1618    }
1619
1620  memcpy (&BLOCK_INFO (head)->frequency, &real_one, sizeof (real_one));
1621  last = head;
1622  for (bb = head; bb; bb = nextbb)
1623    {
1624      edge_iterator ei;
1625      sreal cyclic_probability, frequency;
1626
1627      memcpy (&cyclic_probability, &real_zero, sizeof (real_zero));
1628      memcpy (&frequency, &real_zero, sizeof (real_zero));
1629
1630      nextbb = BLOCK_INFO (bb)->next;
1631      BLOCK_INFO (bb)->next = NULL;
1632
1633      /* Compute frequency of basic block.  */
1634      if (bb != head)
1635	{
1636#ifdef ENABLE_CHECKING
1637	  FOR_EACH_EDGE (e, ei, bb->preds)
1638	    gcc_assert (!bitmap_bit_p (tovisit, e->src->index)
1639			|| (e->flags & EDGE_DFS_BACK));
1640#endif
1641
1642	  FOR_EACH_EDGE (e, ei, bb->preds)
1643	    if (EDGE_INFO (e)->back_edge)
1644	      {
1645		sreal_add (&cyclic_probability, &cyclic_probability,
1646			   &EDGE_INFO (e)->back_edge_prob);
1647	      }
1648	    else if (!(e->flags & EDGE_DFS_BACK))
1649	      {
1650		sreal tmp;
1651
1652		/*  frequency += (e->probability
1653				  * BLOCK_INFO (e->src)->frequency /
1654				  REG_BR_PROB_BASE);  */
1655
1656		sreal_init (&tmp, e->probability, 0);
1657		sreal_mul (&tmp, &tmp, &BLOCK_INFO (e->src)->frequency);
1658		sreal_mul (&tmp, &tmp, &real_inv_br_prob_base);
1659		sreal_add (&frequency, &frequency, &tmp);
1660	      }
1661
1662	  if (sreal_compare (&cyclic_probability, &real_zero) == 0)
1663	    {
1664	      memcpy (&BLOCK_INFO (bb)->frequency, &frequency,
1665		      sizeof (frequency));
1666	    }
1667	  else
1668	    {
1669	      if (sreal_compare (&cyclic_probability, &real_almost_one) > 0)
1670		{
1671		  memcpy (&cyclic_probability, &real_almost_one,
1672			  sizeof (real_almost_one));
1673		}
1674
1675	      /* BLOCK_INFO (bb)->frequency = frequency
1676					      / (1 - cyclic_probability) */
1677
1678	      sreal_sub (&cyclic_probability, &real_one, &cyclic_probability);
1679	      sreal_div (&BLOCK_INFO (bb)->frequency,
1680			 &frequency, &cyclic_probability);
1681	    }
1682	}
1683
1684      bitmap_clear_bit (tovisit, bb->index);
1685
1686      e = find_edge (bb, head);
1687      if (e)
1688	{
1689	  sreal tmp;
1690
1691	  /* EDGE_INFO (e)->back_edge_prob
1692	     = ((e->probability * BLOCK_INFO (bb)->frequency)
1693	     / REG_BR_PROB_BASE); */
1694
1695	  sreal_init (&tmp, e->probability, 0);
1696	  sreal_mul (&tmp, &tmp, &BLOCK_INFO (bb)->frequency);
1697	  sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1698		     &tmp, &real_inv_br_prob_base);
1699	}
1700
1701      /* Propagate to successor blocks.  */
1702      FOR_EACH_EDGE (e, ei, bb->succs)
1703	if (!(e->flags & EDGE_DFS_BACK)
1704	    && BLOCK_INFO (e->dest)->npredecessors)
1705	  {
1706	    BLOCK_INFO (e->dest)->npredecessors--;
1707	    if (!BLOCK_INFO (e->dest)->npredecessors)
1708	      {
1709		if (!nextbb)
1710		  nextbb = e->dest;
1711		else
1712		  BLOCK_INFO (last)->next = e->dest;
1713
1714		last = e->dest;
1715	      }
1716	  }
1717    }
1718}
1719
1720/* Estimate probabilities of loopback edges in loops at same nest level.  */
1721
1722static void
1723estimate_loops_at_level (struct loop *first_loop, bitmap tovisit)
1724{
1725  struct loop *loop;
1726
1727  for (loop = first_loop; loop; loop = loop->next)
1728    {
1729      edge e;
1730      basic_block *bbs;
1731      unsigned i;
1732
1733      estimate_loops_at_level (loop->inner, tovisit);
1734
1735      /* Do not do this for dummy function loop.  */
1736      if (EDGE_COUNT (loop->latch->succs) > 0)
1737	{
1738	  /* Find current loop back edge and mark it.  */
1739	  e = loop_latch_edge (loop);
1740	  EDGE_INFO (e)->back_edge = 1;
1741       }
1742
1743      bbs = get_loop_body (loop);
1744      for (i = 0; i < loop->num_nodes; i++)
1745	bitmap_set_bit (tovisit, bbs[i]->index);
1746      free (bbs);
1747      propagate_freq (loop, tovisit);
1748    }
1749}
1750
1751/* Convert counts measured by profile driven feedback to frequencies.
1752   Return nonzero iff there was any nonzero execution count.  */
1753
1754int
1755counts_to_freqs (void)
1756{
1757  gcov_type count_max, true_count_max = 0;
1758  basic_block bb;
1759
1760  FOR_EACH_BB (bb)
1761    true_count_max = MAX (bb->count, true_count_max);
1762
1763  count_max = MAX (true_count_max, 1);
1764  FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1765    bb->frequency = (bb->count * BB_FREQ_MAX + count_max / 2) / count_max;
1766  return true_count_max;
1767}
1768
1769/* Return true if function is likely to be expensive, so there is no point to
1770   optimize performance of prologue, epilogue or do inlining at the expense
1771   of code size growth.  THRESHOLD is the limit of number of instructions
1772   function can execute at average to be still considered not expensive.  */
1773
1774bool
1775expensive_function_p (int threshold)
1776{
1777  unsigned int sum = 0;
1778  basic_block bb;
1779  unsigned int limit;
1780
1781  /* We can not compute accurately for large thresholds due to scaled
1782     frequencies.  */
1783  gcc_assert (threshold <= BB_FREQ_MAX);
1784
1785  /* Frequencies are out of range.  This either means that function contains
1786     internal loop executing more than BB_FREQ_MAX times or profile feedback
1787     is available and function has not been executed at all.  */
1788  if (ENTRY_BLOCK_PTR->frequency == 0)
1789    return true;
1790
1791  /* Maximally BB_FREQ_MAX^2 so overflow won't happen.  */
1792  limit = ENTRY_BLOCK_PTR->frequency * threshold;
1793  FOR_EACH_BB (bb)
1794    {
1795      rtx insn;
1796
1797      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
1798	   insn = NEXT_INSN (insn))
1799	if (active_insn_p (insn))
1800	  {
1801	    sum += bb->frequency;
1802	    if (sum > limit)
1803	      return true;
1804	}
1805    }
1806
1807  return false;
1808}
1809
1810/* Estimate basic blocks frequency by given branch probabilities.  */
1811
1812static void
1813estimate_bb_frequencies (struct loops *loops)
1814{
1815  basic_block bb;
1816  sreal freq_max;
1817
1818  if (!flag_branch_probabilities || !counts_to_freqs ())
1819    {
1820      static int real_values_initialized = 0;
1821      bitmap tovisit;
1822
1823      if (!real_values_initialized)
1824        {
1825	  real_values_initialized = 1;
1826	  sreal_init (&real_zero, 0, 0);
1827	  sreal_init (&real_one, 1, 0);
1828	  sreal_init (&real_br_prob_base, REG_BR_PROB_BASE, 0);
1829	  sreal_init (&real_bb_freq_max, BB_FREQ_MAX, 0);
1830	  sreal_init (&real_one_half, 1, -1);
1831	  sreal_div (&real_inv_br_prob_base, &real_one, &real_br_prob_base);
1832	  sreal_sub (&real_almost_one, &real_one, &real_inv_br_prob_base);
1833	}
1834
1835      mark_dfs_back_edges ();
1836
1837      single_succ_edge (ENTRY_BLOCK_PTR)->probability = REG_BR_PROB_BASE;
1838
1839      /* Set up block info for each basic block.  */
1840      tovisit = BITMAP_ALLOC (NULL);
1841      alloc_aux_for_blocks (sizeof (struct block_info_def));
1842      alloc_aux_for_edges (sizeof (struct edge_info_def));
1843      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1844	{
1845	  edge e;
1846	  edge_iterator ei;
1847
1848	  FOR_EACH_EDGE (e, ei, bb->succs)
1849	    {
1850	      sreal_init (&EDGE_INFO (e)->back_edge_prob, e->probability, 0);
1851	      sreal_mul (&EDGE_INFO (e)->back_edge_prob,
1852			 &EDGE_INFO (e)->back_edge_prob,
1853			 &real_inv_br_prob_base);
1854	    }
1855	}
1856
1857      /* First compute probabilities locally for each loop from innermost
1858         to outermost to examine probabilities for back edges.  */
1859      estimate_loops_at_level (loops->tree_root, tovisit);
1860
1861      memcpy (&freq_max, &real_zero, sizeof (real_zero));
1862      FOR_EACH_BB (bb)
1863	if (sreal_compare (&freq_max, &BLOCK_INFO (bb)->frequency) < 0)
1864	  memcpy (&freq_max, &BLOCK_INFO (bb)->frequency, sizeof (freq_max));
1865
1866      sreal_div (&freq_max, &real_bb_freq_max, &freq_max);
1867      FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR, NULL, next_bb)
1868	{
1869	  sreal tmp;
1870
1871	  sreal_mul (&tmp, &BLOCK_INFO (bb)->frequency, &freq_max);
1872	  sreal_add (&tmp, &tmp, &real_one_half);
1873	  bb->frequency = sreal_to_int (&tmp);
1874	}
1875
1876      free_aux_for_blocks ();
1877      free_aux_for_edges ();
1878      BITMAP_FREE (tovisit);
1879    }
1880  compute_function_frequency ();
1881  if (flag_reorder_functions)
1882    choose_function_section ();
1883}
1884
1885/* Decide whether function is hot, cold or unlikely executed.  */
1886static void
1887compute_function_frequency (void)
1888{
1889  basic_block bb;
1890
1891  if (!profile_info || !flag_branch_probabilities)
1892    return;
1893  cfun->function_frequency = FUNCTION_FREQUENCY_UNLIKELY_EXECUTED;
1894  FOR_EACH_BB (bb)
1895    {
1896      if (maybe_hot_bb_p (bb))
1897	{
1898	  cfun->function_frequency = FUNCTION_FREQUENCY_HOT;
1899	  return;
1900	}
1901      if (!probably_never_executed_bb_p (bb))
1902	cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
1903    }
1904}
1905
1906/* Choose appropriate section for the function.  */
1907static void
1908choose_function_section (void)
1909{
1910  if (DECL_SECTION_NAME (current_function_decl)
1911      || !targetm.have_named_sections
1912      /* Theoretically we can split the gnu.linkonce text section too,
1913	 but this requires more work as the frequency needs to match
1914	 for all generated objects so we need to merge the frequency
1915	 of all instances.  For now just never set frequency for these.  */
1916      || DECL_ONE_ONLY (current_function_decl))
1917    return;
1918
1919  /* If we are doing the partitioning optimization, let the optimization
1920     choose the correct section into which to put things.  */
1921
1922  if (flag_reorder_blocks_and_partition)
1923    return;
1924
1925  if (cfun->function_frequency == FUNCTION_FREQUENCY_HOT)
1926    DECL_SECTION_NAME (current_function_decl) =
1927      build_string (strlen (HOT_TEXT_SECTION_NAME), HOT_TEXT_SECTION_NAME);
1928  if (cfun->function_frequency == FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
1929    DECL_SECTION_NAME (current_function_decl) =
1930      build_string (strlen (UNLIKELY_EXECUTED_TEXT_SECTION_NAME),
1931		    UNLIKELY_EXECUTED_TEXT_SECTION_NAME);
1932}
1933
1934static bool
1935gate_estimate_probability (void)
1936{
1937  return flag_guess_branch_prob;
1938}
1939
1940struct tree_opt_pass pass_profile =
1941{
1942  "profile",				/* name */
1943  gate_estimate_probability,		/* gate */
1944  tree_estimate_probability,		/* execute */
1945  NULL,					/* sub */
1946  NULL,					/* next */
1947  0,					/* static_pass_number */
1948  TV_BRANCH_PROB,			/* tv_id */
1949  PROP_cfg,				/* properties_required */
1950  0,					/* properties_provided */
1951  0,					/* properties_destroyed */
1952  0,					/* todo_flags_start */
1953  TODO_ggc_collect | TODO_verify_ssa,			/* todo_flags_finish */
1954  0					/* letter */
1955};
1956