1/* Basic block reordering routines for the GNU compiler.
2   Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4   This file is part of GCC.
5
6   GCC is free software; you can redistribute it and/or modify it
7   under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   GCC is distributed in the hope that it will be useful, but WITHOUT
12   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14   License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with GCC; see the file COPYING.  If not, write to the Free
18   Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19   02110-1301, USA.  */
20
21/* This (greedy) algorithm constructs traces in several rounds.
22   The construction starts from "seeds".  The seed for the first round
23   is the entry point of function.  When there are more than one seed
24   that one is selected first that has the lowest key in the heap
25   (see function bb_to_key).  Then the algorithm repeatedly adds the most
26   probable successor to the end of a trace.  Finally it connects the traces.
27
28   There are two parameters: Branch Threshold and Exec Threshold.
29   If the edge to a successor of the actual basic block is lower than
30   Branch Threshold or the frequency of the successor is lower than
31   Exec Threshold the successor will be the seed in one of the next rounds.
32   Each round has these parameters lower than the previous one.
33   The last round has to have these parameters set to zero
34   so that the remaining blocks are picked up.
35
36   The algorithm selects the most probable successor from all unvisited
37   successors and successors that have been added to this trace.
38   The other successors (that has not been "sent" to the next round) will be
39   other seeds for this round and the secondary traces will start in them.
40   If the successor has not been visited in this trace it is added to the trace
41   (however, there is some heuristic for simple branches).
42   If the successor has been visited in this trace the loop has been found.
43   If the loop has many iterations the loop is rotated so that the
44   source block of the most probable edge going out from the loop
45   is the last block of the trace.
46   If the loop has few iterations and there is no edge from the last block of
47   the loop going out from loop the loop header is duplicated.
48   Finally, the construction of the trace is terminated.
49
50   When connecting traces it first checks whether there is an edge from the
51   last block of one trace to the first block of another trace.
52   When there are still some unconnected traces it checks whether there exists
53   a basic block BB such that BB is a successor of the last bb of one trace
54   and BB is a predecessor of the first block of another trace. In this case,
55   BB is duplicated and the traces are connected through this duplicate.
56   The rest of traces are simply connected so there will be a jump to the
57   beginning of the rest of trace.
58
59
60   References:
61
62   "Software Trace Cache"
63   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64   http://citeseer.nj.nec.com/15361.html
65
66*/
67
68#include "config.h"
69#include "system.h"
70#include "coretypes.h"
71#include "tm.h"
72#include "rtl.h"
73#include "regs.h"
74#include "flags.h"
75#include "timevar.h"
76#include "output.h"
77#include "cfglayout.h"
78#include "fibheap.h"
79#include "target.h"
80#include "function.h"
81#include "tm_p.h"
82#include "obstack.h"
83#include "expr.h"
84#include "params.h"
85#include "toplev.h"
86#include "tree-pass.h"
87
88#ifndef HAVE_conditional_execution
89#define HAVE_conditional_execution 0
90#endif
91
92/* The number of rounds.  In most cases there will only be 4 rounds, but
93   when partitioning hot and cold basic blocks into separate sections of
94   the .o file there will be an extra round.*/
95#define N_ROUNDS 5
96
97/* Stubs in case we don't have a return insn.
98   We have to check at runtime too, not only compiletime.  */
99
100#ifndef HAVE_return
101#define HAVE_return 0
102#define gen_return() NULL_RTX
103#endif
104
105
106/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
107static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
108
109/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
110static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
111
112/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
113   block the edge destination is not duplicated while connecting traces.  */
114#define DUPLICATION_THRESHOLD 100
115
116/* Length of unconditional jump instruction.  */
117static int uncond_jump_length;
118
119/* Structure to hold needed information for each basic block.  */
120typedef struct bbro_basic_block_data_def
121{
122  /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
123  int start_of_trace;
124
125  /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
126  int end_of_trace;
127
128  /* Which trace is the bb in?  */
129  int in_trace;
130
131  /* Which heap is BB in (if any)?  */
132  fibheap_t heap;
133
134  /* Which heap node is BB in (if any)?  */
135  fibnode_t node;
136} bbro_basic_block_data;
137
138/* The current size of the following dynamic array.  */
139static int array_size;
140
141/* The array which holds needed information for basic blocks.  */
142static bbro_basic_block_data *bbd;
143
144/* To avoid frequent reallocation the size of arrays is greater than needed,
145   the number of elements is (not less than) 1.25 * size_wanted.  */
146#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
147
148/* Free the memory and set the pointer to NULL.  */
149#define FREE(P) (gcc_assert (P), free (P), P = 0)
150
151/* Structure for holding information about a trace.  */
152struct trace
153{
154  /* First and last basic block of the trace.  */
155  basic_block first, last;
156
157  /* The round of the STC creation which this trace was found in.  */
158  int round;
159
160  /* The length (i.e. the number of basic blocks) of the trace.  */
161  int length;
162};
163
164/* Maximum frequency and count of one of the entry blocks.  */
165static int max_entry_frequency;
166static gcov_type max_entry_count;
167
168/* Local function prototypes.  */
169static void find_traces (int *, struct trace *);
170static basic_block rotate_loop (edge, struct trace *, int);
171static void mark_bb_visited (basic_block, int);
172static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
173				 int, fibheap_t *, int);
174static basic_block copy_bb (basic_block, edge, basic_block, int);
175static fibheapkey_t bb_to_key (basic_block);
176static bool better_edge_p (basic_block, edge, int, int, int, int, edge);
177static void connect_traces (int, struct trace *);
178static bool copy_bb_p (basic_block, int);
179static int get_uncond_jump_length (void);
180static bool push_to_next_round_p (basic_block, int, int, int, gcov_type);
181static void find_rarely_executed_basic_blocks_and_crossing_edges (edge *,
182								  int *,
183								  int *);
184static void add_labels_and_missing_jumps (edge *, int);
185static void add_reg_crossing_jump_notes (void);
186static void fix_up_fall_thru_edges (void);
187static void fix_edges_for_rarely_executed_code (edge *, int);
188static void fix_crossing_conditional_branches (void);
189static void fix_crossing_unconditional_branches (void);
190
191/* Check to see if bb should be pushed into the next round of trace
192   collections or not.  Reasons for pushing the block forward are 1).
193   If the block is cold, we are doing partitioning, and there will be
194   another round (cold partition blocks are not supposed to be
195   collected into traces until the very last round); or 2). There will
196   be another round, and the basic block is not "hot enough" for the
197   current round of trace collection.  */
198
199static bool
200push_to_next_round_p (basic_block bb, int round, int number_of_rounds,
201		      int exec_th, gcov_type count_th)
202{
203  bool there_exists_another_round;
204  bool block_not_hot_enough;
205
206  there_exists_another_round = round < number_of_rounds - 1;
207
208  block_not_hot_enough = (bb->frequency < exec_th
209			  || bb->count < count_th
210			  || probably_never_executed_bb_p (bb));
211
212  if (there_exists_another_round
213      && block_not_hot_enough)
214    return true;
215  else
216    return false;
217}
218
219/* Find the traces for Software Trace Cache.  Chain each trace through
220   RBI()->next.  Store the number of traces to N_TRACES and description of
221   traces to TRACES.  */
222
223static void
224find_traces (int *n_traces, struct trace *traces)
225{
226  int i;
227  int number_of_rounds;
228  edge e;
229  edge_iterator ei;
230  fibheap_t heap;
231
232  /* Add one extra round of trace collection when partitioning hot/cold
233     basic blocks into separate sections.  The last round is for all the
234     cold blocks (and ONLY the cold blocks).  */
235
236  number_of_rounds = N_ROUNDS - 1;
237
238  /* Insert entry points of function into heap.  */
239  heap = fibheap_new ();
240  max_entry_frequency = 0;
241  max_entry_count = 0;
242  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
243    {
244      bbd[e->dest->index].heap = heap;
245      bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
246						    e->dest);
247      if (e->dest->frequency > max_entry_frequency)
248	max_entry_frequency = e->dest->frequency;
249      if (e->dest->count > max_entry_count)
250	max_entry_count = e->dest->count;
251    }
252
253  /* Find the traces.  */
254  for (i = 0; i < number_of_rounds; i++)
255    {
256      gcov_type count_threshold;
257
258      if (dump_file)
259	fprintf (dump_file, "STC - round %d\n", i + 1);
260
261      if (max_entry_count < INT_MAX / 1000)
262	count_threshold = max_entry_count * exec_threshold[i] / 1000;
263      else
264	count_threshold = max_entry_count / 1000 * exec_threshold[i];
265
266      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
267			   max_entry_frequency * exec_threshold[i] / 1000,
268			   count_threshold, traces, n_traces, i, &heap,
269			   number_of_rounds);
270    }
271  fibheap_delete (heap);
272
273  if (dump_file)
274    {
275      for (i = 0; i < *n_traces; i++)
276	{
277	  basic_block bb;
278	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
279		   traces[i].round + 1);
280	  for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux)
281	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
282	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
283	}
284      fflush (dump_file);
285    }
286}
287
288/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
289   (with sequential number TRACE_N).  */
290
291static basic_block
292rotate_loop (edge back_edge, struct trace *trace, int trace_n)
293{
294  basic_block bb;
295
296  /* Information about the best end (end after rotation) of the loop.  */
297  basic_block best_bb = NULL;
298  edge best_edge = NULL;
299  int best_freq = -1;
300  gcov_type best_count = -1;
301  /* The best edge is preferred when its destination is not visited yet
302     or is a start block of some trace.  */
303  bool is_preferred = false;
304
305  /* Find the most frequent edge that goes out from current trace.  */
306  bb = back_edge->dest;
307  do
308    {
309      edge e;
310      edge_iterator ei;
311
312      FOR_EACH_EDGE (e, ei, bb->succs)
313	if (e->dest != EXIT_BLOCK_PTR
314	    && e->dest->il.rtl->visited != trace_n
315	    && (e->flags & EDGE_CAN_FALLTHRU)
316	    && !(e->flags & EDGE_COMPLEX))
317	{
318	  if (is_preferred)
319	    {
320	      /* The best edge is preferred.  */
321	      if (!e->dest->il.rtl->visited
322		  || bbd[e->dest->index].start_of_trace >= 0)
323		{
324		  /* The current edge E is also preferred.  */
325		  int freq = EDGE_FREQUENCY (e);
326		  if (freq > best_freq || e->count > best_count)
327		    {
328		      best_freq = freq;
329		      best_count = e->count;
330		      best_edge = e;
331		      best_bb = bb;
332		    }
333		}
334	    }
335	  else
336	    {
337	      if (!e->dest->il.rtl->visited
338		  || bbd[e->dest->index].start_of_trace >= 0)
339		{
340		  /* The current edge E is preferred.  */
341		  is_preferred = true;
342		  best_freq = EDGE_FREQUENCY (e);
343		  best_count = e->count;
344		  best_edge = e;
345		  best_bb = bb;
346		}
347	      else
348		{
349		  int freq = EDGE_FREQUENCY (e);
350		  if (!best_edge || freq > best_freq || e->count > best_count)
351		    {
352		      best_freq = freq;
353		      best_count = e->count;
354		      best_edge = e;
355		      best_bb = bb;
356		    }
357		}
358	    }
359	}
360      bb = bb->aux;
361    }
362  while (bb != back_edge->dest);
363
364  if (best_bb)
365    {
366      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
367	 the trace.  */
368      if (back_edge->dest == trace->first)
369	{
370	  trace->first = best_bb->aux;
371	}
372      else
373	{
374	  basic_block prev_bb;
375
376	  for (prev_bb = trace->first;
377	       prev_bb->aux != back_edge->dest;
378	       prev_bb = prev_bb->aux)
379	    ;
380	  prev_bb->aux = best_bb->aux;
381
382	  /* Try to get rid of uncond jump to cond jump.  */
383	  if (single_succ_p (prev_bb))
384	    {
385	      basic_block header = single_succ (prev_bb);
386
387	      /* Duplicate HEADER if it is a small block containing cond jump
388		 in the end.  */
389	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
390		  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
391				     NULL_RTX))
392		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
393	    }
394	}
395    }
396  else
397    {
398      /* We have not found suitable loop tail so do no rotation.  */
399      best_bb = back_edge->src;
400    }
401  best_bb->aux = NULL;
402  return best_bb;
403}
404
405/* This function marks BB that it was visited in trace number TRACE.  */
406
407static void
408mark_bb_visited (basic_block bb, int trace)
409{
410  bb->il.rtl->visited = trace;
411  if (bbd[bb->index].heap)
412    {
413      fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
414      bbd[bb->index].heap = NULL;
415      bbd[bb->index].node = NULL;
416    }
417}
418
419/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
420   not include basic blocks their probability is lower than BRANCH_TH or their
421   frequency is lower than EXEC_TH into traces (or count is lower than
422   COUNT_TH).  It stores the new traces into TRACES and modifies the number of
423   traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
424   expects that starting basic blocks are in *HEAP and at the end it deletes
425   *HEAP and stores starting points for the next round into new *HEAP.  */
426
427static void
428find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
429		     struct trace *traces, int *n_traces, int round,
430		     fibheap_t *heap, int number_of_rounds)
431{
432  /* Heap for discarded basic blocks which are possible starting points for
433     the next round.  */
434  fibheap_t new_heap = fibheap_new ();
435
436  while (!fibheap_empty (*heap))
437    {
438      basic_block bb;
439      struct trace *trace;
440      edge best_edge, e;
441      fibheapkey_t key;
442      edge_iterator ei;
443
444      bb = fibheap_extract_min (*heap);
445      bbd[bb->index].heap = NULL;
446      bbd[bb->index].node = NULL;
447
448      if (dump_file)
449	fprintf (dump_file, "Getting bb %d\n", bb->index);
450
451      /* If the BB's frequency is too low send BB to the next round.  When
452	 partitioning hot/cold blocks into separate sections, make sure all
453	 the cold blocks (and ONLY the cold blocks) go into the (extra) final
454	 round.  */
455
456      if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
457				count_th))
458	{
459	  int key = bb_to_key (bb);
460	  bbd[bb->index].heap = new_heap;
461	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
462
463	  if (dump_file)
464	    fprintf (dump_file,
465		     "  Possible start point of next round: %d (key: %d)\n",
466		     bb->index, key);
467	  continue;
468	}
469
470      trace = traces + *n_traces;
471      trace->first = bb;
472      trace->round = round;
473      trace->length = 0;
474      bbd[bb->index].in_trace = *n_traces;
475      (*n_traces)++;
476
477      do
478	{
479	  int prob, freq;
480	  bool ends_in_call;
481
482	  /* The probability and frequency of the best edge.  */
483	  int best_prob = INT_MIN / 2;
484	  int best_freq = INT_MIN / 2;
485
486	  best_edge = NULL;
487	  mark_bb_visited (bb, *n_traces);
488	  trace->length++;
489
490	  if (dump_file)
491	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
492		     bb->index, *n_traces - 1);
493
494	  ends_in_call = block_ends_with_call_p (bb);
495
496	  /* Select the successor that will be placed after BB.  */
497	  FOR_EACH_EDGE (e, ei, bb->succs)
498	    {
499	      gcc_assert (!(e->flags & EDGE_FAKE));
500
501	      if (e->dest == EXIT_BLOCK_PTR)
502		continue;
503
504	      if (e->dest->il.rtl->visited
505		  && e->dest->il.rtl->visited != *n_traces)
506		continue;
507
508	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
509		continue;
510
511	      prob = e->probability;
512	      freq = e->dest->frequency;
513
514	      /* The only sensible preference for a call instruction is the
515		 fallthru edge.  Don't bother selecting anything else.  */
516	      if (ends_in_call)
517		{
518		  if (e->flags & EDGE_CAN_FALLTHRU)
519		    {
520		      best_edge = e;
521		      best_prob = prob;
522		      best_freq = freq;
523		    }
524		  continue;
525		}
526
527	      /* Edge that cannot be fallthru or improbable or infrequent
528		 successor (i.e. it is unsuitable successor).  */
529	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
530		  || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
531		  || e->count < count_th)
532		continue;
533
534	      /* If partitioning hot/cold basic blocks, don't consider edges
535		 that cross section boundaries.  */
536
537	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
538				 best_edge))
539		{
540		  best_edge = e;
541		  best_prob = prob;
542		  best_freq = freq;
543		}
544	    }
545
546	  /* If the best destination has multiple predecessors, and can be
547	     duplicated cheaper than a jump, don't allow it to be added
548	     to a trace.  We'll duplicate it when connecting traces.  */
549	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
550	      && copy_bb_p (best_edge->dest, 0))
551	    best_edge = NULL;
552
553	  /* Add all non-selected successors to the heaps.  */
554	  FOR_EACH_EDGE (e, ei, bb->succs)
555	    {
556	      if (e == best_edge
557		  || e->dest == EXIT_BLOCK_PTR
558		  || e->dest->il.rtl->visited)
559		continue;
560
561	      key = bb_to_key (e->dest);
562
563	      if (bbd[e->dest->index].heap)
564		{
565		  /* E->DEST is already in some heap.  */
566		  if (key != bbd[e->dest->index].node->key)
567		    {
568		      if (dump_file)
569			{
570			  fprintf (dump_file,
571				   "Changing key for bb %d from %ld to %ld.\n",
572				   e->dest->index,
573				   (long) bbd[e->dest->index].node->key,
574				   key);
575			}
576		      fibheap_replace_key (bbd[e->dest->index].heap,
577					   bbd[e->dest->index].node, key);
578		    }
579		}
580	      else
581		{
582		  fibheap_t which_heap = *heap;
583
584		  prob = e->probability;
585		  freq = EDGE_FREQUENCY (e);
586
587		  if (!(e->flags & EDGE_CAN_FALLTHRU)
588		      || (e->flags & EDGE_COMPLEX)
589		      || prob < branch_th || freq < exec_th
590		      || e->count < count_th)
591		    {
592		      /* When partitioning hot/cold basic blocks, make sure
593			 the cold blocks (and only the cold blocks) all get
594			 pushed to the last round of trace collection.  */
595
596		      if (push_to_next_round_p (e->dest, round,
597						number_of_rounds,
598						exec_th, count_th))
599			which_heap = new_heap;
600		    }
601
602		  bbd[e->dest->index].heap = which_heap;
603		  bbd[e->dest->index].node = fibheap_insert (which_heap,
604								key, e->dest);
605
606		  if (dump_file)
607		    {
608		      fprintf (dump_file,
609			       "  Possible start of %s round: %d (key: %ld)\n",
610			       (which_heap == new_heap) ? "next" : "this",
611			       e->dest->index, (long) key);
612		    }
613
614		}
615	    }
616
617	  if (best_edge) /* Suitable successor was found.  */
618	    {
619	      if (best_edge->dest->il.rtl->visited == *n_traces)
620		{
621		  /* We do nothing with one basic block loops.  */
622		  if (best_edge->dest != bb)
623		    {
624		      if (EDGE_FREQUENCY (best_edge)
625			  > 4 * best_edge->dest->frequency / 5)
626			{
627			  /* The loop has at least 4 iterations.  If the loop
628			     header is not the first block of the function
629			     we can rotate the loop.  */
630
631			  if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
632			    {
633			      if (dump_file)
634				{
635				  fprintf (dump_file,
636					   "Rotating loop %d - %d\n",
637					   best_edge->dest->index, bb->index);
638				}
639			      bb->aux = best_edge->dest;
640			      bbd[best_edge->dest->index].in_trace =
641							     (*n_traces) - 1;
642			      bb = rotate_loop (best_edge, trace, *n_traces);
643			    }
644			}
645		      else
646			{
647			  /* The loop has less than 4 iterations.  */
648
649			  if (single_succ_p (bb)
650			      && copy_bb_p (best_edge->dest, !optimize_size))
651			    {
652			      bb = copy_bb (best_edge->dest, best_edge, bb,
653					    *n_traces);
654			      trace->length++;
655			    }
656			}
657		    }
658
659		  /* Terminate the trace.  */
660		  break;
661		}
662	      else
663		{
664		  /* Check for a situation
665
666		    A
667		   /|
668		  B |
669		   \|
670		    C
671
672		  where
673		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
674		    >= EDGE_FREQUENCY (AC).
675		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
676		  Best ordering is then A B C.
677
678		  This situation is created for example by:
679
680		  if (A) B;
681		  C;
682
683		  */
684
685		  FOR_EACH_EDGE (e, ei, bb->succs)
686		    if (e != best_edge
687			&& (e->flags & EDGE_CAN_FALLTHRU)
688			&& !(e->flags & EDGE_COMPLEX)
689			&& !e->dest->il.rtl->visited
690			&& single_pred_p (e->dest)
691			&& !(e->flags & EDGE_CROSSING)
692			&& single_succ_p (e->dest)
693			&& (single_succ_edge (e->dest)->flags
694			    & EDGE_CAN_FALLTHRU)
695			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
696			&& single_succ (e->dest) == best_edge->dest
697			&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
698		      {
699			best_edge = e;
700			if (dump_file)
701			  fprintf (dump_file, "Selecting BB %d\n",
702				   best_edge->dest->index);
703			break;
704		      }
705
706		  bb->aux = best_edge->dest;
707		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
708		  bb = best_edge->dest;
709		}
710	    }
711	}
712      while (best_edge);
713      trace->last = bb;
714      bbd[trace->first->index].start_of_trace = *n_traces - 1;
715      bbd[trace->last->index].end_of_trace = *n_traces - 1;
716
717      /* The trace is terminated so we have to recount the keys in heap
718	 (some block can have a lower key because now one of its predecessors
719	 is an end of the trace).  */
720      FOR_EACH_EDGE (e, ei, bb->succs)
721	{
722	  if (e->dest == EXIT_BLOCK_PTR
723	      || e->dest->il.rtl->visited)
724	    continue;
725
726	  if (bbd[e->dest->index].heap)
727	    {
728	      key = bb_to_key (e->dest);
729	      if (key != bbd[e->dest->index].node->key)
730		{
731		  if (dump_file)
732		    {
733		      fprintf (dump_file,
734			       "Changing key for bb %d from %ld to %ld.\n",
735			       e->dest->index,
736			       (long) bbd[e->dest->index].node->key, key);
737		    }
738		  fibheap_replace_key (bbd[e->dest->index].heap,
739				       bbd[e->dest->index].node,
740				       key);
741		}
742	    }
743	}
744    }
745
746  fibheap_delete (*heap);
747
748  /* "Return" the new heap.  */
749  *heap = new_heap;
750}
751
752/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
753   it to trace after BB, mark OLD_BB visited and update pass' data structures
754   (TRACE is a number of trace which OLD_BB is duplicated to).  */
755
756static basic_block
757copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
758{
759  basic_block new_bb;
760
761  new_bb = duplicate_block (old_bb, e, bb);
762  BB_COPY_PARTITION (new_bb, old_bb);
763
764  gcc_assert (e->dest == new_bb);
765  gcc_assert (!e->dest->il.rtl->visited);
766
767  if (dump_file)
768    fprintf (dump_file,
769	     "Duplicated bb %d (created bb %d)\n",
770	     old_bb->index, new_bb->index);
771  new_bb->il.rtl->visited = trace;
772  new_bb->aux = bb->aux;
773  bb->aux = new_bb;
774
775  if (new_bb->index >= array_size || last_basic_block > array_size)
776    {
777      int i;
778      int new_size;
779
780      new_size = MAX (last_basic_block, new_bb->index + 1);
781      new_size = GET_ARRAY_SIZE (new_size);
782      bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data));
783      for (i = array_size; i < new_size; i++)
784	{
785	  bbd[i].start_of_trace = -1;
786	  bbd[i].in_trace = -1;
787	  bbd[i].end_of_trace = -1;
788	  bbd[i].heap = NULL;
789	  bbd[i].node = NULL;
790	}
791      array_size = new_size;
792
793      if (dump_file)
794	{
795	  fprintf (dump_file,
796		   "Growing the dynamic array to %d elements.\n",
797		   array_size);
798	}
799    }
800
801  bbd[new_bb->index].in_trace = trace;
802
803  return new_bb;
804}
805
806/* Compute and return the key (for the heap) of the basic block BB.  */
807
808static fibheapkey_t
809bb_to_key (basic_block bb)
810{
811  edge e;
812  edge_iterator ei;
813  int priority = 0;
814
815  /* Do not start in probably never executed blocks.  */
816
817  if (BB_PARTITION (bb) == BB_COLD_PARTITION
818      || probably_never_executed_bb_p (bb))
819    return BB_FREQ_MAX;
820
821  /* Prefer blocks whose predecessor is an end of some trace
822     or whose predecessor edge is EDGE_DFS_BACK.  */
823  FOR_EACH_EDGE (e, ei, bb->preds)
824    {
825      if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
826	  || (e->flags & EDGE_DFS_BACK))
827	{
828	  int edge_freq = EDGE_FREQUENCY (e);
829
830	  if (edge_freq > priority)
831	    priority = edge_freq;
832	}
833    }
834
835  if (priority)
836    /* The block with priority should have significantly lower key.  */
837    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
838  return -bb->frequency;
839}
840
841/* Return true when the edge E from basic block BB is better than the temporary
842   best edge (details are in function).  The probability of edge E is PROB. The
843   frequency of the successor is FREQ.  The current best probability is
844   BEST_PROB, the best frequency is BEST_FREQ.
845   The edge is considered to be equivalent when PROB does not differ much from
846   BEST_PROB; similarly for frequency.  */
847
848static bool
849better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob,
850	       int best_freq, edge cur_best_edge)
851{
852  bool is_better_edge;
853
854  /* The BEST_* values do not have to be best, but can be a bit smaller than
855     maximum values.  */
856  int diff_prob = best_prob / 10;
857  int diff_freq = best_freq / 10;
858
859  if (prob > best_prob + diff_prob)
860    /* The edge has higher probability than the temporary best edge.  */
861    is_better_edge = true;
862  else if (prob < best_prob - diff_prob)
863    /* The edge has lower probability than the temporary best edge.  */
864    is_better_edge = false;
865  else if (freq < best_freq - diff_freq)
866    /* The edge and the temporary best edge  have almost equivalent
867       probabilities.  The higher frequency of a successor now means
868       that there is another edge going into that successor.
869       This successor has lower frequency so it is better.  */
870    is_better_edge = true;
871  else if (freq > best_freq + diff_freq)
872    /* This successor has higher frequency so it is worse.  */
873    is_better_edge = false;
874  else if (e->dest->prev_bb == bb)
875    /* The edges have equivalent probabilities and the successors
876       have equivalent frequencies.  Select the previous successor.  */
877    is_better_edge = true;
878  else
879    is_better_edge = false;
880
881  /* If we are doing hot/cold partitioning, make sure that we always favor
882     non-crossing edges over crossing edges.  */
883
884  if (!is_better_edge
885      && flag_reorder_blocks_and_partition
886      && cur_best_edge
887      && (cur_best_edge->flags & EDGE_CROSSING)
888      && !(e->flags & EDGE_CROSSING))
889    is_better_edge = true;
890
891  return is_better_edge;
892}
893
894/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
895
896static void
897connect_traces (int n_traces, struct trace *traces)
898{
899  int i;
900  bool *connected;
901  bool two_passes;
902  int last_trace;
903  int current_pass;
904  int current_partition;
905  int freq_threshold;
906  gcov_type count_threshold;
907
908  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
909  if (max_entry_count < INT_MAX / 1000)
910    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
911  else
912    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
913
914  connected = XCNEWVEC (bool, n_traces);
915  last_trace = -1;
916  current_pass = 1;
917  current_partition = BB_PARTITION (traces[0].first);
918  two_passes = false;
919
920  if (flag_reorder_blocks_and_partition)
921    for (i = 0; i < n_traces && !two_passes; i++)
922      if (BB_PARTITION (traces[0].first)
923	  != BB_PARTITION (traces[i].first))
924	two_passes = true;
925
926  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
927    {
928      int t = i;
929      int t2;
930      edge e, best;
931      int best_len;
932
933      if (i >= n_traces)
934	{
935	  gcc_assert (two_passes && current_pass == 1);
936	  i = 0;
937	  t = i;
938	  current_pass = 2;
939	  if (current_partition == BB_HOT_PARTITION)
940	    current_partition = BB_COLD_PARTITION;
941	  else
942	    current_partition = BB_HOT_PARTITION;
943	}
944
945      if (connected[t])
946	continue;
947
948      if (two_passes
949	  && BB_PARTITION (traces[t].first) != current_partition)
950	continue;
951
952      connected[t] = true;
953
954      /* Find the predecessor traces.  */
955      for (t2 = t; t2 > 0;)
956	{
957	  edge_iterator ei;
958	  best = NULL;
959	  best_len = 0;
960	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
961	    {
962	      int si = e->src->index;
963
964	      if (e->src != ENTRY_BLOCK_PTR
965		  && (e->flags & EDGE_CAN_FALLTHRU)
966		  && !(e->flags & EDGE_COMPLEX)
967		  && bbd[si].end_of_trace >= 0
968		  && !connected[bbd[si].end_of_trace]
969		  && (BB_PARTITION (e->src) == current_partition)
970		  && (!best
971		      || e->probability > best->probability
972		      || (e->probability == best->probability
973			  && traces[bbd[si].end_of_trace].length > best_len)))
974		{
975		  best = e;
976		  best_len = traces[bbd[si].end_of_trace].length;
977		}
978	    }
979	  if (best)
980	    {
981	      best->src->aux = best->dest;
982	      t2 = bbd[best->src->index].end_of_trace;
983	      connected[t2] = true;
984
985	      if (dump_file)
986		{
987		  fprintf (dump_file, "Connection: %d %d\n",
988			   best->src->index, best->dest->index);
989		}
990	    }
991	  else
992	    break;
993	}
994
995      if (last_trace >= 0)
996	traces[last_trace].last->aux = traces[t2].first;
997      last_trace = t;
998
999      /* Find the successor traces.  */
1000      while (1)
1001	{
1002	  /* Find the continuation of the chain.  */
1003	  edge_iterator ei;
1004	  best = NULL;
1005	  best_len = 0;
1006	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1007	    {
1008	      int di = e->dest->index;
1009
1010	      if (e->dest != EXIT_BLOCK_PTR
1011		  && (e->flags & EDGE_CAN_FALLTHRU)
1012		  && !(e->flags & EDGE_COMPLEX)
1013		  && bbd[di].start_of_trace >= 0
1014		  && !connected[bbd[di].start_of_trace]
1015		  && (BB_PARTITION (e->dest) == current_partition)
1016		  && (!best
1017		      || e->probability > best->probability
1018		      || (e->probability == best->probability
1019			  && traces[bbd[di].start_of_trace].length > best_len)))
1020		{
1021		  best = e;
1022		  best_len = traces[bbd[di].start_of_trace].length;
1023		}
1024	    }
1025
1026	  if (best)
1027	    {
1028	      if (dump_file)
1029		{
1030		  fprintf (dump_file, "Connection: %d %d\n",
1031			   best->src->index, best->dest->index);
1032		}
1033	      t = bbd[best->dest->index].start_of_trace;
1034	      traces[last_trace].last->aux = traces[t].first;
1035	      connected[t] = true;
1036	      last_trace = t;
1037	    }
1038	  else
1039	    {
1040	      /* Try to connect the traces by duplication of 1 block.  */
1041	      edge e2;
1042	      basic_block next_bb = NULL;
1043	      bool try_copy = false;
1044
1045	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1046		if (e->dest != EXIT_BLOCK_PTR
1047		    && (e->flags & EDGE_CAN_FALLTHRU)
1048		    && !(e->flags & EDGE_COMPLEX)
1049		    && (!best || e->probability > best->probability))
1050		  {
1051		    edge_iterator ei;
1052		    edge best2 = NULL;
1053		    int best2_len = 0;
1054
1055		    /* If the destination is a start of a trace which is only
1056		       one block long, then no need to search the successor
1057		       blocks of the trace.  Accept it.  */
1058		    if (bbd[e->dest->index].start_of_trace >= 0
1059			&& traces[bbd[e->dest->index].start_of_trace].length
1060			   == 1)
1061		      {
1062			best = e;
1063			try_copy = true;
1064			continue;
1065		      }
1066
1067		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1068		      {
1069			int di = e2->dest->index;
1070
1071			if (e2->dest == EXIT_BLOCK_PTR
1072			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1073				&& !(e2->flags & EDGE_COMPLEX)
1074				&& bbd[di].start_of_trace >= 0
1075				&& !connected[bbd[di].start_of_trace]
1076				&& (BB_PARTITION (e2->dest) == current_partition)
1077				&& (EDGE_FREQUENCY (e2) >= freq_threshold)
1078				&& (e2->count >= count_threshold)
1079				&& (!best2
1080				    || e2->probability > best2->probability
1081				    || (e2->probability == best2->probability
1082					&& traces[bbd[di].start_of_trace].length
1083					   > best2_len))))
1084			  {
1085			    best = e;
1086			    best2 = e2;
1087			    if (e2->dest != EXIT_BLOCK_PTR)
1088			      best2_len = traces[bbd[di].start_of_trace].length;
1089			    else
1090			      best2_len = INT_MAX;
1091			    next_bb = e2->dest;
1092			    try_copy = true;
1093			  }
1094		      }
1095		  }
1096
1097	      if (flag_reorder_blocks_and_partition)
1098		try_copy = false;
1099
1100	      /* Copy tiny blocks always; copy larger blocks only when the
1101		 edge is traversed frequently enough.  */
1102	      if (try_copy
1103		  && copy_bb_p (best->dest,
1104				!optimize_size
1105				&& EDGE_FREQUENCY (best) >= freq_threshold
1106				&& best->count >= count_threshold))
1107		{
1108		  basic_block new_bb;
1109
1110		  if (dump_file)
1111		    {
1112		      fprintf (dump_file, "Connection: %d %d ",
1113			       traces[t].last->index, best->dest->index);
1114		      if (!next_bb)
1115			fputc ('\n', dump_file);
1116		      else if (next_bb == EXIT_BLOCK_PTR)
1117			fprintf (dump_file, "exit\n");
1118		      else
1119			fprintf (dump_file, "%d\n", next_bb->index);
1120		    }
1121
1122		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1123		  traces[t].last = new_bb;
1124		  if (next_bb && next_bb != EXIT_BLOCK_PTR)
1125		    {
1126		      t = bbd[next_bb->index].start_of_trace;
1127		      traces[last_trace].last->aux = traces[t].first;
1128		      connected[t] = true;
1129		      last_trace = t;
1130		    }
1131		  else
1132		    break;	/* Stop finding the successor traces.  */
1133		}
1134	      else
1135		break;	/* Stop finding the successor traces.  */
1136	    }
1137	}
1138    }
1139
1140  if (dump_file)
1141    {
1142      basic_block bb;
1143
1144      fprintf (dump_file, "Final order:\n");
1145      for (bb = traces[0].first; bb; bb = bb->aux)
1146	fprintf (dump_file, "%d ", bb->index);
1147      fprintf (dump_file, "\n");
1148      fflush (dump_file);
1149    }
1150
1151  FREE (connected);
1152}
1153
1154/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1155   when code size is allowed to grow by duplication.  */
1156
1157static bool
1158copy_bb_p (basic_block bb, int code_may_grow)
1159{
1160  int size = 0;
1161  int max_size = uncond_jump_length;
1162  rtx insn;
1163
1164  if (!bb->frequency)
1165    return false;
1166  if (EDGE_COUNT (bb->preds) < 2)
1167    return false;
1168  if (!can_duplicate_block_p (bb))
1169    return false;
1170
1171  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1172  if (EDGE_COUNT (bb->succs) > 8)
1173    return false;
1174
1175  if (code_may_grow && maybe_hot_bb_p (bb))
1176    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1177
1178  FOR_BB_INSNS (bb, insn)
1179    {
1180      if (INSN_P (insn))
1181	size += get_attr_min_length (insn);
1182    }
1183
1184  if (size <= max_size)
1185    return true;
1186
1187  if (dump_file)
1188    {
1189      fprintf (dump_file,
1190	       "Block %d can't be copied because its size = %d.\n",
1191	       bb->index, size);
1192    }
1193
1194  return false;
1195}
1196
1197/* Return the length of unconditional jump instruction.  */
1198
1199static int
1200get_uncond_jump_length (void)
1201{
1202  rtx label, jump;
1203  int length;
1204
1205  label = emit_label_before (gen_label_rtx (), get_insns ());
1206  jump = emit_jump_insn (gen_jump (label));
1207
1208  length = get_attr_min_length (jump);
1209
1210  delete_insn (jump);
1211  delete_insn (label);
1212  return length;
1213}
1214
1215/* Find the basic blocks that are rarely executed and need to be moved to
1216   a separate section of the .o file (to cut down on paging and improve
1217   cache locality).  */
1218
1219static void
1220find_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges,
1221						      int *n_crossing_edges,
1222						      int *max_idx)
1223{
1224  basic_block bb;
1225  bool has_hot_blocks = false;
1226  edge e;
1227  int i;
1228  edge_iterator ei;
1229
1230  /* Mark which partition (hot/cold) each basic block belongs in.  */
1231
1232  FOR_EACH_BB (bb)
1233    {
1234      if (probably_never_executed_bb_p (bb))
1235	BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1236      else
1237	{
1238	  BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1239	  has_hot_blocks = true;
1240	}
1241    }
1242
1243  /* Mark every edge that crosses between sections.  */
1244
1245  i = 0;
1246  FOR_EACH_BB (bb)
1247    FOR_EACH_EDGE (e, ei, bb->succs)
1248    {
1249      if (e->src != ENTRY_BLOCK_PTR
1250	  && e->dest != EXIT_BLOCK_PTR
1251	  && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1252	{
1253	  e->flags |= EDGE_CROSSING;
1254	  if (i == *max_idx)
1255	    {
1256	      *max_idx *= 2;
1257	      crossing_edges = xrealloc (crossing_edges,
1258					 (*max_idx) * sizeof (edge));
1259	    }
1260	  crossing_edges[i++] = e;
1261	}
1262      else
1263	e->flags &= ~EDGE_CROSSING;
1264    }
1265  *n_crossing_edges = i;
1266}
1267
1268/* If any destination of a crossing edge does not have a label, add label;
1269   Convert any fall-through crossing edges (for blocks that do not contain
1270   a jump) to unconditional jumps.  */
1271
1272static void
1273add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1274{
1275  int i;
1276  basic_block src;
1277  basic_block dest;
1278  rtx label;
1279  rtx barrier;
1280  rtx new_jump;
1281
1282  for (i=0; i < n_crossing_edges; i++)
1283    {
1284      if (crossing_edges[i])
1285	{
1286	  src = crossing_edges[i]->src;
1287	  dest = crossing_edges[i]->dest;
1288
1289	  /* Make sure dest has a label.  */
1290
1291	  if (dest && (dest != EXIT_BLOCK_PTR))
1292	    {
1293	      label = block_label (dest);
1294
1295	      /* Make sure source block ends with a jump.  */
1296
1297	      if (src && (src != ENTRY_BLOCK_PTR))
1298		{
1299		  if (!JUMP_P (BB_END (src)))
1300		    /* bb just falls through.  */
1301		    {
1302		      /* make sure there's only one successor */
1303		      gcc_assert (single_succ_p (src));
1304
1305		      /* Find label in dest block.  */
1306		      label = block_label (dest);
1307
1308		      new_jump = emit_jump_insn_after (gen_jump (label),
1309						       BB_END (src));
1310		      barrier = emit_barrier_after (new_jump);
1311		      JUMP_LABEL (new_jump) = label;
1312		      LABEL_NUSES (label) += 1;
1313		      src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1314		      /* Mark edge as non-fallthru.  */
1315		      crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1316		    } /* end: 'if (GET_CODE ... '  */
1317		} /* end: 'if (src && src->index...'  */
1318	    } /* end: 'if (dest && dest->index...'  */
1319	} /* end: 'if (crossing_edges[i]...'  */
1320    } /* end for loop  */
1321}
1322
1323/* Find any bb's where the fall-through edge is a crossing edge (note that
1324   these bb's must also contain a conditional jump; we've already
1325   dealt with fall-through edges for blocks that didn't have a
1326   conditional jump in the call to add_labels_and_missing_jumps).
1327   Convert the fall-through edge to non-crossing edge by inserting a
1328   new bb to fall-through into.  The new bb will contain an
1329   unconditional jump (crossing edge) to the original fall through
1330   destination.  */
1331
1332static void
1333fix_up_fall_thru_edges (void)
1334{
1335  basic_block cur_bb;
1336  basic_block new_bb;
1337  edge succ1;
1338  edge succ2;
1339  edge fall_thru;
1340  edge cond_jump = NULL;
1341  edge e;
1342  bool cond_jump_crosses;
1343  int invert_worked;
1344  rtx old_jump;
1345  rtx fall_thru_label;
1346  rtx barrier;
1347
1348  FOR_EACH_BB (cur_bb)
1349    {
1350      fall_thru = NULL;
1351      if (EDGE_COUNT (cur_bb->succs) > 0)
1352	succ1 = EDGE_SUCC (cur_bb, 0);
1353      else
1354	succ1 = NULL;
1355
1356      if (EDGE_COUNT (cur_bb->succs) > 1)
1357	succ2 = EDGE_SUCC (cur_bb, 1);
1358      else
1359	succ2 = NULL;
1360
1361      /* Find the fall-through edge.  */
1362
1363      if (succ1
1364	  && (succ1->flags & EDGE_FALLTHRU))
1365	{
1366	  fall_thru = succ1;
1367	  cond_jump = succ2;
1368	}
1369      else if (succ2
1370	       && (succ2->flags & EDGE_FALLTHRU))
1371	{
1372	  fall_thru = succ2;
1373	  cond_jump = succ1;
1374	}
1375
1376      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1377	{
1378	  /* Check to see if the fall-thru edge is a crossing edge.  */
1379
1380	  if (fall_thru->flags & EDGE_CROSSING)
1381	    {
1382	      /* The fall_thru edge crosses; now check the cond jump edge, if
1383		 it exists.  */
1384
1385	      cond_jump_crosses = true;
1386	      invert_worked  = 0;
1387	      old_jump = BB_END (cur_bb);
1388
1389	      /* Find the jump instruction, if there is one.  */
1390
1391	      if (cond_jump)
1392		{
1393		  if (!(cond_jump->flags & EDGE_CROSSING))
1394		    cond_jump_crosses = false;
1395
1396		  /* We know the fall-thru edge crosses; if the cond
1397		     jump edge does NOT cross, and its destination is the
1398		     next block in the bb order, invert the jump
1399		     (i.e. fix it so the fall thru does not cross and
1400		     the cond jump does).  */
1401
1402		  if (!cond_jump_crosses
1403		      && cur_bb->aux == cond_jump->dest)
1404		    {
1405		      /* Find label in fall_thru block. We've already added
1406			 any missing labels, so there must be one.  */
1407
1408		      fall_thru_label = block_label (fall_thru->dest);
1409
1410		      if (old_jump && fall_thru_label)
1411			invert_worked = invert_jump (old_jump,
1412						     fall_thru_label,0);
1413		      if (invert_worked)
1414			{
1415			  fall_thru->flags &= ~EDGE_FALLTHRU;
1416			  cond_jump->flags |= EDGE_FALLTHRU;
1417			  update_br_prob_note (cur_bb);
1418			  e = fall_thru;
1419			  fall_thru = cond_jump;
1420			  cond_jump = e;
1421			  cond_jump->flags |= EDGE_CROSSING;
1422			  fall_thru->flags &= ~EDGE_CROSSING;
1423			}
1424		    }
1425		}
1426
1427	      if (cond_jump_crosses || !invert_worked)
1428		{
1429		  /* This is the case where both edges out of the basic
1430		     block are crossing edges. Here we will fix up the
1431		     fall through edge. The jump edge will be taken care
1432		     of later.  */
1433
1434		  new_bb = force_nonfallthru (fall_thru);
1435
1436		  if (new_bb)
1437		    {
1438		      new_bb->aux = cur_bb->aux;
1439		      cur_bb->aux = new_bb;
1440
1441		      /* Make sure new fall-through bb is in same
1442			 partition as bb it's falling through from.  */
1443
1444		      BB_COPY_PARTITION (new_bb, cur_bb);
1445		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1446		    }
1447
1448		  /* Add barrier after new jump */
1449
1450		  if (new_bb)
1451		    {
1452		      barrier = emit_barrier_after (BB_END (new_bb));
1453		      new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1454							       barrier);
1455		    }
1456		  else
1457		    {
1458		      barrier = emit_barrier_after (BB_END (cur_bb));
1459		      cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1460							       barrier);
1461		    }
1462		}
1463	    }
1464	}
1465    }
1466}
1467
1468/* This function checks the destination blockof a "crossing jump" to
1469   see if it has any crossing predecessors that begin with a code label
1470   and end with an unconditional jump.  If so, it returns that predecessor
1471   block.  (This is to avoid creating lots of new basic blocks that all
1472   contain unconditional jumps to the same destination).  */
1473
1474static basic_block
1475find_jump_block (basic_block jump_dest)
1476{
1477  basic_block source_bb = NULL;
1478  edge e;
1479  rtx insn;
1480  edge_iterator ei;
1481
1482  FOR_EACH_EDGE (e, ei, jump_dest->preds)
1483    if (e->flags & EDGE_CROSSING)
1484      {
1485	basic_block src = e->src;
1486
1487	/* Check each predecessor to see if it has a label, and contains
1488	   only one executable instruction, which is an unconditional jump.
1489	   If so, we can use it.  */
1490
1491	if (LABEL_P (BB_HEAD (src)))
1492	  for (insn = BB_HEAD (src);
1493	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1494	       insn = NEXT_INSN (insn))
1495	    {
1496	      if (INSN_P (insn)
1497		  && insn == BB_END (src)
1498		  && JUMP_P (insn)
1499		  && !any_condjump_p (insn))
1500		{
1501		  source_bb = src;
1502		  break;
1503		}
1504	    }
1505
1506	if (source_bb)
1507	  break;
1508      }
1509
1510  return source_bb;
1511}
1512
1513/* Find all BB's with conditional jumps that are crossing edges;
1514   insert a new bb and make the conditional jump branch to the new
1515   bb instead (make the new bb same color so conditional branch won't
1516   be a 'crossing' edge).  Insert an unconditional jump from the
1517   new bb to the original destination of the conditional jump.  */
1518
1519static void
1520fix_crossing_conditional_branches (void)
1521{
1522  basic_block cur_bb;
1523  basic_block new_bb;
1524  basic_block last_bb;
1525  basic_block dest;
1526  basic_block prev_bb;
1527  edge succ1;
1528  edge succ2;
1529  edge crossing_edge;
1530  edge new_edge;
1531  rtx old_jump;
1532  rtx set_src;
1533  rtx old_label = NULL_RTX;
1534  rtx new_label;
1535  rtx new_jump;
1536  rtx barrier;
1537
1538 last_bb = EXIT_BLOCK_PTR->prev_bb;
1539
1540  FOR_EACH_BB (cur_bb)
1541    {
1542      crossing_edge = NULL;
1543      if (EDGE_COUNT (cur_bb->succs) > 0)
1544	succ1 = EDGE_SUCC (cur_bb, 0);
1545      else
1546	succ1 = NULL;
1547
1548      if (EDGE_COUNT (cur_bb->succs) > 1)
1549	succ2 = EDGE_SUCC (cur_bb, 1);
1550      else
1551	succ2 = NULL;
1552
1553      /* We already took care of fall-through edges, so only one successor
1554	 can be a crossing edge.  */
1555
1556      if (succ1 && (succ1->flags & EDGE_CROSSING))
1557	crossing_edge = succ1;
1558      else if (succ2 && (succ2->flags & EDGE_CROSSING))
1559	crossing_edge = succ2;
1560
1561      if (crossing_edge)
1562	{
1563	  old_jump = BB_END (cur_bb);
1564
1565	  /* Check to make sure the jump instruction is a
1566	     conditional jump.  */
1567
1568	  set_src = NULL_RTX;
1569
1570	  if (any_condjump_p (old_jump))
1571	    {
1572	      if (GET_CODE (PATTERN (old_jump)) == SET)
1573		set_src = SET_SRC (PATTERN (old_jump));
1574	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1575		{
1576		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
1577		  if (GET_CODE (set_src) == SET)
1578		    set_src = SET_SRC (set_src);
1579		  else
1580		    set_src = NULL_RTX;
1581		}
1582	    }
1583
1584	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1585	    {
1586	      if (GET_CODE (XEXP (set_src, 1)) == PC)
1587		old_label = XEXP (set_src, 2);
1588	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
1589		old_label = XEXP (set_src, 1);
1590
1591	      /* Check to see if new bb for jumping to that dest has
1592		 already been created; if so, use it; if not, create
1593		 a new one.  */
1594
1595	      new_bb = find_jump_block (crossing_edge->dest);
1596
1597	      if (new_bb)
1598		new_label = block_label (new_bb);
1599	      else
1600		{
1601		  /* Create new basic block to be dest for
1602		     conditional jump.  */
1603
1604		  new_bb = create_basic_block (NULL, NULL, last_bb);
1605		  new_bb->aux = last_bb->aux;
1606		  last_bb->aux = new_bb;
1607		  prev_bb = last_bb;
1608		  last_bb = new_bb;
1609
1610		  /* Update register liveness information.  */
1611
1612		  new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (&reg_obstack);
1613		  new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (&reg_obstack);
1614		  COPY_REG_SET (new_bb->il.rtl->global_live_at_end,
1615				prev_bb->il.rtl->global_live_at_end);
1616		  COPY_REG_SET (new_bb->il.rtl->global_live_at_start,
1617				prev_bb->il.rtl->global_live_at_end);
1618
1619		  /* Put appropriate instructions in new bb.  */
1620
1621		  new_label = gen_label_rtx ();
1622		  emit_label_before (new_label, BB_HEAD (new_bb));
1623		  BB_HEAD (new_bb) = new_label;
1624
1625		  if (GET_CODE (old_label) == LABEL_REF)
1626		    {
1627		      old_label = JUMP_LABEL (old_jump);
1628		      new_jump = emit_jump_insn_after (gen_jump
1629						       (old_label),
1630						       BB_END (new_bb));
1631		    }
1632		  else
1633		    {
1634		      gcc_assert (HAVE_return
1635				  && GET_CODE (old_label) == RETURN);
1636		      new_jump = emit_jump_insn_after (gen_return (),
1637						       BB_END (new_bb));
1638		    }
1639
1640		  barrier = emit_barrier_after (new_jump);
1641		  JUMP_LABEL (new_jump) = old_label;
1642		  new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1643							   barrier);
1644
1645		  /* Make sure new bb is in same partition as source
1646		     of conditional branch.  */
1647		  BB_COPY_PARTITION (new_bb, cur_bb);
1648		}
1649
1650	      /* Make old jump branch to new bb.  */
1651
1652	      redirect_jump (old_jump, new_label, 0);
1653
1654	      /* Remove crossing_edge as predecessor of 'dest'.  */
1655
1656	      dest = crossing_edge->dest;
1657
1658	      redirect_edge_succ (crossing_edge, new_bb);
1659
1660	      /* Make a new edge from new_bb to old dest; new edge
1661		 will be a successor for new_bb and a predecessor
1662		 for 'dest'.  */
1663
1664	      if (EDGE_COUNT (new_bb->succs) == 0)
1665		new_edge = make_edge (new_bb, dest, 0);
1666	      else
1667		new_edge = EDGE_SUCC (new_bb, 0);
1668
1669	      crossing_edge->flags &= ~EDGE_CROSSING;
1670	      new_edge->flags |= EDGE_CROSSING;
1671	    }
1672	}
1673    }
1674}
1675
1676/* Find any unconditional branches that cross between hot and cold
1677   sections.  Convert them into indirect jumps instead.  */
1678
1679static void
1680fix_crossing_unconditional_branches (void)
1681{
1682  basic_block cur_bb;
1683  rtx last_insn;
1684  rtx label;
1685  rtx label_addr;
1686  rtx indirect_jump_sequence;
1687  rtx jump_insn = NULL_RTX;
1688  rtx new_reg;
1689  rtx cur_insn;
1690  edge succ;
1691
1692  FOR_EACH_BB (cur_bb)
1693    {
1694      last_insn = BB_END (cur_bb);
1695
1696      if (EDGE_COUNT (cur_bb->succs) < 1)
1697	continue;
1698
1699      succ = EDGE_SUCC (cur_bb, 0);
1700
1701      /* Check to see if bb ends in a crossing (unconditional) jump.  At
1702	 this point, no crossing jumps should be conditional.  */
1703
1704      if (JUMP_P (last_insn)
1705	  && (succ->flags & EDGE_CROSSING))
1706	{
1707	  rtx label2, table;
1708
1709	  gcc_assert (!any_condjump_p (last_insn));
1710
1711	  /* Make sure the jump is not already an indirect or table jump.  */
1712
1713	  if (!computed_jump_p (last_insn)
1714	      && !tablejump_p (last_insn, &label2, &table))
1715	    {
1716	      /* We have found a "crossing" unconditional branch.  Now
1717		 we must convert it to an indirect jump.  First create
1718		 reference of label, as target for jump.  */
1719
1720	      label = JUMP_LABEL (last_insn);
1721	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
1722	      LABEL_NUSES (label) += 1;
1723
1724	      /* Get a register to use for the indirect jump.  */
1725
1726	      new_reg = gen_reg_rtx (Pmode);
1727
1728	      /* Generate indirect the jump sequence.  */
1729
1730	      start_sequence ();
1731	      emit_move_insn (new_reg, label_addr);
1732	      emit_indirect_jump (new_reg);
1733	      indirect_jump_sequence = get_insns ();
1734	      end_sequence ();
1735
1736	      /* Make sure every instruction in the new jump sequence has
1737		 its basic block set to be cur_bb.  */
1738
1739	      for (cur_insn = indirect_jump_sequence; cur_insn;
1740		   cur_insn = NEXT_INSN (cur_insn))
1741		{
1742		  if (!BARRIER_P (cur_insn))
1743		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
1744		  if (JUMP_P (cur_insn))
1745		    jump_insn = cur_insn;
1746		}
1747
1748	      /* Insert the new (indirect) jump sequence immediately before
1749		 the unconditional jump, then delete the unconditional jump.  */
1750
1751	      emit_insn_before (indirect_jump_sequence, last_insn);
1752	      delete_insn (last_insn);
1753
1754	      /* Make BB_END for cur_bb be the jump instruction (NOT the
1755		 barrier instruction at the end of the sequence...).  */
1756
1757	      BB_END (cur_bb) = jump_insn;
1758	    }
1759	}
1760    }
1761}
1762
1763/* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1764
1765static void
1766add_reg_crossing_jump_notes (void)
1767{
1768  basic_block bb;
1769  edge e;
1770  edge_iterator ei;
1771
1772  FOR_EACH_BB (bb)
1773    FOR_EACH_EDGE (e, ei, bb->succs)
1774      if ((e->flags & EDGE_CROSSING)
1775	  && JUMP_P (BB_END (e->src)))
1776	REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP,
1777							 NULL_RTX,
1778							 REG_NOTES (BB_END
1779								  (e->src)));
1780}
1781
1782/* Hot and cold basic blocks are partitioned and put in separate
1783   sections of the .o file, to reduce paging and improve cache
1784   performance (hopefully).  This can result in bits of code from the
1785   same function being widely separated in the .o file.  However this
1786   is not obvious to the current bb structure.  Therefore we must take
1787   care to ensure that: 1). There are no fall_thru edges that cross
1788   between sections; 2). For those architectures which have "short"
1789   conditional branches, all conditional branches that attempt to
1790   cross between sections are converted to unconditional branches;
1791   and, 3). For those architectures which have "short" unconditional
1792   branches, all unconditional branches that attempt to cross between
1793   sections are converted to indirect jumps.
1794
1795   The code for fixing up fall_thru edges that cross between hot and
1796   cold basic blocks does so by creating new basic blocks containing
1797   unconditional branches to the appropriate label in the "other"
1798   section.  The new basic block is then put in the same (hot or cold)
1799   section as the original conditional branch, and the fall_thru edge
1800   is modified to fall into the new basic block instead.  By adding
1801   this level of indirection we end up with only unconditional branches
1802   crossing between hot and cold sections.
1803
1804   Conditional branches are dealt with by adding a level of indirection.
1805   A new basic block is added in the same (hot/cold) section as the
1806   conditional branch, and the conditional branch is retargeted to the
1807   new basic block.  The new basic block contains an unconditional branch
1808   to the original target of the conditional branch (in the other section).
1809
1810   Unconditional branches are dealt with by converting them into
1811   indirect jumps.  */
1812
1813static void
1814fix_edges_for_rarely_executed_code (edge *crossing_edges,
1815				    int n_crossing_edges)
1816{
1817  /* Make sure the source of any crossing edge ends in a jump and the
1818     destination of any crossing edge has a label.  */
1819
1820  add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1821
1822  /* Convert all crossing fall_thru edges to non-crossing fall
1823     thrus to unconditional jumps (that jump to the original fall
1824     thru dest).  */
1825
1826  fix_up_fall_thru_edges ();
1827
1828  /* If the architecture does not have conditional branches that can
1829     span all of memory, convert crossing conditional branches into
1830     crossing unconditional branches.  */
1831
1832  if (!HAS_LONG_COND_BRANCH)
1833    fix_crossing_conditional_branches ();
1834
1835  /* If the architecture does not have unconditional branches that
1836     can span all of memory, convert crossing unconditional branches
1837     into indirect jumps.  Since adding an indirect jump also adds
1838     a new register usage, update the register usage information as
1839     well.  */
1840
1841  if (!HAS_LONG_UNCOND_BRANCH)
1842    {
1843      fix_crossing_unconditional_branches ();
1844      reg_scan (get_insns(), max_reg_num ());
1845    }
1846
1847  add_reg_crossing_jump_notes ();
1848}
1849
1850/* Verify, in the basic block chain, that there is at most one switch
1851   between hot/cold partitions. This is modelled on
1852   rtl_verify_flow_info_1, but it cannot go inside that function
1853   because this condition will not be true until after
1854   reorder_basic_blocks is called.  */
1855
1856static void
1857verify_hot_cold_block_grouping (void)
1858{
1859  basic_block bb;
1860  int err = 0;
1861  bool switched_sections = false;
1862  int current_partition = 0;
1863
1864  FOR_EACH_BB (bb)
1865    {
1866      if (!current_partition)
1867	current_partition = BB_PARTITION (bb);
1868      if (BB_PARTITION (bb) != current_partition)
1869	{
1870	  if (switched_sections)
1871	    {
1872	      error ("multiple hot/cold transitions found (bb %i)",
1873		     bb->index);
1874	      err = 1;
1875	    }
1876	  else
1877	    {
1878	      switched_sections = true;
1879	      current_partition = BB_PARTITION (bb);
1880	    }
1881	}
1882    }
1883
1884  gcc_assert(!err);
1885}
1886
1887/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1888   the set of flags to pass to cfg_layout_initialize().  */
1889
1890void
1891reorder_basic_blocks (unsigned int flags)
1892{
1893  int n_traces;
1894  int i;
1895  struct trace *traces;
1896
1897  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1898    return;
1899
1900  if (targetm.cannot_modify_jumps_p ())
1901    return;
1902
1903  cfg_layout_initialize (flags);
1904
1905  set_edge_can_fallthru_flag ();
1906  mark_dfs_back_edges ();
1907
1908  /* We are estimating the length of uncond jump insn only once since the code
1909     for getting the insn length always returns the minimal length now.  */
1910  if (uncond_jump_length == 0)
1911    uncond_jump_length = get_uncond_jump_length ();
1912
1913  /* We need to know some information for each basic block.  */
1914  array_size = GET_ARRAY_SIZE (last_basic_block);
1915  bbd = XNEWVEC (bbro_basic_block_data, array_size);
1916  for (i = 0; i < array_size; i++)
1917    {
1918      bbd[i].start_of_trace = -1;
1919      bbd[i].in_trace = -1;
1920      bbd[i].end_of_trace = -1;
1921      bbd[i].heap = NULL;
1922      bbd[i].node = NULL;
1923    }
1924
1925  traces = XNEWVEC (struct trace, n_basic_blocks);
1926  n_traces = 0;
1927  find_traces (&n_traces, traces);
1928  connect_traces (n_traces, traces);
1929  FREE (traces);
1930  FREE (bbd);
1931
1932  if (dump_file)
1933    dump_flow_info (dump_file, dump_flags);
1934
1935  cfg_layout_finalize ();
1936  if (flag_reorder_blocks_and_partition)
1937    verify_hot_cold_block_grouping ();
1938}
1939
1940/* Determine which partition the first basic block in the function
1941   belongs to, then find the first basic block in the current function
1942   that belongs to a different section, and insert a
1943   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1944   instruction stream.  When writing out the assembly code,
1945   encountering this note will make the compiler switch between the
1946   hot and cold text sections.  */
1947
1948static void
1949insert_section_boundary_note (void)
1950{
1951  basic_block bb;
1952  rtx new_note;
1953  int first_partition = 0;
1954
1955  if (flag_reorder_blocks_and_partition)
1956    FOR_EACH_BB (bb)
1957    {
1958      if (!first_partition)
1959	first_partition = BB_PARTITION (bb);
1960      if (BB_PARTITION (bb) != first_partition)
1961	{
1962	  new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1963				       BB_HEAD (bb));
1964	  break;
1965	}
1966    }
1967}
1968
1969/* Duplicate the blocks containing computed gotos.  This basically unfactors
1970   computed gotos that were factored early on in the compilation process to
1971   speed up edge based data flow.  We used to not unfactoring them again,
1972   which can seriously pessimize code with many computed jumps in the source
1973   code, such as interpreters.  See e.g. PR15242.  */
1974
1975static bool
1976gate_duplicate_computed_gotos (void)
1977{
1978  return (optimize > 0 && flag_expensive_optimizations && !optimize_size);
1979}
1980
1981
1982static unsigned int
1983duplicate_computed_gotos (void)
1984{
1985  basic_block bb, new_bb;
1986  bitmap candidates;
1987  int max_size;
1988
1989  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1990    return 0;
1991
1992  if (targetm.cannot_modify_jumps_p ())
1993    return 0;
1994
1995  cfg_layout_initialize (0);
1996
1997  /* We are estimating the length of uncond jump insn only once
1998     since the code for getting the insn length always returns
1999     the minimal length now.  */
2000  if (uncond_jump_length == 0)
2001    uncond_jump_length = get_uncond_jump_length ();
2002
2003  max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2004  candidates = BITMAP_ALLOC (NULL);
2005
2006  /* Look for blocks that end in a computed jump, and see if such blocks
2007     are suitable for unfactoring.  If a block is a candidate for unfactoring,
2008     mark it in the candidates.  */
2009  FOR_EACH_BB (bb)
2010    {
2011      rtx insn;
2012      edge e;
2013      edge_iterator ei;
2014      int size, all_flags;
2015
2016      /* Build the reorder chain for the original order of blocks.  */
2017      if (bb->next_bb != EXIT_BLOCK_PTR)
2018	bb->aux = bb->next_bb;
2019
2020      /* Obviously the block has to end in a computed jump.  */
2021      if (!computed_jump_p (BB_END (bb)))
2022	continue;
2023
2024      /* Only consider blocks that can be duplicated.  */
2025      if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2026	  || !can_duplicate_block_p (bb))
2027	continue;
2028
2029      /* Make sure that the block is small enough.  */
2030      size = 0;
2031      FOR_BB_INSNS (bb, insn)
2032	if (INSN_P (insn))
2033	  {
2034	    size += get_attr_min_length (insn);
2035	    if (size > max_size)
2036	       break;
2037	  }
2038      if (size > max_size)
2039	continue;
2040
2041      /* Final check: there must not be any incoming abnormal edges.  */
2042      all_flags = 0;
2043      FOR_EACH_EDGE (e, ei, bb->preds)
2044	all_flags |= e->flags;
2045      if (all_flags & EDGE_COMPLEX)
2046	continue;
2047
2048      bitmap_set_bit (candidates, bb->index);
2049    }
2050
2051  /* Nothing to do if there is no computed jump here.  */
2052  if (bitmap_empty_p (candidates))
2053    goto done;
2054
2055  /* Duplicate computed gotos.  */
2056  FOR_EACH_BB (bb)
2057    {
2058      if (bb->il.rtl->visited)
2059	continue;
2060
2061      bb->il.rtl->visited = 1;
2062
2063      /* BB must have one outgoing edge.  That edge must not lead to
2064	 the exit block or the next block.
2065	 The destination must have more than one predecessor.  */
2066      if (!single_succ_p (bb)
2067	  || single_succ (bb) == EXIT_BLOCK_PTR
2068	  || single_succ (bb) == bb->next_bb
2069	  || single_pred_p (single_succ (bb)))
2070	continue;
2071
2072      /* The successor block has to be a duplication candidate.  */
2073      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2074	continue;
2075
2076      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2077      new_bb->aux = bb->aux;
2078      bb->aux = new_bb;
2079      new_bb->il.rtl->visited = 1;
2080    }
2081
2082done:
2083  cfg_layout_finalize ();
2084
2085  BITMAP_FREE (candidates);
2086  return 0;
2087}
2088
2089struct tree_opt_pass pass_duplicate_computed_gotos =
2090{
2091  "compgotos",                          /* name */
2092  gate_duplicate_computed_gotos,        /* gate */
2093  duplicate_computed_gotos,             /* execute */
2094  NULL,                                 /* sub */
2095  NULL,                                 /* next */
2096  0,                                    /* static_pass_number */
2097  TV_REORDER_BLOCKS,                    /* tv_id */
2098  0,                                    /* properties_required */
2099  0,                                    /* properties_provided */
2100  0,                                    /* properties_destroyed */
2101  0,                                    /* todo_flags_start */
2102  TODO_dump_func,                       /* todo_flags_finish */
2103  0                                     /* letter */
2104};
2105
2106
2107/* This function is the main 'entrance' for the optimization that
2108   partitions hot and cold basic blocks into separate sections of the
2109   .o file (to improve performance and cache locality).  Ideally it
2110   would be called after all optimizations that rearrange the CFG have
2111   been called.  However part of this optimization may introduce new
2112   register usage, so it must be called before register allocation has
2113   occurred.  This means that this optimization is actually called
2114   well before the optimization that reorders basic blocks (see
2115   function above).
2116
2117   This optimization checks the feedback information to determine
2118   which basic blocks are hot/cold, updates flags on the basic blocks
2119   to indicate which section they belong in.  This information is
2120   later used for writing out sections in the .o file.  Because hot
2121   and cold sections can be arbitrarily large (within the bounds of
2122   memory), far beyond the size of a single function, it is necessary
2123   to fix up all edges that cross section boundaries, to make sure the
2124   instructions used can actually span the required distance.  The
2125   fixes are described below.
2126
2127   Fall-through edges must be changed into jumps; it is not safe or
2128   legal to fall through across a section boundary.  Whenever a
2129   fall-through edge crossing a section boundary is encountered, a new
2130   basic block is inserted (in the same section as the fall-through
2131   source), and the fall through edge is redirected to the new basic
2132   block.  The new basic block contains an unconditional jump to the
2133   original fall-through target.  (If the unconditional jump is
2134   insufficient to cross section boundaries, that is dealt with a
2135   little later, see below).
2136
2137   In order to deal with architectures that have short conditional
2138   branches (which cannot span all of memory) we take any conditional
2139   jump that attempts to cross a section boundary and add a level of
2140   indirection: it becomes a conditional jump to a new basic block, in
2141   the same section.  The new basic block contains an unconditional
2142   jump to the original target, in the other section.
2143
2144   For those architectures whose unconditional branch is also
2145   incapable of reaching all of memory, those unconditional jumps are
2146   converted into indirect jumps, through a register.
2147
2148   IMPORTANT NOTE: This optimization causes some messy interactions
2149   with the cfg cleanup optimizations; those optimizations want to
2150   merge blocks wherever possible, and to collapse indirect jump
2151   sequences (change "A jumps to B jumps to C" directly into "A jumps
2152   to C").  Those optimizations can undo the jump fixes that
2153   partitioning is required to make (see above), in order to ensure
2154   that jumps attempting to cross section boundaries are really able
2155   to cover whatever distance the jump requires (on many architectures
2156   conditional or unconditional jumps are not able to reach all of
2157   memory).  Therefore tests have to be inserted into each such
2158   optimization to make sure that it does not undo stuff necessary to
2159   cross partition boundaries.  This would be much less of a problem
2160   if we could perform this optimization later in the compilation, but
2161   unfortunately the fact that we may need to create indirect jumps
2162   (through registers) requires that this optimization be performed
2163   before register allocation.  */
2164
2165static void
2166partition_hot_cold_basic_blocks (void)
2167{
2168  basic_block cur_bb;
2169  edge *crossing_edges;
2170  int n_crossing_edges;
2171  int max_edges = 2 * last_basic_block;
2172
2173  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2174    return;
2175
2176  crossing_edges = XCNEWVEC (edge, max_edges);
2177
2178  cfg_layout_initialize (0);
2179
2180  FOR_EACH_BB (cur_bb)
2181    if (cur_bb->index >= NUM_FIXED_BLOCKS
2182	&& cur_bb->next_bb->index >= NUM_FIXED_BLOCKS)
2183      cur_bb->aux = cur_bb->next_bb;
2184
2185  find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges,
2186							&n_crossing_edges,
2187							&max_edges);
2188
2189  if (n_crossing_edges > 0)
2190    fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2191
2192  free (crossing_edges);
2193
2194  cfg_layout_finalize();
2195}
2196
2197static bool
2198gate_handle_reorder_blocks (void)
2199{
2200  return (optimize > 0);
2201}
2202
2203
2204/* Reorder basic blocks.  */
2205static unsigned int
2206rest_of_handle_reorder_blocks (void)
2207{
2208  bool changed;
2209  unsigned int liveness_flags;
2210
2211  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2212     splitting possibly introduced more crossjumping opportunities.  */
2213  liveness_flags = (!HAVE_conditional_execution ? CLEANUP_UPDATE_LIFE : 0);
2214  changed = cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2215
2216  if (flag_sched2_use_traces && flag_schedule_insns_after_reload)
2217    {
2218      timevar_push (TV_TRACER);
2219      tracer (liveness_flags);
2220      timevar_pop (TV_TRACER);
2221    }
2222
2223  if (flag_reorder_blocks || flag_reorder_blocks_and_partition)
2224    reorder_basic_blocks (liveness_flags);
2225  if (flag_reorder_blocks || flag_reorder_blocks_and_partition
2226      || (flag_sched2_use_traces && flag_schedule_insns_after_reload))
2227    changed |= cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags);
2228
2229  /* On conditional execution targets we can not update the life cheaply, so
2230     we deffer the updating to after both cleanups.  This may lose some cases
2231     but should not be terribly bad.  */
2232  if (changed && HAVE_conditional_execution)
2233    update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2234		      PROP_DEATH_NOTES);
2235
2236  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2237  insert_section_boundary_note ();
2238  return 0;
2239}
2240
2241struct tree_opt_pass pass_reorder_blocks =
2242{
2243  "bbro",                               /* name */
2244  gate_handle_reorder_blocks,           /* gate */
2245  rest_of_handle_reorder_blocks,        /* execute */
2246  NULL,                                 /* sub */
2247  NULL,                                 /* next */
2248  0,                                    /* static_pass_number */
2249  TV_REORDER_BLOCKS,                    /* tv_id */
2250  0,                                    /* properties_required */
2251  0,                                    /* properties_provided */
2252  0,                                    /* properties_destroyed */
2253  0,                                    /* todo_flags_start */
2254  TODO_dump_func,                       /* todo_flags_finish */
2255  'B'                                   /* letter */
2256};
2257
2258static bool
2259gate_handle_partition_blocks (void)
2260{
2261  /* The optimization to partition hot/cold basic blocks into separate
2262     sections of the .o file does not work well with linkonce or with
2263     user defined section attributes.  Don't call it if either case
2264     arises.  */
2265
2266  return (flag_reorder_blocks_and_partition
2267	  && !DECL_ONE_ONLY (current_function_decl)
2268	  && !user_defined_section_attribute);
2269}
2270
2271/* Partition hot and cold basic blocks.  */
2272static unsigned int
2273rest_of_handle_partition_blocks (void)
2274{
2275  no_new_pseudos = 0;
2276  partition_hot_cold_basic_blocks ();
2277  allocate_reg_life_data ();
2278  update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
2279		    PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES);
2280  no_new_pseudos = 1;
2281  return 0;
2282}
2283
2284struct tree_opt_pass pass_partition_blocks =
2285{
2286  "bbpart",                             /* name */
2287  gate_handle_partition_blocks,         /* gate */
2288  rest_of_handle_partition_blocks,      /* execute */
2289  NULL,                                 /* sub */
2290  NULL,                                 /* next */
2291  0,                                    /* static_pass_number */
2292  TV_REORDER_BLOCKS,                    /* tv_id */
2293  0,                                    /* properties_required */
2294  0,                                    /* properties_provided */
2295  0,                                    /* properties_destroyed */
2296  0,                                    /* todo_flags_start */
2297  TODO_dump_func,                       /* todo_flags_finish */
2298  0                                     /* letter */
2299};
2300
2301
2302