1/* Basic block reordering routines for the GNU compiler.
2   Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010
3   Free Software Foundation, Inc.
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21/* This (greedy) algorithm constructs traces in several rounds.
22   The construction starts from "seeds".  The seed for the first round
23   is the entry point of function.  When there are more than one seed
24   that one is selected first that has the lowest key in the heap
25   (see function bb_to_key).  Then the algorithm repeatedly adds the most
26   probable successor to the end of a trace.  Finally it connects the traces.
27
28   There are two parameters: Branch Threshold and Exec Threshold.
29   If the edge to a successor of the actual basic block is lower than
30   Branch Threshold or the frequency of the successor is lower than
31   Exec Threshold the successor will be the seed in one of the next rounds.
32   Each round has these parameters lower than the previous one.
33   The last round has to have these parameters set to zero
34   so that the remaining blocks are picked up.
35
36   The algorithm selects the most probable successor from all unvisited
37   successors and successors that have been added to this trace.
38   The other successors (that has not been "sent" to the next round) will be
39   other seeds for this round and the secondary traces will start in them.
40   If the successor has not been visited in this trace it is added to the trace
41   (however, there is some heuristic for simple branches).
42   If the successor has been visited in this trace the loop has been found.
43   If the loop has many iterations the loop is rotated so that the
44   source block of the most probable edge going out from the loop
45   is the last block of the trace.
46   If the loop has few iterations and there is no edge from the last block of
47   the loop going out from loop the loop header is duplicated.
48   Finally, the construction of the trace is terminated.
49
50   When connecting traces it first checks whether there is an edge from the
51   last block of one trace to the first block of another trace.
52   When there are still some unconnected traces it checks whether there exists
53   a basic block BB such that BB is a successor of the last bb of one trace
54   and BB is a predecessor of the first block of another trace. In this case,
55   BB is duplicated and the traces are connected through this duplicate.
56   The rest of traces are simply connected so there will be a jump to the
57   beginning of the rest of trace.
58
59
60   References:
61
62   "Software Trace Cache"
63   A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
64   http://citeseer.nj.nec.com/15361.html
65
66*/
67
68#include "config.h"
69#include "system.h"
70#include "coretypes.h"
71#include "tm.h"
72#include "rtl.h"
73#include "regs.h"
74#include "flags.h"
75#include "timevar.h"
76#include "output.h"
77#include "cfglayout.h"
78#include "fibheap.h"
79#include "target.h"
80#include "function.h"
81#include "tm_p.h"
82#include "obstack.h"
83#include "expr.h"
84#include "params.h"
85#include "toplev.h"
86#include "tree-pass.h"
87#include "df.h"
88
89/* The number of rounds.  In most cases there will only be 4 rounds, but
90   when partitioning hot and cold basic blocks into separate sections of
91   the .o file there will be an extra round.*/
92#define N_ROUNDS 5
93
94/* Stubs in case we don't have a return insn.
95   We have to check at runtime too, not only compiletime.  */
96
97#ifndef HAVE_return
98#define HAVE_return 0
99#define gen_return() NULL_RTX
100#endif
101
102
103/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
104static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
105
106/* Exec thresholds in thousandths (per mille) of the frequency of bb 0.  */
107static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
108
109/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry
110   block the edge destination is not duplicated while connecting traces.  */
111#define DUPLICATION_THRESHOLD 100
112
113/* Length of unconditional jump instruction.  */
114static int uncond_jump_length;
115
116/* Structure to hold needed information for each basic block.  */
117typedef struct bbro_basic_block_data_def
118{
119  /* Which trace is the bb start of (-1 means it is not a start of a trace).  */
120  int start_of_trace;
121
122  /* Which trace is the bb end of (-1 means it is not an end of a trace).  */
123  int end_of_trace;
124
125  /* Which trace is the bb in?  */
126  int in_trace;
127
128  /* Which heap is BB in (if any)?  */
129  fibheap_t heap;
130
131  /* Which heap node is BB in (if any)?  */
132  fibnode_t node;
133} bbro_basic_block_data;
134
135/* The current size of the following dynamic array.  */
136static int array_size;
137
138/* The array which holds needed information for basic blocks.  */
139static bbro_basic_block_data *bbd;
140
141/* To avoid frequent reallocation the size of arrays is greater than needed,
142   the number of elements is (not less than) 1.25 * size_wanted.  */
143#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
144
145/* Free the memory and set the pointer to NULL.  */
146#define FREE(P) (gcc_assert (P), free (P), P = 0)
147
148/* Structure for holding information about a trace.  */
149struct trace
150{
151  /* First and last basic block of the trace.  */
152  basic_block first, last;
153
154  /* The round of the STC creation which this trace was found in.  */
155  int round;
156
157  /* The length (i.e. the number of basic blocks) of the trace.  */
158  int length;
159};
160
161/* Maximum frequency and count of one of the entry blocks.  */
162static int max_entry_frequency;
163static gcov_type max_entry_count;
164
165/* Local function prototypes.  */
166static void find_traces (int *, struct trace *);
167static basic_block rotate_loop (edge, struct trace *, int);
168static void mark_bb_visited (basic_block, int);
169static void find_traces_1_round (int, int, gcov_type, struct trace *, int *,
170				 int, fibheap_t *, int);
171static basic_block copy_bb (basic_block, edge, basic_block, int);
172static fibheapkey_t bb_to_key (basic_block);
173static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge);
174static void connect_traces (int, struct trace *);
175static bool copy_bb_p (const_basic_block, int);
176static int get_uncond_jump_length (void);
177static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type);
178static void find_rarely_executed_basic_blocks_and_crossing_edges (edge **,
179								  int *,
180								  int *);
181static void add_labels_and_missing_jumps (edge *, int);
182static void add_reg_crossing_jump_notes (void);
183static void fix_up_fall_thru_edges (void);
184static void fix_edges_for_rarely_executed_code (edge *, int);
185static void fix_crossing_conditional_branches (void);
186static void fix_crossing_unconditional_branches (void);
187
188/* Check to see if bb should be pushed into the next round of trace
189   collections or not.  Reasons for pushing the block forward are 1).
190   If the block is cold, we are doing partitioning, and there will be
191   another round (cold partition blocks are not supposed to be
192   collected into traces until the very last round); or 2). There will
193   be another round, and the basic block is not "hot enough" for the
194   current round of trace collection.  */
195
196static bool
197push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
198		      int exec_th, gcov_type count_th)
199{
200  bool there_exists_another_round;
201  bool block_not_hot_enough;
202
203  there_exists_another_round = round < number_of_rounds - 1;
204
205  block_not_hot_enough = (bb->frequency < exec_th
206			  || bb->count < count_th
207			  || probably_never_executed_bb_p (bb));
208
209  if (there_exists_another_round
210      && block_not_hot_enough)
211    return true;
212  else
213    return false;
214}
215
216/* Find the traces for Software Trace Cache.  Chain each trace through
217   RBI()->next.  Store the number of traces to N_TRACES and description of
218   traces to TRACES.  */
219
220static void
221find_traces (int *n_traces, struct trace *traces)
222{
223  int i;
224  int number_of_rounds;
225  edge e;
226  edge_iterator ei;
227  fibheap_t heap;
228
229  /* Add one extra round of trace collection when partitioning hot/cold
230     basic blocks into separate sections.  The last round is for all the
231     cold blocks (and ONLY the cold blocks).  */
232
233  number_of_rounds = N_ROUNDS - 1;
234
235  /* Insert entry points of function into heap.  */
236  heap = fibheap_new ();
237  max_entry_frequency = 0;
238  max_entry_count = 0;
239  FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs)
240    {
241      bbd[e->dest->index].heap = heap;
242      bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest),
243						    e->dest);
244      if (e->dest->frequency > max_entry_frequency)
245	max_entry_frequency = e->dest->frequency;
246      if (e->dest->count > max_entry_count)
247	max_entry_count = e->dest->count;
248    }
249
250  /* Find the traces.  */
251  for (i = 0; i < number_of_rounds; i++)
252    {
253      gcov_type count_threshold;
254
255      if (dump_file)
256	fprintf (dump_file, "STC - round %d\n", i + 1);
257
258      if (max_entry_count < INT_MAX / 1000)
259	count_threshold = max_entry_count * exec_threshold[i] / 1000;
260      else
261	count_threshold = max_entry_count / 1000 * exec_threshold[i];
262
263      find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
264			   max_entry_frequency * exec_threshold[i] / 1000,
265			   count_threshold, traces, n_traces, i, &heap,
266			   number_of_rounds);
267    }
268  fibheap_delete (heap);
269
270  if (dump_file)
271    {
272      for (i = 0; i < *n_traces; i++)
273	{
274	  basic_block bb;
275	  fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
276		   traces[i].round + 1);
277	  for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux)
278	    fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency);
279	  fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency);
280	}
281      fflush (dump_file);
282    }
283}
284
285/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
286   (with sequential number TRACE_N).  */
287
288static basic_block
289rotate_loop (edge back_edge, struct trace *trace, int trace_n)
290{
291  basic_block bb;
292
293  /* Information about the best end (end after rotation) of the loop.  */
294  basic_block best_bb = NULL;
295  edge best_edge = NULL;
296  int best_freq = -1;
297  gcov_type best_count = -1;
298  /* The best edge is preferred when its destination is not visited yet
299     or is a start block of some trace.  */
300  bool is_preferred = false;
301
302  /* Find the most frequent edge that goes out from current trace.  */
303  bb = back_edge->dest;
304  do
305    {
306      edge e;
307      edge_iterator ei;
308
309      FOR_EACH_EDGE (e, ei, bb->succs)
310	if (e->dest != EXIT_BLOCK_PTR
311	    && e->dest->il.rtl->visited != trace_n
312	    && (e->flags & EDGE_CAN_FALLTHRU)
313	    && !(e->flags & EDGE_COMPLEX))
314	{
315	  if (is_preferred)
316	    {
317	      /* The best edge is preferred.  */
318	      if (!e->dest->il.rtl->visited
319		  || bbd[e->dest->index].start_of_trace >= 0)
320		{
321		  /* The current edge E is also preferred.  */
322		  int freq = EDGE_FREQUENCY (e);
323		  if (freq > best_freq || e->count > best_count)
324		    {
325		      best_freq = freq;
326		      best_count = e->count;
327		      best_edge = e;
328		      best_bb = bb;
329		    }
330		}
331	    }
332	  else
333	    {
334	      if (!e->dest->il.rtl->visited
335		  || bbd[e->dest->index].start_of_trace >= 0)
336		{
337		  /* The current edge E is preferred.  */
338		  is_preferred = true;
339		  best_freq = EDGE_FREQUENCY (e);
340		  best_count = e->count;
341		  best_edge = e;
342		  best_bb = bb;
343		}
344	      else
345		{
346		  int freq = EDGE_FREQUENCY (e);
347		  if (!best_edge || freq > best_freq || e->count > best_count)
348		    {
349		      best_freq = freq;
350		      best_count = e->count;
351		      best_edge = e;
352		      best_bb = bb;
353		    }
354		}
355	    }
356	}
357      bb = (basic_block) bb->aux;
358    }
359  while (bb != back_edge->dest);
360
361  if (best_bb)
362    {
363      /* Rotate the loop so that the BEST_EDGE goes out from the last block of
364	 the trace.  */
365      if (back_edge->dest == trace->first)
366	{
367	  trace->first = (basic_block) best_bb->aux;
368	}
369      else
370	{
371	  basic_block prev_bb;
372
373	  for (prev_bb = trace->first;
374	       prev_bb->aux != back_edge->dest;
375	       prev_bb = (basic_block) prev_bb->aux)
376	    ;
377	  prev_bb->aux = best_bb->aux;
378
379	  /* Try to get rid of uncond jump to cond jump.  */
380	  if (single_succ_p (prev_bb))
381	    {
382	      basic_block header = single_succ (prev_bb);
383
384	      /* Duplicate HEADER if it is a small block containing cond jump
385		 in the end.  */
386	      if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
387		  && !find_reg_note (BB_END (header), REG_CROSSING_JUMP,
388				     NULL_RTX))
389		copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
390	    }
391	}
392    }
393  else
394    {
395      /* We have not found suitable loop tail so do no rotation.  */
396      best_bb = back_edge->src;
397    }
398  best_bb->aux = NULL;
399  return best_bb;
400}
401
402/* This function marks BB that it was visited in trace number TRACE.  */
403
404static void
405mark_bb_visited (basic_block bb, int trace)
406{
407  bb->il.rtl->visited = trace;
408  if (bbd[bb->index].heap)
409    {
410      fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node);
411      bbd[bb->index].heap = NULL;
412      bbd[bb->index].node = NULL;
413    }
414}
415
416/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do
417   not include basic blocks their probability is lower than BRANCH_TH or their
418   frequency is lower than EXEC_TH into traces (or count is lower than
419   COUNT_TH).  It stores the new traces into TRACES and modifies the number of
420   traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It
421   expects that starting basic blocks are in *HEAP and at the end it deletes
422   *HEAP and stores starting points for the next round into new *HEAP.  */
423
424static void
425find_traces_1_round (int branch_th, int exec_th, gcov_type count_th,
426		     struct trace *traces, int *n_traces, int round,
427		     fibheap_t *heap, int number_of_rounds)
428{
429  /* Heap for discarded basic blocks which are possible starting points for
430     the next round.  */
431  fibheap_t new_heap = fibheap_new ();
432
433  while (!fibheap_empty (*heap))
434    {
435      basic_block bb;
436      struct trace *trace;
437      edge best_edge, e;
438      fibheapkey_t key;
439      edge_iterator ei;
440
441      bb = (basic_block) fibheap_extract_min (*heap);
442      bbd[bb->index].heap = NULL;
443      bbd[bb->index].node = NULL;
444
445      if (dump_file)
446	fprintf (dump_file, "Getting bb %d\n", bb->index);
447
448      /* If the BB's frequency is too low send BB to the next round.  When
449	 partitioning hot/cold blocks into separate sections, make sure all
450	 the cold blocks (and ONLY the cold blocks) go into the (extra) final
451	 round.  */
452
453      if (push_to_next_round_p (bb, round, number_of_rounds, exec_th,
454				count_th))
455	{
456	  int key = bb_to_key (bb);
457	  bbd[bb->index].heap = new_heap;
458	  bbd[bb->index].node = fibheap_insert (new_heap, key, bb);
459
460	  if (dump_file)
461	    fprintf (dump_file,
462		     "  Possible start point of next round: %d (key: %d)\n",
463		     bb->index, key);
464	  continue;
465	}
466
467      trace = traces + *n_traces;
468      trace->first = bb;
469      trace->round = round;
470      trace->length = 0;
471      bbd[bb->index].in_trace = *n_traces;
472      (*n_traces)++;
473
474      do
475	{
476	  int prob, freq;
477	  bool ends_in_call;
478
479	  /* The probability and frequency of the best edge.  */
480	  int best_prob = INT_MIN / 2;
481	  int best_freq = INT_MIN / 2;
482
483	  best_edge = NULL;
484	  mark_bb_visited (bb, *n_traces);
485	  trace->length++;
486
487	  if (dump_file)
488	    fprintf (dump_file, "Basic block %d was visited in trace %d\n",
489		     bb->index, *n_traces - 1);
490
491	  ends_in_call = block_ends_with_call_p (bb);
492
493	  /* Select the successor that will be placed after BB.  */
494	  FOR_EACH_EDGE (e, ei, bb->succs)
495	    {
496	      gcc_assert (!(e->flags & EDGE_FAKE));
497
498	      if (e->dest == EXIT_BLOCK_PTR)
499		continue;
500
501	      if (e->dest->il.rtl->visited
502		  && e->dest->il.rtl->visited != *n_traces)
503		continue;
504
505	      if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
506		continue;
507
508	      prob = e->probability;
509	      freq = e->dest->frequency;
510
511	      /* The only sensible preference for a call instruction is the
512		 fallthru edge.  Don't bother selecting anything else.  */
513	      if (ends_in_call)
514		{
515		  if (e->flags & EDGE_CAN_FALLTHRU)
516		    {
517		      best_edge = e;
518		      best_prob = prob;
519		      best_freq = freq;
520		    }
521		  continue;
522		}
523
524	      /* Edge that cannot be fallthru or improbable or infrequent
525		 successor (i.e. it is unsuitable successor).  */
526	      if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
527		  || prob < branch_th || EDGE_FREQUENCY (e) < exec_th
528		  || e->count < count_th)
529		continue;
530
531	      /* If partitioning hot/cold basic blocks, don't consider edges
532		 that cross section boundaries.  */
533
534	      if (better_edge_p (bb, e, prob, freq, best_prob, best_freq,
535				 best_edge))
536		{
537		  best_edge = e;
538		  best_prob = prob;
539		  best_freq = freq;
540		}
541	    }
542
543	  /* If the best destination has multiple predecessors, and can be
544	     duplicated cheaper than a jump, don't allow it to be added
545	     to a trace.  We'll duplicate it when connecting traces.  */
546	  if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2
547	      && copy_bb_p (best_edge->dest, 0))
548	    best_edge = NULL;
549
550	  /* Add all non-selected successors to the heaps.  */
551	  FOR_EACH_EDGE (e, ei, bb->succs)
552	    {
553	      if (e == best_edge
554		  || e->dest == EXIT_BLOCK_PTR
555		  || e->dest->il.rtl->visited)
556		continue;
557
558	      key = bb_to_key (e->dest);
559
560	      if (bbd[e->dest->index].heap)
561		{
562		  /* E->DEST is already in some heap.  */
563		  if (key != bbd[e->dest->index].node->key)
564		    {
565		      if (dump_file)
566			{
567			  fprintf (dump_file,
568				   "Changing key for bb %d from %ld to %ld.\n",
569				   e->dest->index,
570				   (long) bbd[e->dest->index].node->key,
571				   key);
572			}
573		      fibheap_replace_key (bbd[e->dest->index].heap,
574					   bbd[e->dest->index].node, key);
575		    }
576		}
577	      else
578		{
579		  fibheap_t which_heap = *heap;
580
581		  prob = e->probability;
582		  freq = EDGE_FREQUENCY (e);
583
584		  if (!(e->flags & EDGE_CAN_FALLTHRU)
585		      || (e->flags & EDGE_COMPLEX)
586		      || prob < branch_th || freq < exec_th
587		      || e->count < count_th)
588		    {
589		      /* When partitioning hot/cold basic blocks, make sure
590			 the cold blocks (and only the cold blocks) all get
591			 pushed to the last round of trace collection.  */
592
593		      if (push_to_next_round_p (e->dest, round,
594						number_of_rounds,
595						exec_th, count_th))
596			which_heap = new_heap;
597		    }
598
599		  bbd[e->dest->index].heap = which_heap;
600		  bbd[e->dest->index].node = fibheap_insert (which_heap,
601								key, e->dest);
602
603		  if (dump_file)
604		    {
605		      fprintf (dump_file,
606			       "  Possible start of %s round: %d (key: %ld)\n",
607			       (which_heap == new_heap) ? "next" : "this",
608			       e->dest->index, (long) key);
609		    }
610
611		}
612	    }
613
614	  if (best_edge) /* Suitable successor was found.  */
615	    {
616	      if (best_edge->dest->il.rtl->visited == *n_traces)
617		{
618		  /* We do nothing with one basic block loops.  */
619		  if (best_edge->dest != bb)
620		    {
621		      if (EDGE_FREQUENCY (best_edge)
622			  > 4 * best_edge->dest->frequency / 5)
623			{
624			  /* The loop has at least 4 iterations.  If the loop
625			     header is not the first block of the function
626			     we can rotate the loop.  */
627
628			  if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb)
629			    {
630			      if (dump_file)
631				{
632				  fprintf (dump_file,
633					   "Rotating loop %d - %d\n",
634					   best_edge->dest->index, bb->index);
635				}
636			      bb->aux = best_edge->dest;
637			      bbd[best_edge->dest->index].in_trace =
638							     (*n_traces) - 1;
639			      bb = rotate_loop (best_edge, trace, *n_traces);
640			    }
641			}
642		      else
643			{
644			  /* The loop has less than 4 iterations.  */
645
646			  if (single_succ_p (bb)
647			      && copy_bb_p (best_edge->dest,
648			      		    optimize_edge_for_speed_p (best_edge)))
649			    {
650			      bb = copy_bb (best_edge->dest, best_edge, bb,
651					    *n_traces);
652			      trace->length++;
653			    }
654			}
655		    }
656
657		  /* Terminate the trace.  */
658		  break;
659		}
660	      else
661		{
662		  /* Check for a situation
663
664		    A
665		   /|
666		  B |
667		   \|
668		    C
669
670		  where
671		  EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC)
672		    >= EDGE_FREQUENCY (AC).
673		  (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) )
674		  Best ordering is then A B C.
675
676		  This situation is created for example by:
677
678		  if (A) B;
679		  C;
680
681		  */
682
683		  FOR_EACH_EDGE (e, ei, bb->succs)
684		    if (e != best_edge
685			&& (e->flags & EDGE_CAN_FALLTHRU)
686			&& !(e->flags & EDGE_COMPLEX)
687			&& !e->dest->il.rtl->visited
688			&& single_pred_p (e->dest)
689			&& !(e->flags & EDGE_CROSSING)
690			&& single_succ_p (e->dest)
691			&& (single_succ_edge (e->dest)->flags
692			    & EDGE_CAN_FALLTHRU)
693			&& !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
694			&& single_succ (e->dest) == best_edge->dest
695			&& 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge))
696		      {
697			best_edge = e;
698			if (dump_file)
699			  fprintf (dump_file, "Selecting BB %d\n",
700				   best_edge->dest->index);
701			break;
702		      }
703
704		  bb->aux = best_edge->dest;
705		  bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
706		  bb = best_edge->dest;
707		}
708	    }
709	}
710      while (best_edge);
711      trace->last = bb;
712      bbd[trace->first->index].start_of_trace = *n_traces - 1;
713      bbd[trace->last->index].end_of_trace = *n_traces - 1;
714
715      /* The trace is terminated so we have to recount the keys in heap
716	 (some block can have a lower key because now one of its predecessors
717	 is an end of the trace).  */
718      FOR_EACH_EDGE (e, ei, bb->succs)
719	{
720	  if (e->dest == EXIT_BLOCK_PTR
721	      || e->dest->il.rtl->visited)
722	    continue;
723
724	  if (bbd[e->dest->index].heap)
725	    {
726	      key = bb_to_key (e->dest);
727	      if (key != bbd[e->dest->index].node->key)
728		{
729		  if (dump_file)
730		    {
731		      fprintf (dump_file,
732			       "Changing key for bb %d from %ld to %ld.\n",
733			       e->dest->index,
734			       (long) bbd[e->dest->index].node->key, key);
735		    }
736		  fibheap_replace_key (bbd[e->dest->index].heap,
737				       bbd[e->dest->index].node,
738				       key);
739		}
740	    }
741	}
742    }
743
744  fibheap_delete (*heap);
745
746  /* "Return" the new heap.  */
747  *heap = new_heap;
748}
749
750/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
751   it to trace after BB, mark OLD_BB visited and update pass' data structures
752   (TRACE is a number of trace which OLD_BB is duplicated to).  */
753
754static basic_block
755copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
756{
757  basic_block new_bb;
758
759  new_bb = duplicate_block (old_bb, e, bb);
760  BB_COPY_PARTITION (new_bb, old_bb);
761
762  gcc_assert (e->dest == new_bb);
763  gcc_assert (!e->dest->il.rtl->visited);
764
765  if (dump_file)
766    fprintf (dump_file,
767	     "Duplicated bb %d (created bb %d)\n",
768	     old_bb->index, new_bb->index);
769  new_bb->il.rtl->visited = trace;
770  new_bb->aux = bb->aux;
771  bb->aux = new_bb;
772
773  if (new_bb->index >= array_size || last_basic_block > array_size)
774    {
775      int i;
776      int new_size;
777
778      new_size = MAX (last_basic_block, new_bb->index + 1);
779      new_size = GET_ARRAY_SIZE (new_size);
780      bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
781      for (i = array_size; i < new_size; i++)
782	{
783	  bbd[i].start_of_trace = -1;
784	  bbd[i].in_trace = -1;
785	  bbd[i].end_of_trace = -1;
786	  bbd[i].heap = NULL;
787	  bbd[i].node = NULL;
788	}
789      array_size = new_size;
790
791      if (dump_file)
792	{
793	  fprintf (dump_file,
794		   "Growing the dynamic array to %d elements.\n",
795		   array_size);
796	}
797    }
798
799  bbd[new_bb->index].in_trace = trace;
800
801  return new_bb;
802}
803
804/* Compute and return the key (for the heap) of the basic block BB.  */
805
806static fibheapkey_t
807bb_to_key (basic_block bb)
808{
809  edge e;
810  edge_iterator ei;
811  int priority = 0;
812
813  /* Do not start in probably never executed blocks.  */
814
815  if (BB_PARTITION (bb) == BB_COLD_PARTITION
816      || probably_never_executed_bb_p (bb))
817    return BB_FREQ_MAX;
818
819  /* Prefer blocks whose predecessor is an end of some trace
820     or whose predecessor edge is EDGE_DFS_BACK.  */
821  FOR_EACH_EDGE (e, ei, bb->preds)
822    {
823      if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0)
824	  || (e->flags & EDGE_DFS_BACK))
825	{
826	  int edge_freq = EDGE_FREQUENCY (e);
827
828	  if (edge_freq > priority)
829	    priority = edge_freq;
830	}
831    }
832
833  if (priority)
834    /* The block with priority should have significantly lower key.  */
835    return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency);
836  return -bb->frequency;
837}
838
839/* Return true when the edge E from basic block BB is better than the temporary
840   best edge (details are in function).  The probability of edge E is PROB. The
841   frequency of the successor is FREQ.  The current best probability is
842   BEST_PROB, the best frequency is BEST_FREQ.
843   The edge is considered to be equivalent when PROB does not differ much from
844   BEST_PROB; similarly for frequency.  */
845
846static bool
847better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob,
848	       int best_freq, const_edge cur_best_edge)
849{
850  bool is_better_edge;
851
852  /* The BEST_* values do not have to be best, but can be a bit smaller than
853     maximum values.  */
854  int diff_prob = best_prob / 10;
855  int diff_freq = best_freq / 10;
856
857  if (prob > best_prob + diff_prob)
858    /* The edge has higher probability than the temporary best edge.  */
859    is_better_edge = true;
860  else if (prob < best_prob - diff_prob)
861    /* The edge has lower probability than the temporary best edge.  */
862    is_better_edge = false;
863  else if (freq < best_freq - diff_freq)
864    /* The edge and the temporary best edge  have almost equivalent
865       probabilities.  The higher frequency of a successor now means
866       that there is another edge going into that successor.
867       This successor has lower frequency so it is better.  */
868    is_better_edge = true;
869  else if (freq > best_freq + diff_freq)
870    /* This successor has higher frequency so it is worse.  */
871    is_better_edge = false;
872  else if (e->dest->prev_bb == bb)
873    /* The edges have equivalent probabilities and the successors
874       have equivalent frequencies.  Select the previous successor.  */
875    is_better_edge = true;
876  else
877    is_better_edge = false;
878
879  /* If we are doing hot/cold partitioning, make sure that we always favor
880     non-crossing edges over crossing edges.  */
881
882  if (!is_better_edge
883      && flag_reorder_blocks_and_partition
884      && cur_best_edge
885      && (cur_best_edge->flags & EDGE_CROSSING)
886      && !(e->flags & EDGE_CROSSING))
887    is_better_edge = true;
888
889  return is_better_edge;
890}
891
892/* Connect traces in array TRACES, N_TRACES is the count of traces.  */
893
894static void
895connect_traces (int n_traces, struct trace *traces)
896{
897  int i;
898  bool *connected;
899  bool two_passes;
900  int last_trace;
901  int current_pass;
902  int current_partition;
903  int freq_threshold;
904  gcov_type count_threshold;
905
906  freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000;
907  if (max_entry_count < INT_MAX / 1000)
908    count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000;
909  else
910    count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD;
911
912  connected = XCNEWVEC (bool, n_traces);
913  last_trace = -1;
914  current_pass = 1;
915  current_partition = BB_PARTITION (traces[0].first);
916  two_passes = false;
917
918  if (flag_reorder_blocks_and_partition)
919    for (i = 0; i < n_traces && !two_passes; i++)
920      if (BB_PARTITION (traces[0].first)
921	  != BB_PARTITION (traces[i].first))
922	two_passes = true;
923
924  for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
925    {
926      int t = i;
927      int t2;
928      edge e, best;
929      int best_len;
930
931      if (i >= n_traces)
932	{
933	  gcc_assert (two_passes && current_pass == 1);
934	  i = 0;
935	  t = i;
936	  current_pass = 2;
937	  if (current_partition == BB_HOT_PARTITION)
938	    current_partition = BB_COLD_PARTITION;
939	  else
940	    current_partition = BB_HOT_PARTITION;
941	}
942
943      if (connected[t])
944	continue;
945
946      if (two_passes
947	  && BB_PARTITION (traces[t].first) != current_partition)
948	continue;
949
950      connected[t] = true;
951
952      /* Find the predecessor traces.  */
953      for (t2 = t; t2 > 0;)
954	{
955	  edge_iterator ei;
956	  best = NULL;
957	  best_len = 0;
958	  FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
959	    {
960	      int si = e->src->index;
961
962	      if (e->src != ENTRY_BLOCK_PTR
963		  && (e->flags & EDGE_CAN_FALLTHRU)
964		  && !(e->flags & EDGE_COMPLEX)
965		  && bbd[si].end_of_trace >= 0
966		  && !connected[bbd[si].end_of_trace]
967		  && (BB_PARTITION (e->src) == current_partition)
968		  && (!best
969		      || e->probability > best->probability
970		      || (e->probability == best->probability
971			  && traces[bbd[si].end_of_trace].length > best_len)))
972		{
973		  best = e;
974		  best_len = traces[bbd[si].end_of_trace].length;
975		}
976	    }
977	  if (best)
978	    {
979	      best->src->aux = best->dest;
980	      t2 = bbd[best->src->index].end_of_trace;
981	      connected[t2] = true;
982
983	      if (dump_file)
984		{
985		  fprintf (dump_file, "Connection: %d %d\n",
986			   best->src->index, best->dest->index);
987		}
988	    }
989	  else
990	    break;
991	}
992
993      if (last_trace >= 0)
994	traces[last_trace].last->aux = traces[t2].first;
995      last_trace = t;
996
997      /* Find the successor traces.  */
998      while (1)
999	{
1000	  /* Find the continuation of the chain.  */
1001	  edge_iterator ei;
1002	  best = NULL;
1003	  best_len = 0;
1004	  FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1005	    {
1006	      int di = e->dest->index;
1007
1008	      if (e->dest != EXIT_BLOCK_PTR
1009		  && (e->flags & EDGE_CAN_FALLTHRU)
1010		  && !(e->flags & EDGE_COMPLEX)
1011		  && bbd[di].start_of_trace >= 0
1012		  && !connected[bbd[di].start_of_trace]
1013		  && (BB_PARTITION (e->dest) == current_partition)
1014		  && (!best
1015		      || e->probability > best->probability
1016		      || (e->probability == best->probability
1017			  && traces[bbd[di].start_of_trace].length > best_len)))
1018		{
1019		  best = e;
1020		  best_len = traces[bbd[di].start_of_trace].length;
1021		}
1022	    }
1023
1024	  if (best)
1025	    {
1026	      if (dump_file)
1027		{
1028		  fprintf (dump_file, "Connection: %d %d\n",
1029			   best->src->index, best->dest->index);
1030		}
1031	      t = bbd[best->dest->index].start_of_trace;
1032	      traces[last_trace].last->aux = traces[t].first;
1033	      connected[t] = true;
1034	      last_trace = t;
1035	    }
1036	  else
1037	    {
1038	      /* Try to connect the traces by duplication of 1 block.  */
1039	      edge e2;
1040	      basic_block next_bb = NULL;
1041	      bool try_copy = false;
1042
1043	      FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1044		if (e->dest != EXIT_BLOCK_PTR
1045		    && (e->flags & EDGE_CAN_FALLTHRU)
1046		    && !(e->flags & EDGE_COMPLEX)
1047		    && (!best || e->probability > best->probability))
1048		  {
1049		    edge_iterator ei;
1050		    edge best2 = NULL;
1051		    int best2_len = 0;
1052
1053		    /* If the destination is a start of a trace which is only
1054		       one block long, then no need to search the successor
1055		       blocks of the trace.  Accept it.  */
1056		    if (bbd[e->dest->index].start_of_trace >= 0
1057			&& traces[bbd[e->dest->index].start_of_trace].length
1058			   == 1)
1059		      {
1060			best = e;
1061			try_copy = true;
1062			continue;
1063		      }
1064
1065		    FOR_EACH_EDGE (e2, ei, e->dest->succs)
1066		      {
1067			int di = e2->dest->index;
1068
1069			if (e2->dest == EXIT_BLOCK_PTR
1070			    || ((e2->flags & EDGE_CAN_FALLTHRU)
1071				&& !(e2->flags & EDGE_COMPLEX)
1072				&& bbd[di].start_of_trace >= 0
1073				&& !connected[bbd[di].start_of_trace]
1074				&& (BB_PARTITION (e2->dest) == current_partition)
1075				&& (EDGE_FREQUENCY (e2) >= freq_threshold)
1076				&& (e2->count >= count_threshold)
1077				&& (!best2
1078				    || e2->probability > best2->probability
1079				    || (e2->probability == best2->probability
1080					&& traces[bbd[di].start_of_trace].length
1081					   > best2_len))))
1082			  {
1083			    best = e;
1084			    best2 = e2;
1085			    if (e2->dest != EXIT_BLOCK_PTR)
1086			      best2_len = traces[bbd[di].start_of_trace].length;
1087			    else
1088			      best2_len = INT_MAX;
1089			    next_bb = e2->dest;
1090			    try_copy = true;
1091			  }
1092		      }
1093		  }
1094
1095	      if (flag_reorder_blocks_and_partition)
1096		try_copy = false;
1097
1098	      /* Copy tiny blocks always; copy larger blocks only when the
1099		 edge is traversed frequently enough.  */
1100	      if (try_copy
1101		  && copy_bb_p (best->dest,
1102				optimize_edge_for_speed_p (best)
1103				&& EDGE_FREQUENCY (best) >= freq_threshold
1104				&& best->count >= count_threshold))
1105		{
1106		  basic_block new_bb;
1107
1108		  if (dump_file)
1109		    {
1110		      fprintf (dump_file, "Connection: %d %d ",
1111			       traces[t].last->index, best->dest->index);
1112		      if (!next_bb)
1113			fputc ('\n', dump_file);
1114		      else if (next_bb == EXIT_BLOCK_PTR)
1115			fprintf (dump_file, "exit\n");
1116		      else
1117			fprintf (dump_file, "%d\n", next_bb->index);
1118		    }
1119
1120		  new_bb = copy_bb (best->dest, best, traces[t].last, t);
1121		  traces[t].last = new_bb;
1122		  if (next_bb && next_bb != EXIT_BLOCK_PTR)
1123		    {
1124		      t = bbd[next_bb->index].start_of_trace;
1125		      traces[last_trace].last->aux = traces[t].first;
1126		      connected[t] = true;
1127		      last_trace = t;
1128		    }
1129		  else
1130		    break;	/* Stop finding the successor traces.  */
1131		}
1132	      else
1133		break;	/* Stop finding the successor traces.  */
1134	    }
1135	}
1136    }
1137
1138  if (dump_file)
1139    {
1140      basic_block bb;
1141
1142      fprintf (dump_file, "Final order:\n");
1143      for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1144	fprintf (dump_file, "%d ", bb->index);
1145      fprintf (dump_file, "\n");
1146      fflush (dump_file);
1147    }
1148
1149  FREE (connected);
1150}
1151
1152/* Return true when BB can and should be copied. CODE_MAY_GROW is true
1153   when code size is allowed to grow by duplication.  */
1154
1155static bool
1156copy_bb_p (const_basic_block bb, int code_may_grow)
1157{
1158  int size = 0;
1159  int max_size = uncond_jump_length;
1160  rtx insn;
1161
1162  if (!bb->frequency)
1163    return false;
1164  if (EDGE_COUNT (bb->preds) < 2)
1165    return false;
1166  if (!can_duplicate_block_p (bb))
1167    return false;
1168
1169  /* Avoid duplicating blocks which have many successors (PR/13430).  */
1170  if (EDGE_COUNT (bb->succs) > 8)
1171    return false;
1172
1173  if (code_may_grow && optimize_bb_for_speed_p (bb))
1174    max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1175
1176  FOR_BB_INSNS (bb, insn)
1177    {
1178      if (INSN_P (insn))
1179	size += get_attr_min_length (insn);
1180    }
1181
1182  if (size <= max_size)
1183    return true;
1184
1185  if (dump_file)
1186    {
1187      fprintf (dump_file,
1188	       "Block %d can't be copied because its size = %d.\n",
1189	       bb->index, size);
1190    }
1191
1192  return false;
1193}
1194
1195/* Return the length of unconditional jump instruction.  */
1196
1197static int
1198get_uncond_jump_length (void)
1199{
1200  rtx label, jump;
1201  int length;
1202
1203  label = emit_label_before (gen_label_rtx (), get_insns ());
1204  jump = emit_jump_insn (gen_jump (label));
1205
1206  length = get_attr_min_length (jump);
1207
1208  delete_insn (jump);
1209  delete_insn (label);
1210  return length;
1211}
1212
1213/* Find the basic blocks that are rarely executed and need to be moved to
1214   a separate section of the .o file (to cut down on paging and improve
1215   cache locality).  */
1216
1217static void
1218find_rarely_executed_basic_blocks_and_crossing_edges (edge **crossing_edges,
1219						      int *n_crossing_edges,
1220						      int *max_idx)
1221{
1222  basic_block bb;
1223  edge e;
1224  int i;
1225  edge_iterator ei;
1226
1227  /* Mark which partition (hot/cold) each basic block belongs in.  */
1228
1229  FOR_EACH_BB (bb)
1230    {
1231      if (probably_never_executed_bb_p (bb))
1232	BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1233      else
1234	BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1235    }
1236
1237  /* Mark every edge that crosses between sections.  */
1238
1239  i = 0;
1240  FOR_EACH_BB (bb)
1241    FOR_EACH_EDGE (e, ei, bb->succs)
1242    {
1243      if (e->src != ENTRY_BLOCK_PTR
1244	  && e->dest != EXIT_BLOCK_PTR
1245	  && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1246	{
1247	  e->flags |= EDGE_CROSSING;
1248	  if (i == *max_idx)
1249	    {
1250	      *max_idx *= 2;
1251	      *crossing_edges = XRESIZEVEC (edge, *crossing_edges, *max_idx);
1252	    }
1253	  (*crossing_edges)[i++] = e;
1254	}
1255      else
1256	e->flags &= ~EDGE_CROSSING;
1257    }
1258  *n_crossing_edges = i;
1259}
1260
1261/* If any destination of a crossing edge does not have a label, add label;
1262   Convert any fall-through crossing edges (for blocks that do not contain
1263   a jump) to unconditional jumps.  */
1264
1265static void
1266add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges)
1267{
1268  int i;
1269  basic_block src;
1270  basic_block dest;
1271  rtx label;
1272  rtx barrier;
1273  rtx new_jump;
1274
1275  for (i=0; i < n_crossing_edges; i++)
1276    {
1277      if (crossing_edges[i])
1278	{
1279	  src = crossing_edges[i]->src;
1280	  dest = crossing_edges[i]->dest;
1281
1282	  /* Make sure dest has a label.  */
1283
1284	  if (dest && (dest != EXIT_BLOCK_PTR))
1285	    {
1286	      label = block_label (dest);
1287
1288	      /* Make sure source block ends with a jump.  If the
1289	         source block does not end with a jump it might end
1290	         with a call_insn;  this case will be handled in
1291	         fix_up_fall_thru_edges function.  */
1292
1293	      if (src && (src != ENTRY_BLOCK_PTR))
1294		{
1295		  if (!JUMP_P (BB_END (src))
1296		      && !block_ends_with_call_p (src)
1297		      && !can_throw_internal (BB_END (src)))
1298		    /* bb just falls through.  */
1299		    {
1300		      /* make sure there's only one successor */
1301		      gcc_assert (single_succ_p (src));
1302
1303		      /* Find label in dest block.  */
1304		      label = block_label (dest);
1305
1306		      new_jump = emit_jump_insn_after (gen_jump (label),
1307						       BB_END (src));
1308		      barrier = emit_barrier_after (new_jump);
1309		      JUMP_LABEL (new_jump) = label;
1310		      LABEL_NUSES (label) += 1;
1311		      src->il.rtl->footer = unlink_insn_chain (barrier, barrier);
1312		      /* Mark edge as non-fallthru.  */
1313		      crossing_edges[i]->flags &= ~EDGE_FALLTHRU;
1314		    } /* end: 'if (!JUMP_P ... '  */
1315		} /* end: 'if (src && src !=...'  */
1316	    } /* end: 'if (dest && dest !=...'  */
1317	} /* end: 'if (crossing_edges[i]...'  */
1318    } /* end for loop  */
1319}
1320
1321/* Find any bb's where the fall-through edge is a crossing edge (note that
1322   these bb's must also contain a conditional jump or end with a call
1323   instruction; we've already dealt with fall-through edges for blocks
1324   that didn't have a conditional jump or didn't end with call instruction
1325   in the call to add_labels_and_missing_jumps).  Convert the fall-through
1326   edge to non-crossing edge by inserting a new bb to fall-through into.
1327   The new bb will contain an unconditional jump (crossing edge) to the
1328   original fall through destination.  */
1329
1330static void
1331fix_up_fall_thru_edges (void)
1332{
1333  basic_block cur_bb;
1334  basic_block new_bb;
1335  edge succ1;
1336  edge succ2;
1337  edge fall_thru;
1338  edge cond_jump = NULL;
1339  edge e;
1340  bool cond_jump_crosses;
1341  int invert_worked;
1342  rtx old_jump;
1343  rtx fall_thru_label;
1344  rtx barrier;
1345
1346  FOR_EACH_BB (cur_bb)
1347    {
1348      fall_thru = NULL;
1349      if (EDGE_COUNT (cur_bb->succs) > 0)
1350	succ1 = EDGE_SUCC (cur_bb, 0);
1351      else
1352	succ1 = NULL;
1353
1354      if (EDGE_COUNT (cur_bb->succs) > 1)
1355	succ2 = EDGE_SUCC (cur_bb, 1);
1356      else
1357	succ2 = NULL;
1358
1359      /* Find the fall-through edge.  */
1360
1361      if (succ1
1362	  && (succ1->flags & EDGE_FALLTHRU))
1363	{
1364	  fall_thru = succ1;
1365	  cond_jump = succ2;
1366	}
1367      else if (succ2
1368	       && (succ2->flags & EDGE_FALLTHRU))
1369	{
1370	  fall_thru = succ2;
1371	  cond_jump = succ1;
1372	}
1373      else if (succ1
1374	       && (block_ends_with_call_p (cur_bb)
1375		   || can_throw_internal (BB_END (cur_bb))))
1376	{
1377	  edge e;
1378	  edge_iterator ei;
1379
1380	  /* Find EDGE_CAN_FALLTHRU edge.  */
1381	  FOR_EACH_EDGE (e, ei, cur_bb->succs)
1382	    if (e->flags & EDGE_CAN_FALLTHRU)
1383	      {
1384		fall_thru = e;
1385		break;
1386	      }
1387	}
1388
1389      if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR))
1390	{
1391	  /* Check to see if the fall-thru edge is a crossing edge.  */
1392
1393	  if (fall_thru->flags & EDGE_CROSSING)
1394	    {
1395	      /* The fall_thru edge crosses; now check the cond jump edge, if
1396		 it exists.  */
1397
1398	      cond_jump_crosses = true;
1399	      invert_worked  = 0;
1400	      old_jump = BB_END (cur_bb);
1401
1402	      /* Find the jump instruction, if there is one.  */
1403
1404	      if (cond_jump)
1405		{
1406		  if (!(cond_jump->flags & EDGE_CROSSING))
1407		    cond_jump_crosses = false;
1408
1409		  /* We know the fall-thru edge crosses; if the cond
1410		     jump edge does NOT cross, and its destination is the
1411		     next block in the bb order, invert the jump
1412		     (i.e. fix it so the fall thru does not cross and
1413		     the cond jump does).  */
1414
1415		  if (!cond_jump_crosses
1416		      && cur_bb->aux == cond_jump->dest)
1417		    {
1418		      /* Find label in fall_thru block. We've already added
1419			 any missing labels, so there must be one.  */
1420
1421		      fall_thru_label = block_label (fall_thru->dest);
1422
1423		      if (old_jump && JUMP_P (old_jump) && fall_thru_label)
1424			invert_worked = invert_jump (old_jump,
1425						     fall_thru_label,0);
1426		      if (invert_worked)
1427			{
1428			  fall_thru->flags &= ~EDGE_FALLTHRU;
1429			  cond_jump->flags |= EDGE_FALLTHRU;
1430			  update_br_prob_note (cur_bb);
1431			  e = fall_thru;
1432			  fall_thru = cond_jump;
1433			  cond_jump = e;
1434			  cond_jump->flags |= EDGE_CROSSING;
1435			  fall_thru->flags &= ~EDGE_CROSSING;
1436			}
1437		    }
1438		}
1439
1440	      if (cond_jump_crosses || !invert_worked)
1441		{
1442		  /* This is the case where both edges out of the basic
1443		     block are crossing edges. Here we will fix up the
1444		     fall through edge. The jump edge will be taken care
1445		     of later.  The EDGE_CROSSING flag of fall_thru edge
1446                     is unset before the call to force_nonfallthru
1447                     function because if a new basic-block is created
1448                     this edge remains in the current section boundary
1449                     while the edge between new_bb and the fall_thru->dest
1450                     becomes EDGE_CROSSING.  */
1451
1452                  fall_thru->flags &= ~EDGE_CROSSING;
1453		  new_bb = force_nonfallthru (fall_thru);
1454
1455		  if (new_bb)
1456		    {
1457		      new_bb->aux = cur_bb->aux;
1458		      cur_bb->aux = new_bb;
1459
1460		      /* Make sure new fall-through bb is in same
1461			 partition as bb it's falling through from.  */
1462
1463		      BB_COPY_PARTITION (new_bb, cur_bb);
1464		      single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
1465		    }
1466                  else
1467                    {
1468                      /* If a new basic-block was not created; restore
1469                         the EDGE_CROSSING flag.  */
1470                      fall_thru->flags |= EDGE_CROSSING;
1471                    }
1472
1473		  /* Add barrier after new jump */
1474
1475		  if (new_bb)
1476		    {
1477		      barrier = emit_barrier_after (BB_END (new_bb));
1478		      new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1479							       barrier);
1480		    }
1481		  else
1482		    {
1483		      barrier = emit_barrier_after (BB_END (cur_bb));
1484		      cur_bb->il.rtl->footer = unlink_insn_chain (barrier,
1485							       barrier);
1486		    }
1487		}
1488	    }
1489	}
1490    }
1491}
1492
1493/* This function checks the destination block of a "crossing jump" to
1494   see if it has any crossing predecessors that begin with a code label
1495   and end with an unconditional jump.  If so, it returns that predecessor
1496   block.  (This is to avoid creating lots of new basic blocks that all
1497   contain unconditional jumps to the same destination).  */
1498
1499static basic_block
1500find_jump_block (basic_block jump_dest)
1501{
1502  basic_block source_bb = NULL;
1503  edge e;
1504  rtx insn;
1505  edge_iterator ei;
1506
1507  FOR_EACH_EDGE (e, ei, jump_dest->preds)
1508    if (e->flags & EDGE_CROSSING)
1509      {
1510	basic_block src = e->src;
1511
1512	/* Check each predecessor to see if it has a label, and contains
1513	   only one executable instruction, which is an unconditional jump.
1514	   If so, we can use it.  */
1515
1516	if (LABEL_P (BB_HEAD (src)))
1517	  for (insn = BB_HEAD (src);
1518	       !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
1519	       insn = NEXT_INSN (insn))
1520	    {
1521	      if (INSN_P (insn)
1522		  && insn == BB_END (src)
1523		  && JUMP_P (insn)
1524		  && !any_condjump_p (insn))
1525		{
1526		  source_bb = src;
1527		  break;
1528		}
1529	    }
1530
1531	if (source_bb)
1532	  break;
1533      }
1534
1535  return source_bb;
1536}
1537
1538/* Find all BB's with conditional jumps that are crossing edges;
1539   insert a new bb and make the conditional jump branch to the new
1540   bb instead (make the new bb same color so conditional branch won't
1541   be a 'crossing' edge).  Insert an unconditional jump from the
1542   new bb to the original destination of the conditional jump.  */
1543
1544static void
1545fix_crossing_conditional_branches (void)
1546{
1547  basic_block cur_bb;
1548  basic_block new_bb;
1549  basic_block last_bb;
1550  basic_block dest;
1551  edge succ1;
1552  edge succ2;
1553  edge crossing_edge;
1554  edge new_edge;
1555  rtx old_jump;
1556  rtx set_src;
1557  rtx old_label = NULL_RTX;
1558  rtx new_label;
1559  rtx new_jump;
1560  rtx barrier;
1561
1562 last_bb = EXIT_BLOCK_PTR->prev_bb;
1563
1564  FOR_EACH_BB (cur_bb)
1565    {
1566      crossing_edge = NULL;
1567      if (EDGE_COUNT (cur_bb->succs) > 0)
1568	succ1 = EDGE_SUCC (cur_bb, 0);
1569      else
1570	succ1 = NULL;
1571
1572      if (EDGE_COUNT (cur_bb->succs) > 1)
1573	succ2 = EDGE_SUCC (cur_bb, 1);
1574      else
1575	succ2 = NULL;
1576
1577      /* We already took care of fall-through edges, so only one successor
1578	 can be a crossing edge.  */
1579
1580      if (succ1 && (succ1->flags & EDGE_CROSSING))
1581	crossing_edge = succ1;
1582      else if (succ2 && (succ2->flags & EDGE_CROSSING))
1583	crossing_edge = succ2;
1584
1585      if (crossing_edge)
1586	{
1587	  old_jump = BB_END (cur_bb);
1588
1589	  /* Check to make sure the jump instruction is a
1590	     conditional jump.  */
1591
1592	  set_src = NULL_RTX;
1593
1594	  if (any_condjump_p (old_jump))
1595	    {
1596	      if (GET_CODE (PATTERN (old_jump)) == SET)
1597		set_src = SET_SRC (PATTERN (old_jump));
1598	      else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
1599		{
1600		  set_src = XVECEXP (PATTERN (old_jump), 0,0);
1601		  if (GET_CODE (set_src) == SET)
1602		    set_src = SET_SRC (set_src);
1603		  else
1604		    set_src = NULL_RTX;
1605		}
1606	    }
1607
1608	  if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
1609	    {
1610	      if (GET_CODE (XEXP (set_src, 1)) == PC)
1611		old_label = XEXP (set_src, 2);
1612	      else if (GET_CODE (XEXP (set_src, 2)) == PC)
1613		old_label = XEXP (set_src, 1);
1614
1615	      /* Check to see if new bb for jumping to that dest has
1616		 already been created; if so, use it; if not, create
1617		 a new one.  */
1618
1619	      new_bb = find_jump_block (crossing_edge->dest);
1620
1621	      if (new_bb)
1622		new_label = block_label (new_bb);
1623	      else
1624		{
1625		  /* Create new basic block to be dest for
1626		     conditional jump.  */
1627
1628		  new_bb = create_basic_block (NULL, NULL, last_bb);
1629		  new_bb->aux = last_bb->aux;
1630		  last_bb->aux = new_bb;
1631		  last_bb = new_bb;
1632		  /* Put appropriate instructions in new bb.  */
1633
1634		  new_label = gen_label_rtx ();
1635		  emit_label_before (new_label, BB_HEAD (new_bb));
1636		  BB_HEAD (new_bb) = new_label;
1637
1638		  if (GET_CODE (old_label) == LABEL_REF)
1639		    {
1640		      old_label = JUMP_LABEL (old_jump);
1641		      new_jump = emit_jump_insn_after (gen_jump
1642						       (old_label),
1643						       BB_END (new_bb));
1644		    }
1645		  else
1646		    {
1647		      gcc_assert (HAVE_return
1648				  && GET_CODE (old_label) == RETURN);
1649		      new_jump = emit_jump_insn_after (gen_return (),
1650						       BB_END (new_bb));
1651		    }
1652
1653		  barrier = emit_barrier_after (new_jump);
1654		  JUMP_LABEL (new_jump) = old_label;
1655		  new_bb->il.rtl->footer = unlink_insn_chain (barrier,
1656							   barrier);
1657
1658		  /* Make sure new bb is in same partition as source
1659		     of conditional branch.  */
1660		  BB_COPY_PARTITION (new_bb, cur_bb);
1661		}
1662
1663	      /* Make old jump branch to new bb.  */
1664
1665	      redirect_jump (old_jump, new_label, 0);
1666
1667	      /* Remove crossing_edge as predecessor of 'dest'.  */
1668
1669	      dest = crossing_edge->dest;
1670
1671	      redirect_edge_succ (crossing_edge, new_bb);
1672
1673	      /* Make a new edge from new_bb to old dest; new edge
1674		 will be a successor for new_bb and a predecessor
1675		 for 'dest'.  */
1676
1677	      if (EDGE_COUNT (new_bb->succs) == 0)
1678		new_edge = make_edge (new_bb, dest, 0);
1679	      else
1680		new_edge = EDGE_SUCC (new_bb, 0);
1681
1682	      crossing_edge->flags &= ~EDGE_CROSSING;
1683	      new_edge->flags |= EDGE_CROSSING;
1684	    }
1685	}
1686    }
1687}
1688
1689/* Find any unconditional branches that cross between hot and cold
1690   sections.  Convert them into indirect jumps instead.  */
1691
1692static void
1693fix_crossing_unconditional_branches (void)
1694{
1695  basic_block cur_bb;
1696  rtx last_insn;
1697  rtx label;
1698  rtx label_addr;
1699  rtx indirect_jump_sequence;
1700  rtx jump_insn = NULL_RTX;
1701  rtx new_reg;
1702  rtx cur_insn;
1703  edge succ;
1704
1705  FOR_EACH_BB (cur_bb)
1706    {
1707      last_insn = BB_END (cur_bb);
1708
1709      if (EDGE_COUNT (cur_bb->succs) < 1)
1710	continue;
1711
1712      succ = EDGE_SUCC (cur_bb, 0);
1713
1714      /* Check to see if bb ends in a crossing (unconditional) jump.  At
1715	 this point, no crossing jumps should be conditional.  */
1716
1717      if (JUMP_P (last_insn)
1718	  && (succ->flags & EDGE_CROSSING))
1719	{
1720	  rtx label2, table;
1721
1722	  gcc_assert (!any_condjump_p (last_insn));
1723
1724	  /* Make sure the jump is not already an indirect or table jump.  */
1725
1726	  if (!computed_jump_p (last_insn)
1727	      && !tablejump_p (last_insn, &label2, &table))
1728	    {
1729	      /* We have found a "crossing" unconditional branch.  Now
1730		 we must convert it to an indirect jump.  First create
1731		 reference of label, as target for jump.  */
1732
1733	      label = JUMP_LABEL (last_insn);
1734	      label_addr = gen_rtx_LABEL_REF (Pmode, label);
1735	      LABEL_NUSES (label) += 1;
1736
1737	      /* Get a register to use for the indirect jump.  */
1738
1739	      new_reg = gen_reg_rtx (Pmode);
1740
1741	      /* Generate indirect the jump sequence.  */
1742
1743	      start_sequence ();
1744	      emit_move_insn (new_reg, label_addr);
1745	      emit_indirect_jump (new_reg);
1746	      indirect_jump_sequence = get_insns ();
1747	      end_sequence ();
1748
1749	      /* Make sure every instruction in the new jump sequence has
1750		 its basic block set to be cur_bb.  */
1751
1752	      for (cur_insn = indirect_jump_sequence; cur_insn;
1753		   cur_insn = NEXT_INSN (cur_insn))
1754		{
1755		  if (!BARRIER_P (cur_insn))
1756		    BLOCK_FOR_INSN (cur_insn) = cur_bb;
1757		  if (JUMP_P (cur_insn))
1758		    jump_insn = cur_insn;
1759		}
1760
1761	      /* Insert the new (indirect) jump sequence immediately before
1762		 the unconditional jump, then delete the unconditional jump.  */
1763
1764	      emit_insn_before (indirect_jump_sequence, last_insn);
1765	      delete_insn (last_insn);
1766
1767	      /* Make BB_END for cur_bb be the jump instruction (NOT the
1768		 barrier instruction at the end of the sequence...).  */
1769
1770	      BB_END (cur_bb) = jump_insn;
1771	    }
1772	}
1773    }
1774}
1775
1776/* Add REG_CROSSING_JUMP note to all crossing jump insns.  */
1777
1778static void
1779add_reg_crossing_jump_notes (void)
1780{
1781  basic_block bb;
1782  edge e;
1783  edge_iterator ei;
1784
1785  FOR_EACH_BB (bb)
1786    FOR_EACH_EDGE (e, ei, bb->succs)
1787      if ((e->flags & EDGE_CROSSING)
1788	  && JUMP_P (BB_END (e->src)))
1789	add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX);
1790}
1791
1792/* Hot and cold basic blocks are partitioned and put in separate
1793   sections of the .o file, to reduce paging and improve cache
1794   performance (hopefully).  This can result in bits of code from the
1795   same function being widely separated in the .o file.  However this
1796   is not obvious to the current bb structure.  Therefore we must take
1797   care to ensure that: 1). There are no fall_thru edges that cross
1798   between sections; 2). For those architectures which have "short"
1799   conditional branches, all conditional branches that attempt to
1800   cross between sections are converted to unconditional branches;
1801   and, 3). For those architectures which have "short" unconditional
1802   branches, all unconditional branches that attempt to cross between
1803   sections are converted to indirect jumps.
1804
1805   The code for fixing up fall_thru edges that cross between hot and
1806   cold basic blocks does so by creating new basic blocks containing
1807   unconditional branches to the appropriate label in the "other"
1808   section.  The new basic block is then put in the same (hot or cold)
1809   section as the original conditional branch, and the fall_thru edge
1810   is modified to fall into the new basic block instead.  By adding
1811   this level of indirection we end up with only unconditional branches
1812   crossing between hot and cold sections.
1813
1814   Conditional branches are dealt with by adding a level of indirection.
1815   A new basic block is added in the same (hot/cold) section as the
1816   conditional branch, and the conditional branch is retargeted to the
1817   new basic block.  The new basic block contains an unconditional branch
1818   to the original target of the conditional branch (in the other section).
1819
1820   Unconditional branches are dealt with by converting them into
1821   indirect jumps.  */
1822
1823static void
1824fix_edges_for_rarely_executed_code (edge *crossing_edges,
1825				    int n_crossing_edges)
1826{
1827  /* Make sure the source of any crossing edge ends in a jump and the
1828     destination of any crossing edge has a label.  */
1829
1830  add_labels_and_missing_jumps (crossing_edges, n_crossing_edges);
1831
1832  /* Convert all crossing fall_thru edges to non-crossing fall
1833     thrus to unconditional jumps (that jump to the original fall
1834     thru dest).  */
1835
1836  fix_up_fall_thru_edges ();
1837
1838  /* If the architecture does not have conditional branches that can
1839     span all of memory, convert crossing conditional branches into
1840     crossing unconditional branches.  */
1841
1842  if (!HAS_LONG_COND_BRANCH)
1843    fix_crossing_conditional_branches ();
1844
1845  /* If the architecture does not have unconditional branches that
1846     can span all of memory, convert crossing unconditional branches
1847     into indirect jumps.  Since adding an indirect jump also adds
1848     a new register usage, update the register usage information as
1849     well.  */
1850
1851  if (!HAS_LONG_UNCOND_BRANCH)
1852    fix_crossing_unconditional_branches ();
1853
1854  add_reg_crossing_jump_notes ();
1855}
1856
1857/* Verify, in the basic block chain, that there is at most one switch
1858   between hot/cold partitions. This is modelled on
1859   rtl_verify_flow_info_1, but it cannot go inside that function
1860   because this condition will not be true until after
1861   reorder_basic_blocks is called.  */
1862
1863static void
1864verify_hot_cold_block_grouping (void)
1865{
1866  basic_block bb;
1867  int err = 0;
1868  bool switched_sections = false;
1869  int current_partition = 0;
1870
1871  FOR_EACH_BB (bb)
1872    {
1873      if (!current_partition)
1874	current_partition = BB_PARTITION (bb);
1875      if (BB_PARTITION (bb) != current_partition)
1876	{
1877	  if (switched_sections)
1878	    {
1879	      error ("multiple hot/cold transitions found (bb %i)",
1880		     bb->index);
1881	      err = 1;
1882	    }
1883	  else
1884	    {
1885	      switched_sections = true;
1886	      current_partition = BB_PARTITION (bb);
1887	    }
1888	}
1889    }
1890
1891  gcc_assert(!err);
1892}
1893
1894/* Reorder basic blocks.  The main entry point to this file.  FLAGS is
1895   the set of flags to pass to cfg_layout_initialize().  */
1896
1897void
1898reorder_basic_blocks (void)
1899{
1900  int n_traces;
1901  int i;
1902  struct trace *traces;
1903
1904  gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
1905
1906  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
1907    return;
1908
1909  set_edge_can_fallthru_flag ();
1910  mark_dfs_back_edges ();
1911
1912  /* We are estimating the length of uncond jump insn only once since the code
1913     for getting the insn length always returns the minimal length now.  */
1914  if (uncond_jump_length == 0)
1915    uncond_jump_length = get_uncond_jump_length ();
1916
1917  /* We need to know some information for each basic block.  */
1918  array_size = GET_ARRAY_SIZE (last_basic_block);
1919  bbd = XNEWVEC (bbro_basic_block_data, array_size);
1920  for (i = 0; i < array_size; i++)
1921    {
1922      bbd[i].start_of_trace = -1;
1923      bbd[i].in_trace = -1;
1924      bbd[i].end_of_trace = -1;
1925      bbd[i].heap = NULL;
1926      bbd[i].node = NULL;
1927    }
1928
1929  traces = XNEWVEC (struct trace, n_basic_blocks);
1930  n_traces = 0;
1931  find_traces (&n_traces, traces);
1932  connect_traces (n_traces, traces);
1933  FREE (traces);
1934  FREE (bbd);
1935
1936  relink_block_chain (/*stay_in_cfglayout_mode=*/true);
1937
1938  if (dump_file)
1939    dump_flow_info (dump_file, dump_flags);
1940
1941  if (flag_reorder_blocks_and_partition)
1942    verify_hot_cold_block_grouping ();
1943}
1944
1945/* Determine which partition the first basic block in the function
1946   belongs to, then find the first basic block in the current function
1947   that belongs to a different section, and insert a
1948   NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
1949   instruction stream.  When writing out the assembly code,
1950   encountering this note will make the compiler switch between the
1951   hot and cold text sections.  */
1952
1953static void
1954insert_section_boundary_note (void)
1955{
1956  basic_block bb;
1957  rtx new_note;
1958  int first_partition = 0;
1959
1960  if (flag_reorder_blocks_and_partition)
1961    FOR_EACH_BB (bb)
1962    {
1963      if (!first_partition)
1964	first_partition = BB_PARTITION (bb);
1965      if (BB_PARTITION (bb) != first_partition)
1966	{
1967	  new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS,
1968				       BB_HEAD (bb));
1969	  /* ??? This kind of note always lives between basic blocks,
1970	     but add_insn_before will set BLOCK_FOR_INSN anyway.  */
1971	  BLOCK_FOR_INSN (new_note) = NULL;
1972	  break;
1973	}
1974    }
1975}
1976
1977/* Duplicate the blocks containing computed gotos.  This basically unfactors
1978   computed gotos that were factored early on in the compilation process to
1979   speed up edge based data flow.  We used to not unfactoring them again,
1980   which can seriously pessimize code with many computed jumps in the source
1981   code, such as interpreters.  See e.g. PR15242.  */
1982
1983static bool
1984gate_duplicate_computed_gotos (void)
1985{
1986  if (targetm.cannot_modify_jumps_p ())
1987    return false;
1988  return (optimize > 0
1989	  && flag_expensive_optimizations
1990	  && ! optimize_function_for_size_p (cfun));
1991}
1992
1993
1994static unsigned int
1995duplicate_computed_gotos (void)
1996{
1997  basic_block bb, new_bb;
1998  bitmap candidates;
1999  int max_size;
2000
2001  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2002    return 0;
2003
2004  cfg_layout_initialize (0);
2005
2006  /* We are estimating the length of uncond jump insn only once
2007     since the code for getting the insn length always returns
2008     the minimal length now.  */
2009  if (uncond_jump_length == 0)
2010    uncond_jump_length = get_uncond_jump_length ();
2011
2012  max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2013  candidates = BITMAP_ALLOC (NULL);
2014
2015  /* Look for blocks that end in a computed jump, and see if such blocks
2016     are suitable for unfactoring.  If a block is a candidate for unfactoring,
2017     mark it in the candidates.  */
2018  FOR_EACH_BB (bb)
2019    {
2020      rtx insn;
2021      edge e;
2022      edge_iterator ei;
2023      int size, all_flags;
2024
2025      /* Build the reorder chain for the original order of blocks.  */
2026      if (bb->next_bb != EXIT_BLOCK_PTR)
2027	bb->aux = bb->next_bb;
2028
2029      /* Obviously the block has to end in a computed jump.  */
2030      if (!computed_jump_p (BB_END (bb)))
2031	continue;
2032
2033      /* Only consider blocks that can be duplicated.  */
2034      if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX)
2035	  || !can_duplicate_block_p (bb))
2036	continue;
2037
2038      /* Make sure that the block is small enough.  */
2039      size = 0;
2040      FOR_BB_INSNS (bb, insn)
2041	if (INSN_P (insn))
2042	  {
2043	    size += get_attr_min_length (insn);
2044	    if (size > max_size)
2045	       break;
2046	  }
2047      if (size > max_size)
2048	continue;
2049
2050      /* Final check: there must not be any incoming abnormal edges.  */
2051      all_flags = 0;
2052      FOR_EACH_EDGE (e, ei, bb->preds)
2053	all_flags |= e->flags;
2054      if (all_flags & EDGE_COMPLEX)
2055	continue;
2056
2057      bitmap_set_bit (candidates, bb->index);
2058    }
2059
2060  /* Nothing to do if there is no computed jump here.  */
2061  if (bitmap_empty_p (candidates))
2062    goto done;
2063
2064  /* Duplicate computed gotos.  */
2065  FOR_EACH_BB (bb)
2066    {
2067      if (bb->il.rtl->visited)
2068	continue;
2069
2070      bb->il.rtl->visited = 1;
2071
2072      /* BB must have one outgoing edge.  That edge must not lead to
2073	 the exit block or the next block.
2074	 The destination must have more than one predecessor.  */
2075      if (!single_succ_p (bb)
2076	  || single_succ (bb) == EXIT_BLOCK_PTR
2077	  || single_succ (bb) == bb->next_bb
2078	  || single_pred_p (single_succ (bb)))
2079	continue;
2080
2081      /* The successor block has to be a duplication candidate.  */
2082      if (!bitmap_bit_p (candidates, single_succ (bb)->index))
2083	continue;
2084
2085      new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb);
2086      new_bb->aux = bb->aux;
2087      bb->aux = new_bb;
2088      new_bb->il.rtl->visited = 1;
2089    }
2090
2091done:
2092  cfg_layout_finalize ();
2093
2094  BITMAP_FREE (candidates);
2095  return 0;
2096}
2097
2098struct rtl_opt_pass pass_duplicate_computed_gotos =
2099{
2100 {
2101  RTL_PASS,
2102  "compgotos",                          /* name */
2103  gate_duplicate_computed_gotos,        /* gate */
2104  duplicate_computed_gotos,             /* execute */
2105  NULL,                                 /* sub */
2106  NULL,                                 /* next */
2107  0,                                    /* static_pass_number */
2108  TV_REORDER_BLOCKS,                    /* tv_id */
2109  0,                                    /* properties_required */
2110  0,                                    /* properties_provided */
2111  0,                                    /* properties_destroyed */
2112  0,                                    /* todo_flags_start */
2113  TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */
2114 }
2115};
2116
2117
2118/* This function is the main 'entrance' for the optimization that
2119   partitions hot and cold basic blocks into separate sections of the
2120   .o file (to improve performance and cache locality).  Ideally it
2121   would be called after all optimizations that rearrange the CFG have
2122   been called.  However part of this optimization may introduce new
2123   register usage, so it must be called before register allocation has
2124   occurred.  This means that this optimization is actually called
2125   well before the optimization that reorders basic blocks (see
2126   function above).
2127
2128   This optimization checks the feedback information to determine
2129   which basic blocks are hot/cold, updates flags on the basic blocks
2130   to indicate which section they belong in.  This information is
2131   later used for writing out sections in the .o file.  Because hot
2132   and cold sections can be arbitrarily large (within the bounds of
2133   memory), far beyond the size of a single function, it is necessary
2134   to fix up all edges that cross section boundaries, to make sure the
2135   instructions used can actually span the required distance.  The
2136   fixes are described below.
2137
2138   Fall-through edges must be changed into jumps; it is not safe or
2139   legal to fall through across a section boundary.  Whenever a
2140   fall-through edge crossing a section boundary is encountered, a new
2141   basic block is inserted (in the same section as the fall-through
2142   source), and the fall through edge is redirected to the new basic
2143   block.  The new basic block contains an unconditional jump to the
2144   original fall-through target.  (If the unconditional jump is
2145   insufficient to cross section boundaries, that is dealt with a
2146   little later, see below).
2147
2148   In order to deal with architectures that have short conditional
2149   branches (which cannot span all of memory) we take any conditional
2150   jump that attempts to cross a section boundary and add a level of
2151   indirection: it becomes a conditional jump to a new basic block, in
2152   the same section.  The new basic block contains an unconditional
2153   jump to the original target, in the other section.
2154
2155   For those architectures whose unconditional branch is also
2156   incapable of reaching all of memory, those unconditional jumps are
2157   converted into indirect jumps, through a register.
2158
2159   IMPORTANT NOTE: This optimization causes some messy interactions
2160   with the cfg cleanup optimizations; those optimizations want to
2161   merge blocks wherever possible, and to collapse indirect jump
2162   sequences (change "A jumps to B jumps to C" directly into "A jumps
2163   to C").  Those optimizations can undo the jump fixes that
2164   partitioning is required to make (see above), in order to ensure
2165   that jumps attempting to cross section boundaries are really able
2166   to cover whatever distance the jump requires (on many architectures
2167   conditional or unconditional jumps are not able to reach all of
2168   memory).  Therefore tests have to be inserted into each such
2169   optimization to make sure that it does not undo stuff necessary to
2170   cross partition boundaries.  This would be much less of a problem
2171   if we could perform this optimization later in the compilation, but
2172   unfortunately the fact that we may need to create indirect jumps
2173   (through registers) requires that this optimization be performed
2174   before register allocation.  */
2175
2176static void
2177partition_hot_cold_basic_blocks (void)
2178{
2179  edge *crossing_edges;
2180  int n_crossing_edges;
2181  int max_edges = 2 * last_basic_block;
2182
2183  if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1)
2184    return;
2185
2186  crossing_edges = XCNEWVEC (edge, max_edges);
2187
2188  find_rarely_executed_basic_blocks_and_crossing_edges (&crossing_edges,
2189							&n_crossing_edges,
2190							&max_edges);
2191
2192  if (n_crossing_edges > 0)
2193    fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges);
2194
2195  free (crossing_edges);
2196}
2197
2198static bool
2199gate_handle_reorder_blocks (void)
2200{
2201  if (targetm.cannot_modify_jumps_p ())
2202    return false;
2203  return (optimize > 0);
2204}
2205
2206
2207/* Reorder basic blocks.  */
2208static unsigned int
2209rest_of_handle_reorder_blocks (void)
2210{
2211  basic_block bb;
2212
2213  /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2214     splitting possibly introduced more crossjumping opportunities.  */
2215  cfg_layout_initialize (CLEANUP_EXPENSIVE);
2216
2217  if ((flag_reorder_blocks || flag_reorder_blocks_and_partition)
2218      /* Don't reorder blocks when optimizing for size because extra jump insns may
2219	 be created; also barrier may create extra padding.
2220
2221	 More correctly we should have a block reordering mode that tried to
2222	 minimize the combined size of all the jumps.  This would more or less
2223	 automatically remove extra jumps, but would also try to use more short
2224	 jumps instead of long jumps.  */
2225      && optimize_function_for_speed_p (cfun))
2226    {
2227      reorder_basic_blocks ();
2228      cleanup_cfg (CLEANUP_EXPENSIVE);
2229    }
2230
2231  FOR_EACH_BB (bb)
2232    if (bb->next_bb != EXIT_BLOCK_PTR)
2233      bb->aux = bb->next_bb;
2234  cfg_layout_finalize ();
2235
2236  /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes.  */
2237  insert_section_boundary_note ();
2238  return 0;
2239}
2240
2241struct rtl_opt_pass pass_reorder_blocks =
2242{
2243 {
2244  RTL_PASS,
2245  "bbro",                               /* name */
2246  gate_handle_reorder_blocks,           /* gate */
2247  rest_of_handle_reorder_blocks,        /* execute */
2248  NULL,                                 /* sub */
2249  NULL,                                 /* next */
2250  0,                                    /* static_pass_number */
2251  TV_REORDER_BLOCKS,                    /* tv_id */
2252  0,                                    /* properties_required */
2253  0,                                    /* properties_provided */
2254  0,                                    /* properties_destroyed */
2255  0,                                    /* todo_flags_start */
2256  TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */
2257 }
2258};
2259
2260static bool
2261gate_handle_partition_blocks (void)
2262{
2263  /* The optimization to partition hot/cold basic blocks into separate
2264     sections of the .o file does not work well with linkonce or with
2265     user defined section attributes.  Don't call it if either case
2266     arises.  */
2267
2268  return (flag_reorder_blocks_and_partition
2269	  && !DECL_ONE_ONLY (current_function_decl)
2270	  && !user_defined_section_attribute);
2271}
2272
2273/* Partition hot and cold basic blocks.  */
2274static unsigned int
2275rest_of_handle_partition_blocks (void)
2276{
2277  partition_hot_cold_basic_blocks ();
2278  return 0;
2279}
2280
2281struct rtl_opt_pass pass_partition_blocks =
2282{
2283 {
2284  RTL_PASS,
2285  "bbpart",                             /* name */
2286  gate_handle_partition_blocks,         /* gate */
2287  rest_of_handle_partition_blocks,      /* execute */
2288  NULL,                                 /* sub */
2289  NULL,                                 /* next */
2290  0,                                    /* static_pass_number */
2291  TV_REORDER_BLOCKS,                    /* tv_id */
2292  PROP_cfglayout,                       /* properties_required */
2293  0,                                    /* properties_provided */
2294  0,                                    /* properties_destroyed */
2295  0,                                    /* todo_flags_start */
2296  TODO_dump_func | TODO_verify_rtl_sharing/* todo_flags_finish */
2297 }
2298};
2299