1/* Instruction scheduling pass.
2   Copyright (C) 1992-2022 Free Software Foundation, Inc.
3   Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4   and currently maintained by, Jim Wilson (wilson@cygnus.com)
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22/* This pass implements list scheduling within basic blocks.  It is
23   run twice: (1) after flow analysis, but before register allocation,
24   and (2) after register allocation.
25
26   The first run performs interblock scheduling, moving insns between
27   different blocks in the same "region", and the second runs only
28   basic block scheduling.
29
30   Interblock motions performed are useful motions and speculative
31   motions, including speculative loads.  Motions requiring code
32   duplication are not supported.  The identification of motion type
33   and the check for validity of speculative motions requires
34   construction and analysis of the function's control flow graph.
35
36   The main entry point for this pass is schedule_insns(), called for
37   each function.  The work of the scheduler is organized in three
38   levels: (1) function level: insns are subject to splitting,
39   control-flow-graph is constructed, regions are computed (after
40   reload, each region is of one block), (2) region level: control
41   flow graph attributes required for interblock scheduling are
42   computed (dominators, reachability, etc.), data dependences and
43   priorities are computed, and (3) block level: insns in the block
44   are actually scheduled.  */
45
46#include "config.h"
47#include "system.h"
48#include "coretypes.h"
49#include "backend.h"
50#include "target.h"
51#include "rtl.h"
52#include "df.h"
53#include "memmodel.h"
54#include "tm_p.h"
55#include "insn-config.h"
56#include "emit-rtl.h"
57#include "recog.h"
58#include "profile.h"
59#include "insn-attr.h"
60#include "except.h"
61#include "cfganal.h"
62#include "sched-int.h"
63#include "sel-sched.h"
64#include "tree-pass.h"
65#include "dbgcnt.h"
66#include "pretty-print.h"
67#include "print-rtl.h"
68
69/* Disable warnings about quoting issues in the pp_xxx calls below
70   that (intentionally) don't follow GCC diagnostic conventions.  */
71#if __GNUC__ >= 10
72#  pragma GCC diagnostic push
73#  pragma GCC diagnostic ignored "-Wformat-diag"
74#endif
75
76#ifdef INSN_SCHEDULING
77
78/* Some accessor macros for h_i_d members only used within this file.  */
79#define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
80#define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
81
82/* nr_inter/spec counts interblock/speculative motion for the function.  */
83static int nr_inter, nr_spec;
84
85static int is_cfg_nonregular (void);
86
87/* Number of regions in the procedure.  */
88int nr_regions = 0;
89
90/* Same as above before adding any new regions.  */
91static int nr_regions_initial = 0;
92
93/* Table of region descriptions.  */
94region *rgn_table = NULL;
95
96/* Array of lists of regions' blocks.  */
97int *rgn_bb_table = NULL;
98
99/* Topological order of blocks in the region (if b2 is reachable from
100   b1, block_to_bb[b2] > block_to_bb[b1]).  Note: A basic block is
101   always referred to by either block or b, while its topological
102   order name (in the region) is referred to by bb.  */
103int *block_to_bb = NULL;
104
105/* The number of the region containing a block.  */
106int *containing_rgn = NULL;
107
108/* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
109   Currently we can get a ebb only through splitting of currently
110   scheduling block, therefore, we don't need ebb_head array for every region,
111   hence, its sufficient to hold it for current one only.  */
112int *ebb_head = NULL;
113
114/* The minimum probability of reaching a source block so that it will be
115   considered for speculative scheduling.  */
116static int min_spec_prob;
117
118static void find_single_block_region (bool);
119static void find_rgns (void);
120static bool too_large (int, int *, int *);
121
122/* Blocks of the current region being scheduled.  */
123int current_nr_blocks;
124int current_blocks;
125
126/* A speculative motion requires checking live information on the path
127   from 'source' to 'target'.  The split blocks are those to be checked.
128   After a speculative motion, live information should be modified in
129   the 'update' blocks.
130
131   Lists of split and update blocks for each candidate of the current
132   target are in array bblst_table.  */
133static basic_block *bblst_table;
134static int bblst_size, bblst_last;
135
136/* Arrays that hold the DFA state at the end of a basic block, to re-use
137   as the initial state at the start of successor blocks.  The BB_STATE
138   array holds the actual DFA state, and BB_STATE_ARRAY[I] is a pointer
139   into BB_STATE for basic block I.  FIXME: This should be a vec.  */
140static char *bb_state_array = NULL;
141static state_t *bb_state = NULL;
142
143/* Target info declarations.
144
145   The block currently being scheduled is referred to as the "target" block,
146   while other blocks in the region from which insns can be moved to the
147   target are called "source" blocks.  The candidate structure holds info
148   about such sources: are they valid?  Speculative?  Etc.  */
149struct bblst
150{
151  basic_block *first_member;
152  int nr_members;
153};
154
155struct candidate
156{
157  char is_valid;
158  char is_speculative;
159  int src_prob;
160  bblst split_bbs;
161  bblst update_bbs;
162};
163
164static candidate *candidate_table;
165#define IS_VALID(src) (candidate_table[src].is_valid)
166#define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
167#define IS_SPECULATIVE_INSN(INSN)			\
168  (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
169#define SRC_PROB(src) ( candidate_table[src].src_prob )
170
171/* The bb being currently scheduled.  */
172int target_bb;
173
174/* List of edges.  */
175struct edgelst
176{
177  edge *first_member;
178  int nr_members;
179};
180
181static edge *edgelst_table;
182static int edgelst_last;
183
184static void extract_edgelst (sbitmap, edgelst *);
185
186/* Target info functions.  */
187static void split_edges (int, int, edgelst *);
188static void compute_trg_info (int);
189void debug_candidate (int);
190void debug_candidates (int);
191
192/* Dominators array: dom[i] contains the sbitmap of dominators of
193   bb i in the region.  */
194static sbitmap *dom;
195
196/* bb 0 is the only region entry.  */
197#define IS_RGN_ENTRY(bb) (!bb)
198
199/* Is bb_src dominated by bb_trg.  */
200#define IS_DOMINATED(bb_src, bb_trg)                                 \
201( bitmap_bit_p (dom[bb_src], bb_trg) )
202
203/* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
204   the probability of bb i relative to the region entry.  */
205static int *prob;
206
207/* Bit-set of edges, where bit i stands for edge i.  */
208typedef sbitmap edgeset;
209
210/* Number of edges in the region.  */
211static int rgn_nr_edges;
212
213/* Array of size rgn_nr_edges.  */
214static edge *rgn_edges;
215
216/* Mapping from each edge in the graph to its number in the rgn.  */
217#define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
218#define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
219
220/* The split edges of a source bb is different for each target
221   bb.  In order to compute this efficiently, the 'potential-split edges'
222   are computed for each bb prior to scheduling a region.  This is actually
223   the split edges of each bb relative to the region entry.
224
225   pot_split[bb] is the set of potential split edges of bb.  */
226static edgeset *pot_split;
227
228/* For every bb, a set of its ancestor edges.  */
229static edgeset *ancestor_edges;
230
231#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
232
233/* Speculative scheduling functions.  */
234static int check_live_1 (int, rtx);
235static void update_live_1 (int, rtx);
236static int is_pfree (rtx, int, int);
237static int find_conditional_protection (rtx_insn *, int);
238static int is_conditionally_protected (rtx, int, int);
239static int is_prisky (rtx, int, int);
240static int is_exception_free (rtx_insn *, int, int);
241
242static bool sets_likely_spilled (rtx);
243static void sets_likely_spilled_1 (rtx, const_rtx, void *);
244static void add_branch_dependences (rtx_insn *, rtx_insn *);
245static void compute_block_dependences (int);
246
247static void schedule_region (int);
248static void concat_insn_mem_list (rtx_insn_list *, rtx_expr_list *,
249				  rtx_insn_list **, rtx_expr_list **);
250static void propagate_deps (int, class deps_desc *);
251static void free_pending_lists (void);
252
253/* Functions for construction of the control flow graph.  */
254
255/* Return 1 if control flow graph should not be constructed, 0 otherwise.
256
257   We decide not to build the control flow graph if there is possibly more
258   than one entry to the function, if computed branches exist, if we
259   have nonlocal gotos, or if we have an unreachable loop.  */
260
261static int
262is_cfg_nonregular (void)
263{
264  basic_block b;
265  rtx_insn *insn;
266
267  /* If we have a label that could be the target of a nonlocal goto, then
268     the cfg is not well structured.  */
269  if (nonlocal_goto_handler_labels)
270    return 1;
271
272  /* If we have any forced labels, then the cfg is not well structured.  */
273  if (forced_labels)
274    return 1;
275
276  /* If we have exception handlers, then we consider the cfg not well
277     structured.  ?!?  We should be able to handle this now that we
278     compute an accurate cfg for EH.  */
279  if (current_function_has_exception_handlers ())
280    return 1;
281
282  /* If we have insns which refer to labels as non-jumped-to operands,
283     then we consider the cfg not well structured.  */
284  FOR_EACH_BB_FN (b, cfun)
285    FOR_BB_INSNS (b, insn)
286      {
287	rtx note, set, dest;
288	rtx_insn *next;
289
290	/* If this function has a computed jump, then we consider the cfg
291	   not well structured.  */
292	if (JUMP_P (insn) && computed_jump_p (insn))
293	  return 1;
294
295	if (!INSN_P (insn))
296	  continue;
297
298	note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
299	if (note == NULL_RTX)
300	  continue;
301
302	/* For that label not to be seen as a referred-to label, this
303	   must be a single-set which is feeding a jump *only*.  This
304	   could be a conditional jump with the label split off for
305	   machine-specific reasons or a casesi/tablejump.  */
306	next = next_nonnote_insn (insn);
307	if (next == NULL_RTX
308	    || !JUMP_P (next)
309	    || (JUMP_LABEL (next) != XEXP (note, 0)
310		&& find_reg_note (next, REG_LABEL_TARGET,
311				  XEXP (note, 0)) == NULL_RTX)
312	    || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
313	  return 1;
314
315	set = single_set (insn);
316	if (set == NULL_RTX)
317	  return 1;
318
319	dest = SET_DEST (set);
320	if (!REG_P (dest) || !dead_or_set_p (next, dest))
321	  return 1;
322      }
323
324  /* Unreachable loops with more than one basic block are detected
325     during the DFS traversal in find_rgns.
326
327     Unreachable loops with a single block are detected here.  This
328     test is redundant with the one in find_rgns, but it's much
329     cheaper to go ahead and catch the trivial case here.  */
330  FOR_EACH_BB_FN (b, cfun)
331    {
332      if (EDGE_COUNT (b->preds) == 0
333	  || (single_pred_p (b)
334	      && single_pred (b) == b))
335	return 1;
336    }
337
338  /* All the tests passed.  Consider the cfg well structured.  */
339  return 0;
340}
341
342/* Extract list of edges from a bitmap containing EDGE_TO_BIT bits.  */
343
344static void
345extract_edgelst (sbitmap set, edgelst *el)
346{
347  unsigned int i = 0;
348  sbitmap_iterator sbi;
349
350  /* edgelst table space is reused in each call to extract_edgelst.  */
351  edgelst_last = 0;
352
353  el->first_member = &edgelst_table[edgelst_last];
354  el->nr_members = 0;
355
356  /* Iterate over each word in the bitset.  */
357  EXECUTE_IF_SET_IN_BITMAP (set, 0, i, sbi)
358    {
359      edgelst_table[edgelst_last++] = rgn_edges[i];
360      el->nr_members++;
361    }
362}
363
364/* Functions for the construction of regions.  */
365
366/* Print the regions, for debugging purposes.  Callable from debugger.  */
367
368DEBUG_FUNCTION void
369debug_regions (void)
370{
371  int rgn, bb;
372
373  fprintf (sched_dump, "\n;;   ------------ REGIONS ----------\n\n");
374  for (rgn = 0; rgn < nr_regions; rgn++)
375    {
376      fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
377	       rgn_table[rgn].rgn_nr_blocks);
378      fprintf (sched_dump, ";;\tbb/block: ");
379
380      /* We don't have ebb_head initialized yet, so we can't use
381	 BB_TO_BLOCK ().  */
382      current_blocks = RGN_BLOCKS (rgn);
383
384      for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
385	fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
386
387      fprintf (sched_dump, "\n\n");
388    }
389}
390
391/* Print the region's basic blocks.  */
392
393DEBUG_FUNCTION void
394debug_region (int rgn)
395{
396  int bb;
397
398  fprintf (stderr, "\n;;   ------------ REGION %d ----------\n\n", rgn);
399  fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
400	   rgn_table[rgn].rgn_nr_blocks);
401  fprintf (stderr, ";;\tbb/block: ");
402
403  /* We don't have ebb_head initialized yet, so we can't use
404     BB_TO_BLOCK ().  */
405  current_blocks = RGN_BLOCKS (rgn);
406
407  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
408    fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
409
410  fprintf (stderr, "\n\n");
411
412  for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
413    {
414      dump_bb (stderr,
415	       BASIC_BLOCK_FOR_FN (cfun, rgn_bb_table[current_blocks + bb]),
416	       0, TDF_SLIM | TDF_BLOCKS);
417      fprintf (stderr, "\n");
418    }
419
420  fprintf (stderr, "\n");
421
422}
423
424/* True when a bb with index BB_INDEX contained in region RGN.  */
425static bool
426bb_in_region_p (int bb_index, int rgn)
427{
428  int i;
429
430  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
431    if (rgn_bb_table[current_blocks + i] == bb_index)
432      return true;
433
434  return false;
435}
436
437/* Dump region RGN to file F using dot syntax.  */
438void
439dump_region_dot (FILE *f, int rgn)
440{
441  int i;
442
443  fprintf (f, "digraph Region_%d {\n", rgn);
444
445  /* We don't have ebb_head initialized yet, so we can't use
446     BB_TO_BLOCK ().  */
447  current_blocks = RGN_BLOCKS (rgn);
448
449  for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
450    {
451      edge e;
452      edge_iterator ei;
453      int src_bb_num = rgn_bb_table[current_blocks + i];
454      basic_block bb = BASIC_BLOCK_FOR_FN (cfun, src_bb_num);
455
456      FOR_EACH_EDGE (e, ei, bb->succs)
457        if (bb_in_region_p (e->dest->index, rgn))
458	  fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
459    }
460  fprintf (f, "}\n");
461}
462
463/* The same, but first open a file specified by FNAME.  */
464void
465dump_region_dot_file (const char *fname, int rgn)
466{
467  FILE *f = fopen (fname, "wt");
468  dump_region_dot (f, rgn);
469  fclose (f);
470}
471
472/* Build a single block region for each basic block in the function.
473   This allows for using the same code for interblock and basic block
474   scheduling.  */
475
476static void
477find_single_block_region (bool ebbs_p)
478{
479  basic_block bb, ebb_start;
480  int i = 0;
481
482  nr_regions = 0;
483
484  if (ebbs_p) {
485    int probability_cutoff;
486    if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
487      probability_cutoff = param_tracer_min_branch_probability_feedback;
488    else
489      probability_cutoff = param_tracer_min_branch_probability;
490    probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
491
492    FOR_EACH_BB_FN (ebb_start, cfun)
493      {
494        RGN_NR_BLOCKS (nr_regions) = 0;
495        RGN_BLOCKS (nr_regions) = i;
496        RGN_DONT_CALC_DEPS (nr_regions) = 0;
497        RGN_HAS_REAL_EBB (nr_regions) = 0;
498
499        for (bb = ebb_start; ; bb = bb->next_bb)
500          {
501            edge e;
502
503            rgn_bb_table[i] = bb->index;
504            RGN_NR_BLOCKS (nr_regions)++;
505            CONTAINING_RGN (bb->index) = nr_regions;
506            BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
507            i++;
508
509	    if (bb->next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
510                || LABEL_P (BB_HEAD (bb->next_bb)))
511              break;
512
513	    e = find_fallthru_edge (bb->succs);
514            if (! e)
515              break;
516            if (e->probability.initialized_p ()
517		&& e->probability.to_reg_br_prob_base () <= probability_cutoff)
518              break;
519          }
520
521        ebb_start = bb;
522        nr_regions++;
523      }
524  }
525  else
526    FOR_EACH_BB_FN (bb, cfun)
527      {
528        rgn_bb_table[nr_regions] = bb->index;
529        RGN_NR_BLOCKS (nr_regions) = 1;
530        RGN_BLOCKS (nr_regions) = nr_regions;
531        RGN_DONT_CALC_DEPS (nr_regions) = 0;
532        RGN_HAS_REAL_EBB (nr_regions) = 0;
533
534        CONTAINING_RGN (bb->index) = nr_regions;
535        BLOCK_TO_BB (bb->index) = 0;
536        nr_regions++;
537      }
538}
539
540/* Estimate number of the insns in the BB.  */
541static int
542rgn_estimate_number_of_insns (basic_block bb)
543{
544  int count;
545
546  count = INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
547
548  if (MAY_HAVE_DEBUG_INSNS)
549    {
550      rtx_insn *insn;
551
552      FOR_BB_INSNS (bb, insn)
553	if (DEBUG_INSN_P (insn))
554	  count--;
555    }
556
557  return count;
558}
559
560/* Update number of blocks and the estimate for number of insns
561   in the region.  Return true if the region is "too large" for interblock
562   scheduling (compile time considerations).  */
563
564static bool
565too_large (int block, int *num_bbs, int *num_insns)
566{
567  (*num_bbs)++;
568  (*num_insns) += (common_sched_info->estimate_number_of_insns
569                   (BASIC_BLOCK_FOR_FN (cfun, block)));
570
571  return ((*num_bbs > param_max_sched_region_blocks)
572	  || (*num_insns > param_max_sched_region_insns));
573}
574
575/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
576   is still an inner loop.  Put in max_hdr[blk] the header of the most inner
577   loop containing blk.  */
578#define UPDATE_LOOP_RELATIONS(blk, hdr)		\
579{						\
580  if (max_hdr[blk] == -1)			\
581    max_hdr[blk] = hdr;				\
582  else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])	\
583    bitmap_clear_bit (inner, hdr);			\
584  else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])	\
585    {						\
586      bitmap_clear_bit (inner,max_hdr[blk]);		\
587      max_hdr[blk] = hdr;			\
588    }						\
589}
590
591/* Find regions for interblock scheduling.
592
593   A region for scheduling can be:
594
595     * A loop-free procedure, or
596
597     * A reducible inner loop, or
598
599     * A basic block not contained in any other region.
600
601   ?!? In theory we could build other regions based on extended basic
602   blocks or reverse extended basic blocks.  Is it worth the trouble?
603
604   Loop blocks that form a region are put into the region's block list
605   in topological order.
606
607   This procedure stores its results into the following global (ick) variables
608
609     * rgn_nr
610     * rgn_table
611     * rgn_bb_table
612     * block_to_bb
613     * containing region
614
615   We use dominator relationships to avoid making regions out of non-reducible
616   loops.
617
618   This procedure needs to be converted to work on pred/succ lists instead
619   of edge tables.  That would simplify it somewhat.  */
620
621static void
622haifa_find_rgns (void)
623{
624  int *max_hdr, *dfs_nr, *degree;
625  char no_loops = 1;
626  int node, child, loop_head, i, head, tail;
627  int count = 0, sp, idx = 0;
628  edge_iterator current_edge;
629  edge_iterator *stack;
630  int num_bbs, num_insns, unreachable;
631  int too_large_failure;
632  basic_block bb;
633
634  /* Perform a DFS traversal of the cfg.  Identify loop headers, inner loops
635     and a mapping from block to its loop header (if the block is contained
636     in a loop, else -1).
637
638     Store results in HEADER, INNER, and MAX_HDR respectively, these will
639     be used as inputs to the second traversal.
640
641     STACK, SP and DFS_NR are only used during the first traversal.  */
642
643  /* Allocate and initialize variables for the first traversal.  */
644  max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
645  dfs_nr = XCNEWVEC (int, last_basic_block_for_fn (cfun));
646  stack = XNEWVEC (edge_iterator, n_edges_for_fn (cfun));
647
648  /* Note if a block is a natural inner loop header.  */
649  auto_sbitmap inner (last_basic_block_for_fn (cfun));
650  bitmap_ones (inner);
651
652  /* Note if a block is a natural loop header.  */
653  auto_sbitmap header (last_basic_block_for_fn (cfun));
654  bitmap_clear (header);
655
656  /* Note if a block is in the block queue.  */
657  auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
658  bitmap_clear (in_queue);
659
660  /* Note if a block is in the block queue.  */
661  auto_sbitmap in_stack (last_basic_block_for_fn (cfun));
662  bitmap_clear (in_stack);
663
664  for (i = 0; i < last_basic_block_for_fn (cfun); i++)
665    max_hdr[i] = -1;
666
667  #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
668  #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
669
670  /* DFS traversal to find inner loops in the cfg.  */
671
672  current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun))->succs);
673  sp = -1;
674
675  while (1)
676    {
677      if (EDGE_PASSED (current_edge))
678	{
679	  /* We have reached a leaf node or a node that was already
680	     processed.  Pop edges off the stack until we find
681	     an edge that has not yet been processed.  */
682	  while (sp >= 0 && EDGE_PASSED (current_edge))
683	    {
684	      /* Pop entry off the stack.  */
685	      current_edge = stack[sp--];
686	      node = ei_edge (current_edge)->src->index;
687	      gcc_assert (node != ENTRY_BLOCK);
688	      child = ei_edge (current_edge)->dest->index;
689	      gcc_assert (child != EXIT_BLOCK);
690	      bitmap_clear_bit (in_stack, child);
691	      if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
692		UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
693	      ei_next (&current_edge);
694	    }
695
696	  /* See if have finished the DFS tree traversal.  */
697	  if (sp < 0 && EDGE_PASSED (current_edge))
698	    break;
699
700	  /* Nope, continue the traversal with the popped node.  */
701	  continue;
702	}
703
704      /* Process a node.  */
705      node = ei_edge (current_edge)->src->index;
706      gcc_assert (node != ENTRY_BLOCK);
707      bitmap_set_bit (in_stack, node);
708      dfs_nr[node] = ++count;
709
710      /* We don't traverse to the exit block.  */
711      child = ei_edge (current_edge)->dest->index;
712      if (child == EXIT_BLOCK)
713	{
714	  SET_EDGE_PASSED (current_edge);
715	  ei_next (&current_edge);
716	  continue;
717	}
718
719      /* If the successor is in the stack, then we've found a loop.
720	 Mark the loop, if it is not a natural loop, then it will
721	 be rejected during the second traversal.  */
722      if (bitmap_bit_p (in_stack, child))
723	{
724	  no_loops = 0;
725	  bitmap_set_bit (header, child);
726	  UPDATE_LOOP_RELATIONS (node, child);
727	  SET_EDGE_PASSED (current_edge);
728	  ei_next (&current_edge);
729	  continue;
730	}
731
732      /* If the child was already visited, then there is no need to visit
733	 it again.  Just update the loop relationships and restart
734	 with a new edge.  */
735      if (dfs_nr[child])
736	{
737	  if (max_hdr[child] >= 0 && bitmap_bit_p (in_stack, max_hdr[child]))
738	    UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
739	  SET_EDGE_PASSED (current_edge);
740	  ei_next (&current_edge);
741	  continue;
742	}
743
744      /* Push an entry on the stack and continue DFS traversal.  */
745      stack[++sp] = current_edge;
746      SET_EDGE_PASSED (current_edge);
747      current_edge = ei_start (ei_edge (current_edge)->dest->succs);
748    }
749
750  /* Reset ->aux field used by EDGE_PASSED.  */
751  FOR_ALL_BB_FN (bb, cfun)
752    {
753      edge_iterator ei;
754      edge e;
755      FOR_EACH_EDGE (e, ei, bb->succs)
756	e->aux = NULL;
757    }
758
759
760  /* Another check for unreachable blocks.  The earlier test in
761     is_cfg_nonregular only finds unreachable blocks that do not
762     form a loop.
763
764     The DFS traversal will mark every block that is reachable from
765     the entry node by placing a nonzero value in dfs_nr.  Thus if
766     dfs_nr is zero for any block, then it must be unreachable.  */
767  unreachable = 0;
768  FOR_EACH_BB_FN (bb, cfun)
769    if (dfs_nr[bb->index] == 0)
770      {
771	unreachable = 1;
772	break;
773      }
774
775  /* Gross.  To avoid wasting memory, the second pass uses the dfs_nr array
776     to hold degree counts.  */
777  degree = dfs_nr;
778
779  FOR_EACH_BB_FN (bb, cfun)
780    degree[bb->index] = EDGE_COUNT (bb->preds);
781
782  /* Do not perform region scheduling if there are any unreachable
783     blocks.  */
784  if (!unreachable)
785    {
786      int *queue, *degree1 = NULL;
787      /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
788	 there basic blocks, which are forced to be region heads.
789	 This is done to try to assemble few smaller regions
790	 from a too_large region.  */
791      sbitmap extended_rgn_header = NULL;
792      bool extend_regions_p;
793
794      if (no_loops)
795	bitmap_set_bit (header, 0);
796
797      /* Second traversal:find reducible inner loops and topologically sort
798	 block of each region.  */
799
800      queue = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
801
802      extend_regions_p = param_max_sched_extend_regions_iters > 0;
803      if (extend_regions_p)
804        {
805          degree1 = XNEWVEC (int, last_basic_block_for_fn (cfun));
806          extended_rgn_header =
807	    sbitmap_alloc (last_basic_block_for_fn (cfun));
808          bitmap_clear (extended_rgn_header);
809	}
810
811      /* Find blocks which are inner loop headers.  We still have non-reducible
812	 loops to consider at this point.  */
813      FOR_EACH_BB_FN (bb, cfun)
814	{
815	  if (bitmap_bit_p (header, bb->index) && bitmap_bit_p (inner, bb->index))
816	    {
817	      edge e;
818	      edge_iterator ei;
819	      basic_block jbb;
820
821	      /* Now check that the loop is reducible.  We do this separate
822		 from finding inner loops so that we do not find a reducible
823		 loop which contains an inner non-reducible loop.
824
825		 A simple way to find reducible/natural loops is to verify
826		 that each block in the loop is dominated by the loop
827		 header.
828
829		 If there exists a block that is not dominated by the loop
830		 header, then the block is reachable from outside the loop
831		 and thus the loop is not a natural loop.  */
832	      FOR_EACH_BB_FN (jbb, cfun)
833		{
834		  /* First identify blocks in the loop, except for the loop
835		     entry block.  */
836		  if (bb->index == max_hdr[jbb->index] && bb != jbb)
837		    {
838		      /* Now verify that the block is dominated by the loop
839			 header.  */
840		      if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
841			break;
842		    }
843		}
844
845	      /* If we exited the loop early, then I is the header of
846		 a non-reducible loop and we should quit processing it
847		 now.  */
848	      if (jbb != EXIT_BLOCK_PTR_FOR_FN (cfun))
849		continue;
850
851	      /* I is a header of an inner loop, or block 0 in a subroutine
852		 with no loops at all.  */
853	      head = tail = -1;
854	      too_large_failure = 0;
855	      loop_head = max_hdr[bb->index];
856
857              if (extend_regions_p)
858                /* We save degree in case when we meet a too_large region
859		   and cancel it.  We need a correct degree later when
860                   calling extend_rgns.  */
861                memcpy (degree1, degree,
862			last_basic_block_for_fn (cfun) * sizeof (int));
863
864	      /* Decrease degree of all I's successors for topological
865		 ordering.  */
866	      FOR_EACH_EDGE (e, ei, bb->succs)
867		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
868		  --degree[e->dest->index];
869
870	      /* Estimate # insns, and count # blocks in the region.  */
871	      num_bbs = 1;
872	      num_insns = common_sched_info->estimate_number_of_insns (bb);
873
874	      /* Find all loop latches (blocks with back edges to the loop
875		 header) or all the leaf blocks in the cfg has no loops.
876
877		 Place those blocks into the queue.  */
878	      if (no_loops)
879		{
880		  FOR_EACH_BB_FN (jbb, cfun)
881		    /* Leaf nodes have only a single successor which must
882		       be EXIT_BLOCK.  */
883		    if (single_succ_p (jbb)
884			&& single_succ (jbb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
885		      {
886			queue[++tail] = jbb->index;
887			bitmap_set_bit (in_queue, jbb->index);
888
889			if (too_large (jbb->index, &num_bbs, &num_insns))
890			  {
891			    too_large_failure = 1;
892			    break;
893			  }
894		      }
895		}
896	      else
897		{
898		  edge e;
899
900		  FOR_EACH_EDGE (e, ei, bb->preds)
901		    {
902		      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
903			continue;
904
905		      node = e->src->index;
906
907		      if (max_hdr[node] == loop_head && node != bb->index)
908			{
909			  /* This is a loop latch.  */
910			  queue[++tail] = node;
911			  bitmap_set_bit (in_queue, node);
912
913			  if (too_large (node, &num_bbs, &num_insns))
914			    {
915			      too_large_failure = 1;
916			      break;
917			    }
918			}
919		    }
920		}
921
922	      /* Now add all the blocks in the loop to the queue.
923
924	     We know the loop is a natural loop; however the algorithm
925	     above will not always mark certain blocks as being in the
926	     loop.  Consider:
927		node   children
928		 a	  b,c
929		 b	  c
930		 c	  a,d
931		 d	  b
932
933	     The algorithm in the DFS traversal may not mark B & D as part
934	     of the loop (i.e. they will not have max_hdr set to A).
935
936	     We know they cannot be loop latches (else they would have
937	     had max_hdr set since they'd have a backedge to a dominator
938	     block).  So we don't need them on the initial queue.
939
940	     We know they are part of the loop because they are dominated
941	     by the loop header and can be reached by a backwards walk of
942	     the edges starting with nodes on the initial queue.
943
944	     It is safe and desirable to include those nodes in the
945	     loop/scheduling region.  To do so we would need to decrease
946	     the degree of a node if it is the target of a backedge
947	     within the loop itself as the node is placed in the queue.
948
949	     We do not do this because I'm not sure that the actual
950	     scheduling code will properly handle this case. ?!? */
951
952	      while (head < tail && !too_large_failure)
953		{
954		  edge e;
955		  child = queue[++head];
956
957		  FOR_EACH_EDGE (e, ei,
958				 BASIC_BLOCK_FOR_FN (cfun, child)->preds)
959		    {
960		      node = e->src->index;
961
962		      /* See discussion above about nodes not marked as in
963			 this loop during the initial DFS traversal.  */
964		      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
965			  || max_hdr[node] != loop_head)
966			{
967			  tail = -1;
968			  break;
969			}
970		      else if (!bitmap_bit_p (in_queue, node) && node != bb->index)
971			{
972			  queue[++tail] = node;
973			  bitmap_set_bit (in_queue, node);
974
975			  if (too_large (node, &num_bbs, &num_insns))
976			    {
977			      too_large_failure = 1;
978			      break;
979			    }
980			}
981		    }
982		}
983
984	      if (tail >= 0 && !too_large_failure)
985		{
986		  /* Place the loop header into list of region blocks.  */
987		  degree[bb->index] = -1;
988		  rgn_bb_table[idx] = bb->index;
989		  RGN_NR_BLOCKS (nr_regions) = num_bbs;
990		  RGN_BLOCKS (nr_regions) = idx++;
991                  RGN_DONT_CALC_DEPS (nr_regions) = 0;
992		  RGN_HAS_REAL_EBB (nr_regions) = 0;
993		  CONTAINING_RGN (bb->index) = nr_regions;
994		  BLOCK_TO_BB (bb->index) = count = 0;
995
996		  /* Remove blocks from queue[] when their in degree
997		     becomes zero.  Repeat until no blocks are left on the
998		     list.  This produces a topological list of blocks in
999		     the region.  */
1000		  while (tail >= 0)
1001		    {
1002		      if (head < 0)
1003			head = tail;
1004		      child = queue[head];
1005		      if (degree[child] == 0)
1006			{
1007			  edge e;
1008
1009			  degree[child] = -1;
1010			  rgn_bb_table[idx++] = child;
1011			  BLOCK_TO_BB (child) = ++count;
1012			  CONTAINING_RGN (child) = nr_regions;
1013			  queue[head] = queue[tail--];
1014
1015			  FOR_EACH_EDGE (e, ei,
1016					 BASIC_BLOCK_FOR_FN (cfun,
1017							     child)->succs)
1018			    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1019			      --degree[e->dest->index];
1020			}
1021		      else
1022			--head;
1023		    }
1024		  ++nr_regions;
1025		}
1026              else if (extend_regions_p)
1027                {
1028                  /* Restore DEGREE.  */
1029                  int *t = degree;
1030
1031                  degree = degree1;
1032                  degree1 = t;
1033
1034                  /* And force successors of BB to be region heads.
1035		     This may provide several smaller regions instead
1036		     of one too_large region.  */
1037                  FOR_EACH_EDGE (e, ei, bb->succs)
1038		    if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1039                      bitmap_set_bit (extended_rgn_header, e->dest->index);
1040                }
1041	    }
1042	}
1043      free (queue);
1044
1045      if (extend_regions_p)
1046        {
1047          free (degree1);
1048
1049          bitmap_ior (header, header, extended_rgn_header);
1050          sbitmap_free (extended_rgn_header);
1051
1052          extend_rgns (degree, &idx, header, max_hdr);
1053        }
1054    }
1055
1056  /* Any block that did not end up in a region is placed into a region
1057     by itself.  */
1058  FOR_EACH_BB_FN (bb, cfun)
1059    if (degree[bb->index] >= 0)
1060      {
1061	rgn_bb_table[idx] = bb->index;
1062	RGN_NR_BLOCKS (nr_regions) = 1;
1063	RGN_BLOCKS (nr_regions) = idx++;
1064        RGN_DONT_CALC_DEPS (nr_regions) = 0;
1065	RGN_HAS_REAL_EBB (nr_regions) = 0;
1066	CONTAINING_RGN (bb->index) = nr_regions++;
1067	BLOCK_TO_BB (bb->index) = 0;
1068      }
1069
1070  free (max_hdr);
1071  free (degree);
1072  free (stack);
1073}
1074
1075
1076/* Wrapper function.
1077   If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
1078   regions.  Otherwise just call find_rgns_haifa.  */
1079static void
1080find_rgns (void)
1081{
1082  if (sel_sched_p () && flag_sel_sched_pipelining)
1083    sel_find_rgns ();
1084  else
1085    haifa_find_rgns ();
1086}
1087
1088static int gather_region_statistics (int **);
1089static void print_region_statistics (int *, int, int *, int);
1090
1091/* Calculate the histogram that shows the number of regions having the
1092   given number of basic blocks, and store it in the RSP array.  Return
1093   the size of this array.  */
1094static int
1095gather_region_statistics (int **rsp)
1096{
1097  int i, *a = 0, a_sz = 0;
1098
1099  /* a[i] is the number of regions that have (i + 1) basic blocks.  */
1100  for (i = 0; i < nr_regions; i++)
1101    {
1102      int nr_blocks = RGN_NR_BLOCKS (i);
1103
1104      gcc_assert (nr_blocks >= 1);
1105
1106      if (nr_blocks > a_sz)
1107	{
1108	  a = XRESIZEVEC (int, a, nr_blocks);
1109	  do
1110	    a[a_sz++] = 0;
1111	  while (a_sz != nr_blocks);
1112	}
1113
1114      a[nr_blocks - 1]++;
1115    }
1116
1117  *rsp = a;
1118  return a_sz;
1119}
1120
1121/* Print regions statistics.  S1 and S2 denote the data before and after
1122   calling extend_rgns, respectively.  */
1123static void
1124print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1125{
1126  int i;
1127
1128  /* We iterate until s2_sz because extend_rgns does not decrease
1129     the maximal region size.  */
1130  for (i = 1; i < s2_sz; i++)
1131    {
1132      int n1, n2;
1133
1134      n2 = s2[i];
1135
1136      if (n2 == 0)
1137	continue;
1138
1139      if (i >= s1_sz)
1140	n1 = 0;
1141      else
1142	n1 = s1[i];
1143
1144      fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1145	       "was %d + %d more\n", i + 1, n1, n2 - n1);
1146    }
1147}
1148
1149/* Extend regions.
1150   DEGREE - Array of incoming edge count, considering only
1151   the edges, that don't have their sources in formed regions yet.
1152   IDXP - pointer to the next available index in rgn_bb_table.
1153   HEADER - set of all region heads.
1154   LOOP_HDR - mapping from block to the containing loop
1155   (two blocks can reside within one region if they have
1156   the same loop header).  */
1157void
1158extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1159{
1160  int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1161  int nblocks = n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS;
1162
1163  max_iter = param_max_sched_extend_regions_iters;
1164
1165  max_hdr = XNEWVEC (int, last_basic_block_for_fn (cfun));
1166
1167  order = XNEWVEC (int, last_basic_block_for_fn (cfun));
1168  post_order_compute (order, false, false);
1169
1170  for (i = nblocks - 1; i >= 0; i--)
1171    {
1172      int bbn = order[i];
1173      if (degree[bbn] >= 0)
1174	{
1175	  max_hdr[bbn] = bbn;
1176	  rescan = 1;
1177	}
1178      else
1179        /* This block already was processed in find_rgns.  */
1180        max_hdr[bbn] = -1;
1181    }
1182
1183  /* The idea is to topologically walk through CFG in top-down order.
1184     During the traversal, if all the predecessors of a node are
1185     marked to be in the same region (they all have the same max_hdr),
1186     then current node is also marked to be a part of that region.
1187     Otherwise the node starts its own region.
1188     CFG should be traversed until no further changes are made.  On each
1189     iteration the set of the region heads is extended (the set of those
1190     blocks that have max_hdr[bbi] == bbi).  This set is upper bounded by the
1191     set of all basic blocks, thus the algorithm is guaranteed to
1192     terminate.  */
1193
1194  while (rescan && iter < max_iter)
1195    {
1196      rescan = 0;
1197
1198      for (i = nblocks - 1; i >= 0; i--)
1199	{
1200	  edge e;
1201	  edge_iterator ei;
1202	  int bbn = order[i];
1203
1204	  if (max_hdr[bbn] != -1 && !bitmap_bit_p (header, bbn))
1205	    {
1206	      int hdr = -1;
1207
1208	      FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->preds)
1209		{
1210		  int predn = e->src->index;
1211
1212		  if (predn != ENTRY_BLOCK
1213		      /* If pred wasn't processed in find_rgns.  */
1214		      && max_hdr[predn] != -1
1215		      /* And pred and bb reside in the same loop.
1216			 (Or out of any loop).  */
1217		      && loop_hdr[bbn] == loop_hdr[predn])
1218		    {
1219		      if (hdr == -1)
1220			/* Then bb extends the containing region of pred.  */
1221			hdr = max_hdr[predn];
1222		      else if (hdr != max_hdr[predn])
1223			/* Too bad, there are at least two predecessors
1224			   that reside in different regions.  Thus, BB should
1225			   begin its own region.  */
1226			{
1227			  hdr = bbn;
1228			  break;
1229			}
1230		    }
1231		  else
1232		    /* BB starts its own region.  */
1233		    {
1234		      hdr = bbn;
1235		      break;
1236		    }
1237		}
1238
1239	      if (hdr == bbn)
1240		{
1241		  /* If BB start its own region,
1242		     update set of headers with BB.  */
1243		  bitmap_set_bit (header, bbn);
1244		  rescan = 1;
1245		}
1246	      else
1247		gcc_assert (hdr != -1);
1248
1249	      max_hdr[bbn] = hdr;
1250	    }
1251	}
1252
1253      iter++;
1254    }
1255
1256  /* Statistics were gathered on the SPEC2000 package of tests with
1257     mainline weekly snapshot gcc-4.1-20051015 on ia64.
1258
1259     Statistics for SPECint:
1260     1 iteration : 1751 cases (38.7%)
1261     2 iterations: 2770 cases (61.3%)
1262     Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1263     Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1264     (We don't count single block regions here).
1265
1266     Statistics for SPECfp:
1267     1 iteration : 621 cases (35.9%)
1268     2 iterations: 1110 cases (64.1%)
1269     Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1270     Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1271     (We don't count single block regions here).
1272
1273     By default we do at most 2 iterations.
1274     This can be overridden with max-sched-extend-regions-iters parameter:
1275     0 - disable region extension,
1276     N > 0 - do at most N iterations.  */
1277
1278  if (sched_verbose && iter != 0)
1279    fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1280	     rescan ? "... failed" : "");
1281
1282  if (!rescan && iter != 0)
1283    {
1284      int *s1 = NULL, s1_sz = 0;
1285
1286      /* Save the old statistics for later printout.  */
1287      if (sched_verbose >= 6)
1288	s1_sz = gather_region_statistics (&s1);
1289
1290      /* We have succeeded.  Now assemble the regions.  */
1291      for (i = nblocks - 1; i >= 0; i--)
1292	{
1293	  int bbn = order[i];
1294
1295	  if (max_hdr[bbn] == bbn)
1296	    /* BBN is a region head.  */
1297	    {
1298	      edge e;
1299	      edge_iterator ei;
1300	      int num_bbs = 0, j, num_insns = 0, large;
1301
1302	      large = too_large (bbn, &num_bbs, &num_insns);
1303
1304	      degree[bbn] = -1;
1305	      rgn_bb_table[idx] = bbn;
1306	      RGN_BLOCKS (nr_regions) = idx++;
1307	      RGN_DONT_CALC_DEPS (nr_regions) = 0;
1308	      RGN_HAS_REAL_EBB (nr_regions) = 0;
1309	      CONTAINING_RGN (bbn) = nr_regions;
1310	      BLOCK_TO_BB (bbn) = 0;
1311
1312	      FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, bbn)->succs)
1313		if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1314		  degree[e->dest->index]--;
1315
1316	      if (!large)
1317		/* Here we check whether the region is too_large.  */
1318		for (j = i - 1; j >= 0; j--)
1319		  {
1320		    int succn = order[j];
1321		    if (max_hdr[succn] == bbn)
1322		      {
1323			if ((large = too_large (succn, &num_bbs, &num_insns)))
1324			  break;
1325		      }
1326		  }
1327
1328	      if (large)
1329		/* If the region is too_large, then wrap every block of
1330		   the region into single block region.
1331		   Here we wrap region head only.  Other blocks are
1332		   processed in the below cycle.  */
1333		{
1334		  RGN_NR_BLOCKS (nr_regions) = 1;
1335		  nr_regions++;
1336		}
1337
1338	      num_bbs = 1;
1339
1340	      for (j = i - 1; j >= 0; j--)
1341		{
1342		  int succn = order[j];
1343
1344		  if (max_hdr[succn] == bbn)
1345		    /* This cycle iterates over all basic blocks, that
1346		       are supposed to be in the region with head BBN,
1347		       and wraps them into that region (or in single
1348		       block region).  */
1349		    {
1350		      gcc_assert (degree[succn] == 0);
1351
1352		      degree[succn] = -1;
1353		      rgn_bb_table[idx] = succn;
1354		      BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1355		      CONTAINING_RGN (succn) = nr_regions;
1356
1357		      if (large)
1358			/* Wrap SUCCN into single block region.  */
1359			{
1360			  RGN_BLOCKS (nr_regions) = idx;
1361			  RGN_NR_BLOCKS (nr_regions) = 1;
1362			  RGN_DONT_CALC_DEPS (nr_regions) = 0;
1363			  RGN_HAS_REAL_EBB (nr_regions) = 0;
1364			  nr_regions++;
1365			}
1366
1367		      idx++;
1368
1369		      FOR_EACH_EDGE (e, ei,
1370				     BASIC_BLOCK_FOR_FN (cfun, succn)->succs)
1371			if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1372			  degree[e->dest->index]--;
1373		    }
1374		}
1375
1376	      if (!large)
1377		{
1378		  RGN_NR_BLOCKS (nr_regions) = num_bbs;
1379		  nr_regions++;
1380		}
1381	    }
1382	}
1383
1384      if (sched_verbose >= 6)
1385	{
1386	  int *s2, s2_sz;
1387
1388          /* Get the new statistics and print the comparison with the
1389             one before calling this function.  */
1390	  s2_sz = gather_region_statistics (&s2);
1391	  print_region_statistics (s1, s1_sz, s2, s2_sz);
1392	  free (s1);
1393	  free (s2);
1394	}
1395    }
1396
1397  free (order);
1398  free (max_hdr);
1399
1400  *idxp = idx;
1401}
1402
1403/* Functions for regions scheduling information.  */
1404
1405/* Compute dominators, probability, and potential-split-edges of bb.
1406   Assume that these values were already computed for bb's predecessors.  */
1407
1408static void
1409compute_dom_prob_ps (int bb)
1410{
1411  edge_iterator in_ei;
1412  edge in_edge;
1413
1414  /* We shouldn't have any real ebbs yet.  */
1415  gcc_assert (ebb_head [bb] == bb + current_blocks);
1416
1417  if (IS_RGN_ENTRY (bb))
1418    {
1419      bitmap_set_bit (dom[bb], 0);
1420      prob[bb] = REG_BR_PROB_BASE;
1421      return;
1422    }
1423
1424  prob[bb] = 0;
1425
1426  /* Initialize dom[bb] to '111..1'.  */
1427  bitmap_ones (dom[bb]);
1428
1429  FOR_EACH_EDGE (in_edge, in_ei,
1430		 BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb))->preds)
1431    {
1432      int pred_bb;
1433      edge out_edge;
1434      edge_iterator out_ei;
1435
1436      if (in_edge->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1437	continue;
1438
1439      pred_bb = BLOCK_TO_BB (in_edge->src->index);
1440      bitmap_and (dom[bb], dom[bb], dom[pred_bb]);
1441      bitmap_ior (ancestor_edges[bb],
1442		      ancestor_edges[bb], ancestor_edges[pred_bb]);
1443
1444      bitmap_set_bit (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1445
1446      bitmap_ior (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1447
1448      FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1449	bitmap_set_bit (pot_split[bb], EDGE_TO_BIT (out_edge));
1450
1451      prob[bb] += combine_probabilities
1452		 (prob[pred_bb],
1453		  in_edge->probability.initialized_p ()
1454		  ? in_edge->probability.to_reg_br_prob_base ()
1455		  : 0);
1456      // The rounding divide in combine_probabilities can result in an extra
1457      // probability increment propagating along 50-50 edges. Eventually when
1458      // the edges re-merge, the accumulated probability can go slightly above
1459      // REG_BR_PROB_BASE.
1460      if (prob[bb] > REG_BR_PROB_BASE)
1461        prob[bb] = REG_BR_PROB_BASE;
1462    }
1463
1464  bitmap_set_bit (dom[bb], bb);
1465  bitmap_and_compl (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1466
1467  if (sched_verbose >= 2)
1468    fprintf (sched_dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1469	     (100 * prob[bb]) / REG_BR_PROB_BASE);
1470}
1471
1472/* Functions for target info.  */
1473
1474/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1475   Note that bb_trg dominates bb_src.  */
1476
1477static void
1478split_edges (int bb_src, int bb_trg, edgelst *bl)
1479{
1480  auto_sbitmap src (SBITMAP_SIZE (pot_split[bb_src]));
1481  bitmap_copy (src, pot_split[bb_src]);
1482
1483  bitmap_and_compl (src, src, pot_split[bb_trg]);
1484  extract_edgelst (src, bl);
1485}
1486
1487/* Find the valid candidate-source-blocks for the target block TRG, compute
1488   their probability, and check if they are speculative or not.
1489   For speculative sources, compute their update-blocks and split-blocks.  */
1490
1491static void
1492compute_trg_info (int trg)
1493{
1494  candidate *sp;
1495  edgelst el = { NULL, 0 };
1496  int i, j, k, update_idx;
1497  basic_block block;
1498  edge_iterator ei;
1499  edge e;
1500
1501  candidate_table = XNEWVEC (candidate, current_nr_blocks);
1502
1503  bblst_last = 0;
1504  /* bblst_table holds split blocks and update blocks for each block after
1505     the current one in the region.  split blocks and update blocks are
1506     the TO blocks of region edges, so there can be at most rgn_nr_edges
1507     of them.  */
1508  bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1509  bblst_table = XNEWVEC (basic_block, bblst_size);
1510
1511  edgelst_last = 0;
1512  edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1513
1514  /* Define some of the fields for the target bb as well.  */
1515  sp = candidate_table + trg;
1516  sp->is_valid = 1;
1517  sp->is_speculative = 0;
1518  sp->src_prob = REG_BR_PROB_BASE;
1519
1520  auto_sbitmap visited (last_basic_block_for_fn (cfun));
1521
1522  for (i = trg + 1; i < current_nr_blocks; i++)
1523    {
1524      sp = candidate_table + i;
1525
1526      sp->is_valid = IS_DOMINATED (i, trg);
1527      if (sp->is_valid)
1528	{
1529	  int tf = prob[trg], cf = prob[i];
1530
1531	  /* In CFGs with low probability edges TF can possibly be zero.  */
1532	  sp->src_prob = (tf ? GCOV_COMPUTE_SCALE (cf, tf) : 0);
1533	  sp->is_valid = (sp->src_prob >= min_spec_prob);
1534	}
1535
1536      if (sp->is_valid)
1537	{
1538	  split_edges (i, trg, &el);
1539	  sp->is_speculative = (el.nr_members) ? 1 : 0;
1540	  if (sp->is_speculative && !flag_schedule_speculative)
1541	    sp->is_valid = 0;
1542	}
1543
1544      if (sp->is_valid)
1545	{
1546	  /* Compute split blocks and store them in bblst_table.
1547	     The TO block of every split edge is a split block.  */
1548	  sp->split_bbs.first_member = &bblst_table[bblst_last];
1549	  sp->split_bbs.nr_members = el.nr_members;
1550	  for (j = 0; j < el.nr_members; bblst_last++, j++)
1551	    bblst_table[bblst_last] = el.first_member[j]->dest;
1552	  sp->update_bbs.first_member = &bblst_table[bblst_last];
1553
1554	  /* Compute update blocks and store them in bblst_table.
1555	     For every split edge, look at the FROM block, and check
1556	     all out edges.  For each out edge that is not a split edge,
1557	     add the TO block to the update block list.  This list can end
1558	     up with a lot of duplicates.  We need to weed them out to avoid
1559	     overrunning the end of the bblst_table.  */
1560
1561	  update_idx = 0;
1562	  bitmap_clear (visited);
1563	  for (j = 0; j < el.nr_members; j++)
1564	    {
1565	      block = el.first_member[j]->src;
1566	      FOR_EACH_EDGE (e, ei, block->succs)
1567		{
1568		  if (!bitmap_bit_p (visited, e->dest->index))
1569		    {
1570		      for (k = 0; k < el.nr_members; k++)
1571			if (e == el.first_member[k])
1572			  break;
1573
1574		      if (k >= el.nr_members)
1575			{
1576			  bblst_table[bblst_last++] = e->dest;
1577			  bitmap_set_bit (visited, e->dest->index);
1578			  update_idx++;
1579			}
1580		    }
1581		}
1582	    }
1583	  sp->update_bbs.nr_members = update_idx;
1584
1585	  /* Make sure we didn't overrun the end of bblst_table.  */
1586	  gcc_assert (bblst_last <= bblst_size);
1587	}
1588      else
1589	{
1590	  sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1591
1592	  sp->is_speculative = 0;
1593	  sp->src_prob = 0;
1594	}
1595    }
1596}
1597
1598/* Free the computed target info.  */
1599static void
1600free_trg_info (void)
1601{
1602  free (candidate_table);
1603  free (bblst_table);
1604  free (edgelst_table);
1605}
1606
1607/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1608
1609DEBUG_FUNCTION void
1610debug_candidate (int i)
1611{
1612  if (!candidate_table[i].is_valid)
1613    return;
1614
1615  if (candidate_table[i].is_speculative)
1616    {
1617      int j;
1618      fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1619
1620      fprintf (sched_dump, "split path: ");
1621      for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1622	{
1623	  int b = candidate_table[i].split_bbs.first_member[j]->index;
1624
1625	  fprintf (sched_dump, " %d ", b);
1626	}
1627      fprintf (sched_dump, "\n");
1628
1629      fprintf (sched_dump, "update path: ");
1630      for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1631	{
1632	  int b = candidate_table[i].update_bbs.first_member[j]->index;
1633
1634	  fprintf (sched_dump, " %d ", b);
1635	}
1636      fprintf (sched_dump, "\n");
1637    }
1638  else
1639    {
1640      fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1641    }
1642}
1643
1644/* Print candidates info, for debugging purposes.  Callable from debugger.  */
1645
1646DEBUG_FUNCTION void
1647debug_candidates (int trg)
1648{
1649  int i;
1650
1651  fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1652	   BB_TO_BLOCK (trg), trg);
1653  for (i = trg + 1; i < current_nr_blocks; i++)
1654    debug_candidate (i);
1655}
1656
1657/* Functions for speculative scheduling.  */
1658
1659static bitmap_head not_in_df;
1660
1661/* Return 0 if x is a set of a register alive in the beginning of one
1662   of the split-blocks of src, otherwise return 1.  */
1663
1664static int
1665check_live_1 (int src, rtx x)
1666{
1667  int i;
1668  int regno;
1669  rtx reg = SET_DEST (x);
1670
1671  if (reg == 0)
1672    return 1;
1673
1674  while (GET_CODE (reg) == SUBREG
1675	 || GET_CODE (reg) == ZERO_EXTRACT
1676	 || GET_CODE (reg) == STRICT_LOW_PART)
1677    reg = XEXP (reg, 0);
1678
1679  if (GET_CODE (reg) == PARALLEL)
1680    {
1681      int i;
1682
1683      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1684	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1685	  if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1686	    return 1;
1687
1688      return 0;
1689    }
1690
1691  if (!REG_P (reg))
1692    return 1;
1693
1694  regno = REGNO (reg);
1695
1696  if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1697    {
1698      /* Global registers are assumed live.  */
1699      return 0;
1700    }
1701  else
1702    {
1703      if (regno < FIRST_PSEUDO_REGISTER)
1704	{
1705	  /* Check for hard registers.  */
1706	  int j = REG_NREGS (reg);
1707	  while (--j >= 0)
1708	    {
1709	      for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1710		{
1711		  basic_block b = candidate_table[src].split_bbs.first_member[i];
1712		  int t = bitmap_bit_p (&not_in_df, b->index);
1713
1714		  /* We can have split blocks, that were recently generated.
1715		     Such blocks are always outside current region.  */
1716		  gcc_assert (!t || (CONTAINING_RGN (b->index)
1717				     != CONTAINING_RGN (BB_TO_BLOCK (src))));
1718
1719		  if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1720		    return 0;
1721		}
1722	    }
1723	}
1724      else
1725	{
1726	  /* Check for pseudo registers.  */
1727	  for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1728	    {
1729	      basic_block b = candidate_table[src].split_bbs.first_member[i];
1730	      int t = bitmap_bit_p (&not_in_df, b->index);
1731
1732	      gcc_assert (!t || (CONTAINING_RGN (b->index)
1733				 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1734
1735	      if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1736		return 0;
1737	    }
1738	}
1739    }
1740
1741  return 1;
1742}
1743
1744/* If x is a set of a register R, mark that R is alive in the beginning
1745   of every update-block of src.  */
1746
1747static void
1748update_live_1 (int src, rtx x)
1749{
1750  int i;
1751  int regno;
1752  rtx reg = SET_DEST (x);
1753
1754  if (reg == 0)
1755    return;
1756
1757  while (GET_CODE (reg) == SUBREG
1758	 || GET_CODE (reg) == ZERO_EXTRACT
1759	 || GET_CODE (reg) == STRICT_LOW_PART)
1760    reg = XEXP (reg, 0);
1761
1762  if (GET_CODE (reg) == PARALLEL)
1763    {
1764      int i;
1765
1766      for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1767	if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1768	  update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1769
1770      return;
1771    }
1772
1773  if (!REG_P (reg))
1774    return;
1775
1776  /* Global registers are always live, so the code below does not apply
1777     to them.  */
1778
1779  regno = REGNO (reg);
1780
1781  if (! HARD_REGISTER_NUM_P (regno)
1782      || !global_regs[regno])
1783    {
1784      for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1785	{
1786	  basic_block b = candidate_table[src].update_bbs.first_member[i];
1787	  bitmap_set_range (df_get_live_in (b), regno, REG_NREGS (reg));
1788	}
1789    }
1790}
1791
1792/* Return 1 if insn can be speculatively moved from block src to trg,
1793   otherwise return 0.  Called before first insertion of insn to
1794   ready-list or before the scheduling.  */
1795
1796static int
1797check_live (rtx_insn *insn, int src)
1798{
1799  /* Find the registers set by instruction.  */
1800  if (GET_CODE (PATTERN (insn)) == SET
1801      || GET_CODE (PATTERN (insn)) == CLOBBER)
1802    return check_live_1 (src, PATTERN (insn));
1803  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1804    {
1805      int j;
1806      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1807	if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1808	     || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1809	    && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1810	  return 0;
1811
1812      return 1;
1813    }
1814
1815  return 1;
1816}
1817
1818/* Update the live registers info after insn was moved speculatively from
1819   block src to trg.  */
1820
1821static void
1822update_live (rtx_insn *insn, int src)
1823{
1824  /* Find the registers set by instruction.  */
1825  if (GET_CODE (PATTERN (insn)) == SET
1826      || GET_CODE (PATTERN (insn)) == CLOBBER)
1827    update_live_1 (src, PATTERN (insn));
1828  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1829    {
1830      int j;
1831      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1832	if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1833	    || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1834	  update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1835    }
1836}
1837
1838/* Nonzero if block bb_to is equal to, or reachable from block bb_from.  */
1839#define IS_REACHABLE(bb_from, bb_to)					\
1840  (bb_from == bb_to							\
1841   || IS_RGN_ENTRY (bb_from)						\
1842   || (bitmap_bit_p (ancestor_edges[bb_to],					\
1843	 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK_FOR_FN (cfun, \
1844							    BB_TO_BLOCK (bb_from)))))))
1845
1846/* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
1847
1848static void
1849set_spec_fed (rtx load_insn)
1850{
1851  sd_iterator_def sd_it;
1852  dep_t dep;
1853
1854  FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1855    if (DEP_TYPE (dep) == REG_DEP_TRUE)
1856      FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1857}
1858
1859/* On the path from the insn to load_insn_bb, find a conditional
1860branch depending on insn, that guards the speculative load.  */
1861
1862static int
1863find_conditional_protection (rtx_insn *insn, int load_insn_bb)
1864{
1865  sd_iterator_def sd_it;
1866  dep_t dep;
1867
1868  /* Iterate through DEF-USE forward dependences.  */
1869  FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1870    {
1871      rtx_insn *next = DEP_CON (dep);
1872
1873      if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1874	   CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1875	  && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1876	  && load_insn_bb != INSN_BB (next)
1877	  && DEP_TYPE (dep) == REG_DEP_TRUE
1878	  && (JUMP_P (next)
1879	      || find_conditional_protection (next, load_insn_bb)))
1880	return 1;
1881    }
1882  return 0;
1883}				/* find_conditional_protection */
1884
1885/* Returns 1 if the same insn1 that participates in the computation
1886   of load_insn's address is feeding a conditional branch that is
1887   guarding on load_insn. This is true if we find two DEF-USE
1888   chains:
1889   insn1 -> ... -> conditional-branch
1890   insn1 -> ... -> load_insn,
1891   and if a flow path exists:
1892   insn1 -> ... -> conditional-branch -> ... -> load_insn,
1893   and if insn1 is on the path
1894   region-entry -> ... -> bb_trg -> ... load_insn.
1895
1896   Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1897   Locate the branch by following INSN_FORW_DEPS from insn1.  */
1898
1899static int
1900is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1901{
1902  sd_iterator_def sd_it;
1903  dep_t dep;
1904
1905  FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1906    {
1907      rtx_insn *insn1 = DEP_PRO (dep);
1908
1909      /* Must be a DEF-USE dependence upon non-branch.  */
1910      if (DEP_TYPE (dep) != REG_DEP_TRUE
1911	  || JUMP_P (insn1))
1912	continue;
1913
1914      /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn.  */
1915      if (INSN_BB (insn1) == bb_src
1916	  || (CONTAINING_RGN (BLOCK_NUM (insn1))
1917	      != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1918	  || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1919	      && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1920	continue;
1921
1922      /* Now search for the conditional-branch.  */
1923      if (find_conditional_protection (insn1, bb_src))
1924	return 1;
1925
1926      /* Recursive step: search another insn1, "above" current insn1.  */
1927      return is_conditionally_protected (insn1, bb_src, bb_trg);
1928    }
1929
1930  /* The chain does not exist.  */
1931  return 0;
1932}				/* is_conditionally_protected */
1933
1934/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1935   load_insn can move speculatively from bb_src to bb_trg.  All the
1936   following must hold:
1937
1938   (1) both loads have 1 base register (PFREE_CANDIDATEs).
1939   (2) load_insn and load1 have a def-use dependence upon
1940   the same insn 'insn1'.
1941   (3) either load2 is in bb_trg, or:
1942   - there's only one split-block, and
1943   - load1 is on the escape path, and
1944
1945   From all these we can conclude that the two loads access memory
1946   addresses that differ at most by a constant, and hence if moving
1947   load_insn would cause an exception, it would have been caused by
1948   load2 anyhow.  */
1949
1950static int
1951is_pfree (rtx load_insn, int bb_src, int bb_trg)
1952{
1953  sd_iterator_def back_sd_it;
1954  dep_t back_dep;
1955  candidate *candp = candidate_table + bb_src;
1956
1957  if (candp->split_bbs.nr_members != 1)
1958    /* Must have exactly one escape block.  */
1959    return 0;
1960
1961  FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1962    {
1963      rtx_insn *insn1 = DEP_PRO (back_dep);
1964
1965      if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1966	/* Found a DEF-USE dependence (insn1, load_insn).  */
1967	{
1968	  sd_iterator_def fore_sd_it;
1969	  dep_t fore_dep;
1970
1971	  FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1972	    {
1973	      rtx_insn *insn2 = DEP_CON (fore_dep);
1974
1975	      if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1976		{
1977		  /* Found a DEF-USE dependence (insn1, insn2).  */
1978		  if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1979		    /* insn2 not guaranteed to be a 1 base reg load.  */
1980		    continue;
1981
1982		  if (INSN_BB (insn2) == bb_trg)
1983		    /* insn2 is the similar load, in the target block.  */
1984		    return 1;
1985
1986		  if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1987		    /* insn2 is a similar load, in a split-block.  */
1988		    return 1;
1989		}
1990	    }
1991	}
1992    }
1993
1994  /* Couldn't find a similar load.  */
1995  return 0;
1996}				/* is_pfree */
1997
1998/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1999   a load moved speculatively, or if load_insn is protected by
2000   a compare on load_insn's address).  */
2001
2002static int
2003is_prisky (rtx load_insn, int bb_src, int bb_trg)
2004{
2005  if (FED_BY_SPEC_LOAD (load_insn))
2006    return 1;
2007
2008  if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
2009    /* Dependence may 'hide' out of the region.  */
2010    return 1;
2011
2012  if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2013    return 1;
2014
2015  return 0;
2016}
2017
2018/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2019   Return 1 if insn is exception-free (and the motion is valid)
2020   and 0 otherwise.  */
2021
2022static int
2023is_exception_free (rtx_insn *insn, int bb_src, int bb_trg)
2024{
2025  int insn_class = haifa_classify_insn (insn);
2026
2027  /* Handle non-load insns.  */
2028  switch (insn_class)
2029    {
2030    case TRAP_FREE:
2031      return 1;
2032    case TRAP_RISKY:
2033      return 0;
2034    default:;
2035    }
2036
2037  /* Handle loads.  */
2038  if (!flag_schedule_speculative_load)
2039    return 0;
2040  IS_LOAD_INSN (insn) = 1;
2041  switch (insn_class)
2042    {
2043    case IFREE:
2044      return (1);
2045    case IRISKY:
2046      return 0;
2047    case PFREE_CANDIDATE:
2048      if (is_pfree (insn, bb_src, bb_trg))
2049	return 1;
2050      /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate.  */
2051      /* FALLTHRU */
2052    case PRISKY_CANDIDATE:
2053      if (!flag_schedule_speculative_load_dangerous
2054	  || is_prisky (insn, bb_src, bb_trg))
2055	return 0;
2056      break;
2057    default:;
2058    }
2059
2060  return flag_schedule_speculative_load_dangerous;
2061}
2062
2063/* The number of insns from the current block scheduled so far.  */
2064static int sched_target_n_insns;
2065/* The number of insns from the current block to be scheduled in total.  */
2066static int target_n_insns;
2067/* The number of insns from the entire region scheduled so far.  */
2068static int sched_n_insns;
2069
2070/* Implementations of the sched_info functions for region scheduling.  */
2071static void init_ready_list (void);
2072static int can_schedule_ready_p (rtx_insn *);
2073static void begin_schedule_ready (rtx_insn *);
2074static ds_t new_ready (rtx_insn *, ds_t);
2075static int schedule_more_p (void);
2076static const char *rgn_print_insn (const rtx_insn *, int);
2077static int rgn_rank (rtx_insn *, rtx_insn *);
2078static void compute_jump_reg_dependencies (rtx, regset);
2079
2080/* Functions for speculative scheduling.  */
2081static void rgn_add_remove_insn (rtx_insn *, int);
2082static void rgn_add_block (basic_block, basic_block);
2083static void rgn_fix_recovery_cfg (int, int, int);
2084static basic_block advance_target_bb (basic_block, rtx_insn *);
2085
2086/* Return nonzero if there are more insns that should be scheduled.  */
2087
2088static int
2089schedule_more_p (void)
2090{
2091  return sched_target_n_insns < target_n_insns;
2092}
2093
2094/* Add all insns that are initially ready to the ready list READY.  Called
2095   once before scheduling a set of insns.  */
2096
2097static void
2098init_ready_list (void)
2099{
2100  rtx_insn *prev_head = current_sched_info->prev_head;
2101  rtx_insn *next_tail = current_sched_info->next_tail;
2102  int bb_src;
2103  rtx_insn *insn;
2104
2105  target_n_insns = 0;
2106  sched_target_n_insns = 0;
2107  sched_n_insns = 0;
2108
2109  /* Print debugging information.  */
2110  if (sched_verbose >= 5)
2111    debug_rgn_dependencies (target_bb);
2112
2113  /* Prepare current target block info.  */
2114  if (current_nr_blocks > 1)
2115    compute_trg_info (target_bb);
2116
2117  /* Initialize ready list with all 'ready' insns in target block.
2118     Count number of insns in the target block being scheduled.  */
2119  for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2120    {
2121      gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2122      TODO_SPEC (insn) = HARD_DEP;
2123      try_ready (insn);
2124      target_n_insns++;
2125
2126      gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2127    }
2128
2129  /* Add to ready list all 'ready' insns in valid source blocks.
2130     For speculative insns, check-live, exception-free, and
2131     issue-delay.  */
2132  for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2133    if (IS_VALID (bb_src))
2134      {
2135	rtx_insn *src_head;
2136	rtx_insn *src_next_tail;
2137	rtx_insn *tail, *head;
2138
2139	get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2140			   &head, &tail);
2141	src_next_tail = NEXT_INSN (tail);
2142	src_head = head;
2143
2144	for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2145	  if (INSN_P (insn))
2146	    {
2147	      gcc_assert (TODO_SPEC (insn) == HARD_DEP || TODO_SPEC (insn) == DEP_POSTPONED);
2148	      TODO_SPEC (insn) = HARD_DEP;
2149	      try_ready (insn);
2150	    }
2151      }
2152}
2153
2154/* Called after taking INSN from the ready list.  Returns nonzero if this
2155   insn can be scheduled, nonzero if we should silently discard it.  */
2156
2157static int
2158can_schedule_ready_p (rtx_insn *insn)
2159{
2160  /* An interblock motion?  */
2161  if (INSN_BB (insn) != target_bb && IS_SPECULATIVE_INSN (insn))
2162    {
2163      /* Cannot schedule this insn unless all operands are live.  */
2164      if (!check_live (insn, INSN_BB (insn)))
2165	return 0;
2166
2167      /* Should not move expensive instructions speculatively.  */
2168      if (GET_CODE (PATTERN (insn)) != CLOBBER
2169	  && !targetm.sched.can_speculate_insn (insn))
2170	return 0;
2171    }
2172
2173  return 1;
2174}
2175
2176/* Updates counter and other information.  Split from can_schedule_ready_p ()
2177   because when we schedule insn speculatively then insn passed to
2178   can_schedule_ready_p () differs from the one passed to
2179   begin_schedule_ready ().  */
2180static void
2181begin_schedule_ready (rtx_insn *insn)
2182{
2183  /* An interblock motion?  */
2184  if (INSN_BB (insn) != target_bb)
2185    {
2186      if (IS_SPECULATIVE_INSN (insn))
2187	{
2188	  gcc_assert (check_live (insn, INSN_BB (insn)));
2189
2190	  update_live (insn, INSN_BB (insn));
2191
2192	  /* For speculative load, mark insns fed by it.  */
2193	  if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2194	    set_spec_fed (insn);
2195
2196	  nr_spec++;
2197	}
2198      nr_inter++;
2199    }
2200  else
2201    {
2202      /* In block motion.  */
2203      sched_target_n_insns++;
2204    }
2205  sched_n_insns++;
2206}
2207
2208/* Called after INSN has all its hard dependencies resolved and the speculation
2209   of type TS is enough to overcome them all.
2210   Return nonzero if it should be moved to the ready list or the queue, or zero
2211   if we should silently discard it.  */
2212static ds_t
2213new_ready (rtx_insn *next, ds_t ts)
2214{
2215  if (INSN_BB (next) != target_bb)
2216    {
2217      int not_ex_free = 0;
2218
2219      /* For speculative insns, before inserting to ready/queue,
2220	 check live, exception-free, and issue-delay.  */
2221      if (!IS_VALID (INSN_BB (next))
2222	  || CANT_MOVE (next)
2223	  || (IS_SPECULATIVE_INSN (next)
2224	      && ((recog_memoized (next) >= 0
2225		   && min_insn_conflict_delay (curr_state, next, next)
2226		   > param_max_sched_insn_conflict_delay)
2227                  || IS_SPECULATION_CHECK_P (next)
2228		  || !check_live (next, INSN_BB (next))
2229		  || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2230							target_bb)))))
2231	{
2232	  if (not_ex_free
2233	      /* We are here because is_exception_free () == false.
2234		 But we possibly can handle that with control speculation.  */
2235	      && sched_deps_info->generate_spec_deps
2236	      && spec_info->mask & BEGIN_CONTROL)
2237	    {
2238	      ds_t new_ds;
2239
2240	      /* Add control speculation to NEXT's dependency type.  */
2241	      new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2242
2243	      /* Check if NEXT can be speculated with new dependency type.  */
2244	      if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
2245		/* Here we got new control-speculative instruction.  */
2246		ts = new_ds;
2247	      else
2248		/* NEXT isn't ready yet.  */
2249		ts = DEP_POSTPONED;
2250	    }
2251	  else
2252	    /* NEXT isn't ready yet.  */
2253            ts = DEP_POSTPONED;
2254	}
2255    }
2256
2257  return ts;
2258}
2259
2260/* Return a string that contains the insn uid and optionally anything else
2261   necessary to identify this insn in an output.  It's valid to use a
2262   static buffer for this.  The ALIGNED parameter should cause the string
2263   to be formatted so that multiple output lines will line up nicely.  */
2264
2265static const char *
2266rgn_print_insn (const rtx_insn *insn, int aligned)
2267{
2268  static char tmp[80];
2269
2270  if (aligned)
2271    sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2272  else
2273    {
2274      if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2275	sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2276      else
2277	sprintf (tmp, "%d", INSN_UID (insn));
2278    }
2279  return tmp;
2280}
2281
2282/* Compare priority of two insns.  Return a positive number if the second
2283   insn is to be preferred for scheduling, and a negative one if the first
2284   is to be preferred.  Zero if they are equally good.  */
2285
2286static int
2287rgn_rank (rtx_insn *insn1, rtx_insn *insn2)
2288{
2289  /* Some comparison make sense in interblock scheduling only.  */
2290  if (INSN_BB (insn1) != INSN_BB (insn2))
2291    {
2292      int spec_val, prob_val;
2293
2294      /* Prefer an inblock motion on an interblock motion.  */
2295      if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2296	return 1;
2297      if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2298	return -1;
2299
2300      /* Prefer a useful motion on a speculative one.  */
2301      spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2302      if (spec_val)
2303	return spec_val;
2304
2305      /* Prefer a more probable (speculative) insn.  */
2306      prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2307      if (prob_val)
2308	return prob_val;
2309    }
2310  return 0;
2311}
2312
2313/* NEXT is an instruction that depends on INSN (a backward dependence);
2314   return nonzero if we should include this dependence in priority
2315   calculations.  */
2316
2317int
2318contributes_to_priority (rtx_insn *next, rtx_insn *insn)
2319{
2320  /* NEXT and INSN reside in one ebb.  */
2321  return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2322}
2323
2324/* INSN is a JUMP_INSN.  Store the set of registers that must be
2325   considered as used by this jump in USED.  */
2326
2327static void
2328compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2329			       regset used ATTRIBUTE_UNUSED)
2330{
2331  /* Nothing to do here, since we postprocess jumps in
2332     add_branch_dependences.  */
2333}
2334
2335/* This variable holds common_sched_info hooks and data relevant to
2336   the interblock scheduler.  */
2337static struct common_sched_info_def rgn_common_sched_info;
2338
2339
2340/* This holds data for the dependence analysis relevant to
2341   the interblock scheduler.  */
2342static struct sched_deps_info_def rgn_sched_deps_info;
2343
2344/* This holds constant data used for initializing the above structure
2345   for the Haifa scheduler.  */
2346static const struct sched_deps_info_def rgn_const_sched_deps_info =
2347  {
2348    compute_jump_reg_dependencies,
2349    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2350    0, 0, 0
2351  };
2352
2353/* Same as above, but for the selective scheduler.  */
2354static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
2355  {
2356    compute_jump_reg_dependencies,
2357    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2358    0, 0, 0
2359  };
2360
2361/* Return true if scheduling INSN will trigger finish of scheduling
2362   current block.  */
2363static bool
2364rgn_insn_finishes_block_p (rtx_insn *insn)
2365{
2366  if (INSN_BB (insn) == target_bb
2367      && sched_target_n_insns + 1 == target_n_insns)
2368    /* INSN is the last not-scheduled instruction in the current block.  */
2369    return true;
2370
2371  return false;
2372}
2373
2374/* Used in schedule_insns to initialize current_sched_info for scheduling
2375   regions (or single basic blocks).  */
2376
2377static const struct haifa_sched_info rgn_const_sched_info =
2378{
2379  init_ready_list,
2380  can_schedule_ready_p,
2381  schedule_more_p,
2382  new_ready,
2383  rgn_rank,
2384  rgn_print_insn,
2385  contributes_to_priority,
2386  rgn_insn_finishes_block_p,
2387
2388  NULL, NULL,
2389  NULL, NULL,
2390  0, 0,
2391
2392  rgn_add_remove_insn,
2393  begin_schedule_ready,
2394  NULL,
2395  advance_target_bb,
2396  NULL, NULL,
2397  SCHED_RGN
2398};
2399
2400/* This variable holds the data and hooks needed to the Haifa scheduler backend
2401   for the interblock scheduler frontend.  */
2402static struct haifa_sched_info rgn_sched_info;
2403
2404/* Returns maximum priority that an insn was assigned to.  */
2405
2406int
2407get_rgn_sched_max_insns_priority (void)
2408{
2409  return rgn_sched_info.sched_max_insns_priority;
2410}
2411
2412/* Determine if PAT sets a TARGET_CLASS_LIKELY_SPILLED_P register.  */
2413
2414static bool
2415sets_likely_spilled (rtx pat)
2416{
2417  bool ret = false;
2418  note_pattern_stores (pat, sets_likely_spilled_1, &ret);
2419  return ret;
2420}
2421
2422static void
2423sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2424{
2425  bool *ret = (bool *) data;
2426
2427  if (GET_CODE (pat) == SET
2428      && REG_P (x)
2429      && HARD_REGISTER_P (x)
2430      && targetm.class_likely_spilled_p (REGNO_REG_CLASS (REGNO (x))))
2431    *ret = true;
2432}
2433
2434/* A bitmap to note insns that participate in any dependency.  Used in
2435   add_branch_dependences.  */
2436static sbitmap insn_referenced;
2437
2438/* Add dependences so that branches are scheduled to run last in their
2439   block.  */
2440static void
2441add_branch_dependences (rtx_insn *head, rtx_insn *tail)
2442{
2443  rtx_insn *insn, *last;
2444
2445  /* For all branches, calls, uses, clobbers, and instructions
2446     that can throw exceptions, force them to remain in order at the end of
2447     the block by adding dependencies and giving the last a high priority.
2448     There may be notes present, and prev_head may also be a note.
2449
2450     Branches must obviously remain at the end.  Calls should remain at the
2451     end since moving them results in worse register allocation.  Uses remain
2452     at the end to ensure proper register allocation.
2453
2454     Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
2455
2456     COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2457
2458     Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
2459     values) are not moved before reload because we can wind up with register
2460     allocation failures.  */
2461
2462  while (tail != head && DEBUG_INSN_P (tail))
2463    tail = PREV_INSN (tail);
2464
2465  insn = tail;
2466  last = 0;
2467  while (CALL_P (insn)
2468	 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
2469	 || (NONJUMP_INSN_P (insn)
2470	     && (GET_CODE (PATTERN (insn)) == USE
2471		 || GET_CODE (PATTERN (insn)) == CLOBBER
2472		 || can_throw_internal (insn)
2473		 || (!reload_completed
2474		     && sets_likely_spilled (PATTERN (insn)))))
2475	 || NOTE_P (insn)
2476	 || (last != 0 && SCHED_GROUP_P (last)))
2477    {
2478      if (!NOTE_P (insn))
2479	{
2480	  if (last != 0
2481	      && sd_find_dep_between (insn, last, false) == NULL)
2482	    {
2483	      if (! sched_insns_conditions_mutex_p (last, insn))
2484		add_dependence (last, insn, REG_DEP_ANTI);
2485	      bitmap_set_bit (insn_referenced, INSN_LUID (insn));
2486	    }
2487
2488	  CANT_MOVE (insn) = 1;
2489
2490	  last = insn;
2491	}
2492
2493      /* Don't overrun the bounds of the basic block.  */
2494      if (insn == head)
2495	break;
2496
2497      do
2498	insn = PREV_INSN (insn);
2499      while (insn != head && DEBUG_INSN_P (insn));
2500    }
2501
2502  /* Selective scheduling handles control dependencies by itself, and
2503     CANT_MOVE flags ensure that other insns will be kept in place.  */
2504  if (sel_sched_p ())
2505    return;
2506
2507  /* Make sure these insns are scheduled last in their block.  */
2508  insn = last;
2509  if (insn != 0)
2510    while (insn != head)
2511      {
2512	insn = prev_nonnote_insn (insn);
2513
2514	if (bitmap_bit_p (insn_referenced, INSN_LUID (insn))
2515	    || DEBUG_INSN_P (insn))
2516	  continue;
2517
2518	if (! sched_insns_conditions_mutex_p (last, insn))
2519	  add_dependence (last, insn, REG_DEP_ANTI);
2520      }
2521
2522  if (!targetm.have_conditional_execution ())
2523    return;
2524
2525  /* Finally, if the block ends in a jump, and we are doing intra-block
2526     scheduling, make sure that the branch depends on any COND_EXEC insns
2527     inside the block to avoid moving the COND_EXECs past the branch insn.
2528
2529     We only have to do this after reload, because (1) before reload there
2530     are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2531     scheduler after reload.
2532
2533     FIXME: We could in some cases move COND_EXEC insns past the branch if
2534     this scheduler would be a little smarter.  Consider this code:
2535
2536		T = [addr]
2537	C  ?	addr += 4
2538	!C ?	X += 12
2539	C  ?	T += 1
2540	C  ?	jump foo
2541
2542     On a target with a one cycle stall on a memory access the optimal
2543     sequence would be:
2544
2545		T = [addr]
2546	C  ?	addr += 4
2547	C  ?	T += 1
2548	C  ?	jump foo
2549	!C ?	X += 12
2550
2551     We don't want to put the 'X += 12' before the branch because it just
2552     wastes a cycle of execution time when the branch is taken.
2553
2554     Note that in the example "!C" will always be true.  That is another
2555     possible improvement for handling COND_EXECs in this scheduler: it
2556     could remove always-true predicates.  */
2557
2558  if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
2559    return;
2560
2561  insn = tail;
2562  while (insn != head)
2563    {
2564      insn = PREV_INSN (insn);
2565
2566      /* Note that we want to add this dependency even when
2567	 sched_insns_conditions_mutex_p returns true.  The whole point
2568	 is that we _want_ this dependency, even if these insns really
2569	 are independent.  */
2570      if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2571	add_dependence (tail, insn, REG_DEP_ANTI);
2572    }
2573}
2574
2575/* Data structures for the computation of data dependences in a regions.  We
2576   keep one `deps' structure for every basic block.  Before analyzing the
2577   data dependences for a bb, its variables are initialized as a function of
2578   the variables of its predecessors.  When the analysis for a bb completes,
2579   we save the contents to the corresponding bb_deps[bb] variable.  */
2580
2581static class deps_desc *bb_deps;
2582
2583static void
2584concat_insn_mem_list (rtx_insn_list *copy_insns,
2585		      rtx_expr_list *copy_mems,
2586		      rtx_insn_list **old_insns_p,
2587		      rtx_expr_list **old_mems_p)
2588{
2589  rtx_insn_list *new_insns = *old_insns_p;
2590  rtx_expr_list *new_mems = *old_mems_p;
2591
2592  while (copy_insns)
2593    {
2594      new_insns = alloc_INSN_LIST (copy_insns->insn (), new_insns);
2595      new_mems = alloc_EXPR_LIST (VOIDmode, copy_mems->element (), new_mems);
2596      copy_insns = copy_insns->next ();
2597      copy_mems = copy_mems->next ();
2598    }
2599
2600  *old_insns_p = new_insns;
2601  *old_mems_p = new_mems;
2602}
2603
2604/* Join PRED_DEPS to the SUCC_DEPS.  */
2605void
2606deps_join (class deps_desc *succ_deps, class deps_desc *pred_deps)
2607{
2608  unsigned reg;
2609  reg_set_iterator rsi;
2610
2611  /* The reg_last lists are inherited by successor.  */
2612  EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2613    {
2614      struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2615      struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2616
2617      succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2618      succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2619      succ_rl->implicit_sets
2620	= concat_INSN_LIST (pred_rl->implicit_sets, succ_rl->implicit_sets);
2621      succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2622                                            succ_rl->clobbers);
2623      succ_rl->uses_length += pred_rl->uses_length;
2624      succ_rl->clobbers_length += pred_rl->clobbers_length;
2625    }
2626  IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2627
2628  /* Mem read/write lists are inherited by successor.  */
2629  concat_insn_mem_list (pred_deps->pending_read_insns,
2630                        pred_deps->pending_read_mems,
2631                        &succ_deps->pending_read_insns,
2632                        &succ_deps->pending_read_mems);
2633  concat_insn_mem_list (pred_deps->pending_write_insns,
2634                        pred_deps->pending_write_mems,
2635                        &succ_deps->pending_write_insns,
2636                        &succ_deps->pending_write_mems);
2637
2638  succ_deps->pending_jump_insns
2639    = concat_INSN_LIST (pred_deps->pending_jump_insns,
2640                        succ_deps->pending_jump_insns);
2641  succ_deps->last_pending_memory_flush
2642    = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2643                        succ_deps->last_pending_memory_flush);
2644
2645  succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
2646  succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
2647  succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2648
2649  /* last_function_call is inherited by successor.  */
2650  succ_deps->last_function_call
2651    = concat_INSN_LIST (pred_deps->last_function_call,
2652                        succ_deps->last_function_call);
2653
2654  /* last_function_call_may_noreturn is inherited by successor.  */
2655  succ_deps->last_function_call_may_noreturn
2656    = concat_INSN_LIST (pred_deps->last_function_call_may_noreturn,
2657                        succ_deps->last_function_call_may_noreturn);
2658
2659  /* sched_before_next_call is inherited by successor.  */
2660  succ_deps->sched_before_next_call
2661    = concat_INSN_LIST (pred_deps->sched_before_next_call,
2662                        succ_deps->sched_before_next_call);
2663}
2664
2665/* After computing the dependencies for block BB, propagate the dependencies
2666   found in TMP_DEPS to the successors of the block.  */
2667static void
2668propagate_deps (int bb, class deps_desc *pred_deps)
2669{
2670  basic_block block = BASIC_BLOCK_FOR_FN (cfun, BB_TO_BLOCK (bb));
2671  edge_iterator ei;
2672  edge e;
2673
2674  /* bb's structures are inherited by its successors.  */
2675  FOR_EACH_EDGE (e, ei, block->succs)
2676    {
2677      /* Only bbs "below" bb, in the same region, are interesting.  */
2678      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
2679	  || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2680	  || BLOCK_TO_BB (e->dest->index) <= bb)
2681	continue;
2682
2683      deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
2684    }
2685
2686  /* These lists should point to the right place, for correct
2687     freeing later.  */
2688  bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2689  bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2690  bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2691  bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2692  bb_deps[bb].pending_jump_insns = pred_deps->pending_jump_insns;
2693
2694  /* Can't allow these to be freed twice.  */
2695  pred_deps->pending_read_insns = 0;
2696  pred_deps->pending_read_mems = 0;
2697  pred_deps->pending_write_insns = 0;
2698  pred_deps->pending_write_mems = 0;
2699  pred_deps->pending_jump_insns = 0;
2700}
2701
2702/* Compute dependences inside bb.  In a multiple blocks region:
2703   (1) a bb is analyzed after its predecessors, and (2) the lists in
2704   effect at the end of bb (after analyzing for bb) are inherited by
2705   bb's successors.
2706
2707   Specifically for reg-reg data dependences, the block insns are
2708   scanned by sched_analyze () top-to-bottom.  Three lists are
2709   maintained by sched_analyze (): reg_last[].sets for register DEFs,
2710   reg_last[].implicit_sets for implicit hard register DEFs, and
2711   reg_last[].uses for register USEs.
2712
2713   When analysis is completed for bb, we update for its successors:
2714   ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2715   ;  - IMPLICIT_DEFS[succ] = Union (IMPLICIT_DEFS [succ], IMPLICIT_DEFS [bb])
2716   ;  - USES[succ] = Union (USES [succ], DEFS [bb])
2717
2718   The mechanism for computing mem-mem data dependence is very
2719   similar, and the result is interblock dependences in the region.  */
2720
2721static void
2722compute_block_dependences (int bb)
2723{
2724  rtx_insn *head, *tail;
2725  class deps_desc tmp_deps;
2726
2727  tmp_deps = bb_deps[bb];
2728
2729  /* Do the analysis for this block.  */
2730  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2731  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2732
2733  sched_analyze (&tmp_deps, head, tail);
2734
2735  add_branch_dependences (head, tail);
2736
2737  if (current_nr_blocks > 1)
2738    propagate_deps (bb, &tmp_deps);
2739
2740  /* Free up the INSN_LISTs.  */
2741  free_deps (&tmp_deps);
2742
2743  if (targetm.sched.dependencies_evaluation_hook)
2744    targetm.sched.dependencies_evaluation_hook (head, tail);
2745}
2746
2747/* Free dependencies of instructions inside BB.  */
2748static void
2749free_block_dependencies (int bb)
2750{
2751  rtx_insn *head;
2752  rtx_insn *tail;
2753
2754  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2755
2756  if (no_real_insns_p (head, tail))
2757    return;
2758
2759  sched_free_deps (head, tail, true);
2760}
2761
2762/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2763   them to the unused_*_list variables, so that they can be reused.  */
2764
2765static void
2766free_pending_lists (void)
2767{
2768  int bb;
2769
2770  for (bb = 0; bb < current_nr_blocks; bb++)
2771    {
2772      free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2773      free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2774      free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2775      free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2776      free_INSN_LIST_list (&bb_deps[bb].pending_jump_insns);
2777    }
2778}
2779
2780/* Print dependences for debugging starting from FROM_BB.
2781   Callable from debugger.  */
2782/* Print dependences for debugging starting from FROM_BB.
2783   Callable from debugger.  */
2784DEBUG_FUNCTION void
2785debug_rgn_dependencies (int from_bb)
2786{
2787  int bb;
2788
2789  fprintf (sched_dump,
2790	   ";;   --------------- forward dependences: ------------ \n");
2791
2792  for (bb = from_bb; bb < current_nr_blocks; bb++)
2793    {
2794      rtx_insn *head, *tail;
2795
2796      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2797      fprintf (sched_dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
2798	       BB_TO_BLOCK (bb), bb);
2799
2800      debug_dependencies (head, tail);
2801    }
2802}
2803
2804/* Print dependencies information for instructions between HEAD and TAIL.
2805   ??? This function would probably fit best in haifa-sched.cc.  */
2806void debug_dependencies (rtx_insn *head, rtx_insn *tail)
2807{
2808  rtx_insn *insn;
2809  rtx_insn *next_tail = NEXT_INSN (tail);
2810
2811  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2812	   "insn", "code", "bb", "dep", "prio", "cost",
2813	   "reservation");
2814  fprintf (sched_dump, ";;   %7s%6s%6s%6s%6s%6s%14s\n",
2815	   "----", "----", "--", "---", "----", "----",
2816	   "-----------");
2817
2818  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2819    {
2820      if (! INSN_P (insn))
2821	{
2822	  int n;
2823	  fprintf (sched_dump, ";;   %6d ", INSN_UID (insn));
2824	  if (NOTE_P (insn))
2825	    {
2826	      n = NOTE_KIND (insn);
2827	      fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2828	    }
2829	  else
2830	    fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2831	  continue;
2832	}
2833
2834      fprintf (sched_dump,
2835	       ";;   %s%5d%6d%6d%6d%6d%6d   ",
2836	       (SCHED_GROUP_P (insn) ? "+" : " "),
2837	       INSN_UID (insn),
2838	       INSN_CODE (insn),
2839	       BLOCK_NUM (insn),
2840	       sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
2841	       (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2842			       : INSN_PRIORITY (insn))
2843		: INSN_PRIORITY (insn)),
2844	       (sel_sched_p () ? (sched_emulate_haifa_p ? -1
2845			       : insn_sched_cost (insn))
2846		: insn_sched_cost (insn)));
2847
2848      if (recog_memoized (insn) < 0)
2849	fprintf (sched_dump, "nothing");
2850      else
2851	print_reservation (sched_dump, insn);
2852
2853      fprintf (sched_dump, "\t: ");
2854      {
2855	sd_iterator_def sd_it;
2856	dep_t dep;
2857
2858	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2859	  fprintf (sched_dump, "%d%s%s ", INSN_UID (DEP_CON (dep)),
2860		   DEP_NONREG (dep) ? "n" : "",
2861		   DEP_MULTIPLE (dep) ? "m" : "");
2862      }
2863      fprintf (sched_dump, "\n");
2864    }
2865
2866  fprintf (sched_dump, "\n");
2867}
2868
2869/* Dump dependency graph for the current region to a file using dot syntax.  */
2870
2871void
2872dump_rgn_dependencies_dot (FILE *file)
2873{
2874  rtx_insn *head, *tail, *con, *pro;
2875  sd_iterator_def sd_it;
2876  dep_t dep;
2877  int bb;
2878  pretty_printer pp;
2879
2880  pp.buffer->stream = file;
2881  pp_printf (&pp, "digraph SchedDG {\n");
2882
2883  for (bb = 0; bb < current_nr_blocks; ++bb)
2884    {
2885      /* Begin subgraph (basic block).  */
2886      pp_printf (&pp, "subgraph cluster_block_%d {\n", bb);
2887      pp_printf (&pp, "\t" "color=blue;" "\n");
2888      pp_printf (&pp, "\t" "style=bold;" "\n");
2889      pp_printf (&pp, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
2890
2891      /* Setup head and tail (no support for EBBs).  */
2892      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2893      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2894      tail = NEXT_INSN (tail);
2895
2896      /* Dump all insns.  */
2897      for (con = head; con != tail; con = NEXT_INSN (con))
2898	{
2899	  if (!INSN_P (con))
2900	    continue;
2901
2902	  /* Pretty print the insn.  */
2903	  pp_printf (&pp, "\t%d [label=\"{", INSN_UID (con));
2904	  pp_write_text_to_stream (&pp);
2905	  print_insn (&pp, con, /*verbose=*/false);
2906	  pp_write_text_as_dot_label_to_stream (&pp, /*for_record=*/true);
2907	  pp_write_text_to_stream (&pp);
2908
2909	  /* Dump instruction attributes.  */
2910	  pp_printf (&pp, "|{ uid:%d | luid:%d | prio:%d }}\",shape=record]\n",
2911		     INSN_UID (con), INSN_LUID (con), INSN_PRIORITY (con));
2912
2913	  /* Dump all deps.  */
2914	  FOR_EACH_DEP (con, SD_LIST_BACK, sd_it, dep)
2915	    {
2916	      int weight = 0;
2917	      const char *color;
2918	      pro = DEP_PRO (dep);
2919
2920	      switch (DEP_TYPE (dep))
2921		{
2922		case REG_DEP_TRUE:
2923		  color = "black";
2924		  weight = 1;
2925		  break;
2926		case REG_DEP_OUTPUT:
2927		case REG_DEP_ANTI:
2928		  color = "orange";
2929		  break;
2930		case REG_DEP_CONTROL:
2931		  color = "blue";
2932		  break;
2933		default:
2934		  gcc_unreachable ();
2935		}
2936
2937	      pp_printf (&pp, "\t%d -> %d [color=%s",
2938			 INSN_UID (pro), INSN_UID (con), color);
2939	      if (int cost = dep_cost (dep))
2940		pp_printf (&pp, ",label=%d", cost);
2941	      pp_printf (&pp, ",weight=%d", weight);
2942	      pp_printf (&pp, "];\n");
2943	    }
2944	}
2945      pp_printf (&pp, "}\n");
2946    }
2947
2948  pp_printf (&pp, "}\n");
2949  pp_flush (&pp);
2950}
2951
2952/* Dump dependency graph for the current region to a file using dot syntax.  */
2953
2954DEBUG_FUNCTION void
2955dump_rgn_dependencies_dot (const char *fname)
2956{
2957  FILE *fp;
2958
2959  fp = fopen (fname, "w");
2960  if (!fp)
2961    {
2962      perror ("fopen");
2963      return;
2964    }
2965
2966  dump_rgn_dependencies_dot (fp);
2967  fclose (fp);
2968}
2969
2970
2971/* Returns true if all the basic blocks of the current region have
2972   NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region.  */
2973bool
2974sched_is_disabled_for_current_region_p (void)
2975{
2976  int bb;
2977
2978  for (bb = 0; bb < current_nr_blocks; bb++)
2979    if (!(BASIC_BLOCK_FOR_FN (cfun,
2980			      BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2981      return false;
2982
2983  return true;
2984}
2985
2986/* Free all region dependencies saved in INSN_BACK_DEPS and
2987   INSN_RESOLVED_BACK_DEPS.  The Haifa scheduler does this on the fly
2988   when scheduling, so this function is supposed to be called from
2989   the selective scheduling only.  */
2990void
2991free_rgn_deps (void)
2992{
2993  int bb;
2994
2995  for (bb = 0; bb < current_nr_blocks; bb++)
2996    {
2997      rtx_insn *head, *tail;
2998
2999      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3000      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3001
3002      sched_free_deps (head, tail, false);
3003    }
3004}
3005
3006static int rgn_n_insns;
3007
3008/* Compute insn priority for a current region.  */
3009void
3010compute_priorities (void)
3011{
3012  int bb;
3013
3014  current_sched_info->sched_max_insns_priority = 0;
3015  for (bb = 0; bb < current_nr_blocks; bb++)
3016    {
3017      rtx_insn *head, *tail;
3018
3019      gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
3020      get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
3021
3022      if (no_real_insns_p (head, tail))
3023	continue;
3024
3025      rgn_n_insns += set_priorities (head, tail);
3026    }
3027  current_sched_info->sched_max_insns_priority++;
3028}
3029
3030/* (Re-)initialize the arrays of DFA states at the end of each basic block.
3031
3032   SAVED_LAST_BASIC_BLOCK is the previous length of the arrays.  It must be
3033   zero for the first call to this function, to allocate the arrays for the
3034   first time.
3035
3036   This function is called once during initialization of the scheduler, and
3037   called again to resize the arrays if new basic blocks have been created,
3038   for example for speculation recovery code.  */
3039
3040static void
3041realloc_bb_state_array (int saved_last_basic_block)
3042{
3043  char *old_bb_state_array = bb_state_array;
3044  size_t lbb = (size_t) last_basic_block_for_fn (cfun);
3045  size_t slbb = (size_t) saved_last_basic_block;
3046
3047  /* Nothing to do if nothing changed since the last time this was called.  */
3048  if (saved_last_basic_block == last_basic_block_for_fn (cfun))
3049    return;
3050
3051  /* The selective scheduler doesn't use the state arrays.  */
3052  if (sel_sched_p ())
3053    {
3054      gcc_assert (bb_state_array == NULL && bb_state == NULL);
3055      return;
3056    }
3057
3058  gcc_checking_assert (saved_last_basic_block == 0
3059		       || (bb_state_array != NULL && bb_state != NULL));
3060
3061  bb_state_array = XRESIZEVEC (char, bb_state_array, lbb * dfa_state_size);
3062  bb_state = XRESIZEVEC (state_t, bb_state, lbb);
3063
3064  /* If BB_STATE_ARRAY has moved, fixup all the state pointers array.
3065     Otherwise only fixup the newly allocated ones.  For the state
3066     array itself, only initialize the new entries.  */
3067  bool bb_state_array_moved = (bb_state_array != old_bb_state_array);
3068  for (size_t i = bb_state_array_moved ? 0 : slbb; i < lbb; i++)
3069    bb_state[i] = (state_t) (bb_state_array + i * dfa_state_size);
3070  for (size_t i = slbb; i < lbb; i++)
3071    state_reset (bb_state[i]);
3072}
3073
3074/* Free the arrays of DFA states at the end of each basic block.  */
3075
3076static void
3077free_bb_state_array (void)
3078{
3079  free (bb_state_array);
3080  free (bb_state);
3081  bb_state_array = NULL;
3082  bb_state = NULL;
3083}
3084
3085/* Schedule a region.  A region is either an inner loop, a loop-free
3086   subroutine, or a single basic block.  Each bb in the region is
3087   scheduled after its flow predecessors.  */
3088
3089static void
3090schedule_region (int rgn)
3091{
3092  int bb;
3093  int sched_rgn_n_insns = 0;
3094
3095  rgn_n_insns = 0;
3096
3097  /* Do not support register pressure sensitive scheduling for the new regions
3098     as we don't update the liveness info for them.  */
3099  if (sched_pressure != SCHED_PRESSURE_NONE
3100      && rgn >= nr_regions_initial)
3101    {
3102      free_global_sched_pressure_data ();
3103      sched_pressure = SCHED_PRESSURE_NONE;
3104    }
3105
3106  rgn_setup_region (rgn);
3107
3108  /* Don't schedule region that is marked by
3109     NOTE_DISABLE_SCHED_OF_BLOCK.  */
3110  if (sched_is_disabled_for_current_region_p ())
3111    return;
3112
3113  sched_rgn_compute_dependencies (rgn);
3114
3115  sched_rgn_local_init (rgn);
3116
3117  /* Set priorities.  */
3118  compute_priorities ();
3119
3120  sched_extend_ready_list (rgn_n_insns);
3121
3122  if (sched_pressure == SCHED_PRESSURE_WEIGHTED)
3123    {
3124      sched_init_region_reg_pressure_info ();
3125      for (bb = 0; bb < current_nr_blocks; bb++)
3126	{
3127	  basic_block first_bb, last_bb;
3128	  rtx_insn *head, *tail;
3129
3130	  first_bb = EBB_FIRST_BB (bb);
3131	  last_bb = EBB_LAST_BB (bb);
3132
3133	  get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3134
3135	  if (no_real_insns_p (head, tail))
3136	    {
3137	      gcc_assert (first_bb == last_bb);
3138	      continue;
3139	    }
3140	  sched_setup_bb_reg_pressure_info (first_bb, PREV_INSN (head));
3141	}
3142    }
3143
3144  /* Now we can schedule all blocks.  */
3145  for (bb = 0; bb < current_nr_blocks; bb++)
3146    {
3147      basic_block first_bb, last_bb, curr_bb;
3148      rtx_insn *head, *tail;
3149
3150      first_bb = EBB_FIRST_BB (bb);
3151      last_bb = EBB_LAST_BB (bb);
3152
3153      get_ebb_head_tail (first_bb, last_bb, &head, &tail);
3154
3155      if (no_real_insns_p (head, tail))
3156	{
3157	  gcc_assert (first_bb == last_bb);
3158	  continue;
3159	}
3160
3161      current_sched_info->prev_head = PREV_INSN (head);
3162      current_sched_info->next_tail = NEXT_INSN (tail);
3163
3164      remove_notes (head, tail);
3165
3166      unlink_bb_notes (first_bb, last_bb);
3167
3168      target_bb = bb;
3169
3170      gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
3171      current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
3172
3173      curr_bb = first_bb;
3174      if (dbg_cnt (sched_block))
3175        {
3176	  edge f;
3177	  int saved_last_basic_block = last_basic_block_for_fn (cfun);
3178
3179	  schedule_block (&curr_bb, bb_state[first_bb->index]);
3180	  gcc_assert (EBB_FIRST_BB (bb) == first_bb);
3181	  sched_rgn_n_insns += sched_n_insns;
3182	  realloc_bb_state_array (saved_last_basic_block);
3183	  f = find_fallthru_edge (last_bb->succs);
3184	  if (f
3185	      && (!f->probability.initialized_p ()
3186		  || (f->probability.to_reg_br_prob_base () * 100
3187		      / REG_BR_PROB_BASE
3188		      >= param_sched_state_edge_prob_cutoff)))
3189	    {
3190	      memcpy (bb_state[f->dest->index], curr_state,
3191		      dfa_state_size);
3192	      if (sched_verbose >= 5)
3193		fprintf (sched_dump, "saving state for edge %d->%d\n",
3194			 f->src->index, f->dest->index);
3195	    }
3196        }
3197      else
3198        {
3199          sched_rgn_n_insns += rgn_n_insns;
3200        }
3201
3202      /* Clean up.  */
3203      if (current_nr_blocks > 1)
3204	free_trg_info ();
3205    }
3206
3207  /* Sanity check: verify that all region insns were scheduled.  */
3208  gcc_assert (sched_rgn_n_insns == rgn_n_insns);
3209
3210  sched_finish_ready_list ();
3211
3212  /* Done with this region.  */
3213  sched_rgn_local_finish ();
3214
3215  /* Free dependencies.  */
3216  for (bb = 0; bb < current_nr_blocks; ++bb)
3217    free_block_dependencies (bb);
3218
3219  gcc_assert (haifa_recovery_bb_ever_added_p
3220	      || deps_pools_are_empty_p ());
3221}
3222
3223/* Initialize data structures for region scheduling.  */
3224
3225void
3226sched_rgn_init (bool single_blocks_p)
3227{
3228  min_spec_prob = ((param_min_spec_prob * REG_BR_PROB_BASE)
3229		    / 100);
3230
3231  nr_inter = 0;
3232  nr_spec = 0;
3233
3234  extend_regions ();
3235
3236  CONTAINING_RGN (ENTRY_BLOCK) = -1;
3237  CONTAINING_RGN (EXIT_BLOCK) = -1;
3238
3239  realloc_bb_state_array (0);
3240
3241  /* Compute regions for scheduling.  */
3242  if (single_blocks_p
3243      || n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS + 1
3244      || !flag_schedule_interblock
3245      || is_cfg_nonregular ())
3246    {
3247      find_single_block_region (sel_sched_p ());
3248    }
3249  else
3250    {
3251      /* Compute the dominators and post dominators.  */
3252      if (!sel_sched_p ())
3253	calculate_dominance_info (CDI_DOMINATORS);
3254
3255      /* Find regions.  */
3256      find_rgns ();
3257
3258      if (sched_verbose >= 3)
3259	debug_regions ();
3260
3261      /* For now.  This will move as more and more of haifa is converted
3262	 to using the cfg code.  */
3263      if (!sel_sched_p ())
3264	free_dominance_info (CDI_DOMINATORS);
3265    }
3266
3267  gcc_assert (nr_regions > 0 && nr_regions <= n_basic_blocks_for_fn (cfun));
3268
3269  RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1)
3270			     + RGN_NR_BLOCKS (nr_regions - 1));
3271  nr_regions_initial = nr_regions;
3272}
3273
3274/* Free data structures for region scheduling.  */
3275void
3276sched_rgn_finish (void)
3277{
3278  free_bb_state_array ();
3279
3280  /* Reposition the prologue and epilogue notes in case we moved the
3281     prologue/epilogue insns.  */
3282  if (reload_completed)
3283    reposition_prologue_and_epilogue_notes ();
3284
3285  if (sched_verbose)
3286    {
3287      if (reload_completed == 0
3288	  && flag_schedule_interblock)
3289	{
3290	  fprintf (sched_dump,
3291		   "\n;; Procedure interblock/speculative motions == %d/%d \n",
3292		   nr_inter, nr_spec);
3293	}
3294      else
3295	gcc_assert (nr_inter <= 0);
3296      fprintf (sched_dump, "\n\n");
3297    }
3298
3299  nr_regions = 0;
3300
3301  free (rgn_table);
3302  rgn_table = NULL;
3303
3304  free (rgn_bb_table);
3305  rgn_bb_table = NULL;
3306
3307  free (block_to_bb);
3308  block_to_bb = NULL;
3309
3310  free (containing_rgn);
3311  containing_rgn = NULL;
3312
3313  free (ebb_head);
3314  ebb_head = NULL;
3315}
3316
3317/* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
3318   point to the region RGN.  */
3319void
3320rgn_setup_region (int rgn)
3321{
3322  int bb;
3323
3324  /* Set variables for the current region.  */
3325  current_nr_blocks = RGN_NR_BLOCKS (rgn);
3326  current_blocks = RGN_BLOCKS (rgn);
3327
3328  /* EBB_HEAD is a region-scope structure.  But we realloc it for
3329     each region to save time/memory/something else.
3330     See comments in add_block1, for what reasons we allocate +1 element.  */
3331  ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
3332  for (bb = 0; bb <= current_nr_blocks; bb++)
3333    ebb_head[bb] = current_blocks + bb;
3334}
3335
3336/* Compute instruction dependencies in region RGN.  */
3337void
3338sched_rgn_compute_dependencies (int rgn)
3339{
3340  if (!RGN_DONT_CALC_DEPS (rgn))
3341    {
3342      int bb;
3343
3344      if (sel_sched_p ())
3345	sched_emulate_haifa_p = 1;
3346
3347      init_deps_global ();
3348
3349      /* Initializations for region data dependence analysis.  */
3350      bb_deps = XNEWVEC (class deps_desc, current_nr_blocks);
3351      for (bb = 0; bb < current_nr_blocks; bb++)
3352	init_deps (bb_deps + bb, false);
3353
3354      /* Initialize bitmap used in add_branch_dependences.  */
3355      insn_referenced = sbitmap_alloc (sched_max_luid);
3356      bitmap_clear (insn_referenced);
3357
3358      /* Compute backward dependencies.  */
3359      for (bb = 0; bb < current_nr_blocks; bb++)
3360	compute_block_dependences (bb);
3361
3362      sbitmap_free (insn_referenced);
3363      free_pending_lists ();
3364      finish_deps_global ();
3365      free (bb_deps);
3366
3367      /* We don't want to recalculate this twice.  */
3368      RGN_DONT_CALC_DEPS (rgn) = 1;
3369
3370      if (sel_sched_p ())
3371	sched_emulate_haifa_p = 0;
3372    }
3373  else
3374    /* (This is a recovery block.  It is always a single block region.)
3375       OR (We use selective scheduling.)  */
3376    gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
3377}
3378
3379/* Init region data structures.  Returns true if this region should
3380   not be scheduled.  */
3381void
3382sched_rgn_local_init (int rgn)
3383{
3384  int bb;
3385
3386  /* Compute interblock info: probabilities, split-edges, dominators, etc.  */
3387  if (current_nr_blocks > 1)
3388    {
3389      basic_block block;
3390      edge e;
3391      edge_iterator ei;
3392
3393      prob = XNEWVEC (int, current_nr_blocks);
3394
3395      dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
3396      bitmap_vector_clear (dom, current_nr_blocks);
3397
3398      /* Use ->aux to implement EDGE_TO_BIT mapping.  */
3399      rgn_nr_edges = 0;
3400      FOR_EACH_BB_FN (block, cfun)
3401	{
3402	  if (CONTAINING_RGN (block->index) != rgn)
3403	    continue;
3404	  FOR_EACH_EDGE (e, ei, block->succs)
3405	    SET_EDGE_TO_BIT (e, rgn_nr_edges++);
3406	}
3407
3408      rgn_edges = XNEWVEC (edge, rgn_nr_edges);
3409      rgn_nr_edges = 0;
3410      FOR_EACH_BB_FN (block, cfun)
3411	{
3412	  if (CONTAINING_RGN (block->index) != rgn)
3413	    continue;
3414	  FOR_EACH_EDGE (e, ei, block->succs)
3415	    rgn_edges[rgn_nr_edges++] = e;
3416	}
3417
3418      /* Split edges.  */
3419      pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3420      bitmap_vector_clear (pot_split, current_nr_blocks);
3421      ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
3422      bitmap_vector_clear (ancestor_edges, current_nr_blocks);
3423
3424      /* Compute probabilities, dominators, split_edges.  */
3425      for (bb = 0; bb < current_nr_blocks; bb++)
3426	compute_dom_prob_ps (bb);
3427
3428      /* Cleanup ->aux used for EDGE_TO_BIT mapping.  */
3429      /* We don't need them anymore.  But we want to avoid duplication of
3430	 aux fields in the newly created edges.  */
3431      FOR_EACH_BB_FN (block, cfun)
3432	{
3433	  if (CONTAINING_RGN (block->index) != rgn)
3434	    continue;
3435	  FOR_EACH_EDGE (e, ei, block->succs)
3436	    e->aux = NULL;
3437        }
3438    }
3439}
3440
3441/* Free data computed for the finished region.  */
3442void
3443sched_rgn_local_free (void)
3444{
3445  free (prob);
3446  sbitmap_vector_free (dom);
3447  sbitmap_vector_free (pot_split);
3448  sbitmap_vector_free (ancestor_edges);
3449  free (rgn_edges);
3450}
3451
3452/* Free data computed for the finished region.  */
3453void
3454sched_rgn_local_finish (void)
3455{
3456  if (current_nr_blocks > 1 && !sel_sched_p ())
3457    {
3458      sched_rgn_local_free ();
3459    }
3460}
3461
3462/* Setup scheduler infos.  */
3463void
3464rgn_setup_common_sched_info (void)
3465{
3466  memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
3467	  sizeof (rgn_common_sched_info));
3468
3469  rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
3470  rgn_common_sched_info.add_block = rgn_add_block;
3471  rgn_common_sched_info.estimate_number_of_insns
3472    = rgn_estimate_number_of_insns;
3473  rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
3474
3475  common_sched_info = &rgn_common_sched_info;
3476}
3477
3478/* Setup all *_sched_info structures (for the Haifa frontend
3479   and for the dependence analysis) in the interblock scheduler.  */
3480void
3481rgn_setup_sched_infos (void)
3482{
3483  if (!sel_sched_p ())
3484    memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
3485	    sizeof (rgn_sched_deps_info));
3486  else
3487    memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
3488	    sizeof (rgn_sched_deps_info));
3489
3490  sched_deps_info = &rgn_sched_deps_info;
3491
3492  memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
3493  current_sched_info = &rgn_sched_info;
3494}
3495
3496/* The one entry point in this file.  */
3497void
3498schedule_insns (void)
3499{
3500  int rgn;
3501
3502  /* Taking care of this degenerate case makes the rest of
3503     this code simpler.  */
3504  if (n_basic_blocks_for_fn (cfun) == NUM_FIXED_BLOCKS)
3505    return;
3506
3507  rgn_setup_common_sched_info ();
3508  rgn_setup_sched_infos ();
3509
3510  haifa_sched_init ();
3511  sched_rgn_init (reload_completed);
3512
3513  bitmap_initialize (&not_in_df, &bitmap_default_obstack);
3514
3515  /* Schedule every region in the subroutine.  */
3516  for (rgn = 0; rgn < nr_regions; rgn++)
3517    if (dbg_cnt (sched_region))
3518      schedule_region (rgn);
3519
3520  /* Clean up.  */
3521  sched_rgn_finish ();
3522  bitmap_release (&not_in_df);
3523
3524  haifa_sched_finish ();
3525}
3526
3527/* INSN has been added to/removed from current region.  */
3528static void
3529rgn_add_remove_insn (rtx_insn *insn, int remove_p)
3530{
3531  if (!remove_p)
3532    rgn_n_insns++;
3533  else
3534    rgn_n_insns--;
3535
3536  if (INSN_BB (insn) == target_bb)
3537    {
3538      if (!remove_p)
3539	target_n_insns++;
3540      else
3541	target_n_insns--;
3542    }
3543}
3544
3545/* Extend internal data structures.  */
3546void
3547extend_regions (void)
3548{
3549  rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks_for_fn (cfun));
3550  rgn_bb_table = XRESIZEVEC (int, rgn_bb_table,
3551			     n_basic_blocks_for_fn (cfun));
3552  block_to_bb = XRESIZEVEC (int, block_to_bb,
3553			    last_basic_block_for_fn (cfun));
3554  containing_rgn = XRESIZEVEC (int, containing_rgn,
3555			       last_basic_block_for_fn (cfun));
3556}
3557
3558void
3559rgn_make_new_region_out_of_new_block (basic_block bb)
3560{
3561  int i;
3562
3563  i = RGN_BLOCKS (nr_regions);
3564  /* I - first free position in rgn_bb_table.  */
3565
3566  rgn_bb_table[i] = bb->index;
3567  RGN_NR_BLOCKS (nr_regions) = 1;
3568  RGN_HAS_REAL_EBB (nr_regions) = 0;
3569  RGN_DONT_CALC_DEPS (nr_regions) = 0;
3570  CONTAINING_RGN (bb->index) = nr_regions;
3571  BLOCK_TO_BB (bb->index) = 0;
3572
3573  nr_regions++;
3574
3575  RGN_BLOCKS (nr_regions) = i + 1;
3576}
3577
3578/* BB was added to ebb after AFTER.  */
3579static void
3580rgn_add_block (basic_block bb, basic_block after)
3581{
3582  extend_regions ();
3583  bitmap_set_bit (&not_in_df, bb->index);
3584
3585  if (after == 0 || after == EXIT_BLOCK_PTR_FOR_FN (cfun))
3586    {
3587      rgn_make_new_region_out_of_new_block (bb);
3588      RGN_DONT_CALC_DEPS (nr_regions - 1) = (after
3589					     == EXIT_BLOCK_PTR_FOR_FN (cfun));
3590    }
3591  else
3592    {
3593      int i, pos;
3594
3595      /* We need to fix rgn_table, block_to_bb, containing_rgn
3596	 and ebb_head.  */
3597
3598      BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3599
3600      /* We extend ebb_head to one more position to
3601	 easily find the last position of the last ebb in
3602	 the current region.  Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3603	 is _always_ valid for access.  */
3604
3605      i = BLOCK_TO_BB (after->index) + 1;
3606      pos = ebb_head[i] - 1;
3607      /* Now POS is the index of the last block in the region.  */
3608
3609      /* Find index of basic block AFTER.  */
3610      for (; rgn_bb_table[pos] != after->index; pos--)
3611	;
3612
3613      pos++;
3614      gcc_assert (pos > ebb_head[i - 1]);
3615
3616      /* i - ebb right after "AFTER".  */
3617      /* ebb_head[i] - VALID.  */
3618
3619      /* Source position: ebb_head[i]
3620	 Destination position: ebb_head[i] + 1
3621	 Last position:
3622	   RGN_BLOCKS (nr_regions) - 1
3623	 Number of elements to copy: (last_position) - (source_position) + 1
3624       */
3625
3626      memmove (rgn_bb_table + pos + 1,
3627	       rgn_bb_table + pos,
3628	       ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3629	       * sizeof (*rgn_bb_table));
3630
3631      rgn_bb_table[pos] = bb->index;
3632
3633      for (; i <= current_nr_blocks; i++)
3634	ebb_head [i]++;
3635
3636      i = CONTAINING_RGN (after->index);
3637      CONTAINING_RGN (bb->index) = i;
3638
3639      RGN_HAS_REAL_EBB (i) = 1;
3640
3641      for (++i; i <= nr_regions; i++)
3642	RGN_BLOCKS (i)++;
3643    }
3644}
3645
3646/* Fix internal data after interblock movement of jump instruction.
3647   For parameter meaning please refer to
3648   sched-int.h: struct sched_info: fix_recovery_cfg.  */
3649static void
3650rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3651{
3652  int old_pos, new_pos, i;
3653
3654  BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3655
3656  for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3657       rgn_bb_table[old_pos] != check_bb_nexti;
3658       old_pos--)
3659    ;
3660  gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3661
3662  for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3663       rgn_bb_table[new_pos] != bbi;
3664       new_pos--)
3665    ;
3666  new_pos++;
3667  gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3668
3669  gcc_assert (new_pos < old_pos);
3670
3671  memmove (rgn_bb_table + new_pos + 1,
3672	   rgn_bb_table + new_pos,
3673	   (old_pos - new_pos) * sizeof (*rgn_bb_table));
3674
3675  rgn_bb_table[new_pos] = check_bb_nexti;
3676
3677  for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3678    ebb_head[i]++;
3679}
3680
3681/* Return next block in ebb chain.  For parameter meaning please refer to
3682   sched-int.h: struct sched_info: advance_target_bb.  */
3683static basic_block
3684advance_target_bb (basic_block bb, rtx_insn *insn)
3685{
3686  if (insn)
3687    return 0;
3688
3689  gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3690	      && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3691  return bb->next_bb;
3692}
3693
3694#endif
3695
3696/* Run instruction scheduler.  */
3697static unsigned int
3698rest_of_handle_live_range_shrinkage (void)
3699{
3700#ifdef INSN_SCHEDULING
3701  int saved;
3702
3703  initialize_live_range_shrinkage ();
3704  saved = flag_schedule_interblock;
3705  flag_schedule_interblock = false;
3706  schedule_insns ();
3707  flag_schedule_interblock = saved;
3708  finish_live_range_shrinkage ();
3709#endif
3710  return 0;
3711}
3712
3713/* Run instruction scheduler.  */
3714static unsigned int
3715rest_of_handle_sched (void)
3716{
3717#ifdef INSN_SCHEDULING
3718  if (flag_selective_scheduling
3719      && ! maybe_skip_selective_scheduling ())
3720    run_selective_scheduling ();
3721  else
3722    schedule_insns ();
3723#endif
3724  return 0;
3725}
3726
3727/* Run second scheduling pass after reload.  */
3728static unsigned int
3729rest_of_handle_sched2 (void)
3730{
3731#ifdef INSN_SCHEDULING
3732  if (flag_selective_scheduling2
3733      && ! maybe_skip_selective_scheduling ())
3734    run_selective_scheduling ();
3735  else
3736    {
3737      /* Do control and data sched analysis again,
3738	 and write some more of the results to dump file.  */
3739      if (flag_sched2_use_superblocks)
3740	schedule_ebbs ();
3741      else
3742	schedule_insns ();
3743    }
3744#endif
3745  return 0;
3746}
3747
3748static unsigned int
3749rest_of_handle_sched_fusion (void)
3750{
3751#ifdef INSN_SCHEDULING
3752  sched_fusion = true;
3753  schedule_insns ();
3754  sched_fusion = false;
3755#endif
3756  return 0;
3757}
3758
3759namespace {
3760
3761const pass_data pass_data_live_range_shrinkage =
3762{
3763  RTL_PASS, /* type */
3764  "lr_shrinkage", /* name */
3765  OPTGROUP_NONE, /* optinfo_flags */
3766  TV_LIVE_RANGE_SHRINKAGE, /* tv_id */
3767  0, /* properties_required */
3768  0, /* properties_provided */
3769  0, /* properties_destroyed */
3770  0, /* todo_flags_start */
3771  TODO_df_finish, /* todo_flags_finish */
3772};
3773
3774class pass_live_range_shrinkage : public rtl_opt_pass
3775{
3776public:
3777  pass_live_range_shrinkage(gcc::context *ctxt)
3778    : rtl_opt_pass(pass_data_live_range_shrinkage, ctxt)
3779  {}
3780
3781  /* opt_pass methods: */
3782  virtual bool gate (function *)
3783    {
3784#ifdef INSN_SCHEDULING
3785      return flag_live_range_shrinkage;
3786#else
3787      return 0;
3788#endif
3789    }
3790
3791  virtual unsigned int execute (function *)
3792    {
3793      return rest_of_handle_live_range_shrinkage ();
3794    }
3795
3796}; // class pass_live_range_shrinkage
3797
3798} // anon namespace
3799
3800rtl_opt_pass *
3801make_pass_live_range_shrinkage (gcc::context *ctxt)
3802{
3803  return new pass_live_range_shrinkage (ctxt);
3804}
3805
3806namespace {
3807
3808const pass_data pass_data_sched =
3809{
3810  RTL_PASS, /* type */
3811  "sched1", /* name */
3812  OPTGROUP_NONE, /* optinfo_flags */
3813  TV_SCHED, /* tv_id */
3814  0, /* properties_required */
3815  0, /* properties_provided */
3816  0, /* properties_destroyed */
3817  0, /* todo_flags_start */
3818  TODO_df_finish, /* todo_flags_finish */
3819};
3820
3821class pass_sched : public rtl_opt_pass
3822{
3823public:
3824  pass_sched (gcc::context *ctxt)
3825    : rtl_opt_pass (pass_data_sched, ctxt)
3826  {}
3827
3828  /* opt_pass methods: */
3829  virtual bool gate (function *);
3830  virtual unsigned int execute (function *) { return rest_of_handle_sched (); }
3831
3832}; // class pass_sched
3833
3834bool
3835pass_sched::gate (function *)
3836{
3837#ifdef INSN_SCHEDULING
3838  return optimize > 0 && flag_schedule_insns && dbg_cnt (sched_func);
3839#else
3840  return 0;
3841#endif
3842}
3843
3844} // anon namespace
3845
3846rtl_opt_pass *
3847make_pass_sched (gcc::context *ctxt)
3848{
3849  return new pass_sched (ctxt);
3850}
3851
3852namespace {
3853
3854const pass_data pass_data_sched2 =
3855{
3856  RTL_PASS, /* type */
3857  "sched2", /* name */
3858  OPTGROUP_NONE, /* optinfo_flags */
3859  TV_SCHED2, /* tv_id */
3860  0, /* properties_required */
3861  0, /* properties_provided */
3862  0, /* properties_destroyed */
3863  0, /* todo_flags_start */
3864  TODO_df_finish, /* todo_flags_finish */
3865};
3866
3867class pass_sched2 : public rtl_opt_pass
3868{
3869public:
3870  pass_sched2 (gcc::context *ctxt)
3871    : rtl_opt_pass (pass_data_sched2, ctxt)
3872  {}
3873
3874  /* opt_pass methods: */
3875  virtual bool gate (function *);
3876  virtual unsigned int execute (function *)
3877    {
3878      return rest_of_handle_sched2 ();
3879    }
3880
3881}; // class pass_sched2
3882
3883bool
3884pass_sched2::gate (function *)
3885{
3886#ifdef INSN_SCHEDULING
3887  return optimize > 0 && flag_schedule_insns_after_reload
3888    && !targetm.delay_sched2 && dbg_cnt (sched2_func);
3889#else
3890  return 0;
3891#endif
3892}
3893
3894} // anon namespace
3895
3896rtl_opt_pass *
3897make_pass_sched2 (gcc::context *ctxt)
3898{
3899  return new pass_sched2 (ctxt);
3900}
3901
3902namespace {
3903
3904const pass_data pass_data_sched_fusion =
3905{
3906  RTL_PASS, /* type */
3907  "sched_fusion", /* name */
3908  OPTGROUP_NONE, /* optinfo_flags */
3909  TV_SCHED_FUSION, /* tv_id */
3910  0, /* properties_required */
3911  0, /* properties_provided */
3912  0, /* properties_destroyed */
3913  0, /* todo_flags_start */
3914  TODO_df_finish, /* todo_flags_finish */
3915};
3916
3917class pass_sched_fusion : public rtl_opt_pass
3918{
3919public:
3920  pass_sched_fusion (gcc::context *ctxt)
3921    : rtl_opt_pass (pass_data_sched_fusion, ctxt)
3922  {}
3923
3924  /* opt_pass methods: */
3925  virtual bool gate (function *);
3926  virtual unsigned int execute (function *)
3927    {
3928      return rest_of_handle_sched_fusion ();
3929    }
3930
3931}; // class pass_sched2
3932
3933bool
3934pass_sched_fusion::gate (function *)
3935{
3936#ifdef INSN_SCHEDULING
3937  /* Scheduling fusion relies on peephole2 to do real fusion work,
3938     so only enable it if peephole2 is in effect.  */
3939  return (optimize > 0 && flag_peephole2
3940    && flag_schedule_fusion && targetm.sched.fusion_priority != NULL);
3941#else
3942  return 0;
3943#endif
3944}
3945
3946} // anon namespace
3947
3948rtl_opt_pass *
3949make_pass_sched_fusion (gcc::context *ctxt)
3950{
3951  return new pass_sched_fusion (ctxt);
3952}
3953
3954#if __GNUC__ >= 10
3955#  pragma GCC diagnostic pop
3956#endif
3957