1/* Swing Modulo Scheduling implementation.
2   Copyright (C) 2004-2015 Free Software Foundation, Inc.
3   Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "diagnostic-core.h"
27#include "rtl.h"
28#include "tm_p.h"
29#include "hard-reg-set.h"
30#include "regs.h"
31#include "hashtab.h"
32#include "hash-set.h"
33#include "vec.h"
34#include "machmode.h"
35#include "input.h"
36#include "function.h"
37#include "profile.h"
38#include "flags.h"
39#include "insn-config.h"
40#include "insn-attr.h"
41#include "except.h"
42#include "recog.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "cfgrtl.h"
46#include "predict.h"
47#include "basic-block.h"
48#include "sched-int.h"
49#include "target.h"
50#include "cfgloop.h"
51#include "double-int.h"
52#include "alias.h"
53#include "symtab.h"
54#include "wide-int.h"
55#include "inchash.h"
56#include "tree.h"
57#include "insn-codes.h"
58#include "optabs.h"
59#include "statistics.h"
60#include "real.h"
61#include "fixed-value.h"
62#include "expmed.h"
63#include "dojump.h"
64#include "explow.h"
65#include "calls.h"
66#include "emit-rtl.h"
67#include "varasm.h"
68#include "stmt.h"
69#include "expr.h"
70#include "params.h"
71#include "gcov-io.h"
72#include "sbitmap.h"
73#include "df.h"
74#include "ddg.h"
75#include "tree-pass.h"
76#include "dbgcnt.h"
77#include "loop-unroll.h"
78
79#ifdef INSN_SCHEDULING
80
81/* This file contains the implementation of the Swing Modulo Scheduler,
82   described in the following references:
83   [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
84       Lifetime--sensitive modulo scheduling in a production environment.
85       IEEE Trans. on Comps., 50(3), March 2001
86   [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
87       Swing Modulo Scheduling: A Lifetime Sensitive Approach.
88       PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
89
90   The basic structure is:
91   1. Build a data-dependence graph (DDG) for each loop.
92   2. Use the DDG to order the insns of a loop (not in topological order
93      necessarily, but rather) trying to place each insn after all its
94      predecessors _or_ after all its successors.
95   3. Compute MII: a lower bound on the number of cycles to schedule the loop.
96   4. Use the ordering to perform list-scheduling of the loop:
97      1. Set II = MII.  We will try to schedule the loop within II cycles.
98      2. Try to schedule the insns one by one according to the ordering.
99	 For each insn compute an interval of cycles by considering already-
100	 scheduled preds and succs (and associated latencies); try to place
101	 the insn in the cycles of this window checking for potential
102	 resource conflicts (using the DFA interface).
103	 Note: this is different from the cycle-scheduling of schedule_insns;
104	 here the insns are not scheduled monotonically top-down (nor bottom-
105	 up).
106      3. If failed in scheduling all insns - bump II++ and try again, unless
107	 II reaches an upper bound MaxII, in which case report failure.
108   5. If we succeeded in scheduling the loop within II cycles, we now
109      generate prolog and epilog, decrease the counter of the loop, and
110      perform modulo variable expansion for live ranges that span more than
111      II cycles (i.e. use register copies to prevent a def from overwriting
112      itself before reaching the use).
113
114    SMS works with countable loops (1) whose control part can be easily
115    decoupled from the rest of the loop and (2) whose loop count can
116    be easily adjusted.  This is because we peel a constant number of
117    iterations into a prologue and epilogue for which we want to avoid
118    emitting the control part, and a kernel which is to iterate that
119    constant number of iterations less than the original loop.  So the
120    control part should be a set of insns clearly identified and having
121    its own iv, not otherwise used in the loop (at-least for now), which
122    initializes a register before the loop to the number of iterations.
123    Currently SMS relies on the do-loop pattern to recognize such loops,
124    where (1) the control part comprises of all insns defining and/or
125    using a certain 'count' register and (2) the loop count can be
126    adjusted by modifying this register prior to the loop.
127    TODO: Rely on cfgloop analysis instead.  */
128
129/* This page defines partial-schedule structures and functions for
130   modulo scheduling.  */
131
132typedef struct partial_schedule *partial_schedule_ptr;
133typedef struct ps_insn *ps_insn_ptr;
134
135/* The minimum (absolute) cycle that a node of ps was scheduled in.  */
136#define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
137
138/* The maximum (absolute) cycle that a node of ps was scheduled in.  */
139#define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
140
141/* Perform signed modulo, always returning a non-negative value.  */
142#define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
143
144/* The number of different iterations the nodes in ps span, assuming
145   the stage boundaries are placed efficiently.  */
146#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \
147                         + 1 + ii - 1) / ii)
148/* The stage count of ps.  */
149#define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count)
150
151/* A single instruction in the partial schedule.  */
152struct ps_insn
153{
154  /* Identifies the instruction to be scheduled.  Values smaller than
155     the ddg's num_nodes refer directly to ddg nodes.  A value of
156     X - num_nodes refers to register move X.  */
157  int id;
158
159  /* The (absolute) cycle in which the PS instruction is scheduled.
160     Same as SCHED_TIME (node).  */
161  int cycle;
162
163  /* The next/prev PS_INSN in the same row.  */
164  ps_insn_ptr next_in_row,
165	      prev_in_row;
166
167};
168
169/* Information about a register move that has been added to a partial
170   schedule.  */
171struct ps_reg_move_info
172{
173  /* The source of the move is defined by the ps_insn with id DEF.
174     The destination is used by the ps_insns with the ids in USES.  */
175  int def;
176  sbitmap uses;
177
178  /* The original form of USES' instructions used OLD_REG, but they
179     should now use NEW_REG.  */
180  rtx old_reg;
181  rtx new_reg;
182
183  /* The number of consecutive stages that the move occupies.  */
184  int num_consecutive_stages;
185
186  /* An instruction that sets NEW_REG to the correct value.  The first
187     move associated with DEF will have an rhs of OLD_REG; later moves
188     use the result of the previous move.  */
189  rtx_insn *insn;
190};
191
192typedef struct ps_reg_move_info ps_reg_move_info;
193
194/* Holds the partial schedule as an array of II rows.  Each entry of the
195   array points to a linked list of PS_INSNs, which represents the
196   instructions that are scheduled for that row.  */
197struct partial_schedule
198{
199  int ii;	/* Number of rows in the partial schedule.  */
200  int history;  /* Threshold for conflict checking using DFA.  */
201
202  /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii).  */
203  ps_insn_ptr *rows;
204
205  /* All the moves added for this partial schedule.  Index X has
206     a ps_insn id of X + g->num_nodes.  */
207  vec<ps_reg_move_info> reg_moves;
208
209  /*  rows_length[i] holds the number of instructions in the row.
210      It is used only (as an optimization) to back off quickly from
211      trying to schedule a node in a full row; that is, to avoid running
212      through futile DFA state transitions.  */
213  int *rows_length;
214
215  /* The earliest absolute cycle of an insn in the partial schedule.  */
216  int min_cycle;
217
218  /* The latest absolute cycle of an insn in the partial schedule.  */
219  int max_cycle;
220
221  ddg_ptr g;	/* The DDG of the insns in the partial schedule.  */
222
223  int stage_count;  /* The stage count of the partial schedule.  */
224};
225
226
227static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
228static void free_partial_schedule (partial_schedule_ptr);
229static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
230void print_partial_schedule (partial_schedule_ptr, FILE *);
231static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
232static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
233						int, int, sbitmap, sbitmap);
234static void rotate_partial_schedule (partial_schedule_ptr, int);
235void set_row_column_for_ps (partial_schedule_ptr);
236static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
237static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
238
239
240/* This page defines constants and structures for the modulo scheduling
241   driver.  */
242
243static int sms_order_nodes (ddg_ptr, int, int *, int *);
244static void set_node_sched_params (ddg_ptr);
245static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
246static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
247static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
248                                    rtx, rtx);
249static int calculate_stage_count (partial_schedule_ptr, int);
250static void calculate_must_precede_follow (ddg_node_ptr, int, int,
251					   int, int, sbitmap, sbitmap, sbitmap);
252static int get_sched_window (partial_schedule_ptr, ddg_node_ptr,
253			     sbitmap, int, int *, int *, int *);
254static bool try_scheduling_node_in_cycle (partial_schedule_ptr, int, int,
255					  sbitmap, int *, sbitmap, sbitmap);
256static void remove_node_from_ps (partial_schedule_ptr, ps_insn_ptr);
257
258#define NODE_ASAP(node) ((node)->aux.count)
259
260#define SCHED_PARAMS(x) (&node_sched_param_vec[x])
261#define SCHED_TIME(x) (SCHED_PARAMS (x)->time)
262#define SCHED_ROW(x) (SCHED_PARAMS (x)->row)
263#define SCHED_STAGE(x) (SCHED_PARAMS (x)->stage)
264#define SCHED_COLUMN(x) (SCHED_PARAMS (x)->column)
265
266/* The scheduling parameters held for each node.  */
267typedef struct node_sched_params
268{
269  int time;	/* The absolute scheduling cycle.  */
270
271  int row;    /* Holds time % ii.  */
272  int stage;  /* Holds time / ii.  */
273
274  /* The column of a node inside the ps.  If nodes u, v are on the same row,
275     u will precede v if column (u) < column (v).  */
276  int column;
277} *node_sched_params_ptr;
278
279typedef struct node_sched_params node_sched_params;
280
281/* The following three functions are copied from the current scheduler
282   code in order to use sched_analyze() for computing the dependencies.
283   They are used when initializing the sched_info structure.  */
284static const char *
285sms_print_insn (const rtx_insn *insn, int aligned ATTRIBUTE_UNUSED)
286{
287  static char tmp[80];
288
289  sprintf (tmp, "i%4d", INSN_UID (insn));
290  return tmp;
291}
292
293static void
294compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
295			       regset used ATTRIBUTE_UNUSED)
296{
297}
298
299static struct common_sched_info_def sms_common_sched_info;
300
301static struct sched_deps_info_def sms_sched_deps_info =
302  {
303    compute_jump_reg_dependencies,
304    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
305    NULL,
306    0, 0, 0
307  };
308
309static struct haifa_sched_info sms_sched_info =
310{
311  NULL,
312  NULL,
313  NULL,
314  NULL,
315  NULL,
316  sms_print_insn,
317  NULL,
318  NULL, /* insn_finishes_block_p */
319  NULL, NULL,
320  NULL, NULL,
321  0, 0,
322
323  NULL, NULL, NULL, NULL,
324  NULL, NULL,
325  0
326};
327
328/* Partial schedule instruction ID in PS is a register move.  Return
329   information about it.  */
330static struct ps_reg_move_info *
331ps_reg_move (partial_schedule_ptr ps, int id)
332{
333  gcc_checking_assert (id >= ps->g->num_nodes);
334  return &ps->reg_moves[id - ps->g->num_nodes];
335}
336
337/* Return the rtl instruction that is being scheduled by partial schedule
338   instruction ID, which belongs to schedule PS.  */
339static rtx_insn *
340ps_rtl_insn (partial_schedule_ptr ps, int id)
341{
342  if (id < ps->g->num_nodes)
343    return ps->g->nodes[id].insn;
344  else
345    return ps_reg_move (ps, id)->insn;
346}
347
348/* Partial schedule instruction ID, which belongs to PS, occurred in
349   the original (unscheduled) loop.  Return the first instruction
350   in the loop that was associated with ps_rtl_insn (PS, ID).
351   If the instruction had some notes before it, this is the first
352   of those notes.  */
353static rtx_insn *
354ps_first_note (partial_schedule_ptr ps, int id)
355{
356  gcc_assert (id < ps->g->num_nodes);
357  return ps->g->nodes[id].first_note;
358}
359
360/* Return the number of consecutive stages that are occupied by
361   partial schedule instruction ID in PS.  */
362static int
363ps_num_consecutive_stages (partial_schedule_ptr ps, int id)
364{
365  if (id < ps->g->num_nodes)
366    return 1;
367  else
368    return ps_reg_move (ps, id)->num_consecutive_stages;
369}
370
371/* Given HEAD and TAIL which are the first and last insns in a loop;
372   return the register which controls the loop.  Return zero if it has
373   more than one occurrence in the loop besides the control part or the
374   do-loop pattern is not of the form we expect.  */
375static rtx
376doloop_register_get (rtx_insn *head ATTRIBUTE_UNUSED, rtx_insn *tail ATTRIBUTE_UNUSED)
377{
378#ifdef HAVE_doloop_end
379  rtx reg, condition;
380  rtx_insn *insn, *first_insn_not_to_check;
381
382  if (!JUMP_P (tail))
383    return NULL_RTX;
384
385  /* TODO: Free SMS's dependence on doloop_condition_get.  */
386  condition = doloop_condition_get (tail);
387  if (! condition)
388    return NULL_RTX;
389
390  if (REG_P (XEXP (condition, 0)))
391    reg = XEXP (condition, 0);
392  else if (GET_CODE (XEXP (condition, 0)) == PLUS
393	   && REG_P (XEXP (XEXP (condition, 0), 0)))
394    reg = XEXP (XEXP (condition, 0), 0);
395  else
396    gcc_unreachable ();
397
398  /* Check that the COUNT_REG has no other occurrences in the loop
399     until the decrement.  We assume the control part consists of
400     either a single (parallel) branch-on-count or a (non-parallel)
401     branch immediately preceded by a single (decrement) insn.  */
402  first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
403                             : prev_nondebug_insn (tail));
404
405  for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
406    if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
407      {
408        if (dump_file)
409        {
410          fprintf (dump_file, "SMS count_reg found ");
411          print_rtl_single (dump_file, reg);
412          fprintf (dump_file, " outside control in insn:\n");
413          print_rtl_single (dump_file, insn);
414        }
415
416        return NULL_RTX;
417      }
418
419  return reg;
420#else
421  return NULL_RTX;
422#endif
423}
424
425/* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
426   that the number of iterations is a compile-time constant.  If so,
427   return the rtx_insn that sets COUNT_REG to a constant, and set COUNT to
428   this constant.  Otherwise return 0.  */
429static rtx_insn *
430const_iteration_count (rtx count_reg, basic_block pre_header,
431		       int64_t * count)
432{
433  rtx_insn *insn;
434  rtx_insn *head, *tail;
435
436  if (! pre_header)
437    return NULL;
438
439  get_ebb_head_tail (pre_header, pre_header, &head, &tail);
440
441  for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
442    if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
443	rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
444      {
445	rtx pat = single_set (insn);
446
447	if (CONST_INT_P (SET_SRC (pat)))
448	  {
449	    *count = INTVAL (SET_SRC (pat));
450	    return insn;
451	  }
452
453	return NULL;
454      }
455
456  return NULL;
457}
458
459/* A very simple resource-based lower bound on the initiation interval.
460   ??? Improve the accuracy of this bound by considering the
461   utilization of various units.  */
462static int
463res_MII (ddg_ptr g)
464{
465  if (targetm.sched.sms_res_mii)
466    return targetm.sched.sms_res_mii (g);
467
468  return ((g->num_nodes - g->num_debug) / issue_rate);
469}
470
471
472/* A vector that contains the sched data for each ps_insn.  */
473static vec<node_sched_params> node_sched_param_vec;
474
475/* Allocate sched_params for each node and initialize it.  */
476static void
477set_node_sched_params (ddg_ptr g)
478{
479  node_sched_param_vec.truncate (0);
480  node_sched_param_vec.safe_grow_cleared (g->num_nodes);
481}
482
483/* Make sure that node_sched_param_vec has an entry for every move in PS.  */
484static void
485extend_node_sched_params (partial_schedule_ptr ps)
486{
487  node_sched_param_vec.safe_grow_cleared (ps->g->num_nodes
488					  + ps->reg_moves.length ());
489}
490
491/* Update the sched_params (time, row and stage) for node U using the II,
492   the CYCLE of U and MIN_CYCLE.
493   We're not simply taking the following
494   SCHED_STAGE (u) = CALC_STAGE_COUNT (SCHED_TIME (u), min_cycle, ii);
495   because the stages may not be aligned on cycle 0.  */
496static void
497update_node_sched_params (int u, int ii, int cycle, int min_cycle)
498{
499  int sc_until_cycle_zero;
500  int stage;
501
502  SCHED_TIME (u) = cycle;
503  SCHED_ROW (u) = SMODULO (cycle, ii);
504
505  /* The calculation of stage count is done adding the number
506     of stages before cycle zero and after cycle zero.  */
507  sc_until_cycle_zero = CALC_STAGE_COUNT (-1, min_cycle, ii);
508
509  if (SCHED_TIME (u) < 0)
510    {
511      stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii);
512      SCHED_STAGE (u) = sc_until_cycle_zero - stage;
513    }
514  else
515    {
516      stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii);
517      SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1;
518    }
519}
520
521static void
522print_node_sched_params (FILE *file, int num_nodes, partial_schedule_ptr ps)
523{
524  int i;
525
526  if (! file)
527    return;
528  for (i = 0; i < num_nodes; i++)
529    {
530      node_sched_params_ptr nsp = SCHED_PARAMS (i);
531
532      fprintf (file, "Node = %d; INSN = %d\n", i,
533	       INSN_UID (ps_rtl_insn (ps, i)));
534      fprintf (file, " asap = %d:\n", NODE_ASAP (&ps->g->nodes[i]));
535      fprintf (file, " time = %d:\n", nsp->time);
536      fprintf (file, " stage = %d:\n", nsp->stage);
537    }
538}
539
540/* Set SCHED_COLUMN for each instruction in row ROW of PS.  */
541static void
542set_columns_for_row (partial_schedule_ptr ps, int row)
543{
544  ps_insn_ptr cur_insn;
545  int column;
546
547  column = 0;
548  for (cur_insn = ps->rows[row]; cur_insn; cur_insn = cur_insn->next_in_row)
549    SCHED_COLUMN (cur_insn->id) = column++;
550}
551
552/* Set SCHED_COLUMN for each instruction in PS.  */
553static void
554set_columns_for_ps (partial_schedule_ptr ps)
555{
556  int row;
557
558  for (row = 0; row < ps->ii; row++)
559    set_columns_for_row (ps, row);
560}
561
562/* Try to schedule the move with ps_insn identifier I_REG_MOVE in PS.
563   Its single predecessor has already been scheduled, as has its
564   ddg node successors.  (The move may have also another move as its
565   successor, in which case that successor will be scheduled later.)
566
567   The move is part of a chain that satisfies register dependencies
568   between a producing ddg node and various consuming ddg nodes.
569   If some of these dependencies have a distance of 1 (meaning that
570   the use is upward-exposed) then DISTANCE1_USES is nonnull and
571   contains the set of uses with distance-1 dependencies.
572   DISTANCE1_USES is null otherwise.
573
574   MUST_FOLLOW is a scratch bitmap that is big enough to hold
575   all current ps_insn ids.
576
577   Return true on success.  */
578static bool
579schedule_reg_move (partial_schedule_ptr ps, int i_reg_move,
580		   sbitmap distance1_uses, sbitmap must_follow)
581{
582  unsigned int u;
583  int this_time, this_distance, this_start, this_end, this_latency;
584  int start, end, c, ii;
585  sbitmap_iterator sbi;
586  ps_reg_move_info *move;
587  rtx_insn *this_insn;
588  ps_insn_ptr psi;
589
590  move = ps_reg_move (ps, i_reg_move);
591  ii = ps->ii;
592  if (dump_file)
593    {
594      fprintf (dump_file, "Scheduling register move INSN %d; ii = %d"
595	       ", min cycle = %d\n\n", INSN_UID (move->insn), ii,
596	       PS_MIN_CYCLE (ps));
597      print_rtl_single (dump_file, move->insn);
598      fprintf (dump_file, "\n%11s %11s %5s\n", "start", "end", "time");
599      fprintf (dump_file, "=========== =========== =====\n");
600    }
601
602  start = INT_MIN;
603  end = INT_MAX;
604
605  /* For dependencies of distance 1 between a producer ddg node A
606     and consumer ddg node B, we have a chain of dependencies:
607
608        A --(T,L1,1)--> M1 --(T,L2,0)--> M2 ... --(T,Ln,0)--> B
609
610     where Mi is the ith move.  For dependencies of distance 0 between
611     a producer ddg node A and consumer ddg node C, we have a chain of
612     dependencies:
613
614        A --(T,L1',0)--> M1' --(T,L2',0)--> M2' ... --(T,Ln',0)--> C
615
616     where Mi' occupies the same position as Mi but occurs a stage later.
617     We can only schedule each move once, so if we have both types of
618     chain, we model the second as:
619
620        A --(T,L1',1)--> M1 --(T,L2',0)--> M2 ... --(T,Ln',-1)--> C
621
622     First handle the dependencies between the previously-scheduled
623     predecessor and the move.  */
624  this_insn = ps_rtl_insn (ps, move->def);
625  this_latency = insn_latency (this_insn, move->insn);
626  this_distance = distance1_uses && move->def < ps->g->num_nodes ? 1 : 0;
627  this_time = SCHED_TIME (move->def) - this_distance * ii;
628  this_start = this_time + this_latency;
629  this_end = this_time + ii;
630  if (dump_file)
631    fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
632	     this_start, this_end, SCHED_TIME (move->def),
633	     INSN_UID (this_insn), this_latency, this_distance,
634	     INSN_UID (move->insn));
635
636  if (start < this_start)
637    start = this_start;
638  if (end > this_end)
639    end = this_end;
640
641  /* Handle the dependencies between the move and previously-scheduled
642     successors.  */
643  EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, u, sbi)
644    {
645      this_insn = ps_rtl_insn (ps, u);
646      this_latency = insn_latency (move->insn, this_insn);
647      if (distance1_uses && !bitmap_bit_p (distance1_uses, u))
648	this_distance = -1;
649      else
650	this_distance = 0;
651      this_time = SCHED_TIME (u) + this_distance * ii;
652      this_start = this_time - ii;
653      this_end = this_time - this_latency;
654      if (dump_file)
655	fprintf (dump_file, "%11d %11d %5d %d --(T,%d,%d)--> %d\n",
656		 this_start, this_end, SCHED_TIME (u), INSN_UID (move->insn),
657		 this_latency, this_distance, INSN_UID (this_insn));
658
659      if (start < this_start)
660	start = this_start;
661      if (end > this_end)
662	end = this_end;
663    }
664
665  if (dump_file)
666    {
667      fprintf (dump_file, "----------- ----------- -----\n");
668      fprintf (dump_file, "%11d %11d %5s %s\n", start, end, "", "(max, min)");
669    }
670
671  bitmap_clear (must_follow);
672  bitmap_set_bit (must_follow, move->def);
673
674  start = MAX (start, end - (ii - 1));
675  for (c = end; c >= start; c--)
676    {
677      psi = ps_add_node_check_conflicts (ps, i_reg_move, c,
678					 move->uses, must_follow);
679      if (psi)
680	{
681	  update_node_sched_params (i_reg_move, ii, c, PS_MIN_CYCLE (ps));
682	  if (dump_file)
683	    fprintf (dump_file, "\nScheduled register move INSN %d at"
684		     " time %d, row %d\n\n", INSN_UID (move->insn), c,
685		     SCHED_ROW (i_reg_move));
686	  return true;
687	}
688    }
689
690  if (dump_file)
691    fprintf (dump_file, "\nNo available slot\n\n");
692
693  return false;
694}
695
696/*
697   Breaking intra-loop register anti-dependences:
698   Each intra-loop register anti-dependence implies a cross-iteration true
699   dependence of distance 1. Therefore, we can remove such false dependencies
700   and figure out if the partial schedule broke them by checking if (for a
701   true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
702   if so generate a register move.   The number of such moves is equal to:
703              SCHED_TIME (use) - SCHED_TIME (def)       { 0 broken
704   nreg_moves = ----------------------------------- + 1 - {   dependence.
705                            ii                          { 1 if not.
706*/
707static bool
708schedule_reg_moves (partial_schedule_ptr ps)
709{
710  ddg_ptr g = ps->g;
711  int ii = ps->ii;
712  int i;
713
714  for (i = 0; i < g->num_nodes; i++)
715    {
716      ddg_node_ptr u = &g->nodes[i];
717      ddg_edge_ptr e;
718      int nreg_moves = 0, i_reg_move;
719      rtx prev_reg, old_reg;
720      int first_move;
721      int distances[2];
722      sbitmap must_follow;
723      sbitmap distance1_uses;
724      rtx set = single_set (u->insn);
725
726      /* Skip instructions that do not set a register.  */
727      if ((set && !REG_P (SET_DEST (set))))
728        continue;
729
730      /* Compute the number of reg_moves needed for u, by looking at life
731	 ranges started at u (excluding self-loops).  */
732      distances[0] = distances[1] = false;
733      for (e = u->out; e; e = e->next_out)
734	if (e->type == TRUE_DEP && e->dest != e->src)
735	  {
736	    int nreg_moves4e = (SCHED_TIME (e->dest->cuid)
737				- SCHED_TIME (e->src->cuid)) / ii;
738
739            if (e->distance == 1)
740              nreg_moves4e = (SCHED_TIME (e->dest->cuid)
741			      - SCHED_TIME (e->src->cuid) + ii) / ii;
742
743	    /* If dest precedes src in the schedule of the kernel, then dest
744	       will read before src writes and we can save one reg_copy.  */
745	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
746		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
747	      nreg_moves4e--;
748
749            if (nreg_moves4e >= 1)
750	      {
751		/* !single_set instructions are not supported yet and
752		   thus we do not except to encounter them in the loop
753		   except from the doloop part.  For the latter case
754		   we assume no regmoves are generated as the doloop
755		   instructions are tied to the branch with an edge.  */
756		gcc_assert (set);
757		/* If the instruction contains auto-inc register then
758		   validate that the regmov is being generated for the
759		   target regsiter rather then the inc'ed register.	*/
760		gcc_assert (!autoinc_var_is_used_p (u->insn, e->dest->insn));
761	      }
762
763	    if (nreg_moves4e)
764	      {
765		gcc_assert (e->distance < 2);
766		distances[e->distance] = true;
767	      }
768	    nreg_moves = MAX (nreg_moves, nreg_moves4e);
769	  }
770
771      if (nreg_moves == 0)
772	continue;
773
774      /* Create NREG_MOVES register moves.  */
775      first_move = ps->reg_moves.length ();
776      ps->reg_moves.safe_grow_cleared (first_move + nreg_moves);
777      extend_node_sched_params (ps);
778
779      /* Record the moves associated with this node.  */
780      first_move += ps->g->num_nodes;
781
782      /* Generate each move.  */
783      old_reg = prev_reg = SET_DEST (single_set (u->insn));
784      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
785	{
786	  ps_reg_move_info *move = ps_reg_move (ps, first_move + i_reg_move);
787
788	  move->def = i_reg_move > 0 ? first_move + i_reg_move - 1 : i;
789	  move->uses = sbitmap_alloc (first_move + nreg_moves);
790	  move->old_reg = old_reg;
791	  move->new_reg = gen_reg_rtx (GET_MODE (prev_reg));
792	  move->num_consecutive_stages = distances[0] && distances[1] ? 2 : 1;
793	  move->insn = as_a <rtx_insn *> (gen_move_insn (move->new_reg,
794							 copy_rtx (prev_reg)));
795	  bitmap_clear (move->uses);
796
797	  prev_reg = move->new_reg;
798	}
799
800      distance1_uses = distances[1] ? sbitmap_alloc (g->num_nodes) : NULL;
801
802      if (distance1_uses)
803	bitmap_clear (distance1_uses);
804
805      /* Every use of the register defined by node may require a different
806	 copy of this register, depending on the time the use is scheduled.
807	 Record which uses require which move results.  */
808      for (e = u->out; e; e = e->next_out)
809	if (e->type == TRUE_DEP && e->dest != e->src)
810	  {
811	    int dest_copy = (SCHED_TIME (e->dest->cuid)
812			     - SCHED_TIME (e->src->cuid)) / ii;
813
814	    if (e->distance == 1)
815	      dest_copy = (SCHED_TIME (e->dest->cuid)
816			   - SCHED_TIME (e->src->cuid) + ii) / ii;
817
818	    if (SCHED_ROW (e->dest->cuid) == SCHED_ROW (e->src->cuid)
819		&& SCHED_COLUMN (e->dest->cuid) < SCHED_COLUMN (e->src->cuid))
820	      dest_copy--;
821
822	    if (dest_copy)
823	      {
824		ps_reg_move_info *move;
825
826		move = ps_reg_move (ps, first_move + dest_copy - 1);
827		bitmap_set_bit (move->uses, e->dest->cuid);
828		if (e->distance == 1)
829		  bitmap_set_bit (distance1_uses, e->dest->cuid);
830	      }
831	  }
832
833      must_follow = sbitmap_alloc (first_move + nreg_moves);
834      for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
835	if (!schedule_reg_move (ps, first_move + i_reg_move,
836				distance1_uses, must_follow))
837	  break;
838      sbitmap_free (must_follow);
839      if (distance1_uses)
840	sbitmap_free (distance1_uses);
841      if (i_reg_move < nreg_moves)
842	return false;
843    }
844  return true;
845}
846
847/* Emit the moves associatied with PS.  Apply the substitutions
848   associated with them.  */
849static void
850apply_reg_moves (partial_schedule_ptr ps)
851{
852  ps_reg_move_info *move;
853  int i;
854
855  FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
856    {
857      unsigned int i_use;
858      sbitmap_iterator sbi;
859
860      EXECUTE_IF_SET_IN_BITMAP (move->uses, 0, i_use, sbi)
861	{
862	  replace_rtx (ps->g->nodes[i_use].insn, move->old_reg, move->new_reg);
863	  df_insn_rescan (ps->g->nodes[i_use].insn);
864	}
865    }
866}
867
868/* Bump the SCHED_TIMEs of all nodes by AMOUNT.  Set the values of
869   SCHED_ROW and SCHED_STAGE.  Instruction scheduled on cycle AMOUNT
870   will move to cycle zero.  */
871static void
872reset_sched_times (partial_schedule_ptr ps, int amount)
873{
874  int row;
875  int ii = ps->ii;
876  ps_insn_ptr crr_insn;
877
878  for (row = 0; row < ii; row++)
879    for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
880      {
881	int u = crr_insn->id;
882	int normalized_time = SCHED_TIME (u) - amount;
883	int new_min_cycle = PS_MIN_CYCLE (ps) - amount;
884
885        if (dump_file)
886          {
887            /* Print the scheduling times after the rotation.  */
888	    rtx_insn *insn = ps_rtl_insn (ps, u);
889
890            fprintf (dump_file, "crr_insn->node=%d (insn id %d), "
891                     "crr_insn->cycle=%d, min_cycle=%d", u,
892                     INSN_UID (insn), normalized_time, new_min_cycle);
893            if (JUMP_P (insn))
894              fprintf (dump_file, " (branch)");
895            fprintf (dump_file, "\n");
896          }
897
898	gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
899	gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
900
901	crr_insn->cycle = normalized_time;
902	update_node_sched_params (u, ii, normalized_time, new_min_cycle);
903      }
904}
905
906/* Permute the insns according to their order in PS, from row 0 to
907   row ii-1, and position them right before LAST.  This schedules
908   the insns of the loop kernel.  */
909static void
910permute_partial_schedule (partial_schedule_ptr ps, rtx_insn *last)
911{
912  int ii = ps->ii;
913  int row;
914  ps_insn_ptr ps_ij;
915
916  for (row = 0; row < ii ; row++)
917    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
918      {
919	rtx_insn *insn = ps_rtl_insn (ps, ps_ij->id);
920
921	if (PREV_INSN (last) != insn)
922	  {
923	    if (ps_ij->id < ps->g->num_nodes)
924	      reorder_insns_nobb (ps_first_note (ps, ps_ij->id), insn,
925				  PREV_INSN (last));
926	    else
927	      add_insn_before (insn, last, NULL);
928	  }
929      }
930}
931
932/* Set bitmaps TMP_FOLLOW and TMP_PRECEDE to MUST_FOLLOW and MUST_PRECEDE
933   respectively only if cycle C falls on the border of the scheduling
934   window boundaries marked by START and END cycles.  STEP is the
935   direction of the window.  */
936static inline void
937set_must_precede_follow (sbitmap *tmp_follow, sbitmap must_follow,
938			 sbitmap *tmp_precede, sbitmap must_precede, int c,
939			 int start, int end, int step)
940{
941  *tmp_precede = NULL;
942  *tmp_follow = NULL;
943
944  if (c == start)
945    {
946      if (step == 1)
947	*tmp_precede = must_precede;
948      else			/* step == -1.  */
949	*tmp_follow = must_follow;
950    }
951  if (c == end - step)
952    {
953      if (step == 1)
954	*tmp_follow = must_follow;
955      else			/* step == -1.  */
956	*tmp_precede = must_precede;
957    }
958
959}
960
961/* Return True if the branch can be moved to row ii-1 while
962   normalizing the partial schedule PS to start from cycle zero and thus
963   optimize the SC.  Otherwise return False.  */
964static bool
965optimize_sc (partial_schedule_ptr ps, ddg_ptr g)
966{
967  int amount = PS_MIN_CYCLE (ps);
968  sbitmap sched_nodes = sbitmap_alloc (g->num_nodes);
969  int start, end, step;
970  int ii = ps->ii;
971  bool ok = false;
972  int stage_count, stage_count_curr;
973
974  /* Compare the SC after normalization and SC after bringing the branch
975     to row ii-1.  If they are equal just bail out.  */
976  stage_count = calculate_stage_count (ps, amount);
977  stage_count_curr =
978    calculate_stage_count (ps, SCHED_TIME (g->closing_branch->cuid) - (ii - 1));
979
980  if (stage_count == stage_count_curr)
981    {
982      if (dump_file)
983	fprintf (dump_file, "SMS SC already optimized.\n");
984
985      ok = false;
986      goto clear;
987    }
988
989  if (dump_file)
990    {
991      fprintf (dump_file, "SMS Trying to optimize branch location\n");
992      fprintf (dump_file, "SMS partial schedule before trial:\n");
993      print_partial_schedule (ps, dump_file);
994    }
995
996  /* First, normalize the partial scheduling.  */
997  reset_sched_times (ps, amount);
998  rotate_partial_schedule (ps, amount);
999  if (dump_file)
1000    {
1001      fprintf (dump_file,
1002	       "SMS partial schedule after normalization (ii, %d, SC %d):\n",
1003	       ii, stage_count);
1004      print_partial_schedule (ps, dump_file);
1005    }
1006
1007  if (SMODULO (SCHED_TIME (g->closing_branch->cuid), ii) == ii - 1)
1008    {
1009      ok = true;
1010      goto clear;
1011    }
1012
1013  bitmap_ones (sched_nodes);
1014
1015  /* Calculate the new placement of the branch.  It should be in row
1016     ii-1 and fall into it's scheduling window.  */
1017  if (get_sched_window (ps, g->closing_branch, sched_nodes, ii, &start,
1018			&step, &end) == 0)
1019    {
1020      bool success;
1021      ps_insn_ptr next_ps_i;
1022      int branch_cycle = SCHED_TIME (g->closing_branch->cuid);
1023      int row = SMODULO (branch_cycle, ps->ii);
1024      int num_splits = 0;
1025      sbitmap must_precede, must_follow, tmp_precede, tmp_follow;
1026      int min_cycle, c;
1027
1028      if (dump_file)
1029	fprintf (dump_file, "\nTrying to schedule node %d "
1030		 "INSN = %d  in (%d .. %d) step %d\n",
1031		 g->closing_branch->cuid,
1032		 (INSN_UID (g->closing_branch->insn)), start, end, step);
1033
1034      gcc_assert ((step > 0 && start < end) || (step < 0 && start > end));
1035      if (step == 1)
1036	{
1037	  c = start + ii - SMODULO (start, ii) - 1;
1038	  gcc_assert (c >= start);
1039	  if (c >= end)
1040	    {
1041	      ok = false;
1042	      if (dump_file)
1043		fprintf (dump_file,
1044			 "SMS failed to schedule branch at cycle: %d\n", c);
1045	      goto clear;
1046	    }
1047	}
1048      else
1049	{
1050	  c = start - SMODULO (start, ii) - 1;
1051	  gcc_assert (c <= start);
1052
1053	  if (c <= end)
1054	    {
1055	      if (dump_file)
1056		fprintf (dump_file,
1057			 "SMS failed to schedule branch at cycle: %d\n", c);
1058	      ok = false;
1059	      goto clear;
1060	    }
1061	}
1062
1063      must_precede = sbitmap_alloc (g->num_nodes);
1064      must_follow = sbitmap_alloc (g->num_nodes);
1065
1066      /* Try to schedule the branch is it's new cycle.  */
1067      calculate_must_precede_follow (g->closing_branch, start, end,
1068				     step, ii, sched_nodes,
1069				     must_precede, must_follow);
1070
1071      set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1072			       must_precede, c, start, end, step);
1073
1074      /* Find the element in the partial schedule related to the closing
1075         branch so we can remove it from it's current cycle.  */
1076      for (next_ps_i = ps->rows[row];
1077	   next_ps_i; next_ps_i = next_ps_i->next_in_row)
1078	if (next_ps_i->id == g->closing_branch->cuid)
1079	  break;
1080
1081      min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1082      remove_node_from_ps (ps, next_ps_i);
1083      success =
1084	try_scheduling_node_in_cycle (ps, g->closing_branch->cuid, c,
1085				      sched_nodes, &num_splits,
1086				      tmp_precede, tmp_follow);
1087      gcc_assert (num_splits == 0);
1088      if (!success)
1089	{
1090	  if (dump_file)
1091	    fprintf (dump_file,
1092		     "SMS failed to schedule branch at cycle: %d, "
1093		     "bringing it back to cycle %d\n", c, branch_cycle);
1094
1095	  /* The branch was failed to be placed in row ii - 1.
1096	     Put it back in it's original place in the partial
1097	     schedualing.  */
1098	  set_must_precede_follow (&tmp_follow, must_follow, &tmp_precede,
1099				   must_precede, branch_cycle, start, end,
1100				   step);
1101	  success =
1102	    try_scheduling_node_in_cycle (ps, g->closing_branch->cuid,
1103					  branch_cycle, sched_nodes,
1104					  &num_splits, tmp_precede,
1105					  tmp_follow);
1106	  gcc_assert (success && (num_splits == 0));
1107	  ok = false;
1108	}
1109      else
1110	{
1111	  /* The branch is placed in row ii - 1.  */
1112	  if (dump_file)
1113	    fprintf (dump_file,
1114		     "SMS success in moving branch to cycle %d\n", c);
1115
1116	  update_node_sched_params (g->closing_branch->cuid, ii, c,
1117				    PS_MIN_CYCLE (ps));
1118	  ok = true;
1119	}
1120
1121      /* This might have been added to a new first stage.  */
1122      if (PS_MIN_CYCLE (ps) < min_cycle)
1123	reset_sched_times (ps, 0);
1124
1125      free (must_precede);
1126      free (must_follow);
1127    }
1128
1129clear:
1130  free (sched_nodes);
1131  return ok;
1132}
1133
1134static void
1135duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
1136			   int to_stage, rtx count_reg)
1137{
1138  int row;
1139  ps_insn_ptr ps_ij;
1140
1141  for (row = 0; row < ps->ii; row++)
1142    for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
1143      {
1144	int u = ps_ij->id;
1145	int first_u, last_u;
1146	rtx_insn *u_insn;
1147
1148        /* Do not duplicate any insn which refers to count_reg as it
1149           belongs to the control part.
1150           The closing branch is scheduled as well and thus should
1151           be ignored.
1152           TODO: This should be done by analyzing the control part of
1153           the loop.  */
1154	u_insn = ps_rtl_insn (ps, u);
1155        if (reg_mentioned_p (count_reg, u_insn)
1156            || JUMP_P (u_insn))
1157          continue;
1158
1159	first_u = SCHED_STAGE (u);
1160	last_u = first_u + ps_num_consecutive_stages (ps, u) - 1;
1161	if (from_stage <= last_u && to_stage >= first_u)
1162	  {
1163	    if (u < ps->g->num_nodes)
1164	      duplicate_insn_chain (ps_first_note (ps, u), u_insn);
1165	    else
1166	      emit_insn (copy_rtx (PATTERN (u_insn)));
1167	  }
1168      }
1169}
1170
1171
1172/* Generate the instructions (including reg_moves) for prolog & epilog.  */
1173static void
1174generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
1175                        rtx count_reg, rtx count_init)
1176{
1177  int i;
1178  int last_stage = PS_STAGE_COUNT (ps) - 1;
1179  edge e;
1180
1181  /* Generate the prolog, inserting its insns on the loop-entry edge.  */
1182  start_sequence ();
1183
1184  if (!count_init)
1185    {
1186      /* Generate instructions at the beginning of the prolog to
1187         adjust the loop count by STAGE_COUNT.  If loop count is constant
1188         (count_init), this constant is adjusted by STAGE_COUNT in
1189         generate_prolog_epilog function.  */
1190      rtx sub_reg = NULL_RTX;
1191
1192      sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
1193				     gen_int_mode (last_stage,
1194						   GET_MODE (count_reg)),
1195                                     count_reg, 1, OPTAB_DIRECT);
1196      gcc_assert (REG_P (sub_reg));
1197      if (REGNO (sub_reg) != REGNO (count_reg))
1198        emit_move_insn (count_reg, sub_reg);
1199    }
1200
1201  for (i = 0; i < last_stage; i++)
1202    duplicate_insns_of_cycles (ps, 0, i, count_reg);
1203
1204  /* Put the prolog on the entry edge.  */
1205  e = loop_preheader_edge (loop);
1206  split_edge_and_insert (e, get_insns ());
1207  if (!flag_resched_modulo_sched)
1208    e->dest->flags |= BB_DISABLE_SCHEDULE;
1209
1210  end_sequence ();
1211
1212  /* Generate the epilog, inserting its insns on the loop-exit edge.  */
1213  start_sequence ();
1214
1215  for (i = 0; i < last_stage; i++)
1216    duplicate_insns_of_cycles (ps, i + 1, last_stage, count_reg);
1217
1218  /* Put the epilogue on the exit edge.  */
1219  gcc_assert (single_exit (loop));
1220  e = single_exit (loop);
1221  split_edge_and_insert (e, get_insns ());
1222  if (!flag_resched_modulo_sched)
1223    e->dest->flags |= BB_DISABLE_SCHEDULE;
1224
1225  end_sequence ();
1226}
1227
1228/* Mark LOOP as software pipelined so the later
1229   scheduling passes don't touch it.  */
1230static void
1231mark_loop_unsched (struct loop *loop)
1232{
1233  unsigned i;
1234  basic_block *bbs = get_loop_body (loop);
1235
1236  for (i = 0; i < loop->num_nodes; i++)
1237    bbs[i]->flags |= BB_DISABLE_SCHEDULE;
1238
1239  free (bbs);
1240}
1241
1242/* Return true if all the BBs of the loop are empty except the
1243   loop header.  */
1244static bool
1245loop_single_full_bb_p (struct loop *loop)
1246{
1247  unsigned i;
1248  basic_block *bbs = get_loop_body (loop);
1249
1250  for (i = 0; i < loop->num_nodes ; i++)
1251    {
1252      rtx_insn *head, *tail;
1253      bool empty_bb = true;
1254
1255      if (bbs[i] == loop->header)
1256        continue;
1257
1258      /* Make sure that basic blocks other than the header
1259         have only notes labels or jumps.  */
1260      get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
1261      for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
1262        {
1263          if (NOTE_P (head) || LABEL_P (head)
1264 	      || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head))))
1265 	    continue;
1266 	  empty_bb = false;
1267 	  break;
1268        }
1269
1270      if (! empty_bb)
1271        {
1272          free (bbs);
1273          return false;
1274        }
1275    }
1276  free (bbs);
1277  return true;
1278}
1279
1280/* Dump file:line from INSN's location info to dump_file.  */
1281
1282static void
1283dump_insn_location (rtx_insn *insn)
1284{
1285  if (dump_file && INSN_HAS_LOCATION (insn))
1286    {
1287      expanded_location xloc = insn_location (insn);
1288      fprintf (dump_file, " %s:%i", xloc.file, xloc.line);
1289    }
1290}
1291
1292/* A simple loop from SMS point of view; it is a loop that is composed of
1293   either a single basic block or two BBs - a header and a latch.  */
1294#define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) 		    \
1295				  && (EDGE_COUNT (loop->latch->preds) == 1) \
1296                                  && (EDGE_COUNT (loop->latch->succs) == 1))
1297
1298/* Return true if the loop is in its canonical form and false if not.
1299   i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit.  */
1300static bool
1301loop_canon_p (struct loop *loop)
1302{
1303
1304  if (loop->inner || !loop_outer (loop))
1305  {
1306    if (dump_file)
1307      fprintf (dump_file, "SMS loop inner or !loop_outer\n");
1308    return false;
1309  }
1310
1311  if (!single_exit (loop))
1312    {
1313      if (dump_file)
1314	{
1315	  rtx_insn *insn = BB_END (loop->header);
1316
1317	  fprintf (dump_file, "SMS loop many exits");
1318	  dump_insn_location (insn);
1319	  fprintf (dump_file, "\n");
1320	}
1321      return false;
1322    }
1323
1324  if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
1325    {
1326      if (dump_file)
1327	{
1328	  rtx_insn *insn = BB_END (loop->header);
1329
1330	  fprintf (dump_file, "SMS loop many BBs.");
1331	  dump_insn_location (insn);
1332	  fprintf (dump_file, "\n");
1333	}
1334      return false;
1335    }
1336
1337    return true;
1338}
1339
1340/* If there are more than one entry for the loop,
1341   make it one by splitting the first entry edge and
1342   redirecting the others to the new BB.  */
1343static void
1344canon_loop (struct loop *loop)
1345{
1346  edge e;
1347  edge_iterator i;
1348
1349  /* Avoid annoying special cases of edges going to exit
1350     block.  */
1351  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
1352    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
1353      split_edge (e);
1354
1355  if (loop->latch == loop->header
1356      || EDGE_COUNT (loop->latch->succs) > 1)
1357    {
1358      FOR_EACH_EDGE (e, i, loop->header->preds)
1359        if (e->src == loop->latch)
1360          break;
1361      split_edge (e);
1362    }
1363}
1364
1365/* Setup infos.  */
1366static void
1367setup_sched_infos (void)
1368{
1369  memcpy (&sms_common_sched_info, &haifa_common_sched_info,
1370	  sizeof (sms_common_sched_info));
1371  sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
1372  common_sched_info = &sms_common_sched_info;
1373
1374  sched_deps_info = &sms_sched_deps_info;
1375  current_sched_info = &sms_sched_info;
1376}
1377
1378/* Probability in % that the sms-ed loop rolls enough so that optimized
1379   version may be entered.  Just a guess.  */
1380#define PROB_SMS_ENOUGH_ITERATIONS 80
1381
1382/* Used to calculate the upper bound of ii.  */
1383#define MAXII_FACTOR 2
1384
1385/* Main entry point, perform SMS scheduling on the loops of the function
1386   that consist of single basic blocks.  */
1387static void
1388sms_schedule (void)
1389{
1390  rtx_insn *insn;
1391  ddg_ptr *g_arr, g;
1392  int * node_order;
1393  int maxii, max_asap;
1394  partial_schedule_ptr ps;
1395  basic_block bb = NULL;
1396  struct loop *loop;
1397  basic_block condition_bb = NULL;
1398  edge latch_edge;
1399  gcov_type trip_count = 0;
1400
1401  loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1402		       | LOOPS_HAVE_RECORDED_EXITS);
1403  if (number_of_loops (cfun) <= 1)
1404    {
1405      loop_optimizer_finalize ();
1406      return;  /* There are no loops to schedule.  */
1407    }
1408
1409  /* Initialize issue_rate.  */
1410  if (targetm.sched.issue_rate)
1411    {
1412      int temp = reload_completed;
1413
1414      reload_completed = 1;
1415      issue_rate = targetm.sched.issue_rate ();
1416      reload_completed = temp;
1417    }
1418  else
1419    issue_rate = 1;
1420
1421  /* Initialize the scheduler.  */
1422  setup_sched_infos ();
1423  haifa_sched_init ();
1424
1425  /* Allocate memory to hold the DDG array one entry for each loop.
1426     We use loop->num as index into this array.  */
1427  g_arr = XCNEWVEC (ddg_ptr, number_of_loops (cfun));
1428
1429  if (dump_file)
1430  {
1431    fprintf (dump_file, "\n\nSMS analysis phase\n");
1432    fprintf (dump_file, "===================\n\n");
1433  }
1434
1435  /* Build DDGs for all the relevant loops and hold them in G_ARR
1436     indexed by the loop index.  */
1437  FOR_EACH_LOOP (loop, 0)
1438    {
1439      rtx_insn *head, *tail;
1440      rtx count_reg;
1441
1442      /* For debugging.  */
1443      if (dbg_cnt (sms_sched_loop) == false)
1444        {
1445          if (dump_file)
1446            fprintf (dump_file, "SMS reached max limit... \n");
1447
1448	  break;
1449        }
1450
1451      if (dump_file)
1452	{
1453	  rtx_insn *insn = BB_END (loop->header);
1454
1455	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1456	  dump_insn_location (insn);
1457	  fprintf (dump_file, "\n");
1458	}
1459
1460      if (! loop_canon_p (loop))
1461        continue;
1462
1463      if (! loop_single_full_bb_p (loop))
1464      {
1465        if (dump_file)
1466          fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
1467	continue;
1468      }
1469
1470      bb = loop->header;
1471
1472      get_ebb_head_tail (bb, bb, &head, &tail);
1473      latch_edge = loop_latch_edge (loop);
1474      gcc_assert (single_exit (loop));
1475      if (single_exit (loop)->count)
1476	trip_count = latch_edge->count / single_exit (loop)->count;
1477
1478      /* Perform SMS only on loops that their average count is above threshold.  */
1479
1480      if ( latch_edge->count
1481          && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
1482	{
1483	  if (dump_file)
1484	    {
1485	      dump_insn_location (tail);
1486	      fprintf (dump_file, "\nSMS single-bb-loop\n");
1487	      if (profile_info && flag_branch_probabilities)
1488	    	{
1489	      	  fprintf (dump_file, "SMS loop-count ");
1490	      	  fprintf (dump_file, "%"PRId64,
1491	             	   (int64_t) bb->count);
1492	      	  fprintf (dump_file, "\n");
1493                  fprintf (dump_file, "SMS trip-count ");
1494                  fprintf (dump_file, "%"PRId64,
1495                           (int64_t) trip_count);
1496                  fprintf (dump_file, "\n");
1497	      	  fprintf (dump_file, "SMS profile-sum-max ");
1498	      	  fprintf (dump_file, "%"PRId64,
1499	          	   (int64_t) profile_info->sum_max);
1500	      	  fprintf (dump_file, "\n");
1501	    	}
1502	    }
1503          continue;
1504        }
1505
1506      /* Make sure this is a doloop.  */
1507      if ( !(count_reg = doloop_register_get (head, tail)))
1508      {
1509        if (dump_file)
1510          fprintf (dump_file, "SMS doloop_register_get failed\n");
1511	continue;
1512      }
1513
1514      /* Don't handle BBs with calls or barriers
1515	 or !single_set with the exception of instructions that include
1516	 count_reg---these instructions are part of the control part
1517	 that do-loop recognizes.
1518         ??? Should handle insns defining subregs.  */
1519     for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
1520      {
1521         rtx set;
1522
1523        if (CALL_P (insn)
1524            || BARRIER_P (insn)
1525            || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1526                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE
1527                && !reg_mentioned_p (count_reg, insn))
1528            || (INSN_P (insn) && (set = single_set (insn))
1529                && GET_CODE (SET_DEST (set)) == SUBREG))
1530        break;
1531      }
1532
1533      if (insn != NEXT_INSN (tail))
1534	{
1535	  if (dump_file)
1536	    {
1537	      if (CALL_P (insn))
1538		fprintf (dump_file, "SMS loop-with-call\n");
1539	      else if (BARRIER_P (insn))
1540		fprintf (dump_file, "SMS loop-with-barrier\n");
1541              else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn)
1542                && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
1543                fprintf (dump_file, "SMS loop-with-not-single-set\n");
1544              else
1545               fprintf (dump_file, "SMS loop with subreg in lhs\n");
1546	      print_rtl_single (dump_file, insn);
1547	    }
1548
1549	  continue;
1550	}
1551
1552      /* Always schedule the closing branch with the rest of the
1553         instructions. The branch is rotated to be in row ii-1 at the
1554         end of the scheduling procedure to make sure it's the last
1555         instruction in the iteration.  */
1556      if (! (g = create_ddg (bb, 1)))
1557        {
1558          if (dump_file)
1559	    fprintf (dump_file, "SMS create_ddg failed\n");
1560	  continue;
1561        }
1562
1563      g_arr[loop->num] = g;
1564      if (dump_file)
1565        fprintf (dump_file, "...OK\n");
1566
1567    }
1568  if (dump_file)
1569  {
1570    fprintf (dump_file, "\nSMS transformation phase\n");
1571    fprintf (dump_file, "=========================\n\n");
1572  }
1573
1574  /* We don't want to perform SMS on new loops - created by versioning.  */
1575  FOR_EACH_LOOP (loop, 0)
1576    {
1577      rtx_insn *head, *tail;
1578      rtx count_reg;
1579      rtx_insn *count_init;
1580      int mii, rec_mii, stage_count, min_cycle;
1581      int64_t loop_count = 0;
1582      bool opt_sc_p;
1583
1584      if (! (g = g_arr[loop->num]))
1585        continue;
1586
1587      if (dump_file)
1588	{
1589	  rtx_insn *insn = BB_END (loop->header);
1590
1591	  fprintf (dump_file, "SMS loop num: %d", loop->num);
1592	  dump_insn_location (insn);
1593	  fprintf (dump_file, "\n");
1594
1595	  print_ddg (dump_file, g);
1596	}
1597
1598      get_ebb_head_tail (loop->header, loop->header, &head, &tail);
1599
1600      latch_edge = loop_latch_edge (loop);
1601      gcc_assert (single_exit (loop));
1602      if (single_exit (loop)->count)
1603	trip_count = latch_edge->count / single_exit (loop)->count;
1604
1605      if (dump_file)
1606	{
1607	  dump_insn_location (tail);
1608	  fprintf (dump_file, "\nSMS single-bb-loop\n");
1609	  if (profile_info && flag_branch_probabilities)
1610	    {
1611	      fprintf (dump_file, "SMS loop-count ");
1612	      fprintf (dump_file, "%"PRId64,
1613	               (int64_t) bb->count);
1614	      fprintf (dump_file, "\n");
1615	      fprintf (dump_file, "SMS profile-sum-max ");
1616	      fprintf (dump_file, "%"PRId64,
1617	               (int64_t) profile_info->sum_max);
1618	      fprintf (dump_file, "\n");
1619	    }
1620	  fprintf (dump_file, "SMS doloop\n");
1621	  fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
1622          fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
1623          fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
1624	}
1625
1626
1627      /* In case of th loop have doloop register it gets special
1628	 handling.  */
1629      count_init = NULL;
1630      if ((count_reg = doloop_register_get (head, tail)))
1631	{
1632	  basic_block pre_header;
1633
1634	  pre_header = loop_preheader_edge (loop)->src;
1635	  count_init = const_iteration_count (count_reg, pre_header,
1636					      &loop_count);
1637	}
1638      gcc_assert (count_reg);
1639
1640      if (dump_file && count_init)
1641        {
1642          fprintf (dump_file, "SMS const-doloop ");
1643          fprintf (dump_file, "%"PRId64,
1644		     loop_count);
1645          fprintf (dump_file, "\n");
1646        }
1647
1648      node_order = XNEWVEC (int, g->num_nodes);
1649
1650      mii = 1; /* Need to pass some estimate of mii.  */
1651      rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
1652      mii = MAX (res_MII (g), rec_mii);
1653      maxii = MAX (max_asap, MAXII_FACTOR * mii);
1654
1655      if (dump_file)
1656	fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1657		 rec_mii, mii, maxii);
1658
1659      for (;;)
1660	{
1661	  set_node_sched_params (g);
1662
1663	  stage_count = 0;
1664	  opt_sc_p = false;
1665	  ps = sms_schedule_by_order (g, mii, maxii, node_order);
1666
1667	  if (ps)
1668	    {
1669	      /* Try to achieve optimized SC by normalizing the partial
1670		 schedule (having the cycles start from cycle zero).
1671		 The branch location must be placed in row ii-1 in the
1672		 final scheduling.	If failed, shift all instructions to
1673		 position the branch in row ii-1.  */
1674	      opt_sc_p = optimize_sc (ps, g);
1675	      if (opt_sc_p)
1676		stage_count = calculate_stage_count (ps, 0);
1677	      else
1678		{
1679		  /* Bring the branch to cycle ii-1.  */
1680		  int amount = (SCHED_TIME (g->closing_branch->cuid)
1681				- (ps->ii - 1));
1682
1683		  if (dump_file)
1684		    fprintf (dump_file, "SMS schedule branch at cycle ii-1\n");
1685
1686		  stage_count = calculate_stage_count (ps, amount);
1687		}
1688
1689	      gcc_assert (stage_count >= 1);
1690	    }
1691
1692	  /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of
1693	     1 means that there is no interleaving between iterations thus
1694	     we let the scheduling passes do the job in this case.  */
1695	  if (stage_count < PARAM_VALUE (PARAM_SMS_MIN_SC)
1696	      || (count_init && (loop_count <= stage_count))
1697	      || (flag_branch_probabilities && (trip_count <= stage_count)))
1698	    {
1699	      if (dump_file)
1700		{
1701		  fprintf (dump_file, "SMS failed... \n");
1702		  fprintf (dump_file, "SMS sched-failed (stage-count=%d,"
1703			   " loop-count=", stage_count);
1704		  fprintf (dump_file, "%"PRId64, loop_count);
1705		  fprintf (dump_file, ", trip-count=");
1706		  fprintf (dump_file, "%"PRId64, trip_count);
1707		  fprintf (dump_file, ")\n");
1708		}
1709	      break;
1710	    }
1711
1712          if (!opt_sc_p)
1713            {
1714	      /* Rotate the partial schedule to have the branch in row ii-1.  */
1715              int amount = SCHED_TIME (g->closing_branch->cuid) - (ps->ii - 1);
1716
1717              reset_sched_times (ps, amount);
1718              rotate_partial_schedule (ps, amount);
1719            }
1720
1721	  set_columns_for_ps (ps);
1722
1723	  min_cycle = PS_MIN_CYCLE (ps) - SMODULO (PS_MIN_CYCLE (ps), ps->ii);
1724	  if (!schedule_reg_moves (ps))
1725	    {
1726	      mii = ps->ii + 1;
1727	      free_partial_schedule (ps);
1728	      continue;
1729	    }
1730
1731	  /* Moves that handle incoming values might have been added
1732	     to a new first stage.  Bump the stage count if so.
1733
1734	     ??? Perhaps we could consider rotating the schedule here
1735	     instead?  */
1736	  if (PS_MIN_CYCLE (ps) < min_cycle)
1737	    {
1738	      reset_sched_times (ps, 0);
1739	      stage_count++;
1740	    }
1741
1742	  /* The stage count should now be correct without rotation.  */
1743	  gcc_checking_assert (stage_count == calculate_stage_count (ps, 0));
1744	  PS_STAGE_COUNT (ps) = stage_count;
1745
1746	  canon_loop (loop);
1747
1748          if (dump_file)
1749            {
1750	      dump_insn_location (tail);
1751	      fprintf (dump_file, " SMS succeeded %d %d (with ii, sc)\n",
1752		       ps->ii, stage_count);
1753	      print_partial_schedule (ps, dump_file);
1754	    }
1755
1756          /* case the BCT count is not known , Do loop-versioning */
1757	  if (count_reg && ! count_init)
1758            {
1759	      rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
1760					 gen_int_mode (stage_count,
1761						       GET_MODE (count_reg)));
1762	      unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
1763			       * REG_BR_PROB_BASE) / 100;
1764
1765	      loop_version (loop, comp_rtx, &condition_bb,
1766	  		    prob, prob, REG_BR_PROB_BASE - prob,
1767			    true);
1768	     }
1769
1770	  /* Set new iteration count of loop kernel.  */
1771          if (count_reg && count_init)
1772	    SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1773						     - stage_count + 1);
1774
1775	  /* Now apply the scheduled kernel to the RTL of the loop.  */
1776	  permute_partial_schedule (ps, g->closing_branch->first_note);
1777
1778          /* Mark this loop as software pipelined so the later
1779	     scheduling passes don't touch it.  */
1780	  if (! flag_resched_modulo_sched)
1781	    mark_loop_unsched (loop);
1782
1783	  /* The life-info is not valid any more.  */
1784	  df_set_bb_dirty (g->bb);
1785
1786	  apply_reg_moves (ps);
1787	  if (dump_file)
1788	    print_node_sched_params (dump_file, g->num_nodes, ps);
1789	  /* Generate prolog and epilog.  */
1790          generate_prolog_epilog (ps, loop, count_reg, count_init);
1791	  break;
1792	}
1793
1794      free_partial_schedule (ps);
1795      node_sched_param_vec.release ();
1796      free (node_order);
1797      free_ddg (g);
1798    }
1799
1800  free (g_arr);
1801
1802  /* Release scheduler data, needed until now because of DFA.  */
1803  haifa_sched_finish ();
1804  loop_optimizer_finalize ();
1805}
1806
1807/* The SMS scheduling algorithm itself
1808   -----------------------------------
1809   Input: 'O' an ordered list of insns of a loop.
1810   Output: A scheduling of the loop - kernel, prolog, and epilogue.
1811
1812   'Q' is the empty Set
1813   'PS' is the partial schedule; it holds the currently scheduled nodes with
1814	their cycle/slot.
1815   'PSP' previously scheduled predecessors.
1816   'PSS' previously scheduled successors.
1817   't(u)' the cycle where u is scheduled.
1818   'l(u)' is the latency of u.
1819   'd(v,u)' is the dependence distance from v to u.
1820   'ASAP(u)' the earliest time at which u could be scheduled as computed in
1821	     the node ordering phase.
1822   'check_hardware_resources_conflicts(u, PS, c)'
1823			     run a trace around cycle/slot through DFA model
1824			     to check resource conflicts involving instruction u
1825			     at cycle c given the partial schedule PS.
1826   'add_to_partial_schedule_at_time(u, PS, c)'
1827			     Add the node/instruction u to the partial schedule
1828			     PS at time c.
1829   'calculate_register_pressure(PS)'
1830			     Given a schedule of instructions, calculate the register
1831			     pressure it implies.  One implementation could be the
1832			     maximum number of overlapping live ranges.
1833   'maxRP' The maximum allowed register pressure, it is usually derived from the number
1834	   registers available in the hardware.
1835
1836   1. II = MII.
1837   2. PS = empty list
1838   3. for each node u in O in pre-computed order
1839   4.   if (PSP(u) != Q && PSS(u) == Q) then
1840   5.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1841   6.     start = Early_start; end = Early_start + II - 1; step = 1
1842   11.  else if (PSP(u) == Q && PSS(u) != Q) then
1843   12.      Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1844   13.     start = Late_start; end = Late_start - II + 1; step = -1
1845   14.  else if (PSP(u) != Q && PSS(u) != Q) then
1846   15.     Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1847   16.     Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1848   17.     start = Early_start;
1849   18.     end = min(Early_start + II - 1 , Late_start);
1850   19.     step = 1
1851   20.     else "if (PSP(u) == Q && PSS(u) == Q)"
1852   21.	  start = ASAP(u); end = start + II - 1; step = 1
1853   22.  endif
1854
1855   23.  success = false
1856   24.  for (c = start ; c != end ; c += step)
1857   25.     if check_hardware_resources_conflicts(u, PS, c) then
1858   26.       add_to_partial_schedule_at_time(u, PS, c)
1859   27.       success = true
1860   28.       break
1861   29.     endif
1862   30.  endfor
1863   31.  if (success == false) then
1864   32.    II = II + 1
1865   33.    if (II > maxII) then
1866   34.       finish - failed to schedule
1867   35.	 endif
1868   36.    goto 2.
1869   37.  endif
1870   38. endfor
1871   39. if (calculate_register_pressure(PS) > maxRP) then
1872   40.    goto 32.
1873   41. endif
1874   42. compute epilogue & prologue
1875   43. finish - succeeded to schedule
1876
1877   ??? The algorithm restricts the scheduling window to II cycles.
1878   In rare cases, it may be better to allow windows of II+1 cycles.
1879   The window would then start and end on the same row, but with
1880   different "must precede" and "must follow" requirements.  */
1881
1882/* A limit on the number of cycles that resource conflicts can span.  ??? Should
1883   be provided by DFA, and be dependent on the type of insn scheduled.  Currently
1884   set to 0 to save compile time.  */
1885#define DFA_HISTORY SMS_DFA_HISTORY
1886
1887/* A threshold for the number of repeated unsuccessful attempts to insert
1888   an empty row, before we flush the partial schedule and start over.  */
1889#define MAX_SPLIT_NUM 10
1890/* Given the partial schedule PS, this function calculates and returns the
1891   cycles in which we can schedule the node with the given index I.
1892   NOTE: Here we do the backtracking in SMS, in some special cases. We have
1893   noticed that there are several cases in which we fail    to SMS the loop
1894   because the sched window of a node is empty    due to tight data-deps. In
1895   such cases we want to unschedule    some of the predecessors/successors
1896   until we get non-empty    scheduling window.  It returns -1 if the
1897   scheduling window is empty and zero otherwise.  */
1898
1899static int
1900get_sched_window (partial_schedule_ptr ps, ddg_node_ptr u_node,
1901		  sbitmap sched_nodes, int ii, int *start_p, int *step_p,
1902		  int *end_p)
1903{
1904  int start, step, end;
1905  int early_start, late_start;
1906  ddg_edge_ptr e;
1907  sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
1908  sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
1909  sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1910  sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1911  int psp_not_empty;
1912  int pss_not_empty;
1913  int count_preds;
1914  int count_succs;
1915
1916  /* 1. compute sched window for u (start, end, step).  */
1917  bitmap_clear (psp);
1918  bitmap_clear (pss);
1919  psp_not_empty = bitmap_and (psp, u_node_preds, sched_nodes);
1920  pss_not_empty = bitmap_and (pss, u_node_succs, sched_nodes);
1921
1922  /* We first compute a forward range (start <= end), then decide whether
1923     to reverse it.  */
1924  early_start = INT_MIN;
1925  late_start = INT_MAX;
1926  start = INT_MIN;
1927  end = INT_MAX;
1928  step = 1;
1929
1930  count_preds = 0;
1931  count_succs = 0;
1932
1933  if (dump_file && (psp_not_empty || pss_not_empty))
1934    {
1935      fprintf (dump_file, "\nAnalyzing dependencies for node %d (INSN %d)"
1936	       "; ii = %d\n\n", u_node->cuid, INSN_UID (u_node->insn), ii);
1937      fprintf (dump_file, "%11s %11s %11s %11s %5s\n",
1938	       "start", "early start", "late start", "end", "time");
1939      fprintf (dump_file, "=========== =========== =========== ==========="
1940	       " =====\n");
1941    }
1942  /* Calculate early_start and limit end.  Both bounds are inclusive.  */
1943  if (psp_not_empty)
1944    for (e = u_node->in; e != 0; e = e->next_in)
1945      {
1946	int v = e->src->cuid;
1947
1948	if (bitmap_bit_p (sched_nodes, v))
1949	  {
1950	    int p_st = SCHED_TIME (v);
1951	    int earliest = p_st + e->latency - (e->distance * ii);
1952	    int latest = (e->data_type == MEM_DEP ? p_st + ii - 1 : INT_MAX);
1953
1954	    if (dump_file)
1955	      {
1956		fprintf (dump_file, "%11s %11d %11s %11d %5d",
1957			 "", earliest, "", latest, p_st);
1958		print_ddg_edge (dump_file, e);
1959		fprintf (dump_file, "\n");
1960	      }
1961
1962	    early_start = MAX (early_start, earliest);
1963	    end = MIN (end, latest);
1964
1965	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1966	      count_preds++;
1967	  }
1968      }
1969
1970  /* Calculate late_start and limit start.  Both bounds are inclusive.  */
1971  if (pss_not_empty)
1972    for (e = u_node->out; e != 0; e = e->next_out)
1973      {
1974	int v = e->dest->cuid;
1975
1976	if (bitmap_bit_p (sched_nodes, v))
1977	  {
1978	    int s_st = SCHED_TIME (v);
1979	    int earliest = (e->data_type == MEM_DEP ? s_st - ii + 1 : INT_MIN);
1980	    int latest = s_st - e->latency + (e->distance * ii);
1981
1982	    if (dump_file)
1983	      {
1984		fprintf (dump_file, "%11d %11s %11d %11s %5d",
1985			 earliest, "", latest, "", s_st);
1986		print_ddg_edge (dump_file, e);
1987		fprintf (dump_file, "\n");
1988	      }
1989
1990	    start = MAX (start, earliest);
1991	    late_start = MIN (late_start, latest);
1992
1993	    if (e->type == TRUE_DEP && e->data_type == REG_DEP)
1994	      count_succs++;
1995	  }
1996      }
1997
1998  if (dump_file && (psp_not_empty || pss_not_empty))
1999    {
2000      fprintf (dump_file, "----------- ----------- ----------- -----------"
2001	       " -----\n");
2002      fprintf (dump_file, "%11d %11d %11d %11d %5s %s\n",
2003	       start, early_start, late_start, end, "",
2004	       "(max, max, min, min)");
2005    }
2006
2007  /* Get a target scheduling window no bigger than ii.  */
2008  if (early_start == INT_MIN && late_start == INT_MAX)
2009    early_start = NODE_ASAP (u_node);
2010  else if (early_start == INT_MIN)
2011    early_start = late_start - (ii - 1);
2012  late_start = MIN (late_start, early_start + (ii - 1));
2013
2014  /* Apply memory dependence limits.  */
2015  start = MAX (start, early_start);
2016  end = MIN (end, late_start);
2017
2018  if (dump_file && (psp_not_empty || pss_not_empty))
2019    fprintf (dump_file, "%11s %11d %11d %11s %5s final window\n",
2020	     "", start, end, "", "");
2021
2022  /* If there are at least as many successors as predecessors, schedule the
2023     node close to its successors.  */
2024  if (pss_not_empty && count_succs >= count_preds)
2025    {
2026      int tmp = end;
2027      end = start;
2028      start = tmp;
2029      step = -1;
2030    }
2031
2032  /* Now that we've finalized the window, make END an exclusive rather
2033     than an inclusive bound.  */
2034  end += step;
2035
2036  *start_p = start;
2037  *step_p = step;
2038  *end_p = end;
2039  sbitmap_free (psp);
2040  sbitmap_free (pss);
2041
2042  if ((start >= end && step == 1) || (start <= end && step == -1))
2043    {
2044      if (dump_file)
2045	fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
2046		 start, end, step);
2047      return -1;
2048    }
2049
2050  return 0;
2051}
2052
2053/* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
2054   node currently been scheduled.  At the end of the calculation
2055   MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
2056   U_NODE which are (1) already scheduled in the first/last row of
2057   U_NODE's scheduling window, (2) whose dependence inequality with U
2058   becomes an equality when U is scheduled in this same row, and (3)
2059   whose dependence latency is zero.
2060
2061   The first and last rows are calculated using the following parameters:
2062   START/END rows - The cycles that begins/ends the traversal on the window;
2063   searching for an empty cycle to schedule U_NODE.
2064   STEP - The direction in which we traverse the window.
2065   II - The initiation interval.  */
2066
2067static void
2068calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
2069			       int step, int ii, sbitmap sched_nodes,
2070			       sbitmap must_precede, sbitmap must_follow)
2071{
2072  ddg_edge_ptr e;
2073  int first_cycle_in_window, last_cycle_in_window;
2074
2075  gcc_assert (must_precede && must_follow);
2076
2077  /* Consider the following scheduling window:
2078     {first_cycle_in_window, first_cycle_in_window+1, ...,
2079     last_cycle_in_window}.  If step is 1 then the following will be
2080     the order we traverse the window: {start=first_cycle_in_window,
2081     first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
2082     or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
2083     end=first_cycle_in_window-1} if step is -1.  */
2084  first_cycle_in_window = (step == 1) ? start : end - step;
2085  last_cycle_in_window = (step == 1) ? end - step : start;
2086
2087  bitmap_clear (must_precede);
2088  bitmap_clear (must_follow);
2089
2090  if (dump_file)
2091    fprintf (dump_file, "\nmust_precede: ");
2092
2093  /* Instead of checking if:
2094      (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
2095      && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
2096             first_cycle_in_window)
2097      && e->latency == 0
2098     we use the fact that latency is non-negative:
2099      SCHED_TIME (e->src) - (e->distance * ii) <=
2100      SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
2101      first_cycle_in_window
2102     and check only if
2103      SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window  */
2104  for (e = u_node->in; e != 0; e = e->next_in)
2105    if (bitmap_bit_p (sched_nodes, e->src->cuid)
2106	&& ((SCHED_TIME (e->src->cuid) - (e->distance * ii)) ==
2107             first_cycle_in_window))
2108      {
2109	if (dump_file)
2110	  fprintf (dump_file, "%d ", e->src->cuid);
2111
2112	bitmap_set_bit (must_precede, e->src->cuid);
2113      }
2114
2115  if (dump_file)
2116    fprintf (dump_file, "\nmust_follow: ");
2117
2118  /* Instead of checking if:
2119      (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
2120      && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
2121             last_cycle_in_window)
2122      && e->latency == 0
2123     we use the fact that latency is non-negative:
2124      SCHED_TIME (e->dest) + (e->distance * ii) >=
2125      SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
2126      last_cycle_in_window
2127     and check only if
2128      SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window  */
2129  for (e = u_node->out; e != 0; e = e->next_out)
2130    if (bitmap_bit_p (sched_nodes, e->dest->cuid)
2131	&& ((SCHED_TIME (e->dest->cuid) + (e->distance * ii)) ==
2132             last_cycle_in_window))
2133      {
2134	if (dump_file)
2135	  fprintf (dump_file, "%d ", e->dest->cuid);
2136
2137	bitmap_set_bit (must_follow, e->dest->cuid);
2138      }
2139
2140  if (dump_file)
2141    fprintf (dump_file, "\n");
2142}
2143
2144/* Return 1 if U_NODE can be scheduled in CYCLE.  Use the following
2145   parameters to decide if that's possible:
2146   PS - The partial schedule.
2147   U - The serial number of U_NODE.
2148   NUM_SPLITS - The number of row splits made so far.
2149   MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
2150   the first row of the scheduling window)
2151   MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
2152   last row of the scheduling window)  */
2153
2154static bool
2155try_scheduling_node_in_cycle (partial_schedule_ptr ps,
2156			      int u, int cycle, sbitmap sched_nodes,
2157			      int *num_splits, sbitmap must_precede,
2158			      sbitmap must_follow)
2159{
2160  ps_insn_ptr psi;
2161  bool success = 0;
2162
2163  verify_partial_schedule (ps, sched_nodes);
2164  psi = ps_add_node_check_conflicts (ps, u, cycle, must_precede, must_follow);
2165  if (psi)
2166    {
2167      SCHED_TIME (u) = cycle;
2168      bitmap_set_bit (sched_nodes, u);
2169      success = 1;
2170      *num_splits = 0;
2171      if (dump_file)
2172	fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
2173
2174    }
2175
2176  return success;
2177}
2178
2179/* This function implements the scheduling algorithm for SMS according to the
2180   above algorithm.  */
2181static partial_schedule_ptr
2182sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
2183{
2184  int ii = mii;
2185  int i, c, success, num_splits = 0;
2186  int flush_and_start_over = true;
2187  int num_nodes = g->num_nodes;
2188  int start, end, step; /* Place together into one struct?  */
2189  sbitmap sched_nodes = sbitmap_alloc (num_nodes);
2190  sbitmap must_precede = sbitmap_alloc (num_nodes);
2191  sbitmap must_follow = sbitmap_alloc (num_nodes);
2192  sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
2193
2194  partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
2195
2196  bitmap_ones (tobe_scheduled);
2197  bitmap_clear (sched_nodes);
2198
2199  while (flush_and_start_over && (ii < maxii))
2200    {
2201
2202      if (dump_file)
2203	fprintf (dump_file, "Starting with ii=%d\n", ii);
2204      flush_and_start_over = false;
2205      bitmap_clear (sched_nodes);
2206
2207      for (i = 0; i < num_nodes; i++)
2208	{
2209	  int u = nodes_order[i];
2210  	  ddg_node_ptr u_node = &ps->g->nodes[u];
2211	  rtx insn = u_node->insn;
2212
2213	  if (!NONDEBUG_INSN_P (insn))
2214	    {
2215	      bitmap_clear_bit (tobe_scheduled, u);
2216	      continue;
2217	    }
2218
2219	  if (bitmap_bit_p (sched_nodes, u))
2220	    continue;
2221
2222	  /* Try to get non-empty scheduling window.  */
2223	 success = 0;
2224         if (get_sched_window (ps, u_node, sched_nodes, ii, &start,
2225                                &step, &end) == 0)
2226            {
2227              if (dump_file)
2228                fprintf (dump_file, "\nTrying to schedule node %d "
2229			 "INSN = %d  in (%d .. %d) step %d\n", u, (INSN_UID
2230                        (g->nodes[u].insn)), start, end, step);
2231
2232              gcc_assert ((step > 0 && start < end)
2233                          || (step < 0 && start > end));
2234
2235              calculate_must_precede_follow (u_node, start, end, step, ii,
2236                                             sched_nodes, must_precede,
2237                                             must_follow);
2238
2239              for (c = start; c != end; c += step)
2240                {
2241		  sbitmap tmp_precede, tmp_follow;
2242
2243                  set_must_precede_follow (&tmp_follow, must_follow,
2244		                           &tmp_precede, must_precede,
2245                                           c, start, end, step);
2246                  success =
2247                    try_scheduling_node_in_cycle (ps, u, c,
2248                                                  sched_nodes,
2249                                                  &num_splits, tmp_precede,
2250                                                  tmp_follow);
2251                  if (success)
2252                    break;
2253                }
2254
2255              verify_partial_schedule (ps, sched_nodes);
2256            }
2257            if (!success)
2258            {
2259              int split_row;
2260
2261              if (ii++ == maxii)
2262                break;
2263
2264              if (num_splits >= MAX_SPLIT_NUM)
2265                {
2266                  num_splits = 0;
2267                  flush_and_start_over = true;
2268                  verify_partial_schedule (ps, sched_nodes);
2269                  reset_partial_schedule (ps, ii);
2270                  verify_partial_schedule (ps, sched_nodes);
2271                  break;
2272                }
2273
2274              num_splits++;
2275              /* The scheduling window is exclusive of 'end'
2276                 whereas compute_split_window() expects an inclusive,
2277                 ordered range.  */
2278              if (step == 1)
2279                split_row = compute_split_row (sched_nodes, start, end - 1,
2280                                               ps->ii, u_node);
2281              else
2282                split_row = compute_split_row (sched_nodes, end + 1, start,
2283                                               ps->ii, u_node);
2284
2285              ps_insert_empty_row (ps, split_row, sched_nodes);
2286              i--;              /* Go back and retry node i.  */
2287
2288              if (dump_file)
2289                fprintf (dump_file, "num_splits=%d\n", num_splits);
2290            }
2291
2292          /* ??? If (success), check register pressure estimates.  */
2293        }                       /* Continue with next node.  */
2294    }                           /* While flush_and_start_over.  */
2295  if (ii >= maxii)
2296    {
2297      free_partial_schedule (ps);
2298      ps = NULL;
2299    }
2300  else
2301    gcc_assert (bitmap_equal_p (tobe_scheduled, sched_nodes));
2302
2303  sbitmap_free (sched_nodes);
2304  sbitmap_free (must_precede);
2305  sbitmap_free (must_follow);
2306  sbitmap_free (tobe_scheduled);
2307
2308  return ps;
2309}
2310
2311/* This function inserts a new empty row into PS at the position
2312   according to SPLITROW, keeping all already scheduled instructions
2313   intact and updating their SCHED_TIME and cycle accordingly.  */
2314static void
2315ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
2316		     sbitmap sched_nodes)
2317{
2318  ps_insn_ptr crr_insn;
2319  ps_insn_ptr *rows_new;
2320  int ii = ps->ii;
2321  int new_ii = ii + 1;
2322  int row;
2323  int *rows_length_new;
2324
2325  verify_partial_schedule (ps, sched_nodes);
2326
2327  /* We normalize sched_time and rotate ps to have only non-negative sched
2328     times, for simplicity of updating cycles after inserting new row.  */
2329  split_row -= ps->min_cycle;
2330  split_row = SMODULO (split_row, ii);
2331  if (dump_file)
2332    fprintf (dump_file, "split_row=%d\n", split_row);
2333
2334  reset_sched_times (ps, PS_MIN_CYCLE (ps));
2335  rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
2336
2337  rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
2338  rows_length_new = (int *) xcalloc (new_ii, sizeof (int));
2339  for (row = 0; row < split_row; row++)
2340    {
2341      rows_new[row] = ps->rows[row];
2342      rows_length_new[row] = ps->rows_length[row];
2343      ps->rows[row] = NULL;
2344      for (crr_insn = rows_new[row];
2345	   crr_insn; crr_insn = crr_insn->next_in_row)
2346	{
2347	  int u = crr_insn->id;
2348	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
2349
2350	  SCHED_TIME (u) = new_time;
2351	  crr_insn->cycle = new_time;
2352	  SCHED_ROW (u) = new_time % new_ii;
2353	  SCHED_STAGE (u) = new_time / new_ii;
2354	}
2355
2356    }
2357
2358  rows_new[split_row] = NULL;
2359
2360  for (row = split_row; row < ii; row++)
2361    {
2362      rows_new[row + 1] = ps->rows[row];
2363      rows_length_new[row + 1] = ps->rows_length[row];
2364      ps->rows[row] = NULL;
2365      for (crr_insn = rows_new[row + 1];
2366	   crr_insn; crr_insn = crr_insn->next_in_row)
2367	{
2368	  int u = crr_insn->id;
2369	  int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
2370
2371	  SCHED_TIME (u) = new_time;
2372	  crr_insn->cycle = new_time;
2373	  SCHED_ROW (u) = new_time % new_ii;
2374	  SCHED_STAGE (u) = new_time / new_ii;
2375	}
2376    }
2377
2378  /* Updating ps.  */
2379  ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
2380    + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
2381  ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
2382    + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
2383  free (ps->rows);
2384  ps->rows = rows_new;
2385  free (ps->rows_length);
2386  ps->rows_length = rows_length_new;
2387  ps->ii = new_ii;
2388  gcc_assert (ps->min_cycle >= 0);
2389
2390  verify_partial_schedule (ps, sched_nodes);
2391
2392  if (dump_file)
2393    fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
2394	     ps->max_cycle);
2395}
2396
2397/* Given U_NODE which is the node that failed to be scheduled; LOW and
2398   UP which are the boundaries of it's scheduling window; compute using
2399   SCHED_NODES and II a row in the partial schedule that can be split
2400   which will separate a critical predecessor from a critical successor
2401   thereby expanding the window, and return it.  */
2402static int
2403compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
2404		   ddg_node_ptr u_node)
2405{
2406  ddg_edge_ptr e;
2407  int lower = INT_MIN, upper = INT_MAX;
2408  int crit_pred = -1;
2409  int crit_succ = -1;
2410  int crit_cycle;
2411
2412  for (e = u_node->in; e != 0; e = e->next_in)
2413    {
2414      int v = e->src->cuid;
2415
2416      if (bitmap_bit_p (sched_nodes, v)
2417	  && (low == SCHED_TIME (v) + e->latency - (e->distance * ii)))
2418	if (SCHED_TIME (v) > lower)
2419	  {
2420	    crit_pred = v;
2421	    lower = SCHED_TIME (v);
2422	  }
2423    }
2424
2425  if (crit_pred >= 0)
2426    {
2427      crit_cycle = SCHED_TIME (crit_pred) + 1;
2428      return SMODULO (crit_cycle, ii);
2429    }
2430
2431  for (e = u_node->out; e != 0; e = e->next_out)
2432    {
2433      int v = e->dest->cuid;
2434
2435      if (bitmap_bit_p (sched_nodes, v)
2436	  && (up == SCHED_TIME (v) - e->latency + (e->distance * ii)))
2437	if (SCHED_TIME (v) < upper)
2438	  {
2439	    crit_succ = v;
2440	    upper = SCHED_TIME (v);
2441	  }
2442    }
2443
2444  if (crit_succ >= 0)
2445    {
2446      crit_cycle = SCHED_TIME (crit_succ);
2447      return SMODULO (crit_cycle, ii);
2448    }
2449
2450  if (dump_file)
2451    fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
2452
2453  return SMODULO ((low + up + 1) / 2, ii);
2454}
2455
2456static void
2457verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
2458{
2459  int row;
2460  ps_insn_ptr crr_insn;
2461
2462  for (row = 0; row < ps->ii; row++)
2463    {
2464      int length = 0;
2465
2466      for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
2467	{
2468	  int u = crr_insn->id;
2469
2470	  length++;
2471	  gcc_assert (bitmap_bit_p (sched_nodes, u));
2472	  /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
2473	     popcount (sched_nodes) == number of insns in ps.  */
2474	  gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
2475	  gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
2476	}
2477
2478      gcc_assert (ps->rows_length[row] == length);
2479    }
2480}
2481
2482
2483/* This page implements the algorithm for ordering the nodes of a DDG
2484   for modulo scheduling, activated through the
2485   "int sms_order_nodes (ddg_ptr, int mii, int * result)" API.  */
2486
2487#define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
2488#define ASAP(x) (ORDER_PARAMS ((x))->asap)
2489#define ALAP(x) (ORDER_PARAMS ((x))->alap)
2490#define HEIGHT(x) (ORDER_PARAMS ((x))->height)
2491#define MOB(x) (ALAP ((x)) - ASAP ((x)))
2492#define DEPTH(x) (ASAP ((x)))
2493
2494typedef struct node_order_params * nopa;
2495
2496static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
2497static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
2498static nopa  calculate_order_params (ddg_ptr, int, int *);
2499static int find_max_asap (ddg_ptr, sbitmap);
2500static int find_max_hv_min_mob (ddg_ptr, sbitmap);
2501static int find_max_dv_min_mob (ddg_ptr, sbitmap);
2502
2503enum sms_direction {BOTTOMUP, TOPDOWN};
2504
2505struct node_order_params
2506{
2507  int asap;
2508  int alap;
2509  int height;
2510};
2511
2512/* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1.  */
2513static void
2514check_nodes_order (int *node_order, int num_nodes)
2515{
2516  int i;
2517  sbitmap tmp = sbitmap_alloc (num_nodes);
2518
2519  bitmap_clear (tmp);
2520
2521  if (dump_file)
2522    fprintf (dump_file, "SMS final nodes order: \n");
2523
2524  for (i = 0; i < num_nodes; i++)
2525    {
2526      int u = node_order[i];
2527
2528      if (dump_file)
2529        fprintf (dump_file, "%d ", u);
2530      gcc_assert (u < num_nodes && u >= 0 && !bitmap_bit_p (tmp, u));
2531
2532      bitmap_set_bit (tmp, u);
2533    }
2534
2535  if (dump_file)
2536    fprintf (dump_file, "\n");
2537
2538  sbitmap_free (tmp);
2539}
2540
2541/* Order the nodes of G for scheduling and pass the result in
2542   NODE_ORDER.  Also set aux.count of each node to ASAP.
2543   Put maximal ASAP to PMAX_ASAP.  Return the recMII for the given DDG.  */
2544static int
2545sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
2546{
2547  int i;
2548  int rec_mii = 0;
2549  ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
2550
2551  nopa nops = calculate_order_params (g, mii, pmax_asap);
2552
2553  if (dump_file)
2554    print_sccs (dump_file, sccs, g);
2555
2556  order_nodes_of_sccs (sccs, node_order);
2557
2558  if (sccs->num_sccs > 0)
2559    /* First SCC has the largest recurrence_length.  */
2560    rec_mii = sccs->sccs[0]->recurrence_length;
2561
2562  /* Save ASAP before destroying node_order_params.  */
2563  for (i = 0; i < g->num_nodes; i++)
2564    {
2565      ddg_node_ptr v = &g->nodes[i];
2566      v->aux.count = ASAP (v);
2567    }
2568
2569  free (nops);
2570  free_ddg_all_sccs (sccs);
2571  check_nodes_order (node_order, g->num_nodes);
2572
2573  return rec_mii;
2574}
2575
2576static void
2577order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
2578{
2579  int i, pos = 0;
2580  ddg_ptr g = all_sccs->ddg;
2581  int num_nodes = g->num_nodes;
2582  sbitmap prev_sccs = sbitmap_alloc (num_nodes);
2583  sbitmap on_path = sbitmap_alloc (num_nodes);
2584  sbitmap tmp = sbitmap_alloc (num_nodes);
2585  sbitmap ones = sbitmap_alloc (num_nodes);
2586
2587  bitmap_clear (prev_sccs);
2588  bitmap_ones (ones);
2589
2590  /* Perform the node ordering starting from the SCC with the highest recMII.
2591     For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc.  */
2592  for (i = 0; i < all_sccs->num_sccs; i++)
2593    {
2594      ddg_scc_ptr scc = all_sccs->sccs[i];
2595
2596      /* Add nodes on paths from previous SCCs to the current SCC.  */
2597      find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
2598      bitmap_ior (tmp, scc->nodes, on_path);
2599
2600      /* Add nodes on paths from the current SCC to previous SCCs.  */
2601      find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
2602      bitmap_ior (tmp, tmp, on_path);
2603
2604      /* Remove nodes of previous SCCs from current extended SCC.  */
2605      bitmap_and_compl (tmp, tmp, prev_sccs);
2606
2607      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2608      /* Above call to order_nodes_in_scc updated prev_sccs |= tmp.  */
2609    }
2610
2611  /* Handle the remaining nodes that do not belong to any scc.  Each call
2612     to order_nodes_in_scc handles a single connected component.  */
2613  while (pos < g->num_nodes)
2614    {
2615      bitmap_and_compl (tmp, ones, prev_sccs);
2616      pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
2617    }
2618  sbitmap_free (prev_sccs);
2619  sbitmap_free (on_path);
2620  sbitmap_free (tmp);
2621  sbitmap_free (ones);
2622}
2623
2624/* MII is needed if we consider backarcs (that do not close recursive cycles).  */
2625static struct node_order_params *
2626calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
2627{
2628  int u;
2629  int max_asap;
2630  int num_nodes = g->num_nodes;
2631  ddg_edge_ptr e;
2632  /* Allocate a place to hold ordering params for each node in the DDG.  */
2633  nopa node_order_params_arr;
2634
2635  /* Initialize of ASAP/ALAP/HEIGHT to zero.  */
2636  node_order_params_arr = (nopa) xcalloc (num_nodes,
2637					  sizeof (struct node_order_params));
2638
2639  /* Set the aux pointer of each node to point to its order_params structure.  */
2640  for (u = 0; u < num_nodes; u++)
2641    g->nodes[u].aux.info = &node_order_params_arr[u];
2642
2643  /* Disregarding a backarc from each recursive cycle to obtain a DAG,
2644     calculate ASAP, ALAP, mobility, distance, and height for each node
2645     in the dependence (direct acyclic) graph.  */
2646
2647  /* We assume that the nodes in the array are in topological order.  */
2648
2649  max_asap = 0;
2650  for (u = 0; u < num_nodes; u++)
2651    {
2652      ddg_node_ptr u_node = &g->nodes[u];
2653
2654      ASAP (u_node) = 0;
2655      for (e = u_node->in; e; e = e->next_in)
2656	if (e->distance == 0)
2657	  ASAP (u_node) = MAX (ASAP (u_node),
2658			       ASAP (e->src) + e->latency);
2659      max_asap = MAX (max_asap, ASAP (u_node));
2660    }
2661
2662  for (u = num_nodes - 1; u > -1; u--)
2663    {
2664      ddg_node_ptr u_node = &g->nodes[u];
2665
2666      ALAP (u_node) = max_asap;
2667      HEIGHT (u_node) = 0;
2668      for (e = u_node->out; e; e = e->next_out)
2669	if (e->distance == 0)
2670	  {
2671	    ALAP (u_node) = MIN (ALAP (u_node),
2672				 ALAP (e->dest) - e->latency);
2673	    HEIGHT (u_node) = MAX (HEIGHT (u_node),
2674				   HEIGHT (e->dest) + e->latency);
2675	  }
2676    }
2677  if (dump_file)
2678  {
2679    fprintf (dump_file, "\nOrder params\n");
2680    for (u = 0; u < num_nodes; u++)
2681      {
2682        ddg_node_ptr u_node = &g->nodes[u];
2683
2684        fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
2685                 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
2686      }
2687  }
2688
2689  *pmax_asap = max_asap;
2690  return node_order_params_arr;
2691}
2692
2693static int
2694find_max_asap (ddg_ptr g, sbitmap nodes)
2695{
2696  unsigned int u = 0;
2697  int max_asap = -1;
2698  int result = -1;
2699  sbitmap_iterator sbi;
2700
2701  EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2702    {
2703      ddg_node_ptr u_node = &g->nodes[u];
2704
2705      if (max_asap < ASAP (u_node))
2706	{
2707	  max_asap = ASAP (u_node);
2708	  result = u;
2709	}
2710    }
2711  return result;
2712}
2713
2714static int
2715find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
2716{
2717  unsigned int u = 0;
2718  int max_hv = -1;
2719  int min_mob = INT_MAX;
2720  int result = -1;
2721  sbitmap_iterator sbi;
2722
2723  EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2724    {
2725      ddg_node_ptr u_node = &g->nodes[u];
2726
2727      if (max_hv < HEIGHT (u_node))
2728	{
2729	  max_hv = HEIGHT (u_node);
2730	  min_mob = MOB (u_node);
2731	  result = u;
2732	}
2733      else if ((max_hv == HEIGHT (u_node))
2734	       && (min_mob > MOB (u_node)))
2735	{
2736	  min_mob = MOB (u_node);
2737	  result = u;
2738	}
2739    }
2740  return result;
2741}
2742
2743static int
2744find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
2745{
2746  unsigned int u = 0;
2747  int max_dv = -1;
2748  int min_mob = INT_MAX;
2749  int result = -1;
2750  sbitmap_iterator sbi;
2751
2752  EXECUTE_IF_SET_IN_BITMAP (nodes, 0, u, sbi)
2753    {
2754      ddg_node_ptr u_node = &g->nodes[u];
2755
2756      if (max_dv < DEPTH (u_node))
2757	{
2758	  max_dv = DEPTH (u_node);
2759	  min_mob = MOB (u_node);
2760	  result = u;
2761	}
2762      else if ((max_dv == DEPTH (u_node))
2763	       && (min_mob > MOB (u_node)))
2764	{
2765	  min_mob = MOB (u_node);
2766	  result = u;
2767	}
2768    }
2769  return result;
2770}
2771
2772/* Places the nodes of SCC into the NODE_ORDER array starting
2773   at position POS, according to the SMS ordering algorithm.
2774   NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
2775   the NODE_ORDER array, starting from position zero.  */
2776static int
2777order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
2778		    int * node_order, int pos)
2779{
2780  enum sms_direction dir;
2781  int num_nodes = g->num_nodes;
2782  sbitmap workset = sbitmap_alloc (num_nodes);
2783  sbitmap tmp = sbitmap_alloc (num_nodes);
2784  sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
2785  sbitmap predecessors = sbitmap_alloc (num_nodes);
2786  sbitmap successors = sbitmap_alloc (num_nodes);
2787
2788  bitmap_clear (predecessors);
2789  find_predecessors (predecessors, g, nodes_ordered);
2790
2791  bitmap_clear (successors);
2792  find_successors (successors, g, nodes_ordered);
2793
2794  bitmap_clear (tmp);
2795  if (bitmap_and (tmp, predecessors, scc))
2796    {
2797      bitmap_copy (workset, tmp);
2798      dir = BOTTOMUP;
2799    }
2800  else if (bitmap_and (tmp, successors, scc))
2801    {
2802      bitmap_copy (workset, tmp);
2803      dir = TOPDOWN;
2804    }
2805  else
2806    {
2807      int u;
2808
2809      bitmap_clear (workset);
2810      if ((u = find_max_asap (g, scc)) >= 0)
2811	bitmap_set_bit (workset, u);
2812      dir = BOTTOMUP;
2813    }
2814
2815  bitmap_clear (zero_bitmap);
2816  while (!bitmap_equal_p (workset, zero_bitmap))
2817    {
2818      int v;
2819      ddg_node_ptr v_node;
2820      sbitmap v_node_preds;
2821      sbitmap v_node_succs;
2822
2823      if (dir == TOPDOWN)
2824	{
2825	  while (!bitmap_equal_p (workset, zero_bitmap))
2826	    {
2827	      v = find_max_hv_min_mob (g, workset);
2828	      v_node = &g->nodes[v];
2829	      node_order[pos++] = v;
2830	      v_node_succs = NODE_SUCCESSORS (v_node);
2831	      bitmap_and (tmp, v_node_succs, scc);
2832
2833	      /* Don't consider the already ordered successors again.  */
2834	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2835	      bitmap_ior (workset, workset, tmp);
2836	      bitmap_clear_bit (workset, v);
2837	      bitmap_set_bit (nodes_ordered, v);
2838	    }
2839	  dir = BOTTOMUP;
2840	  bitmap_clear (predecessors);
2841	  find_predecessors (predecessors, g, nodes_ordered);
2842	  bitmap_and (workset, predecessors, scc);
2843	}
2844      else
2845	{
2846	  while (!bitmap_equal_p (workset, zero_bitmap))
2847	    {
2848	      v = find_max_dv_min_mob (g, workset);
2849	      v_node = &g->nodes[v];
2850	      node_order[pos++] = v;
2851	      v_node_preds = NODE_PREDECESSORS (v_node);
2852	      bitmap_and (tmp, v_node_preds, scc);
2853
2854	      /* Don't consider the already ordered predecessors again.  */
2855	      bitmap_and_compl (tmp, tmp, nodes_ordered);
2856	      bitmap_ior (workset, workset, tmp);
2857	      bitmap_clear_bit (workset, v);
2858	      bitmap_set_bit (nodes_ordered, v);
2859	    }
2860	  dir = TOPDOWN;
2861	  bitmap_clear (successors);
2862	  find_successors (successors, g, nodes_ordered);
2863	  bitmap_and (workset, successors, scc);
2864	}
2865    }
2866  sbitmap_free (tmp);
2867  sbitmap_free (workset);
2868  sbitmap_free (zero_bitmap);
2869  sbitmap_free (predecessors);
2870  sbitmap_free (successors);
2871  return pos;
2872}
2873
2874
2875/* This page contains functions for manipulating partial-schedules during
2876   modulo scheduling.  */
2877
2878/* Create a partial schedule and allocate a memory to hold II rows.  */
2879
2880static partial_schedule_ptr
2881create_partial_schedule (int ii, ddg_ptr g, int history)
2882{
2883  partial_schedule_ptr ps = XNEW (struct partial_schedule);
2884  ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
2885  ps->rows_length = (int *) xcalloc (ii, sizeof (int));
2886  ps->reg_moves.create (0);
2887  ps->ii = ii;
2888  ps->history = history;
2889  ps->min_cycle = INT_MAX;
2890  ps->max_cycle = INT_MIN;
2891  ps->g = g;
2892
2893  return ps;
2894}
2895
2896/* Free the PS_INSNs in rows array of the given partial schedule.
2897   ??? Consider caching the PS_INSN's.  */
2898static void
2899free_ps_insns (partial_schedule_ptr ps)
2900{
2901  int i;
2902
2903  for (i = 0; i < ps->ii; i++)
2904    {
2905      while (ps->rows[i])
2906	{
2907	  ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
2908
2909	  free (ps->rows[i]);
2910	  ps->rows[i] = ps_insn;
2911	}
2912      ps->rows[i] = NULL;
2913    }
2914}
2915
2916/* Free all the memory allocated to the partial schedule.  */
2917
2918static void
2919free_partial_schedule (partial_schedule_ptr ps)
2920{
2921  ps_reg_move_info *move;
2922  unsigned int i;
2923
2924  if (!ps)
2925    return;
2926
2927  FOR_EACH_VEC_ELT (ps->reg_moves, i, move)
2928    sbitmap_free (move->uses);
2929  ps->reg_moves.release ();
2930
2931  free_ps_insns (ps);
2932  free (ps->rows);
2933  free (ps->rows_length);
2934  free (ps);
2935}
2936
2937/* Clear the rows array with its PS_INSNs, and create a new one with
2938   NEW_II rows.  */
2939
2940static void
2941reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
2942{
2943  if (!ps)
2944    return;
2945  free_ps_insns (ps);
2946  if (new_ii == ps->ii)
2947    return;
2948  ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
2949						 * sizeof (ps_insn_ptr));
2950  memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
2951  ps->rows_length = (int *) xrealloc (ps->rows_length, new_ii * sizeof (int));
2952  memset (ps->rows_length, 0, new_ii * sizeof (int));
2953  ps->ii = new_ii;
2954  ps->min_cycle = INT_MAX;
2955  ps->max_cycle = INT_MIN;
2956}
2957
2958/* Prints the partial schedule as an ii rows array, for each rows
2959   print the ids of the insns in it.  */
2960void
2961print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
2962{
2963  int i;
2964
2965  for (i = 0; i < ps->ii; i++)
2966    {
2967      ps_insn_ptr ps_i = ps->rows[i];
2968
2969      fprintf (dump, "\n[ROW %d ]: ", i);
2970      while (ps_i)
2971	{
2972	  rtx_insn *insn = ps_rtl_insn (ps, ps_i->id);
2973
2974	  if (JUMP_P (insn))
2975	    fprintf (dump, "%d (branch), ", INSN_UID (insn));
2976	  else
2977	    fprintf (dump, "%d, ", INSN_UID (insn));
2978
2979	  ps_i = ps_i->next_in_row;
2980	}
2981    }
2982}
2983
2984/* Creates an object of PS_INSN and initializes it to the given parameters.  */
2985static ps_insn_ptr
2986create_ps_insn (int id, int cycle)
2987{
2988  ps_insn_ptr ps_i = XNEW (struct ps_insn);
2989
2990  ps_i->id = id;
2991  ps_i->next_in_row = NULL;
2992  ps_i->prev_in_row = NULL;
2993  ps_i->cycle = cycle;
2994
2995  return ps_i;
2996}
2997
2998
2999/* Removes the given PS_INSN from the partial schedule.  */
3000static void
3001remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
3002{
3003  int row;
3004
3005  gcc_assert (ps && ps_i);
3006
3007  row = SMODULO (ps_i->cycle, ps->ii);
3008  if (! ps_i->prev_in_row)
3009    {
3010      gcc_assert (ps_i == ps->rows[row]);
3011      ps->rows[row] = ps_i->next_in_row;
3012      if (ps->rows[row])
3013	ps->rows[row]->prev_in_row = NULL;
3014    }
3015  else
3016    {
3017      ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
3018      if (ps_i->next_in_row)
3019	ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
3020    }
3021
3022  ps->rows_length[row] -= 1;
3023  free (ps_i);
3024  return;
3025}
3026
3027/* Unlike what literature describes for modulo scheduling (which focuses
3028   on VLIW machines) the order of the instructions inside a cycle is
3029   important.  Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
3030   where the current instruction should go relative to the already
3031   scheduled instructions in the given cycle.  Go over these
3032   instructions and find the first possible column to put it in.  */
3033static bool
3034ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3035		     sbitmap must_precede, sbitmap must_follow)
3036{
3037  ps_insn_ptr next_ps_i;
3038  ps_insn_ptr first_must_follow = NULL;
3039  ps_insn_ptr last_must_precede = NULL;
3040  ps_insn_ptr last_in_row = NULL;
3041  int row;
3042
3043  if (! ps_i)
3044    return false;
3045
3046  row = SMODULO (ps_i->cycle, ps->ii);
3047
3048  /* Find the first must follow and the last must precede
3049     and insert the node immediately after the must precede
3050     but make sure that it there is no must follow after it.  */
3051  for (next_ps_i = ps->rows[row];
3052       next_ps_i;
3053       next_ps_i = next_ps_i->next_in_row)
3054    {
3055      if (must_follow
3056	  && bitmap_bit_p (must_follow, next_ps_i->id)
3057	  && ! first_must_follow)
3058        first_must_follow = next_ps_i;
3059      if (must_precede && bitmap_bit_p (must_precede, next_ps_i->id))
3060        {
3061          /* If we have already met a node that must follow, then
3062	     there is no possible column.  */
3063  	  if (first_must_follow)
3064            return false;
3065	  else
3066            last_must_precede = next_ps_i;
3067        }
3068      /* The closing branch must be the last in the row.  */
3069      if (must_precede
3070	  && bitmap_bit_p (must_precede, next_ps_i->id)
3071	  && JUMP_P (ps_rtl_insn (ps, next_ps_i->id)))
3072	return false;
3073
3074       last_in_row = next_ps_i;
3075    }
3076
3077  /* The closing branch is scheduled as well.  Make sure there is no
3078     dependent instruction after it as the branch should be the last
3079     instruction in the row.  */
3080  if (JUMP_P (ps_rtl_insn (ps, ps_i->id)))
3081    {
3082      if (first_must_follow)
3083	return false;
3084      if (last_in_row)
3085	{
3086	  /* Make the branch the last in the row.  New instructions
3087	     will be inserted at the beginning of the row or after the
3088	     last must_precede instruction thus the branch is guaranteed
3089	     to remain the last instruction in the row.  */
3090	  last_in_row->next_in_row = ps_i;
3091	  ps_i->prev_in_row = last_in_row;
3092	  ps_i->next_in_row = NULL;
3093	}
3094      else
3095	ps->rows[row] = ps_i;
3096      return true;
3097    }
3098
3099  /* Now insert the node after INSERT_AFTER_PSI.  */
3100
3101  if (! last_must_precede)
3102    {
3103      ps_i->next_in_row = ps->rows[row];
3104      ps_i->prev_in_row = NULL;
3105      if (ps_i->next_in_row)
3106    	ps_i->next_in_row->prev_in_row = ps_i;
3107      ps->rows[row] = ps_i;
3108    }
3109  else
3110    {
3111      ps_i->next_in_row = last_must_precede->next_in_row;
3112      last_must_precede->next_in_row = ps_i;
3113      ps_i->prev_in_row = last_must_precede;
3114      if (ps_i->next_in_row)
3115        ps_i->next_in_row->prev_in_row = ps_i;
3116    }
3117
3118  return true;
3119}
3120
3121/* Advances the PS_INSN one column in its current row; returns false
3122   in failure and true in success.  Bit N is set in MUST_FOLLOW if
3123   the node with cuid N must be come after the node pointed to by
3124   PS_I when scheduled in the same cycle.  */
3125static int
3126ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
3127			sbitmap must_follow)
3128{
3129  ps_insn_ptr prev, next;
3130  int row;
3131
3132  if (!ps || !ps_i)
3133    return false;
3134
3135  row = SMODULO (ps_i->cycle, ps->ii);
3136
3137  if (! ps_i->next_in_row)
3138    return false;
3139
3140  /* Check if next_in_row is dependent on ps_i, both having same sched
3141     times (typically ANTI_DEP).  If so, ps_i cannot skip over it.  */
3142  if (must_follow && bitmap_bit_p (must_follow, ps_i->next_in_row->id))
3143    return false;
3144
3145  /* Advance PS_I over its next_in_row in the doubly linked list.  */
3146  prev = ps_i->prev_in_row;
3147  next = ps_i->next_in_row;
3148
3149  if (ps_i == ps->rows[row])
3150    ps->rows[row] = next;
3151
3152  ps_i->next_in_row = next->next_in_row;
3153
3154  if (next->next_in_row)
3155    next->next_in_row->prev_in_row = ps_i;
3156
3157  next->next_in_row = ps_i;
3158  ps_i->prev_in_row = next;
3159
3160  next->prev_in_row = prev;
3161  if (prev)
3162    prev->next_in_row = next;
3163
3164  return true;
3165}
3166
3167/* Inserts a DDG_NODE to the given partial schedule at the given cycle.
3168   Returns 0 if this is not possible and a PS_INSN otherwise.  Bit N is
3169   set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
3170   before/after (respectively) the node pointed to by PS_I when scheduled
3171   in the same cycle.  */
3172static ps_insn_ptr
3173add_node_to_ps (partial_schedule_ptr ps, int id, int cycle,
3174		sbitmap must_precede, sbitmap must_follow)
3175{
3176  ps_insn_ptr ps_i;
3177  int row = SMODULO (cycle, ps->ii);
3178
3179  if (ps->rows_length[row] >= issue_rate)
3180    return NULL;
3181
3182  ps_i = create_ps_insn (id, cycle);
3183
3184  /* Finds and inserts PS_I according to MUST_FOLLOW and
3185     MUST_PRECEDE.  */
3186  if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
3187    {
3188      free (ps_i);
3189      return NULL;
3190    }
3191
3192  ps->rows_length[row] += 1;
3193  return ps_i;
3194}
3195
3196/* Advance time one cycle.  Assumes DFA is being used.  */
3197static void
3198advance_one_cycle (void)
3199{
3200  if (targetm.sched.dfa_pre_cycle_insn)
3201    state_transition (curr_state,
3202		      targetm.sched.dfa_pre_cycle_insn ());
3203
3204  state_transition (curr_state, NULL);
3205
3206  if (targetm.sched.dfa_post_cycle_insn)
3207    state_transition (curr_state,
3208		      targetm.sched.dfa_post_cycle_insn ());
3209}
3210
3211
3212
3213/* Checks if PS has resource conflicts according to DFA, starting from
3214   FROM cycle to TO cycle; returns true if there are conflicts and false
3215   if there are no conflicts.  Assumes DFA is being used.  */
3216static int
3217ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
3218{
3219  int cycle;
3220
3221  state_reset (curr_state);
3222
3223  for (cycle = from; cycle <= to; cycle++)
3224    {
3225      ps_insn_ptr crr_insn;
3226      /* Holds the remaining issue slots in the current row.  */
3227      int can_issue_more = issue_rate;
3228
3229      /* Walk through the DFA for the current row.  */
3230      for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
3231	   crr_insn;
3232	   crr_insn = crr_insn->next_in_row)
3233	{
3234	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
3235
3236	  if (!NONDEBUG_INSN_P (insn))
3237	    continue;
3238
3239	  /* Check if there is room for the current insn.  */
3240	  if (!can_issue_more || state_dead_lock_p (curr_state))
3241	    return true;
3242
3243	  /* Update the DFA state and return with failure if the DFA found
3244	     resource conflicts.  */
3245	  if (state_transition (curr_state, insn) >= 0)
3246	    return true;
3247
3248	  if (targetm.sched.variable_issue)
3249	    can_issue_more =
3250	      targetm.sched.variable_issue (sched_dump, sched_verbose,
3251					    insn, can_issue_more);
3252	  /* A naked CLOBBER or USE generates no instruction, so don't
3253	     let them consume issue slots.  */
3254	  else if (GET_CODE (PATTERN (insn)) != USE
3255		   && GET_CODE (PATTERN (insn)) != CLOBBER)
3256	    can_issue_more--;
3257	}
3258
3259      /* Advance the DFA to the next cycle.  */
3260      advance_one_cycle ();
3261    }
3262  return false;
3263}
3264
3265/* Checks if the given node causes resource conflicts when added to PS at
3266   cycle C.  If not the node is added to PS and returned; otherwise zero
3267   is returned.  Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
3268   cuid N must be come before/after (respectively) the node pointed to by
3269   PS_I when scheduled in the same cycle.  */
3270ps_insn_ptr
3271ps_add_node_check_conflicts (partial_schedule_ptr ps, int n,
3272   			     int c, sbitmap must_precede,
3273			     sbitmap must_follow)
3274{
3275  int has_conflicts = 0;
3276  ps_insn_ptr ps_i;
3277
3278  /* First add the node to the PS, if this succeeds check for
3279     conflicts, trying different issue slots in the same row.  */
3280  if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
3281    return NULL; /* Failed to insert the node at the given cycle.  */
3282
3283  has_conflicts = ps_has_conflicts (ps, c, c)
3284		  || (ps->history > 0
3285		      && ps_has_conflicts (ps,
3286					   c - ps->history,
3287					   c + ps->history));
3288
3289  /* Try different issue slots to find one that the given node can be
3290     scheduled in without conflicts.  */
3291  while (has_conflicts)
3292    {
3293      if (! ps_insn_advance_column (ps, ps_i, must_follow))
3294	break;
3295      has_conflicts = ps_has_conflicts (ps, c, c)
3296		      || (ps->history > 0
3297			  && ps_has_conflicts (ps,
3298					       c - ps->history,
3299					       c + ps->history));
3300    }
3301
3302  if (has_conflicts)
3303    {
3304      remove_node_from_ps (ps, ps_i);
3305      return NULL;
3306    }
3307
3308  ps->min_cycle = MIN (ps->min_cycle, c);
3309  ps->max_cycle = MAX (ps->max_cycle, c);
3310  return ps_i;
3311}
3312
3313/* Calculate the stage count of the partial schedule PS.  The calculation
3314   takes into account the rotation amount passed in ROTATION_AMOUNT.  */
3315int
3316calculate_stage_count (partial_schedule_ptr ps, int rotation_amount)
3317{
3318  int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount;
3319  int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount;
3320  int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii);
3321
3322  /* The calculation of stage count is done adding the number of stages
3323     before cycle zero and after cycle zero.  */
3324  stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii);
3325
3326  return stage_count;
3327}
3328
3329/* Rotate the rows of PS such that insns scheduled at time
3330   START_CYCLE will appear in row 0.  Updates max/min_cycles.  */
3331void
3332rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
3333{
3334  int i, row, backward_rotates;
3335  int last_row = ps->ii - 1;
3336
3337  if (start_cycle == 0)
3338    return;
3339
3340  backward_rotates = SMODULO (start_cycle, ps->ii);
3341
3342  /* Revisit later and optimize this into a single loop.  */
3343  for (i = 0; i < backward_rotates; i++)
3344    {
3345      ps_insn_ptr first_row = ps->rows[0];
3346      int first_row_length = ps->rows_length[0];
3347
3348      for (row = 0; row < last_row; row++)
3349	{
3350	  ps->rows[row] = ps->rows[row + 1];
3351	  ps->rows_length[row] = ps->rows_length[row + 1];
3352	}
3353
3354      ps->rows[last_row] = first_row;
3355      ps->rows_length[last_row] = first_row_length;
3356    }
3357
3358  ps->max_cycle -= start_cycle;
3359  ps->min_cycle -= start_cycle;
3360}
3361
3362#endif /* INSN_SCHEDULING */
3363
3364/* Run instruction scheduler.  */
3365/* Perform SMS module scheduling.  */
3366
3367namespace {
3368
3369const pass_data pass_data_sms =
3370{
3371  RTL_PASS, /* type */
3372  "sms", /* name */
3373  OPTGROUP_NONE, /* optinfo_flags */
3374  TV_SMS, /* tv_id */
3375  0, /* properties_required */
3376  0, /* properties_provided */
3377  0, /* properties_destroyed */
3378  0, /* todo_flags_start */
3379  TODO_df_finish, /* todo_flags_finish */
3380};
3381
3382class pass_sms : public rtl_opt_pass
3383{
3384public:
3385  pass_sms (gcc::context *ctxt)
3386    : rtl_opt_pass (pass_data_sms, ctxt)
3387  {}
3388
3389  /* opt_pass methods: */
3390  virtual bool gate (function *)
3391{
3392  return (optimize > 0 && flag_modulo_sched);
3393}
3394
3395  virtual unsigned int execute (function *);
3396
3397}; // class pass_sms
3398
3399unsigned int
3400pass_sms::execute (function *fun ATTRIBUTE_UNUSED)
3401{
3402#ifdef INSN_SCHEDULING
3403  basic_block bb;
3404
3405  /* Collect loop information to be used in SMS.  */
3406  cfg_layout_initialize (0);
3407  sms_schedule ();
3408
3409  /* Update the life information, because we add pseudos.  */
3410  max_regno = max_reg_num ();
3411
3412  /* Finalize layout changes.  */
3413  FOR_EACH_BB_FN (bb, fun)
3414    if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
3415      bb->aux = bb->next_bb;
3416  free_dominance_info (CDI_DOMINATORS);
3417  cfg_layout_finalize ();
3418#endif /* INSN_SCHEDULING */
3419  return 0;
3420}
3421
3422} // anon namespace
3423
3424rtl_opt_pass *
3425make_pass_sms (gcc::context *ctxt)
3426{
3427  return new pass_sms (ctxt);
3428}
3429