cfgloopanal.c revision 272461
16104Sse/* Natural loop analysis code for GNU compiler.
26104Sse   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
37234Sse
46104SseThis file is part of GCC.
56104Sse
66104SseGCC is free software; you can redistribute it and/or modify it under
76104Ssethe terms of the GNU General Public License as published by the Free
86104SseSoftware Foundation; either version 2, or (at your option) any later
96104Sseversion.
106104Sse
116104SseGCC is distributed in the hope that it will be useful, but WITHOUT ANY
126104SseWARRANTY; without even the implied warranty of MERCHANTABILITY or
136104SseFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
146104Ssefor more details.
156104Sse
166104SseYou should have received a copy of the GNU General Public License
176104Ssealong with GCC; see the file COPYING.  If not, write to the Free
186104SseSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
196104Sse02110-1301, USA.  */
206104Sse
216104Sse#include "config.h"
226104Sse#include "system.h"
236104Sse#include "coretypes.h"
246104Sse#include "tm.h"
256104Sse#include "rtl.h"
266104Sse#include "hard-reg-set.h"
276104Sse#include "obstack.h"
286104Sse#include "basic-block.h"
296104Sse#include "cfgloop.h"
306104Sse#include "expr.h"
316104Sse#include "output.h"
326104Sse
336104Sse/* Checks whether BB is executed exactly once in each LOOP iteration.  */
346104Sse
356104Ssebool
366104Ssejust_once_each_iteration_p (const struct loop *loop, basic_block bb)
376104Sse{
387234Sse  /* It must be executed at least once each iteration.  */
396104Sse  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
406734Sbde    return false;
416734Sbde
426734Sbde  /* And just once.  */
436734Sbde  if (bb->loop_father != loop)
446734Sbde    return false;
456104Sse
466104Sse  /* But this was not enough.  We might have some irreducible loop here.  */
476104Sse  if (bb->flags & BB_IRREDUCIBLE_LOOP)
486104Sse    return false;
496104Sse
506104Sse  return true;
516104Sse}
527234Sse
537234Sse/* Structure representing edge of a graph.  */
547234Sse
557234Ssestruct edge
567234Sse{
577234Sse  int src, dest;	/* Source and destination.  */
587234Sse  struct edge *pred_next, *succ_next;
597234Sse			/* Next edge in predecessor and successor lists.  */
607234Sse  void *data;		/* Data attached to the edge.  */
617234Sse};
627234Sse
637234Sse/* Structure representing vertex of a graph.  */
647234Sse
657234Ssestruct vertex
667234Sse{
677234Sse  struct edge *pred, *succ;
687234Sse			/* Lists of predecessors and successors.  */
697234Sse  int component;	/* Number of dfs restarts before reaching the
707234Sse			   vertex.  */
717234Sse  int post;		/* Postorder number.  */
727234Sse};
737234Sse
747234Sse/* Structure representing a graph.  */
757234Sse
767234Ssestruct graph
777234Sse{
787234Sse  int n_vertices;	/* Number of vertices.  */
797234Sse  struct vertex *vertices;
807234Sse			/* The vertices.  */
817234Sse};
827234Sse
837234Sse/* Dumps graph G into F.  */
847234Sse
857234Sseextern void dump_graph (FILE *, struct graph *);
867234Sse
877234Ssevoid
887234Ssedump_graph (FILE *f, struct graph *g)
897234Sse{
907234Sse  int i;
917234Sse  struct edge *e;
927234Sse
937234Sse  for (i = 0; i < g->n_vertices; i++)
947234Sse    {
957234Sse      if (!g->vertices[i].pred
967234Sse	  && !g->vertices[i].succ)
977234Sse	continue;
987234Sse
997234Sse      fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
1007234Sse      for (e = g->vertices[i].pred; e; e = e->pred_next)
1017234Sse	fprintf (f, " %d", e->src);
1027234Sse      fprintf (f, "\n");
1037234Sse
1047234Sse      fprintf (f, "\t->");
1057234Sse      for (e = g->vertices[i].succ; e; e = e->succ_next)
1067234Sse	fprintf (f, " %d", e->dest);
1077234Sse      fprintf (f, "\n");
1087234Sse    }
1097234Sse}
1107234Sse
1117234Sse/* Creates a new graph with N_VERTICES vertices.  */
1127234Sse
1137234Ssestatic struct graph *
1147234Ssenew_graph (int n_vertices)
1157234Sse{
1167234Sse  struct graph *g = XNEW (struct graph);
1177234Sse
1187234Sse  g->n_vertices = n_vertices;
1197234Sse  g->vertices = XCNEWVEC (struct vertex, n_vertices);
1206104Sse
1216104Sse  return g;
1226104Sse}
1236104Sse
1246104Sse/* Adds an edge from F to T to graph G, with DATA attached.  */
1257234Sse
1266104Ssestatic void
1277234Sseadd_edge (struct graph *g, int f, int t, void *data)
1286104Sse{
1296104Sse  struct edge *e = xmalloc (sizeof (struct edge));
1307234Sse
1317234Sse  e->src = f;
1326104Sse  e->dest = t;
1337234Sse  e->data = data;
1347234Sse
1357234Sse  e->pred_next = g->vertices[t].pred;
1366104Sse  g->vertices[t].pred = e;
1376104Sse
1386104Sse  e->succ_next = g->vertices[f].succ;
1396104Sse  g->vertices[f].succ = e;
1406104Sse}
1416104Sse
1426104Sse/* Runs dfs search over vertices of G, from NQ vertices in queue QS.
1436104Sse   The vertices in postorder are stored into QT.  If FORWARD is false,
1446104Sse   backward dfs is run.  */
1456104Sse
1466104Ssestatic void
1476104Ssedfs (struct graph *g, int *qs, int nq, int *qt, bool forward)
1486104Sse{
1496104Sse  int i, tick = 0, v, comp = 0, top;
1506104Sse  struct edge *e;
1516104Sse  struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices);
1526104Sse
1536104Sse  for (i = 0; i < g->n_vertices; i++)
1547234Sse    {
1557234Sse      g->vertices[i].component = -1;
1566104Sse      g->vertices[i].post = -1;
1576104Sse    }
1586104Sse
1596104Sse#define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred)
1607234Sse#define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next)
1617234Sse#define EDGE_SRC(E) (forward ? (E)->src : (E)->dest)
1627234Sse#define EDGE_DEST(E) (forward ? (E)->dest : (E)->src)
1636104Sse
1646104Sse  for (i = 0; i < nq; i++)
1656104Sse    {
1666104Sse      v = qs[i];
1676104Sse      if (g->vertices[v].post != -1)
1686104Sse	continue;
1696104Sse
1707234Sse      g->vertices[v].component = comp++;
1716104Sse      e = FST_EDGE (v);
1727234Sse      top = 0;
1737234Sse
1747234Sse      while (1)
1757234Sse	{
1767234Sse	  while (e && g->vertices[EDGE_DEST (e)].component != -1)
1777234Sse	    e = NEXT_EDGE (e);
1787234Sse
1797234Sse	  if (!e)
1807234Sse	    {
1816104Sse	      if (qt)
1826104Sse		qt[tick] = v;
1837234Sse	      g->vertices[v].post = tick++;
1846104Sse
1857234Sse	      if (!top)
1866104Sse		break;
1876104Sse
1887234Sse	      e = stack[--top];
1897234Sse	      v = EDGE_SRC (e);
1907234Sse	      e = NEXT_EDGE (e);
1917234Sse	      continue;
1927234Sse	    }
1936104Sse
1946104Sse	  stack[top++] = e;
1956104Sse	  v = EDGE_DEST (e);
1966104Sse	  e = FST_EDGE (v);
1976104Sse	  g->vertices[v].component = comp - 1;
1986104Sse	}
1996104Sse    }
2006104Sse
2016104Sse  free (stack);
2026104Sse}
2036104Sse
2046104Sse/* Marks the edge E in graph G irreducible if it connects two vertices in the
2056104Sse   same scc.  */
2066104Sse
2076104Ssestatic void
2086104Ssecheck_irred (struct graph *g, struct edge *e)
2096104Sse{
2106104Sse  edge real = e->data;
2116104Sse
2126104Sse  /* All edges should lead from a component with higher number to the
2136104Sse     one with lower one.  */
2146104Sse  gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component);
2156104Sse
2166104Sse  if (g->vertices[e->src].component != g->vertices[e->dest].component)
2176104Sse    return;
2187234Sse
2197234Sse  real->flags |= EDGE_IRREDUCIBLE_LOOP;
2206104Sse  if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
2216104Sse    real->src->flags |= BB_IRREDUCIBLE_LOOP;
2226104Sse}
2236104Sse
2246104Sse/* Runs CALLBACK for all edges in G.  */
2256104Sse
2266104Ssestatic void
2276104Ssefor_each_edge (struct graph *g,
2286104Sse	       void (callback) (struct graph *, struct edge *))
2296104Sse{
2306104Sse  struct edge *e;
2317234Sse  int i;
2327234Sse
2336104Sse  for (i = 0; i < g->n_vertices; i++)
2346104Sse    for (e = g->vertices[i].succ; e; e = e->succ_next)
2356104Sse      callback (g, e);
2366104Sse}
2376104Sse
2386104Sse/* Releases the memory occupied by G.  */
2396104Sse
2406104Ssestatic void
2416104Ssefree_graph (struct graph *g)
2426104Sse{
2436104Sse  struct edge *e, *n;
2446104Sse  int i;
2456104Sse
2467234Sse  for (i = 0; i < g->n_vertices; i++)
2477234Sse    for (e = g->vertices[i].succ; e; e = n)
2486104Sse      {
2496104Sse	n = e->succ_next;
2506104Sse	free (e);
2516104Sse      }
2526104Sse  free (g->vertices);
2536104Sse  free (g);
2546104Sse}
2557234Sse
2566104Sse/* Marks blocks and edges that are part of non-recognized loops; i.e. we
2576104Sse   throw away all latch edges and mark blocks inside any remaining cycle.
2586104Sse   Everything is a bit complicated due to fact we do not want to do this
2596104Sse   for parts of cycles that only "pass" through some loop -- i.e. for
2606104Sse   each cycle, we want to mark blocks that belong directly to innermost
2616104Sse   loop containing the whole cycle.
2626104Sse
2636104Sse   LOOPS is the loop tree.  */
2646104Sse
2656104Sse#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
2666104Sse#define BB_REPR(BB) ((BB)->index + 1)
2676104Sse
2686104Ssevoid
2696104Ssemark_irreducible_loops (struct loops *loops)
2706104Sse{
2716104Sse  basic_block act;
2727234Sse  edge e;
2736104Sse  edge_iterator ei;
2746104Sse  int i, src, dest;
2756104Sse  struct graph *g;
2766104Sse  int *queue1 = XNEWVEC (int, last_basic_block + loops->num);
2776104Sse  int *queue2 = XNEWVEC (int, last_basic_block + loops->num);
2786104Sse  int nq, depth;
2796104Sse  struct loop *cloop;
2806104Sse
2816104Sse  /* Reset the flags.  */
2826104Sse  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
2836104Sse    {
2846104Sse      act->flags &= ~BB_IRREDUCIBLE_LOOP;
2856104Sse      FOR_EACH_EDGE (e, ei, act->succs)
2866104Sse	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
2876104Sse    }
2886104Sse
2897234Sse  /* Create the edge lists.  */
2907234Sse  g = new_graph (last_basic_block + loops->num);
2917234Sse
2927234Sse  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
2937234Sse    FOR_EACH_EDGE (e, ei, act->succs)
2947234Sse      {
2957234Sse	/* Ignore edges to exit.  */
2967234Sse	if (e->dest == EXIT_BLOCK_PTR)
2977234Sse	  continue;
2987234Sse
2997234Sse	/* And latch edges.  */
3007234Sse	if (e->dest->loop_father->header == e->dest
3017234Sse	    && e->dest->loop_father->latch == act)
3027234Sse	  continue;
3037234Sse
3047234Sse	/* Edges inside a single loop should be left where they are.  Edges
3057234Sse	   to subloop headers should lead to representative of the subloop,
3066104Sse	   but from the same place.
3076104Sse
3086104Sse	   Edges exiting loops should lead from representative
3096104Sse	   of the son of nearest common ancestor of the loops in that
3106104Sse	   act lays.  */
3116104Sse
3126104Sse	src = BB_REPR (act);
3136104Sse	dest = BB_REPR (e->dest);
3146104Sse
3156104Sse	if (e->dest->loop_father->header == e->dest)
3166104Sse	  dest = LOOP_REPR (e->dest->loop_father);
3176104Sse
3187234Sse	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
3197234Sse	  {
3207234Sse	    depth = find_common_loop (act->loop_father,
3217234Sse				      e->dest->loop_father)->depth + 1;
3227234Sse	    if (depth == act->loop_father->depth)
3237234Sse	      cloop = act->loop_father;
3247234Sse	    else
3257234Sse	      cloop = act->loop_father->pred[depth];
3267234Sse
3277234Sse	    src = LOOP_REPR (cloop);
3287234Sse	  }
3296104Sse
3306104Sse	add_edge (g, src, dest, e);
3317234Sse      }
3326104Sse
3336104Sse  /* Find the strongly connected components.  Use the algorithm of Tarjan --
3346104Sse     first determine the postorder dfs numbering in reversed graph, then
3356104Sse     run the dfs on the original graph in the order given by decreasing
3366104Sse     numbers assigned by the previous pass.  */
3376104Sse  nq = 0;
3386104Sse  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
3396104Sse    {
3406104Sse      queue1[nq++] = BB_REPR (act);
3416104Sse    }
3426104Sse  for (i = 1; i < (int) loops->num; i++)
3436104Sse    if (loops->parray[i])
3446104Sse      queue1[nq++] = LOOP_REPR (loops->parray[i]);
3456104Sse  dfs (g, queue1, nq, queue2, false);
3466104Sse  for (i = 0; i < nq; i++)
3476104Sse    queue1[i] = queue2[nq - i - 1];
3486104Sse  dfs (g, queue1, nq, NULL, true);
3496104Sse
3506104Sse  /* Mark the irreducible loops.  */
3516104Sse  for_each_edge (g, check_irred);
3526104Sse
3536104Sse  free_graph (g);
3546104Sse  free (queue1);
3556104Sse  free (queue2);
3566104Sse
3576104Sse  loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
3586104Sse}
3596104Sse
3606104Sse/* Counts number of insns inside LOOP.  */
3616104Sseint
3626104Ssenum_loop_insns (struct loop *loop)
3636104Sse{
3647234Sse  basic_block *bbs, bb;
3656104Sse  unsigned i, ninsns = 0;
3666104Sse  rtx insn;
3676104Sse
3686104Sse  bbs = get_loop_body (loop);
3696104Sse  for (i = 0; i < loop->num_nodes; i++)
3706104Sse    {
3716104Sse      bb = bbs[i];
3726104Sse      ninsns++;
3736104Sse      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
3746104Sse	if (INSN_P (insn))
3756104Sse	  ninsns++;
3766104Sse    }
3777234Sse  free(bbs);
3787234Sse
3797234Sse  return ninsns;
3807234Sse}
3817234Sse
3827234Sse/* Counts number of insns executed on average per iteration LOOP.  */
3837234Sseint
3847234Sseaverage_num_loop_insns (struct loop *loop)
3857234Sse{
3867234Sse  basic_block *bbs, bb;
3877234Sse  unsigned i, binsns, ninsns, ratio;
3887234Sse  rtx insn;
3896104Sse
3906104Sse  ninsns = 0;
3917234Sse  bbs = get_loop_body (loop);
3926104Sse  for (i = 0; i < loop->num_nodes; i++)
3936104Sse    {
3946104Sse      bb = bbs[i];
3956104Sse
3966104Sse      binsns = 1;
3976104Sse      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
3986104Sse	if (INSN_P (insn))
3996104Sse	  binsns++;
4006104Sse
4016104Sse      ratio = loop->header->frequency == 0
4026104Sse	      ? BB_FREQ_MAX
4036104Sse	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
4046104Sse      ninsns += binsns * ratio;
4056104Sse    }
4066104Sse  free(bbs);
4076104Sse
4086104Sse  ninsns /= BB_FREQ_MAX;
4096104Sse  if (!ninsns)
4106104Sse    ninsns = 1; /* To avoid division by zero.  */
4116104Sse
4126104Sse  return ninsns;
4136104Sse}
4146104Sse
4156104Sse/* Returns expected number of LOOP iterations.
4166104Sse   Compute upper bound on number of iterations in case they do not fit integer
4176104Sse   to help loop peeling heuristics.  Use exact counts if at all possible.  */
4186104Sseunsigned
4196104Sseexpected_loop_iterations (const struct loop *loop)
4207234Sse{
4216104Sse  edge e;
4226104Sse  edge_iterator ei;
4236104Sse
4246104Sse  if (loop->latch->count || loop->header->count)
4256104Sse    {
4266104Sse      gcov_type count_in, count_latch, expected;
4276104Sse
4287234Sse      count_in = 0;
4297234Sse      count_latch = 0;
4307234Sse
4317234Sse      FOR_EACH_EDGE (e, ei, loop->header->preds)
4327234Sse	if (e->src == loop->latch)
4337234Sse	  count_latch = e->count;
4347234Sse	else
4357234Sse	  count_in += e->count;
4367234Sse
4377234Sse      if (count_in == 0)
4387234Sse	expected = count_latch * 2;
4396104Sse      else
4407234Sse	expected = (count_latch + count_in - 1) / count_in;
4417234Sse
4427234Sse      /* Avoid overflows.  */
4437234Sse      return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
4447234Sse    }
4456104Sse  else
4467234Sse    {
4477234Sse      int freq_in, freq_latch;
4487234Sse
4496104Sse      freq_in = 0;
4506104Sse      freq_latch = 0;
4517234Sse
4527234Sse      FOR_EACH_EDGE (e, ei, loop->header->preds)
4537234Sse	if (e->src == loop->latch)
4546104Sse	  freq_latch = EDGE_FREQUENCY (e);
4557234Sse	else
4566104Sse	  freq_in += EDGE_FREQUENCY (e);
4577234Sse
4587234Sse      if (freq_in == 0)
4597234Sse	return freq_latch * 2;
4607234Sse
4617234Sse      return (freq_latch + freq_in - 1) / freq_in;
4627234Sse    }
4637234Sse}
4647234Sse
4657234Sse/* Returns the maximum level of nesting of subloops of LOOP.  */
4667234Sse
4677234Sseunsigned
4687234Sseget_loop_level (const struct loop *loop)
4696104Sse{
4707234Sse  const struct loop *ploop;
4716104Sse  unsigned mx = 0, l;
4727234Sse
4736104Sse  for (ploop = loop->inner; ploop; ploop = ploop->next)
4746104Sse    {
4756104Sse      l = get_loop_level (ploop);
4767234Sse      if (l >= mx)
4777234Sse	mx = l + 1;
4786104Sse    }
4796104Sse  return mx;
4807234Sse}
4816104Sse
4827234Sse/* Returns estimate on cost of computing SEQ.  */
4837234Sse
4846104Ssestatic unsigned
4857234Sseseq_cost (rtx seq)
4867234Sse{
4877234Sse  unsigned cost = 0;
4887234Sse  rtx set;
4896104Sse
4907234Sse  for (; seq; seq = NEXT_INSN (seq))
4916104Sse    {
4927234Sse      set = single_set (seq);
4937234Sse      if (set)
4947234Sse	cost += rtx_cost (set, SET);
4957234Sse      else
4967234Sse	cost++;
4977234Sse    }
4987234Sse
4997234Sse  return cost;
5007234Sse}
5016104Sse
502/* The properties of the target.  */
503
504unsigned target_avail_regs;	/* Number of available registers.  */
505unsigned target_res_regs;	/* Number of reserved registers.  */
506unsigned target_small_cost;	/* The cost for register when there is a free one.  */
507unsigned target_pres_cost;	/* The cost for register when there are not too many
508				   free ones.  */
509unsigned target_spill_cost;	/* The cost for register when we need to spill.  */
510
511/* Initialize the constants for computing set costs.  */
512
513void
514init_set_costs (void)
515{
516  rtx seq;
517  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
518  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
519  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
520  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
521  unsigned i;
522
523  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
524    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
525	&& !fixed_regs[i])
526      target_avail_regs++;
527
528  target_res_regs = 3;
529
530  /* These are really just heuristic values.  */
531
532  start_sequence ();
533  emit_move_insn (reg1, reg2);
534  seq = get_insns ();
535  end_sequence ();
536  target_small_cost = seq_cost (seq);
537  target_pres_cost = 2 * target_small_cost;
538
539  start_sequence ();
540  emit_move_insn (mem, reg1);
541  emit_move_insn (reg2, mem);
542  seq = get_insns ();
543  end_sequence ();
544  target_spill_cost = seq_cost (seq);
545}
546
547/* Calculates cost for having SIZE new loop global variables.  REGS_USED is the
548   number of global registers used in loop.  N_USES is the number of relevant
549   variable uses.  */
550
551unsigned
552global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses)
553{
554  unsigned regs_needed = regs_used + size;
555  unsigned cost = 0;
556
557  if (regs_needed + target_res_regs <= target_avail_regs)
558    cost += target_small_cost * size;
559  else if (regs_needed <= target_avail_regs)
560    cost += target_pres_cost * size;
561  else
562    {
563      cost += target_pres_cost * size;
564      cost += target_spill_cost * n_uses * (regs_needed - target_avail_regs) / regs_needed;
565    }
566
567  return cost;
568}
569
570/* Sets EDGE_LOOP_EXIT flag for all exits of LOOPS.  */
571
572void
573mark_loop_exit_edges (struct loops *loops)
574{
575  basic_block bb;
576  edge e;
577
578  if (loops->num <= 1)
579    return;
580
581  FOR_EACH_BB (bb)
582    {
583      edge_iterator ei;
584
585      FOR_EACH_EDGE (e, ei, bb->succs)
586	{
587	  if (bb->loop_father->outer
588	      && loop_exit_edge_p (bb->loop_father, e))
589	    e->flags |= EDGE_LOOP_EXIT;
590	  else
591	    e->flags &= ~EDGE_LOOP_EXIT;
592	}
593    }
594}
595
596