1/* Natural loop analysis code for GNU compiler.
2   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "expr.h"
31#include "output.h"
32
33/* Checks whether BB is executed exactly once in each LOOP iteration.  */
34
35bool
36just_once_each_iteration_p (const struct loop *loop, basic_block bb)
37{
38  /* It must be executed at least once each iteration.  */
39  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
40    return false;
41
42  /* And just once.  */
43  if (bb->loop_father != loop)
44    return false;
45
46  /* But this was not enough.  We might have some irreducible loop here.  */
47  if (bb->flags & BB_IRREDUCIBLE_LOOP)
48    return false;
49
50  return true;
51}
52
53/* Structure representing edge of a graph.  */
54
55struct edge
56{
57  int src, dest;	/* Source and destination.  */
58  struct edge *pred_next, *succ_next;
59			/* Next edge in predecessor and successor lists.  */
60  void *data;		/* Data attached to the edge.  */
61};
62
63/* Structure representing vertex of a graph.  */
64
65struct vertex
66{
67  struct edge *pred, *succ;
68			/* Lists of predecessors and successors.  */
69  int component;	/* Number of dfs restarts before reaching the
70			   vertex.  */
71  int post;		/* Postorder number.  */
72};
73
74/* Structure representing a graph.  */
75
76struct graph
77{
78  int n_vertices;	/* Number of vertices.  */
79  struct vertex *vertices;
80			/* The vertices.  */
81};
82
83/* Dumps graph G into F.  */
84
85extern void dump_graph (FILE *, struct graph *);
86void dump_graph (FILE *f, struct graph *g)
87{
88  int i;
89  struct edge *e;
90
91  for (i = 0; i < g->n_vertices; i++)
92    {
93      if (!g->vertices[i].pred
94	  && !g->vertices[i].succ)
95	continue;
96
97      fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
98      for (e = g->vertices[i].pred; e; e = e->pred_next)
99	fprintf (f, " %d", e->src);
100      fprintf (f, "\n");
101
102      fprintf (f, "\t->");
103      for (e = g->vertices[i].succ; e; e = e->succ_next)
104	fprintf (f, " %d", e->dest);
105      fprintf (f, "\n");
106    }
107}
108
109/* Creates a new graph with N_VERTICES vertices.  */
110
111static struct graph *
112new_graph (int n_vertices)
113{
114  struct graph *g = xmalloc (sizeof (struct graph));
115
116  g->n_vertices = n_vertices;
117  g->vertices = xcalloc (n_vertices, sizeof (struct vertex));
118
119  return g;
120}
121
122/* Adds an edge from F to T to graph G, with DATA attached.  */
123
124static void
125add_edge (struct graph *g, int f, int t, void *data)
126{
127  struct edge *e = xmalloc (sizeof (struct edge));
128
129  e->src = f;
130  e->dest = t;
131  e->data = data;
132
133  e->pred_next = g->vertices[t].pred;
134  g->vertices[t].pred = e;
135
136  e->succ_next = g->vertices[f].succ;
137  g->vertices[f].succ = e;
138}
139
140/* Runs dfs search over vertices of G, from NQ vertices in queue QS.
141   The vertices in postorder are stored into QT.  If FORWARD is false,
142   backward dfs is run.  */
143
144static void
145dfs (struct graph *g, int *qs, int nq, int *qt, bool forward)
146{
147  int i, tick = 0, v, comp = 0, top;
148  struct edge *e;
149  struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices);
150
151  for (i = 0; i < g->n_vertices; i++)
152    {
153      g->vertices[i].component = -1;
154      g->vertices[i].post = -1;
155    }
156
157#define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred)
158#define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next)
159#define EDGE_SRC(E) (forward ? (E)->src : (E)->dest)
160#define EDGE_DEST(E) (forward ? (E)->dest : (E)->src)
161
162  for (i = 0; i < nq; i++)
163    {
164      v = qs[i];
165      if (g->vertices[v].post != -1)
166	continue;
167
168      g->vertices[v].component = comp++;
169      e = FST_EDGE (v);
170      top = 0;
171
172      while (1)
173	{
174	  while (e && g->vertices[EDGE_DEST (e)].component != -1)
175	    e = NEXT_EDGE (e);
176
177	  if (!e)
178	    {
179	      if (qt)
180		qt[tick] = v;
181 	      g->vertices[v].post = tick++;
182
183	      if (!top)
184		break;
185
186	      e = stack[--top];
187	      v = EDGE_SRC (e);
188	      e = NEXT_EDGE (e);
189	      continue;
190	    }
191
192	  stack[top++] = e;
193	  v = EDGE_DEST (e);
194	  e = FST_EDGE (v);
195	  g->vertices[v].component = comp - 1;
196	}
197    }
198
199  free (stack);
200}
201
202/* Marks the edge E in graph G irreducible if it connects two vertices in the
203   same scc.  */
204
205static void
206check_irred (struct graph *g, struct edge *e)
207{
208  edge real = e->data;
209
210  /* All edges should lead from a component with higher number to the
211     one with lower one.  */
212  gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component);
213
214  if (g->vertices[e->src].component != g->vertices[e->dest].component)
215    return;
216
217  real->flags |= EDGE_IRREDUCIBLE_LOOP;
218  if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
219    real->src->flags |= BB_IRREDUCIBLE_LOOP;
220}
221
222/* Runs CALLBACK for all edges in G.  */
223
224static void
225for_each_edge (struct graph *g,
226	       void (callback) (struct graph *, struct edge *))
227{
228  struct edge *e;
229  int i;
230
231  for (i = 0; i < g->n_vertices; i++)
232    for (e = g->vertices[i].succ; e; e = e->succ_next)
233      callback (g, e);
234}
235
236/* Releases the memory occupied by G.  */
237
238static void
239free_graph (struct graph *g)
240{
241  struct edge *e, *n;
242  int i;
243
244  for (i = 0; i < g->n_vertices; i++)
245    for (e = g->vertices[i].succ; e; e = n)
246      {
247	n = e->succ_next;
248	free (e);
249      }
250  free (g->vertices);
251  free (g);
252}
253
254/* Marks blocks and edges that are part of non-recognized loops; i.e. we
255   throw away all latch edges and mark blocks inside any remaining cycle.
256   Everything is a bit complicated due to fact we do not want to do this
257   for parts of cycles that only "pass" through some loop -- i.e. for
258   each cycle, we want to mark blocks that belong directly to innermost
259   loop containing the whole cycle.
260
261   LOOPS is the loop tree.  */
262
263#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
264#define BB_REPR(BB) ((BB)->index + 1)
265
266void
267mark_irreducible_loops (struct loops *loops)
268{
269  basic_block act;
270  edge e;
271  edge_iterator ei;
272  int i, src, dest;
273  struct graph *g;
274  int *queue1 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
275  int *queue2 = xmalloc ((last_basic_block + loops->num) * sizeof (int));
276  int nq, depth;
277  struct loop *cloop;
278
279  /* Reset the flags.  */
280  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
281    {
282      act->flags &= ~BB_IRREDUCIBLE_LOOP;
283      FOR_EACH_EDGE (e, ei, act->succs)
284	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
285    }
286
287  /* Create the edge lists.  */
288  g = new_graph (last_basic_block + loops->num);
289
290  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
291    FOR_EACH_EDGE (e, ei, act->succs)
292      {
293        /* Ignore edges to exit.  */
294        if (e->dest == EXIT_BLOCK_PTR)
295	  continue;
296
297	/* And latch edges.  */
298	if (e->dest->loop_father->header == e->dest
299	    && e->dest->loop_father->latch == act)
300	  continue;
301
302	/* Edges inside a single loop should be left where they are.  Edges
303	   to subloop headers should lead to representative of the subloop,
304	   but from the same place.
305
306	   Edges exiting loops should lead from representative
307	   of the son of nearest common ancestor of the loops in that
308	   act lays.  */
309
310	src = BB_REPR (act);
311	dest = BB_REPR (e->dest);
312
313	if (e->dest->loop_father->header == e->dest)
314	  dest = LOOP_REPR (e->dest->loop_father);
315
316	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
317	  {
318	    depth = find_common_loop (act->loop_father,
319				      e->dest->loop_father)->depth + 1;
320	    if (depth == act->loop_father->depth)
321	      cloop = act->loop_father;
322	    else
323	      cloop = act->loop_father->pred[depth];
324
325	    src = LOOP_REPR (cloop);
326	  }
327
328	add_edge (g, src, dest, e);
329      }
330
331  /* Find the strongly connected components.  Use the algorithm of Tarjan --
332     first determine the postorder dfs numbering in reversed graph, then
333     run the dfs on the original graph in the order given by decreasing
334     numbers assigned by the previous pass.  */
335  nq = 0;
336  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
337    {
338      queue1[nq++] = BB_REPR (act);
339    }
340  for (i = 1; i < (int) loops->num; i++)
341    if (loops->parray[i])
342      queue1[nq++] = LOOP_REPR (loops->parray[i]);
343  dfs (g, queue1, nq, queue2, false);
344  for (i = 0; i < nq; i++)
345    queue1[i] = queue2[nq - i - 1];
346  dfs (g, queue1, nq, NULL, true);
347
348  /* Mark the irreducible loops.  */
349  for_each_edge (g, check_irred);
350
351  free_graph (g);
352  free (queue1);
353  free (queue2);
354
355  loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
356}
357
358/* Counts number of insns inside LOOP.  */
359int
360num_loop_insns (struct loop *loop)
361{
362  basic_block *bbs, bb;
363  unsigned i, ninsns = 0;
364  rtx insn;
365
366  bbs = get_loop_body (loop);
367  for (i = 0; i < loop->num_nodes; i++)
368    {
369      bb = bbs[i];
370      ninsns++;
371      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
372	if (INSN_P (insn))
373	  ninsns++;
374    }
375  free(bbs);
376
377  return ninsns;
378}
379
380/* Counts number of insns executed on average per iteration LOOP.  */
381int
382average_num_loop_insns (struct loop *loop)
383{
384  basic_block *bbs, bb;
385  unsigned i, binsns, ninsns, ratio;
386  rtx insn;
387
388  ninsns = 0;
389  bbs = get_loop_body (loop);
390  for (i = 0; i < loop->num_nodes; i++)
391    {
392      bb = bbs[i];
393
394      binsns = 1;
395      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
396	if (INSN_P (insn))
397	  binsns++;
398
399      ratio = loop->header->frequency == 0
400	      ? BB_FREQ_MAX
401	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
402      ninsns += binsns * ratio;
403    }
404  free(bbs);
405
406  ninsns /= BB_FREQ_MAX;
407  if (!ninsns)
408    ninsns = 1; /* To avoid division by zero.  */
409
410  return ninsns;
411}
412
413/* Returns expected number of LOOP iterations.
414   Compute upper bound on number of iterations in case they do not fit integer
415   to help loop peeling heuristics.  Use exact counts if at all possible.  */
416unsigned
417expected_loop_iterations (const struct loop *loop)
418{
419  edge e;
420  edge_iterator ei;
421
422  if (loop->latch->count || loop->header->count)
423    {
424      gcov_type count_in, count_latch, expected;
425
426      count_in = 0;
427      count_latch = 0;
428
429      FOR_EACH_EDGE (e, ei, loop->header->preds)
430	if (e->src == loop->latch)
431	  count_latch = e->count;
432	else
433	  count_in += e->count;
434
435      if (count_in == 0)
436        expected = count_latch * 2;
437      else
438        expected = (count_latch + count_in - 1) / count_in;
439
440      /* Avoid overflows.  */
441      return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
442    }
443  else
444    {
445      int freq_in, freq_latch;
446
447      freq_in = 0;
448      freq_latch = 0;
449
450      FOR_EACH_EDGE (e, ei, loop->header->preds)
451	if (e->src == loop->latch)
452	  freq_latch = EDGE_FREQUENCY (e);
453	else
454	  freq_in += EDGE_FREQUENCY (e);
455
456      if (freq_in == 0)
457	return freq_latch * 2;
458
459      return (freq_latch + freq_in - 1) / freq_in;
460    }
461}
462
463/* Returns the maximum level of nesting of subloops of LOOP.  */
464
465unsigned
466get_loop_level (const struct loop *loop)
467{
468  const struct loop *ploop;
469  unsigned mx = 0, l;
470
471  for (ploop = loop->inner; ploop; ploop = ploop->next)
472    {
473      l = get_loop_level (ploop);
474      if (l >= mx)
475	mx = l + 1;
476    }
477  return mx;
478}
479
480/* Returns estimate on cost of computing SEQ.  */
481
482static unsigned
483seq_cost (rtx seq)
484{
485  unsigned cost = 0;
486  rtx set;
487
488  for (; seq; seq = NEXT_INSN (seq))
489    {
490      set = single_set (seq);
491      if (set)
492	cost += rtx_cost (set, SET);
493      else
494	cost++;
495    }
496
497  return cost;
498}
499
500/* The properties of the target.  */
501
502unsigned target_avail_regs;	/* Number of available registers.  */
503unsigned target_res_regs;	/* Number of reserved registers.  */
504unsigned target_small_cost;	/* The cost for register when there is a free one.  */
505unsigned target_pres_cost;	/* The cost for register when there are not too many
506				   free ones.  */
507unsigned target_spill_cost;	/* The cost for register when we need to spill.  */
508
509/* Initialize the constants for computing set costs.  */
510
511void
512init_set_costs (void)
513{
514  rtx seq;
515  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
516  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
517  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
518  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
519  unsigned i;
520
521  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
522    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
523	&& !fixed_regs[i])
524      target_avail_regs++;
525
526  target_res_regs = 3;
527
528  /* These are really just heuristic values.  */
529
530  start_sequence ();
531  emit_move_insn (reg1, reg2);
532  seq = get_insns ();
533  end_sequence ();
534  target_small_cost = seq_cost (seq);
535  target_pres_cost = 2 * target_small_cost;
536
537  start_sequence ();
538  emit_move_insn (mem, reg1);
539  emit_move_insn (reg2, mem);
540  seq = get_insns ();
541  end_sequence ();
542  target_spill_cost = seq_cost (seq);
543}
544
545/* Calculates cost for having SIZE new loop global variables.  REGS_USED is the
546   number of global registers used in loop.  N_USES is the number of relevant
547   variable uses.  */
548
549unsigned
550global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses)
551{
552  unsigned regs_needed = regs_used + size;
553  unsigned cost = 0;
554
555  if (regs_needed + target_res_regs <= target_avail_regs)
556    cost += target_small_cost * size;
557  else if (regs_needed <= target_avail_regs)
558    cost += target_pres_cost * size;
559  else
560    {
561      cost += target_pres_cost * size;
562      cost += target_spill_cost * n_uses * (regs_needed - target_avail_regs) / regs_needed;
563    }
564
565  return cost;
566}
567
568/* Sets EDGE_LOOP_EXIT flag for all exits of LOOPS.  */
569
570void
571mark_loop_exit_edges (struct loops *loops)
572{
573  basic_block bb;
574  edge e;
575
576  if (loops->num <= 1)
577    return;
578
579  FOR_EACH_BB (bb)
580    {
581      edge_iterator ei;
582
583      FOR_EACH_EDGE (e, ei, bb->succs)
584	{
585	  if (bb->loop_father->outer
586	      && loop_exit_edge_p (bb->loop_father, e))
587	    e->flags |= EDGE_LOOP_EXIT;
588	  else
589	    e->flags &= ~EDGE_LOOP_EXIT;
590	}
591    }
592}
593
594