1/* Natural loop analysis code for GNU compiler.
2   Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "expr.h"
31#include "output.h"
32
33/* Checks whether BB is executed exactly once in each LOOP iteration.  */
34
35bool
36just_once_each_iteration_p (const struct loop *loop, basic_block bb)
37{
38  /* It must be executed at least once each iteration.  */
39  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
40    return false;
41
42  /* And just once.  */
43  if (bb->loop_father != loop)
44    return false;
45
46  /* But this was not enough.  We might have some irreducible loop here.  */
47  if (bb->flags & BB_IRREDUCIBLE_LOOP)
48    return false;
49
50  return true;
51}
52
53/* Structure representing edge of a graph.  */
54
55struct edge
56{
57  int src, dest;	/* Source and destination.  */
58  struct edge *pred_next, *succ_next;
59			/* Next edge in predecessor and successor lists.  */
60  void *data;		/* Data attached to the edge.  */
61};
62
63/* Structure representing vertex of a graph.  */
64
65struct vertex
66{
67  struct edge *pred, *succ;
68			/* Lists of predecessors and successors.  */
69  int component;	/* Number of dfs restarts before reaching the
70			   vertex.  */
71  int post;		/* Postorder number.  */
72};
73
74/* Structure representing a graph.  */
75
76struct graph
77{
78  int n_vertices;	/* Number of vertices.  */
79  struct vertex *vertices;
80			/* The vertices.  */
81};
82
83/* Dumps graph G into F.  */
84
85extern void dump_graph (FILE *, struct graph *);
86
87void
88dump_graph (FILE *f, struct graph *g)
89{
90  int i;
91  struct edge *e;
92
93  for (i = 0; i < g->n_vertices; i++)
94    {
95      if (!g->vertices[i].pred
96	  && !g->vertices[i].succ)
97	continue;
98
99      fprintf (f, "%d (%d)\t<-", i, g->vertices[i].component);
100      for (e = g->vertices[i].pred; e; e = e->pred_next)
101	fprintf (f, " %d", e->src);
102      fprintf (f, "\n");
103
104      fprintf (f, "\t->");
105      for (e = g->vertices[i].succ; e; e = e->succ_next)
106	fprintf (f, " %d", e->dest);
107      fprintf (f, "\n");
108    }
109}
110
111/* Creates a new graph with N_VERTICES vertices.  */
112
113static struct graph *
114new_graph (int n_vertices)
115{
116  struct graph *g = XNEW (struct graph);
117
118  g->n_vertices = n_vertices;
119  g->vertices = XCNEWVEC (struct vertex, n_vertices);
120
121  return g;
122}
123
124/* Adds an edge from F to T to graph G, with DATA attached.  */
125
126static void
127add_edge (struct graph *g, int f, int t, void *data)
128{
129  struct edge *e = xmalloc (sizeof (struct edge));
130
131  e->src = f;
132  e->dest = t;
133  e->data = data;
134
135  e->pred_next = g->vertices[t].pred;
136  g->vertices[t].pred = e;
137
138  e->succ_next = g->vertices[f].succ;
139  g->vertices[f].succ = e;
140}
141
142/* Runs dfs search over vertices of G, from NQ vertices in queue QS.
143   The vertices in postorder are stored into QT.  If FORWARD is false,
144   backward dfs is run.  */
145
146static void
147dfs (struct graph *g, int *qs, int nq, int *qt, bool forward)
148{
149  int i, tick = 0, v, comp = 0, top;
150  struct edge *e;
151  struct edge **stack = xmalloc (sizeof (struct edge *) * g->n_vertices);
152
153  for (i = 0; i < g->n_vertices; i++)
154    {
155      g->vertices[i].component = -1;
156      g->vertices[i].post = -1;
157    }
158
159#define FST_EDGE(V) (forward ? g->vertices[(V)].succ : g->vertices[(V)].pred)
160#define NEXT_EDGE(E) (forward ? (E)->succ_next : (E)->pred_next)
161#define EDGE_SRC(E) (forward ? (E)->src : (E)->dest)
162#define EDGE_DEST(E) (forward ? (E)->dest : (E)->src)
163
164  for (i = 0; i < nq; i++)
165    {
166      v = qs[i];
167      if (g->vertices[v].post != -1)
168	continue;
169
170      g->vertices[v].component = comp++;
171      e = FST_EDGE (v);
172      top = 0;
173
174      while (1)
175	{
176	  while (e && g->vertices[EDGE_DEST (e)].component != -1)
177	    e = NEXT_EDGE (e);
178
179	  if (!e)
180	    {
181	      if (qt)
182		qt[tick] = v;
183	      g->vertices[v].post = tick++;
184
185	      if (!top)
186		break;
187
188	      e = stack[--top];
189	      v = EDGE_SRC (e);
190	      e = NEXT_EDGE (e);
191	      continue;
192	    }
193
194	  stack[top++] = e;
195	  v = EDGE_DEST (e);
196	  e = FST_EDGE (v);
197	  g->vertices[v].component = comp - 1;
198	}
199    }
200
201  free (stack);
202}
203
204/* Marks the edge E in graph G irreducible if it connects two vertices in the
205   same scc.  */
206
207static void
208check_irred (struct graph *g, struct edge *e)
209{
210  edge real = e->data;
211
212  /* All edges should lead from a component with higher number to the
213     one with lower one.  */
214  gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component);
215
216  if (g->vertices[e->src].component != g->vertices[e->dest].component)
217    return;
218
219  real->flags |= EDGE_IRREDUCIBLE_LOOP;
220  if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
221    real->src->flags |= BB_IRREDUCIBLE_LOOP;
222}
223
224/* Runs CALLBACK for all edges in G.  */
225
226static void
227for_each_edge (struct graph *g,
228	       void (callback) (struct graph *, struct edge *))
229{
230  struct edge *e;
231  int i;
232
233  for (i = 0; i < g->n_vertices; i++)
234    for (e = g->vertices[i].succ; e; e = e->succ_next)
235      callback (g, e);
236}
237
238/* Releases the memory occupied by G.  */
239
240static void
241free_graph (struct graph *g)
242{
243  struct edge *e, *n;
244  int i;
245
246  for (i = 0; i < g->n_vertices; i++)
247    for (e = g->vertices[i].succ; e; e = n)
248      {
249	n = e->succ_next;
250	free (e);
251      }
252  free (g->vertices);
253  free (g);
254}
255
256/* Marks blocks and edges that are part of non-recognized loops; i.e. we
257   throw away all latch edges and mark blocks inside any remaining cycle.
258   Everything is a bit complicated due to fact we do not want to do this
259   for parts of cycles that only "pass" through some loop -- i.e. for
260   each cycle, we want to mark blocks that belong directly to innermost
261   loop containing the whole cycle.
262
263   LOOPS is the loop tree.  */
264
265#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
266#define BB_REPR(BB) ((BB)->index + 1)
267
268void
269mark_irreducible_loops (struct loops *loops)
270{
271  basic_block act;
272  edge e;
273  edge_iterator ei;
274  int i, src, dest;
275  struct graph *g;
276  int *queue1 = XNEWVEC (int, last_basic_block + loops->num);
277  int *queue2 = XNEWVEC (int, last_basic_block + loops->num);
278  int nq, depth;
279  struct loop *cloop;
280
281  /* Reset the flags.  */
282  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
283    {
284      act->flags &= ~BB_IRREDUCIBLE_LOOP;
285      FOR_EACH_EDGE (e, ei, act->succs)
286	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
287    }
288
289  /* Create the edge lists.  */
290  g = new_graph (last_basic_block + loops->num);
291
292  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
293    FOR_EACH_EDGE (e, ei, act->succs)
294      {
295	/* Ignore edges to exit.  */
296	if (e->dest == EXIT_BLOCK_PTR)
297	  continue;
298
299	/* And latch edges.  */
300	if (e->dest->loop_father->header == e->dest
301	    && e->dest->loop_father->latch == act)
302	  continue;
303
304	/* Edges inside a single loop should be left where they are.  Edges
305	   to subloop headers should lead to representative of the subloop,
306	   but from the same place.
307
308	   Edges exiting loops should lead from representative
309	   of the son of nearest common ancestor of the loops in that
310	   act lays.  */
311
312	src = BB_REPR (act);
313	dest = BB_REPR (e->dest);
314
315	if (e->dest->loop_father->header == e->dest)
316	  dest = LOOP_REPR (e->dest->loop_father);
317
318	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
319	  {
320	    depth = find_common_loop (act->loop_father,
321				      e->dest->loop_father)->depth + 1;
322	    if (depth == act->loop_father->depth)
323	      cloop = act->loop_father;
324	    else
325	      cloop = act->loop_father->pred[depth];
326
327	    src = LOOP_REPR (cloop);
328	  }
329
330	add_edge (g, src, dest, e);
331      }
332
333  /* Find the strongly connected components.  Use the algorithm of Tarjan --
334     first determine the postorder dfs numbering in reversed graph, then
335     run the dfs on the original graph in the order given by decreasing
336     numbers assigned by the previous pass.  */
337  nq = 0;
338  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
339    {
340      queue1[nq++] = BB_REPR (act);
341    }
342  for (i = 1; i < (int) loops->num; i++)
343    if (loops->parray[i])
344      queue1[nq++] = LOOP_REPR (loops->parray[i]);
345  dfs (g, queue1, nq, queue2, false);
346  for (i = 0; i < nq; i++)
347    queue1[i] = queue2[nq - i - 1];
348  dfs (g, queue1, nq, NULL, true);
349
350  /* Mark the irreducible loops.  */
351  for_each_edge (g, check_irred);
352
353  free_graph (g);
354  free (queue1);
355  free (queue2);
356
357  loops->state |= LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS;
358}
359
360/* Counts number of insns inside LOOP.  */
361int
362num_loop_insns (struct loop *loop)
363{
364  basic_block *bbs, bb;
365  unsigned i, ninsns = 0;
366  rtx insn;
367
368  bbs = get_loop_body (loop);
369  for (i = 0; i < loop->num_nodes; i++)
370    {
371      bb = bbs[i];
372      ninsns++;
373      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
374	if (INSN_P (insn))
375	  ninsns++;
376    }
377  free(bbs);
378
379  return ninsns;
380}
381
382/* Counts number of insns executed on average per iteration LOOP.  */
383int
384average_num_loop_insns (struct loop *loop)
385{
386  basic_block *bbs, bb;
387  unsigned i, binsns, ninsns, ratio;
388  rtx insn;
389
390  ninsns = 0;
391  bbs = get_loop_body (loop);
392  for (i = 0; i < loop->num_nodes; i++)
393    {
394      bb = bbs[i];
395
396      binsns = 1;
397      for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn))
398	if (INSN_P (insn))
399	  binsns++;
400
401      ratio = loop->header->frequency == 0
402	      ? BB_FREQ_MAX
403	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
404      ninsns += binsns * ratio;
405    }
406  free(bbs);
407
408  ninsns /= BB_FREQ_MAX;
409  if (!ninsns)
410    ninsns = 1; /* To avoid division by zero.  */
411
412  return ninsns;
413}
414
415/* Returns expected number of LOOP iterations.
416   Compute upper bound on number of iterations in case they do not fit integer
417   to help loop peeling heuristics.  Use exact counts if at all possible.  */
418unsigned
419expected_loop_iterations (const struct loop *loop)
420{
421  edge e;
422  edge_iterator ei;
423
424  if (loop->latch->count || loop->header->count)
425    {
426      gcov_type count_in, count_latch, expected;
427
428      count_in = 0;
429      count_latch = 0;
430
431      FOR_EACH_EDGE (e, ei, loop->header->preds)
432	if (e->src == loop->latch)
433	  count_latch = e->count;
434	else
435	  count_in += e->count;
436
437      if (count_in == 0)
438	expected = count_latch * 2;
439      else
440	expected = (count_latch + count_in - 1) / count_in;
441
442      /* Avoid overflows.  */
443      return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
444    }
445  else
446    {
447      int freq_in, freq_latch;
448
449      freq_in = 0;
450      freq_latch = 0;
451
452      FOR_EACH_EDGE (e, ei, loop->header->preds)
453	if (e->src == loop->latch)
454	  freq_latch = EDGE_FREQUENCY (e);
455	else
456	  freq_in += EDGE_FREQUENCY (e);
457
458      if (freq_in == 0)
459	return freq_latch * 2;
460
461      return (freq_latch + freq_in - 1) / freq_in;
462    }
463}
464
465/* Returns the maximum level of nesting of subloops of LOOP.  */
466
467unsigned
468get_loop_level (const struct loop *loop)
469{
470  const struct loop *ploop;
471  unsigned mx = 0, l;
472
473  for (ploop = loop->inner; ploop; ploop = ploop->next)
474    {
475      l = get_loop_level (ploop);
476      if (l >= mx)
477	mx = l + 1;
478    }
479  return mx;
480}
481
482/* Returns estimate on cost of computing SEQ.  */
483
484static unsigned
485seq_cost (rtx seq)
486{
487  unsigned cost = 0;
488  rtx set;
489
490  for (; seq; seq = NEXT_INSN (seq))
491    {
492      set = single_set (seq);
493      if (set)
494	cost += rtx_cost (set, SET);
495      else
496	cost++;
497    }
498
499  return cost;
500}
501
502/* The properties of the target.  */
503
504unsigned target_avail_regs;	/* Number of available registers.  */
505unsigned target_res_regs;	/* Number of reserved registers.  */
506unsigned target_small_cost;	/* The cost for register when there is a free one.  */
507unsigned target_pres_cost;	/* The cost for register when there are not too many
508				   free ones.  */
509unsigned target_spill_cost;	/* The cost for register when we need to spill.  */
510
511/* Initialize the constants for computing set costs.  */
512
513void
514init_set_costs (void)
515{
516  rtx seq;
517  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
518  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
519  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
520  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
521  unsigned i;
522
523  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
524    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
525	&& !fixed_regs[i])
526      target_avail_regs++;
527
528  target_res_regs = 3;
529
530  /* These are really just heuristic values.  */
531
532  start_sequence ();
533  emit_move_insn (reg1, reg2);
534  seq = get_insns ();
535  end_sequence ();
536  target_small_cost = seq_cost (seq);
537  target_pres_cost = 2 * target_small_cost;
538
539  start_sequence ();
540  emit_move_insn (mem, reg1);
541  emit_move_insn (reg2, mem);
542  seq = get_insns ();
543  end_sequence ();
544  target_spill_cost = seq_cost (seq);
545}
546
547/* Calculates cost for having SIZE new loop global variables.  REGS_USED is the
548   number of global registers used in loop.  N_USES is the number of relevant
549   variable uses.  */
550
551unsigned
552global_cost_for_size (unsigned size, unsigned regs_used, unsigned n_uses)
553{
554  unsigned regs_needed = regs_used + size;
555  unsigned cost = 0;
556
557  if (regs_needed + target_res_regs <= target_avail_regs)
558    cost += target_small_cost * size;
559  else if (regs_needed <= target_avail_regs)
560    cost += target_pres_cost * size;
561  else
562    {
563      cost += target_pres_cost * size;
564      cost += target_spill_cost * n_uses * (regs_needed - target_avail_regs) / regs_needed;
565    }
566
567  return cost;
568}
569
570/* Sets EDGE_LOOP_EXIT flag for all exits of LOOPS.  */
571
572void
573mark_loop_exit_edges (struct loops *loops)
574{
575  basic_block bb;
576  edge e;
577
578  if (loops->num <= 1)
579    return;
580
581  FOR_EACH_BB (bb)
582    {
583      edge_iterator ei;
584
585      FOR_EACH_EDGE (e, ei, bb->succs)
586	{
587	  if (bb->loop_father->outer
588	      && loop_exit_edge_p (bb->loop_father, e))
589	    e->flags |= EDGE_LOOP_EXIT;
590	  else
591	    e->flags &= ~EDGE_LOOP_EXIT;
592	}
593    }
594}
595
596