1/* Natural loop analysis code for GNU compiler.
2   Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3   Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "expr.h"
31#include "output.h"
32#include "graphds.h"
33#include "params.h"
34
35/* Checks whether BB is executed exactly once in each LOOP iteration.  */
36
37bool
38just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
39{
40  /* It must be executed at least once each iteration.  */
41  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
42    return false;
43
44  /* And just once.  */
45  if (bb->loop_father != loop)
46    return false;
47
48  /* But this was not enough.  We might have some irreducible loop here.  */
49  if (bb->flags & BB_IRREDUCIBLE_LOOP)
50    return false;
51
52  return true;
53}
54
55/* Marks blocks and edges that are part of non-recognized loops; i.e. we
56   throw away all latch edges and mark blocks inside any remaining cycle.
57   Everything is a bit complicated due to fact we do not want to do this
58   for parts of cycles that only "pass" through some loop -- i.e. for
59   each cycle, we want to mark blocks that belong directly to innermost
60   loop containing the whole cycle.
61
62   LOOPS is the loop tree.  */
63
64#define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block)
65#define BB_REPR(BB) ((BB)->index + 1)
66
67bool
68mark_irreducible_loops (void)
69{
70  basic_block act;
71  struct graph_edge *ge;
72  edge e;
73  edge_iterator ei;
74  int src, dest;
75  unsigned depth;
76  struct graph *g;
77  int num = number_of_loops ();
78  struct loop *cloop;
79  bool irred_loop_found = false;
80  int i;
81
82  gcc_assert (current_loops != NULL);
83
84  /* Reset the flags.  */
85  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
86    {
87      act->flags &= ~BB_IRREDUCIBLE_LOOP;
88      FOR_EACH_EDGE (e, ei, act->succs)
89	e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
90    }
91
92  /* Create the edge lists.  */
93  g = new_graph (last_basic_block + num);
94
95  FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb)
96    FOR_EACH_EDGE (e, ei, act->succs)
97      {
98	/* Ignore edges to exit.  */
99	if (e->dest == EXIT_BLOCK_PTR)
100	  continue;
101
102	src = BB_REPR (act);
103	dest = BB_REPR (e->dest);
104
105	/* Ignore latch edges.  */
106	if (e->dest->loop_father->header == e->dest
107	    && e->dest->loop_father->latch == act)
108	  continue;
109
110	/* Edges inside a single loop should be left where they are.  Edges
111	   to subloop headers should lead to representative of the subloop,
112	   but from the same place.
113
114	   Edges exiting loops should lead from representative
115	   of the son of nearest common ancestor of the loops in that
116	   act lays.  */
117
118	if (e->dest->loop_father->header == e->dest)
119	  dest = LOOP_REPR (e->dest->loop_father);
120
121	if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
122	  {
123	    depth = 1 + loop_depth (find_common_loop (act->loop_father,
124						      e->dest->loop_father));
125	    if (depth == loop_depth (act->loop_father))
126	      cloop = act->loop_father;
127	    else
128	      cloop = VEC_index (loop_p, act->loop_father->superloops, depth);
129
130	    src = LOOP_REPR (cloop);
131	  }
132
133	add_edge (g, src, dest)->data = e;
134      }
135
136  /* Find the strongly connected components.  */
137  graphds_scc (g, NULL);
138
139  /* Mark the irreducible loops.  */
140  for (i = 0; i < g->n_vertices; i++)
141    for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
142      {
143	edge real = (edge) ge->data;
144	/* edge E in graph G is irreducible if it connects two vertices in the
145	   same scc.  */
146
147	/* All edges should lead from a component with higher number to the
148	   one with lower one.  */
149	gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
150
151	if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
152	  continue;
153
154	real->flags |= EDGE_IRREDUCIBLE_LOOP;
155	irred_loop_found = true;
156	if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
157	  real->src->flags |= BB_IRREDUCIBLE_LOOP;
158      }
159
160  free_graph (g);
161
162  loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
163  return irred_loop_found;
164}
165
166/* Counts number of insns inside LOOP.  */
167int
168num_loop_insns (const struct loop *loop)
169{
170  basic_block *bbs, bb;
171  unsigned i, ninsns = 0;
172  rtx insn;
173
174  bbs = get_loop_body (loop);
175  for (i = 0; i < loop->num_nodes; i++)
176    {
177      bb = bbs[i];
178      FOR_BB_INSNS (bb, insn)
179	if (NONDEBUG_INSN_P (insn))
180	  ninsns++;
181    }
182  free (bbs);
183
184  if (!ninsns)
185    ninsns = 1;	/* To avoid division by zero.  */
186
187  return ninsns;
188}
189
190/* Counts number of insns executed on average per iteration LOOP.  */
191int
192average_num_loop_insns (const struct loop *loop)
193{
194  basic_block *bbs, bb;
195  unsigned i, binsns, ninsns, ratio;
196  rtx insn;
197
198  ninsns = 0;
199  bbs = get_loop_body (loop);
200  for (i = 0; i < loop->num_nodes; i++)
201    {
202      bb = bbs[i];
203
204      binsns = 0;
205      FOR_BB_INSNS (bb, insn)
206	if (NONDEBUG_INSN_P (insn))
207	  binsns++;
208
209      ratio = loop->header->frequency == 0
210	      ? BB_FREQ_MAX
211	      : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
212      ninsns += binsns * ratio;
213    }
214  free (bbs);
215
216  ninsns /= BB_FREQ_MAX;
217  if (!ninsns)
218    ninsns = 1; /* To avoid division by zero.  */
219
220  return ninsns;
221}
222
223/* Returns expected number of iterations of LOOP, according to
224   measured or guessed profile.  No bounding is done on the
225   value.  */
226
227gcov_type
228expected_loop_iterations_unbounded (const struct loop *loop)
229{
230  edge e;
231  edge_iterator ei;
232
233  if (loop->latch->count || loop->header->count)
234    {
235      gcov_type count_in, count_latch, expected;
236
237      count_in = 0;
238      count_latch = 0;
239
240      FOR_EACH_EDGE (e, ei, loop->header->preds)
241	if (e->src == loop->latch)
242	  count_latch = e->count;
243	else
244	  count_in += e->count;
245
246      if (count_in == 0)
247	expected = count_latch * 2;
248      else
249	expected = (count_latch + count_in - 1) / count_in;
250
251      return expected;
252    }
253  else
254    {
255      int freq_in, freq_latch;
256
257      freq_in = 0;
258      freq_latch = 0;
259
260      FOR_EACH_EDGE (e, ei, loop->header->preds)
261	if (e->src == loop->latch)
262	  freq_latch = EDGE_FREQUENCY (e);
263	else
264	  freq_in += EDGE_FREQUENCY (e);
265
266      if (freq_in == 0)
267	return freq_latch * 2;
268
269      return (freq_latch + freq_in - 1) / freq_in;
270    }
271}
272
273/* Returns expected number of LOOP iterations.  The returned value is bounded
274   by REG_BR_PROB_BASE.  */
275
276unsigned
277expected_loop_iterations (const struct loop *loop)
278{
279  gcov_type expected = expected_loop_iterations_unbounded (loop);
280  return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
281}
282
283/* Returns the maximum level of nesting of subloops of LOOP.  */
284
285unsigned
286get_loop_level (const struct loop *loop)
287{
288  const struct loop *ploop;
289  unsigned mx = 0, l;
290
291  for (ploop = loop->inner; ploop; ploop = ploop->next)
292    {
293      l = get_loop_level (ploop);
294      if (l >= mx)
295	mx = l + 1;
296    }
297  return mx;
298}
299
300/* Returns estimate on cost of computing SEQ.  */
301
302static unsigned
303seq_cost (const_rtx seq, bool speed)
304{
305  unsigned cost = 0;
306  rtx set;
307
308  for (; seq; seq = NEXT_INSN (seq))
309    {
310      set = single_set (seq);
311      if (set)
312	cost += rtx_cost (set, SET, speed);
313      else
314	cost++;
315    }
316
317  return cost;
318}
319
320/* The properties of the target.  */
321
322unsigned target_avail_regs;	/* Number of available registers.  */
323unsigned target_res_regs;	/* Number of registers reserved for temporary
324				   expressions.  */
325unsigned target_reg_cost[2];	/* The cost for register when there still
326				   is some reserve, but we are approaching
327				   the number of available registers.  */
328unsigned target_spill_cost[2];	/* The cost for register when we need
329				   to spill.  */
330
331/* Initialize the constants for computing set costs.  */
332
333void
334init_set_costs (void)
335{
336  int speed;
337  rtx seq;
338  rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
339  rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
340  rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
341  rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
342  unsigned i;
343
344  target_avail_regs = 0;
345  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
346    if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
347	&& !fixed_regs[i])
348      target_avail_regs++;
349
350  target_res_regs = 3;
351
352  for (speed = 0; speed < 2; speed++)
353     {
354      crtl->maybe_hot_insn_p = speed;
355      /* Set up the costs for using extra registers:
356
357	 1) If not many free registers remain, we should prefer having an
358	    additional move to decreasing the number of available registers.
359	    (TARGET_REG_COST).
360	 2) If no registers are available, we need to spill, which may require
361	    storing the old value to memory and loading it back
362	    (TARGET_SPILL_COST).  */
363
364      start_sequence ();
365      emit_move_insn (reg1, reg2);
366      seq = get_insns ();
367      end_sequence ();
368      target_reg_cost [speed] = seq_cost (seq, speed);
369
370      start_sequence ();
371      emit_move_insn (mem, reg1);
372      emit_move_insn (reg2, mem);
373      seq = get_insns ();
374      end_sequence ();
375      target_spill_cost [speed] = seq_cost (seq, speed);
376    }
377  default_rtl_profile ();
378}
379
380/* Estimates cost of increased register pressure caused by making N_NEW new
381   registers live around the loop.  N_OLD is the number of registers live
382   around the loop.  */
383
384unsigned
385estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed)
386{
387  unsigned cost;
388  unsigned regs_needed = n_new + n_old;
389
390  /* If we have enough registers, we should use them and not restrict
391     the transformations unnecessarily.  */
392  if (regs_needed + target_res_regs <= target_avail_regs)
393    return 0;
394
395  if (regs_needed <= target_avail_regs)
396    /* If we are close to running out of registers, try to preserve
397       them.  */
398    cost = target_reg_cost [speed] * n_new;
399  else
400    /* If we run out of registers, it is very expensive to add another
401       one.  */
402    cost = target_spill_cost [speed] * n_new;
403
404  if (optimize && (flag_ira_region == IRA_REGION_ALL
405		   || flag_ira_region == IRA_REGION_MIXED)
406      && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM)
407    /* IRA regional allocation deals with high register pressure
408       better.  So decrease the cost (to do more accurate the cost
409       calculation for IRA, we need to know how many registers lives
410       through the loop transparently).  */
411    cost /= 2;
412
413  return cost;
414}
415
416/* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
417
418void
419mark_loop_exit_edges (void)
420{
421  basic_block bb;
422  edge e;
423
424  if (number_of_loops () <= 1)
425    return;
426
427  FOR_EACH_BB (bb)
428    {
429      edge_iterator ei;
430
431      FOR_EACH_EDGE (e, ei, bb->succs)
432	{
433	  if (loop_outer (bb->loop_father)
434	      && loop_exit_edge_p (bb->loop_father, e))
435	    e->flags |= EDGE_LOOP_EXIT;
436	  else
437	    e->flags &= ~EDGE_LOOP_EXIT;
438	}
439    }
440}
441
442