1/* Induction variable canonicalization.
2   Copyright (C) 2004, 2005, 2007, 2008, 2010
3   Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 3, or (at your option) any
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21/* This pass detects the loops that iterate a constant number of times,
22   adds a canonical induction variable (step -1, tested against 0)
23   and replaces the exit test.  This enables the less powerful rtl
24   level analysis to use this information.
25
26   This might spoil the code in some cases (by increasing register pressure).
27   Note that in the case the new variable is not needed, ivopts will get rid
28   of it, so it might only be a problem when there are no other linear induction
29   variables.  In that case the created optimization possibilities are likely
30   to pay up.
31
32   Additionally in case we detect that it is beneficial to unroll the
33   loop completely, we do it right here to expose the optimization
34   possibilities to the following passes.  */
35
36#include "config.h"
37#include "system.h"
38#include "coretypes.h"
39#include "tm.h"
40#include "tree.h"
41#include "rtl.h"
42#include "tm_p.h"
43#include "hard-reg-set.h"
44#include "basic-block.h"
45#include "output.h"
46#include "diagnostic.h"
47#include "tree-flow.h"
48#include "tree-dump.h"
49#include "cfgloop.h"
50#include "tree-pass.h"
51#include "ggc.h"
52#include "tree-chrec.h"
53#include "tree-scalar-evolution.h"
54#include "params.h"
55#include "flags.h"
56#include "tree-inline.h"
57#include "target.h"
58
59/* Specifies types of loops that may be unrolled.  */
60
61enum unroll_level
62{
63  UL_SINGLE_ITER,	/* Only loops that exit immediately in the first
64			   iteration.  */
65  UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
66			   of code size.  */
67  UL_ALL		/* All suitable loops.  */
68};
69
70/* Adds a canonical induction variable to LOOP iterating NITER times.  EXIT
71   is the exit edge whose condition is replaced.  */
72
73static void
74create_canonical_iv (struct loop *loop, edge exit, tree niter)
75{
76  edge in;
77  tree type, var;
78  gimple cond;
79  gimple_stmt_iterator incr_at;
80  enum tree_code cmp;
81
82  if (dump_file && (dump_flags & TDF_DETAILS))
83    {
84      fprintf (dump_file, "Added canonical iv to loop %d, ", loop->num);
85      print_generic_expr (dump_file, niter, TDF_SLIM);
86      fprintf (dump_file, " iterations.\n");
87    }
88
89  cond = last_stmt (exit->src);
90  in = EDGE_SUCC (exit->src, 0);
91  if (in == exit)
92    in = EDGE_SUCC (exit->src, 1);
93
94  /* Note that we do not need to worry about overflows, since
95     type of niter is always unsigned and all comparisons are
96     just for equality/nonequality -- i.e. everything works
97     with a modulo arithmetics.  */
98
99  type = TREE_TYPE (niter);
100  niter = fold_build2 (PLUS_EXPR, type,
101		       niter,
102		       build_int_cst (type, 1));
103  incr_at = gsi_last_bb (in->src);
104  create_iv (niter,
105	     build_int_cst (type, -1),
106	     NULL_TREE, loop,
107	     &incr_at, false, NULL, &var);
108
109  cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
110  gimple_cond_set_code (cond, cmp);
111  gimple_cond_set_lhs (cond, var);
112  gimple_cond_set_rhs (cond, build_int_cst (type, 0));
113  update_stmt (cond);
114}
115
116/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
117
118unsigned
119tree_num_loop_insns (struct loop *loop, eni_weights *weights)
120{
121  basic_block *body = get_loop_body (loop);
122  gimple_stmt_iterator gsi;
123  unsigned size = 0, i;
124
125  for (i = 0; i < loop->num_nodes; i++)
126    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
127      size += estimate_num_insns (gsi_stmt (gsi), weights);
128  free (body);
129
130  return size;
131}
132
133/* Describe size of loop as detected by tree_estimate_loop_size.  */
134struct loop_size
135{
136  /* Number of instructions in the loop.  */
137  int overall;
138
139  /* Number of instructions that will be likely optimized out in
140     peeled iterations of loop  (i.e. computation based on induction
141     variable where induction variable starts at known constant.)  */
142  int eliminated_by_peeling;
143
144  /* Same statistics for last iteration of loop: it is smaller because
145     instructions after exit are not executed.  */
146  int last_iteration;
147  int last_iteration_eliminated_by_peeling;
148};
149
150/* Return true if OP in STMT will be constant after peeling LOOP.  */
151
152static bool
153constant_after_peeling (tree op, gimple stmt, struct loop *loop)
154{
155  affine_iv iv;
156
157  if (is_gimple_min_invariant (op))
158    return true;
159
160  /* We can still fold accesses to constant arrays when index is known.  */
161  if (TREE_CODE (op) != SSA_NAME)
162    {
163      tree base = op;
164
165      /* First make fast look if we see constant array inside.  */
166      while (handled_component_p (base))
167	base = TREE_OPERAND (base, 0);
168      if ((DECL_P (base)
169      	   && TREE_STATIC (base)
170	   && TREE_READONLY (base)
171           && (DECL_INITIAL (base)
172	       || (!DECL_EXTERNAL (base)
173		   && targetm.binds_local_p (base))))
174	  || CONSTANT_CLASS_P (base))
175	{
176	  /* If so, see if we understand all the indices.  */
177	  base = op;
178	  while (handled_component_p (base))
179	    {
180	      if (TREE_CODE (base) == ARRAY_REF
181		  && !constant_after_peeling (TREE_OPERAND (base, 1), stmt, loop))
182		return false;
183	      base = TREE_OPERAND (base, 0);
184	    }
185	  return true;
186	}
187      return false;
188    }
189
190  /* Induction variables are constants.  */
191  if (!simple_iv (loop, loop_containing_stmt (stmt), op, &iv, false))
192    return false;
193  if (!is_gimple_min_invariant (iv.base))
194    return false;
195  if (!is_gimple_min_invariant (iv.step))
196    return false;
197  return true;
198}
199
200/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.
201   Return results in SIZE, estimate benefits for complete unrolling exiting by EXIT.  */
202
203static void
204tree_estimate_loop_size (struct loop *loop, edge exit, struct loop_size *size)
205{
206  basic_block *body = get_loop_body (loop);
207  gimple_stmt_iterator gsi;
208  unsigned int i;
209  bool after_exit;
210
211  size->overall = 0;
212  size->eliminated_by_peeling = 0;
213  size->last_iteration = 0;
214  size->last_iteration_eliminated_by_peeling = 0;
215
216  if (dump_file && (dump_flags & TDF_DETAILS))
217    fprintf (dump_file, "Estimating sizes for loop %i\n", loop->num);
218  for (i = 0; i < loop->num_nodes; i++)
219    {
220      if (exit && body[i] != exit->src
221	  && dominated_by_p (CDI_DOMINATORS, body[i], exit->src))
222	after_exit = true;
223      else
224	after_exit = false;
225      if (dump_file && (dump_flags & TDF_DETAILS))
226	fprintf (dump_file, " BB: %i, after_exit: %i\n", body[i]->index, after_exit);
227
228      for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
229	{
230	  gimple stmt = gsi_stmt (gsi);
231	  int num = estimate_num_insns (stmt, &eni_size_weights);
232	  bool likely_eliminated = false;
233
234	  if (dump_file && (dump_flags & TDF_DETAILS))
235	    {
236	      fprintf (dump_file, "  size: %3i ", num);
237	      print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, 0);
238	    }
239
240	  /* Look for reasons why we might optimize this stmt away. */
241
242	  /* Exit conditional.  */
243	  if (body[i] == exit->src && stmt == last_stmt (exit->src))
244	    {
245	      if (dump_file && (dump_flags & TDF_DETAILS))
246	        fprintf (dump_file, "   Exit condition will be eliminated.\n");
247	      likely_eliminated = true;
248	    }
249	  /* Sets of IV variables  */
250	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
251	      && constant_after_peeling (gimple_assign_lhs (stmt), stmt, loop))
252	    {
253	      if (dump_file && (dump_flags & TDF_DETAILS))
254	        fprintf (dump_file, "   Induction variable computation will"
255			 " be folded away.\n");
256	      likely_eliminated = true;
257	    }
258	  /* Assignments of IV variables.  */
259	  else if (gimple_code (stmt) == GIMPLE_ASSIGN
260		   && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
261		   && constant_after_peeling (gimple_assign_rhs1 (stmt), stmt,loop)
262		   && (gimple_assign_rhs_class (stmt) != GIMPLE_BINARY_RHS
263		       || constant_after_peeling (gimple_assign_rhs2 (stmt),
264		       				  stmt, loop)))
265	    {
266	      if (dump_file && (dump_flags & TDF_DETAILS))
267	        fprintf (dump_file, "   Constant expression will be folded away.\n");
268	      likely_eliminated = true;
269	    }
270	  /* Conditionals.  */
271	  else if (gimple_code (stmt) == GIMPLE_COND
272		   && constant_after_peeling (gimple_cond_lhs (stmt), stmt, loop)
273		   && constant_after_peeling (gimple_cond_rhs (stmt), stmt, loop))
274	    {
275	      if (dump_file && (dump_flags & TDF_DETAILS))
276	        fprintf (dump_file, "   Constant conditional.\n");
277	      likely_eliminated = true;
278	    }
279
280	  size->overall += num;
281	  if (likely_eliminated)
282	    size->eliminated_by_peeling += num;
283	  if (!after_exit)
284	    {
285	      size->last_iteration += num;
286	      if (likely_eliminated)
287		size->last_iteration_eliminated_by_peeling += num;
288	    }
289	}
290    }
291  if (dump_file && (dump_flags & TDF_DETAILS))
292    fprintf (dump_file, "size: %i-%i, last_iteration: %i-%i\n", size->overall,
293    	     size->eliminated_by_peeling, size->last_iteration,
294	     size->last_iteration_eliminated_by_peeling);
295
296  free (body);
297}
298
299/* Estimate number of insns of completely unrolled loop.
300   It is (NUNROLL + 1) * size of loop body with taking into account
301   the fact that in last copy everything after exit conditional
302   is dead and that some instructions will be eliminated after
303   peeling.
304
305   Loop body is likely going to simplify futher, this is difficult
306   to guess, we just decrease the result by 1/3.  */
307
308static unsigned HOST_WIDE_INT
309estimated_unrolled_size (struct loop_size *size,
310			 unsigned HOST_WIDE_INT nunroll)
311{
312  HOST_WIDE_INT unr_insns = ((nunroll)
313  			     * (HOST_WIDE_INT) (size->overall
314			     			- size->eliminated_by_peeling));
315  if (!nunroll)
316    unr_insns = 0;
317  unr_insns += size->last_iteration - size->last_iteration_eliminated_by_peeling;
318
319  unr_insns = unr_insns * 2 / 3;
320  if (unr_insns <= 0)
321    unr_insns = 1;
322
323  return unr_insns;
324}
325
326/* Tries to unroll LOOP completely, i.e. NITER times.
327   UL determines which loops we are allowed to unroll.
328   EXIT is the exit of the loop that should be eliminated.  */
329
330static bool
331try_unroll_loop_completely (struct loop *loop,
332			    edge exit, tree niter,
333			    enum unroll_level ul)
334{
335  unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
336  gimple cond;
337  struct loop_size size;
338
339  if (loop->inner)
340    return false;
341
342  if (!host_integerp (niter, 1))
343    return false;
344  n_unroll = tree_low_cst (niter, 1);
345
346  max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
347  if (n_unroll > max_unroll)
348    return false;
349
350  if (n_unroll)
351    {
352      if (ul == UL_SINGLE_ITER)
353	return false;
354
355      tree_estimate_loop_size (loop, exit, &size);
356      ninsns = size.overall;
357
358      unr_insns = estimated_unrolled_size (&size, n_unroll);
359      if (dump_file && (dump_flags & TDF_DETAILS))
360	{
361	  fprintf (dump_file, "  Loop size: %d\n", (int) ninsns);
362	  fprintf (dump_file, "  Estimated size after unrolling: %d\n",
363		   (int) unr_insns);
364	}
365
366      if (unr_insns > ninsns
367	  && (unr_insns
368	      > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)))
369	{
370	  if (dump_file && (dump_flags & TDF_DETAILS))
371	    fprintf (dump_file, "Not unrolling loop %d "
372		     "(--param max-completely-peeled-insns limit reached).\n",
373		     loop->num);
374	  return false;
375	}
376
377      if (ul == UL_NO_GROWTH
378	  && unr_insns > ninsns)
379	{
380	  if (dump_file && (dump_flags & TDF_DETAILS))
381	    fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
382	  return false;
383	}
384    }
385
386  if (n_unroll)
387    {
388      sbitmap wont_exit;
389      edge e;
390      unsigned i;
391      VEC (edge, heap) *to_remove = NULL;
392
393      initialize_original_copy_tables ();
394      wont_exit = sbitmap_alloc (n_unroll + 1);
395      sbitmap_ones (wont_exit);
396      RESET_BIT (wont_exit, 0);
397
398      if (!gimple_duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
399						 n_unroll, wont_exit,
400						 exit, &to_remove,
401						 DLTHE_FLAG_UPDATE_FREQ
402						 | DLTHE_FLAG_COMPLETTE_PEEL))
403	{
404          free_original_copy_tables ();
405	  free (wont_exit);
406	  return false;
407	}
408
409      for (i = 0; VEC_iterate (edge, to_remove, i, e); i++)
410	{
411	  bool ok = remove_path (e);
412	  gcc_assert (ok);
413	}
414
415      VEC_free (edge, heap, to_remove);
416      free (wont_exit);
417      free_original_copy_tables ();
418    }
419
420  cond = last_stmt (exit->src);
421  if (exit->flags & EDGE_TRUE_VALUE)
422    gimple_cond_make_true (cond);
423  else
424    gimple_cond_make_false (cond);
425  update_stmt (cond);
426  update_ssa (TODO_update_ssa);
427
428  if (dump_file && (dump_flags & TDF_DETAILS))
429    fprintf (dump_file, "Unrolled loop %d completely.\n", loop->num);
430
431  return true;
432}
433
434/* Adds a canonical induction variable to LOOP if suitable.
435   CREATE_IV is true if we may create a new iv.  UL determines
436   which loops we are allowed to completely unroll.  If TRY_EVAL is true, we try
437   to determine the number of iterations of a loop by direct evaluation.
438   Returns true if cfg is changed.  */
439
440static bool
441canonicalize_loop_induction_variables (struct loop *loop,
442				       bool create_iv, enum unroll_level ul,
443				       bool try_eval)
444{
445  edge exit = NULL;
446  tree niter;
447
448  niter = number_of_latch_executions (loop);
449  if (TREE_CODE (niter) == INTEGER_CST)
450    {
451      exit = single_exit (loop);
452      if (!just_once_each_iteration_p (loop, exit->src))
453	return false;
454    }
455  else
456    {
457      /* If the loop has more than one exit, try checking all of them
458	 for # of iterations determinable through scev.  */
459      if (!single_exit (loop))
460	niter = find_loop_niter (loop, &exit);
461
462      /* Finally if everything else fails, try brute force evaluation.  */
463      if (try_eval
464	  && (chrec_contains_undetermined (niter)
465	      || TREE_CODE (niter) != INTEGER_CST))
466	niter = find_loop_niter_by_eval (loop, &exit);
467
468      if (chrec_contains_undetermined (niter)
469	  || TREE_CODE (niter) != INTEGER_CST)
470	return false;
471    }
472
473  if (dump_file && (dump_flags & TDF_DETAILS))
474    {
475      fprintf (dump_file, "Loop %d iterates ", loop->num);
476      print_generic_expr (dump_file, niter, TDF_SLIM);
477      fprintf (dump_file, " times.\n");
478    }
479
480  if (try_unroll_loop_completely (loop, exit, niter, ul))
481    return true;
482
483  if (create_iv)
484    create_canonical_iv (loop, exit, niter);
485
486  return false;
487}
488
489/* The main entry point of the pass.  Adds canonical induction variables
490   to the suitable loops.  */
491
492unsigned int
493canonicalize_induction_variables (void)
494{
495  loop_iterator li;
496  struct loop *loop;
497  bool changed = false;
498
499  FOR_EACH_LOOP (li, loop, 0)
500    {
501      changed |= canonicalize_loop_induction_variables (loop,
502							true, UL_SINGLE_ITER,
503							true);
504    }
505
506  /* Clean up the information about numbers of iterations, since brute force
507     evaluation could reveal new information.  */
508  scev_reset ();
509
510  if (changed)
511    return TODO_cleanup_cfg;
512  return 0;
513}
514
515/* Unroll LOOPS completely if they iterate just few times.  Unless
516   MAY_INCREASE_SIZE is true, perform the unrolling only if the
517   size of the code does not increase.  */
518
519unsigned int
520tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
521{
522  loop_iterator li;
523  struct loop *loop;
524  bool changed;
525  enum unroll_level ul;
526  int iteration = 0;
527
528  do
529    {
530      changed = false;
531
532      FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
533	{
534	  if (may_increase_size && optimize_loop_for_speed_p (loop)
535	      /* Unroll outermost loops only if asked to do so or they do
536		 not cause code growth.  */
537	      && (unroll_outer
538		  || loop_outer (loop_outer (loop))))
539	    ul = UL_ALL;
540	  else
541	    ul = UL_NO_GROWTH;
542	  changed |= canonicalize_loop_induction_variables
543		       (loop, false, ul, !flag_tree_loop_ivcanon);
544	}
545
546      if (changed)
547	{
548	  /* This will take care of removing completely unrolled loops
549	     from the loop structures so we can continue unrolling now
550	     innermost loops.  */
551	  if (cleanup_tree_cfg ())
552	    update_ssa (TODO_update_ssa_only_virtuals);
553
554	  /* Clean up the information about numbers of iterations, since
555	     complete unrolling might have invalidated it.  */
556	  scev_reset ();
557	}
558    }
559  while (changed
560	 && ++iteration <= PARAM_VALUE (PARAM_MAX_UNROLL_ITERATIONS));
561
562  return 0;
563}
564