loop-unroll.c revision 132718
1132718Skan/* Loop unrolling and peeling.
2132718Skan   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
3132718Skan
4132718SkanThis file is part of GCC.
5132718Skan
6132718SkanGCC is free software; you can redistribute it and/or modify it under
7132718Skanthe terms of the GNU General Public License as published by the Free
8132718SkanSoftware Foundation; either version 2, or (at your option) any later
9132718Skanversion.
10132718Skan
11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
13132718SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14132718Skanfor more details.
15132718Skan
16132718SkanYou should have received a copy of the GNU General Public License
17132718Skanalong with GCC; see the file COPYING.  If not, write to the Free
18132718SkanSoftware Foundation, 59 Temple Place - Suite 330, Boston, MA
19132718Skan02111-1307, USA.  */
20132718Skan
21132718Skan#include "config.h"
22132718Skan#include "system.h"
23132718Skan#include "coretypes.h"
24132718Skan#include "tm.h"
25132718Skan#include "rtl.h"
26132718Skan#include "hard-reg-set.h"
27132718Skan#include "basic-block.h"
28132718Skan#include "cfgloop.h"
29132718Skan#include "cfglayout.h"
30132718Skan#include "params.h"
31132718Skan#include "output.h"
32132718Skan#include "expr.h"
33132718Skan/* We need to use the macro exact_log2. */
34132718Skan#include "toplev.h"
35132718Skan
36132718Skan/* This pass performs loop unrolling and peeling.  We only perform these
37132718Skan   optimizations on innermost loops (with single exception) because
38132718Skan   the impact on performance is greatest here, and we want to avoid
39132718Skan   unnecessary code size growth.  The gain is caused by greater sequentiality
40132718Skan   of code, better code to optimize for further passes and in some cases
41132718Skan   by fewer testings of exit conditions.  The main problem is code growth,
42132718Skan   that impacts performance negatively due to effect of caches.
43132718Skan
44132718Skan   What we do:
45132718Skan
46132718Skan   -- complete peeling of once-rolling loops; this is the above mentioned
47132718Skan      exception, as this causes loop to be cancelled completely and
48132718Skan      does not cause code growth
49132718Skan   -- complete peeling of loops that roll (small) constant times.
50132718Skan   -- simple peeling of first iterations of loops that do not roll much
51132718Skan      (according to profile feedback)
52132718Skan   -- unrolling of loops that roll constant times; this is almost always
53132718Skan      win, as we get rid of exit condition tests.
54132718Skan   -- unrolling of loops that roll number of times that we can compute
55132718Skan      in runtime; we also get rid of exit condition tests here, but there
56132718Skan      is the extra expense for calculating the number of iterations
57132718Skan   -- simple unrolling of remaining loops; this is performed only if we
58132718Skan      are asked to, as the gain is questionable in this case and often
59132718Skan      it may even slow down the code
60132718Skan   For more detailed descriptions of each of those, see comments at
61132718Skan   appropriate function below.
62132718Skan
63132718Skan   There is a lot of parameters (defined and described in params.def) that
64132718Skan   control how much we unroll/peel.
65132718Skan
66132718Skan   ??? A great problem is that we don't have a good way how to determine
67132718Skan   how many times we should unroll the loop; the experiments I have made
68132718Skan   showed that this choice may affect performance in order of several %.
69132718Skan   */
70132718Skan
71132718Skanstatic void decide_unrolling_and_peeling (struct loops *, int);
72132718Skanstatic void peel_loops_completely (struct loops *, int);
73132718Skanstatic void decide_peel_simple (struct loop *, int);
74132718Skanstatic void decide_peel_once_rolling (struct loop *, int);
75132718Skanstatic void decide_peel_completely (struct loop *, int);
76132718Skanstatic void decide_unroll_stupid (struct loop *, int);
77132718Skanstatic void decide_unroll_constant_iterations (struct loop *, int);
78132718Skanstatic void decide_unroll_runtime_iterations (struct loop *, int);
79132718Skanstatic void peel_loop_simple (struct loops *, struct loop *);
80132718Skanstatic void peel_loop_completely (struct loops *, struct loop *);
81132718Skanstatic void unroll_loop_stupid (struct loops *, struct loop *);
82132718Skanstatic void unroll_loop_constant_iterations (struct loops *, struct loop *);
83132718Skanstatic void unroll_loop_runtime_iterations (struct loops *, struct loop *);
84132718Skanstatic void expand_bct (edge, int);
85132718Skanstatic bool discard_increment (struct loop *);
86132718Skan
87132718Skan/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
88132718Skanvoid
89132718Skanunroll_and_peel_loops (struct loops *loops, int flags)
90132718Skan{
91132718Skan  struct loop *loop, *next;
92132718Skan  int check;
93132718Skan
94132718Skan  /* First perform complete loop peeling (it is almost surely a win,
95132718Skan     and affects parameters for further decision a lot).  */
96132718Skan  peel_loops_completely (loops, flags);
97132718Skan
98132718Skan  /* Now decide rest of unrolling and peeling.  */
99132718Skan  decide_unrolling_and_peeling (loops, flags);
100132718Skan
101132718Skan  loop = loops->tree_root;
102132718Skan  while (loop->inner)
103132718Skan    loop = loop->inner;
104132718Skan
105132718Skan  /* Scan the loops, inner ones first.  */
106132718Skan  while (loop != loops->tree_root)
107132718Skan    {
108132718Skan      if (loop->next)
109132718Skan	{
110132718Skan	  next = loop->next;
111132718Skan	  while (next->inner)
112132718Skan	    next = next->inner;
113132718Skan	}
114132718Skan      else
115132718Skan	next = loop->outer;
116132718Skan
117132718Skan      check = 1;
118132718Skan      /* And perform the appropriate transformations.  */
119132718Skan      switch (loop->lpt_decision.decision)
120132718Skan	{
121132718Skan	case LPT_PEEL_COMPLETELY:
122132718Skan	  /* Already done.  */
123132718Skan	  abort ();
124132718Skan	case LPT_PEEL_SIMPLE:
125132718Skan	  peel_loop_simple (loops, loop);
126132718Skan	  break;
127132718Skan	case LPT_UNROLL_CONSTANT:
128132718Skan	  unroll_loop_constant_iterations (loops, loop);
129132718Skan	  break;
130132718Skan	case LPT_UNROLL_RUNTIME:
131132718Skan	  unroll_loop_runtime_iterations (loops, loop);
132132718Skan	  break;
133132718Skan	case LPT_UNROLL_STUPID:
134132718Skan	  unroll_loop_stupid (loops, loop);
135132718Skan	  break;
136132718Skan	case LPT_NONE:
137132718Skan	  check = 0;
138132718Skan	  break;
139132718Skan	default:
140132718Skan	  abort ();
141132718Skan	}
142132718Skan      if (check)
143132718Skan	{
144132718Skan#ifdef ENABLE_CHECKING
145132718Skan	  verify_dominators (CDI_DOMINATORS);
146132718Skan	  verify_loop_structure (loops);
147132718Skan#endif
148132718Skan	}
149132718Skan      loop = next;
150132718Skan    }
151132718Skan}
152132718Skan
153132718Skan/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
154132718Skanstatic void
155132718Skanpeel_loops_completely (struct loops *loops, int flags)
156132718Skan{
157132718Skan  struct loop *loop, *next;
158132718Skan
159132718Skan  loop = loops->tree_root;
160132718Skan  while (loop->inner)
161132718Skan    loop = loop->inner;
162132718Skan
163132718Skan  while (loop != loops->tree_root)
164132718Skan    {
165132718Skan      if (loop->next)
166132718Skan	{
167132718Skan	  next = loop->next;
168132718Skan	  while (next->inner)
169132718Skan	    next = next->inner;
170132718Skan	}
171132718Skan      else
172132718Skan	next = loop->outer;
173132718Skan
174132718Skan      loop->lpt_decision.decision = LPT_NONE;
175132718Skan      loop->has_desc = 0;
176132718Skan
177132718Skan      if (rtl_dump_file)
178132718Skan	fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n",
179132718Skan		 loop->num);
180132718Skan
181132718Skan      loop->ninsns = num_loop_insns (loop);
182132718Skan
183132718Skan      decide_peel_once_rolling (loop, flags);
184132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
185132718Skan	decide_peel_completely (loop, flags);
186132718Skan
187132718Skan      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
188132718Skan	{
189132718Skan	  peel_loop_completely (loops, loop);
190132718Skan#ifdef ENABLE_CHECKING
191132718Skan	  verify_dominators (CDI_DOMINATORS);
192132718Skan	  verify_loop_structure (loops);
193132718Skan#endif
194132718Skan	}
195132718Skan      loop = next;
196132718Skan    }
197132718Skan}
198132718Skan
199132718Skan/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
200132718Skanstatic void
201132718Skandecide_unrolling_and_peeling (struct loops *loops, int flags)
202132718Skan{
203132718Skan  struct loop *loop = loops->tree_root, *next;
204132718Skan
205132718Skan  while (loop->inner)
206132718Skan    loop = loop->inner;
207132718Skan
208132718Skan  /* Scan the loops, inner ones first.  */
209132718Skan  while (loop != loops->tree_root)
210132718Skan    {
211132718Skan      if (loop->next)
212132718Skan	{
213132718Skan	  next = loop->next;
214132718Skan	  while (next->inner)
215132718Skan	    next = next->inner;
216132718Skan	}
217132718Skan      else
218132718Skan	next = loop->outer;
219132718Skan
220132718Skan      loop->lpt_decision.decision = LPT_NONE;
221132718Skan
222132718Skan      if (rtl_dump_file)
223132718Skan	fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num);
224132718Skan
225132718Skan      /* Do not peel cold areas.  */
226132718Skan      if (!maybe_hot_bb_p (loop->header))
227132718Skan	{
228132718Skan	  if (rtl_dump_file)
229132718Skan	    fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
230132718Skan	  loop = next;
231132718Skan	  continue;
232132718Skan	}
233132718Skan
234132718Skan      /* Can the loop be manipulated?  */
235132718Skan      if (!can_duplicate_loop_p (loop))
236132718Skan	{
237132718Skan	  if (rtl_dump_file)
238132718Skan	    fprintf (rtl_dump_file,
239132718Skan		     ";; Not considering loop, cannot duplicate\n");
240132718Skan	  loop = next;
241132718Skan	  continue;
242132718Skan	}
243132718Skan
244132718Skan      /* Skip non-innermost loops.  */
245132718Skan      if (loop->inner)
246132718Skan	{
247132718Skan	  if (rtl_dump_file)
248132718Skan	    fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
249132718Skan	  loop = next;
250132718Skan	  continue;
251132718Skan	}
252132718Skan
253132718Skan      loop->ninsns = num_loop_insns (loop);
254132718Skan      loop->av_ninsns = average_num_loop_insns (loop);
255132718Skan
256132718Skan      /* Try transformations one by one in decreasing order of
257132718Skan	 priority.  */
258132718Skan
259132718Skan      decide_unroll_constant_iterations (loop, flags);
260132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
261132718Skan	decide_unroll_runtime_iterations (loop, flags);
262132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
263132718Skan	decide_unroll_stupid (loop, flags);
264132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
265132718Skan	decide_peel_simple (loop, flags);
266132718Skan
267132718Skan      loop = next;
268132718Skan    }
269132718Skan}
270132718Skan
271132718Skan/* Decide whether the LOOP is once rolling and suitable for complete
272132718Skan   peeling.  */
273132718Skanstatic void
274132718Skandecide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
275132718Skan{
276132718Skan  if (rtl_dump_file)
277132718Skan    fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n");
278132718Skan
279132718Skan  /* Is the loop small enough?  */
280132718Skan  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
281132718Skan    {
282132718Skan      if (rtl_dump_file)
283132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
284132718Skan      return;
285132718Skan    }
286132718Skan
287132718Skan  /* Check for simple loops.  */
288132718Skan  loop->simple = simple_loop_p (loop, &loop->desc);
289132718Skan  loop->has_desc = 1;
290132718Skan
291132718Skan  /* Check number of iterations.  */
292132718Skan  if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0)
293132718Skan    {
294132718Skan      if (rtl_dump_file)
295132718Skan	fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n");
296132718Skan      return;
297132718Skan    }
298132718Skan
299132718Skan  /* Success.  */
300132718Skan  if (rtl_dump_file)
301132718Skan    fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n");
302132718Skan  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
303132718Skan}
304132718Skan
305132718Skan/* Decide whether the LOOP is suitable for complete peeling.  */
306132718Skanstatic void
307132718Skandecide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
308132718Skan{
309132718Skan  unsigned npeel;
310132718Skan
311132718Skan  if (rtl_dump_file)
312132718Skan    fprintf (rtl_dump_file, ";; Considering peeling completely\n");
313132718Skan
314132718Skan  /* Skip non-innermost loops.  */
315132718Skan  if (loop->inner)
316132718Skan    {
317132718Skan      if (rtl_dump_file)
318132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n");
319132718Skan      return;
320132718Skan    }
321132718Skan
322132718Skan  /* Do not peel cold areas.  */
323132718Skan  if (!maybe_hot_bb_p (loop->header))
324132718Skan    {
325132718Skan      if (rtl_dump_file)
326132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, cold area\n");
327132718Skan      return;
328132718Skan    }
329132718Skan
330132718Skan  /* Can the loop be manipulated?  */
331132718Skan  if (!can_duplicate_loop_p (loop))
332132718Skan    {
333132718Skan      if (rtl_dump_file)
334132718Skan	fprintf (rtl_dump_file,
335132718Skan		 ";; Not considering loop, cannot duplicate\n");
336132718Skan      return;
337132718Skan    }
338132718Skan
339132718Skan  /* npeel = number of iterations to peel.  */
340132718Skan  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
341132718Skan  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
342132718Skan    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
343132718Skan
344132718Skan  /* Is the loop small enough?  */
345132718Skan  if (!npeel)
346132718Skan    {
347132718Skan      if (rtl_dump_file)
348132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
349132718Skan      return;
350132718Skan    }
351132718Skan
352132718Skan  /* Check for simple loops.  */
353132718Skan  if (!loop->has_desc)
354132718Skan    {
355132718Skan      loop->simple = simple_loop_p (loop, &loop->desc);
356132718Skan      loop->has_desc = 1;
357132718Skan    }
358132718Skan
359132718Skan  /* Check number of iterations.  */
360132718Skan  if (!loop->simple || !loop->desc.const_iter)
361132718Skan    {
362132718Skan      if (rtl_dump_file)
363132718Skan	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
364132718Skan      return;
365132718Skan    }
366132718Skan
367132718Skan  if (loop->desc.niter > npeel - 1)
368132718Skan    {
369132718Skan      if (rtl_dump_file)
370132718Skan	{
371132718Skan	  fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much (");
372132718Skan	  fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter);
373132718Skan	  fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
374132718Skan	}
375132718Skan      return;
376132718Skan    }
377132718Skan
378132718Skan  /* Success.  */
379132718Skan  if (rtl_dump_file)
380132718Skan    fprintf (rtl_dump_file, ";; Decided to peel loop completely\n");
381132718Skan  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
382132718Skan}
383132718Skan
384132718Skan/* Peel all iterations of LOOP, remove exit edges and cancel the loop
385132718Skan   completely.  The transformation done:
386132718Skan
387132718Skan   for (i = 0; i < 4; i++)
388132718Skan     body;
389132718Skan
390132718Skan   ==>
391132718Skan
392132718Skan   i = 0;
393132718Skan   body; i++;
394132718Skan   body; i++;
395132718Skan   body; i++;
396132718Skan   body; i++;
397132718Skan   */
398132718Skanstatic void
399132718Skanpeel_loop_completely (struct loops *loops, struct loop *loop)
400132718Skan{
401132718Skan  sbitmap wont_exit;
402132718Skan  unsigned HOST_WIDE_INT npeel;
403132718Skan  unsigned n_remove_edges, i;
404132718Skan  edge *remove_edges;
405132718Skan  struct loop_desc *desc = &loop->desc;
406132718Skan  bool discard_inc = false;
407132718Skan  bool is_bct;
408132718Skan
409132718Skan  if ((is_bct = is_bct_cond (BB_END (loop->desc.out_edge->src))))
410132718Skan    discard_inc = discard_increment (loop);
411132718Skan
412132718Skan  npeel = desc->niter;
413132718Skan
414132718Skan  if (npeel)
415132718Skan    {
416132718Skan      wont_exit = sbitmap_alloc (npeel + 1);
417132718Skan      sbitmap_ones (wont_exit);
418132718Skan      RESET_BIT (wont_exit, 0);
419132718Skan      if (desc->may_be_zero)
420132718Skan	RESET_BIT (wont_exit, 1);
421132718Skan
422132718Skan      remove_edges = xcalloc (npeel, sizeof (edge));
423132718Skan      n_remove_edges = 0;
424132718Skan
425132718Skan      if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
426132718Skan		loops, npeel,
427132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
428132718Skan		DLTHE_FLAG_UPDATE_FREQ))
429132718Skan	abort ();
430132718Skan
431132718Skan      free (wont_exit);
432132718Skan
433132718Skan      /* Expand the branch and count.  */
434132718Skan      if (is_bct)
435132718Skan	for (i = 0; i < n_remove_edges; i++)
436132718Skan	  expand_bct (remove_edges[i], discard_inc);
437132718Skan
438132718Skan      /* Remove the exit edges.  */
439132718Skan      for (i = 0; i < n_remove_edges; i++)
440132718Skan	remove_path (loops, remove_edges[i]);
441132718Skan      free (remove_edges);
442132718Skan    }
443132718Skan
444132718Skan  /* Expand the branch and count.  */
445132718Skan  if (is_bct)
446132718Skan    expand_bct (desc->in_edge, discard_inc);
447132718Skan
448132718Skan  /* Now remove the unreachable part of the last iteration and cancel
449132718Skan     the loop.  */
450132718Skan  remove_path (loops, desc->in_edge);
451132718Skan
452132718Skan  if (rtl_dump_file)
453132718Skan    fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
454132718Skan}
455132718Skan
456132718Skan/* Decide whether to unroll LOOP iterating constant number of times and how much.  */
457132718Skanstatic void
458132718Skandecide_unroll_constant_iterations (struct loop *loop, int flags)
459132718Skan{
460132718Skan  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i;
461132718Skan
462132718Skan  if (!(flags & UAP_UNROLL))
463132718Skan    {
464132718Skan      /* We were not asked to, just return back silently.  */
465132718Skan      return;
466132718Skan    }
467132718Skan
468132718Skan  if (rtl_dump_file)
469132718Skan    fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n");
470132718Skan
471132718Skan  /* nunroll = total number of copies of the original loop body in
472132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
473132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
474132718Skan  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
475132718Skan  if (nunroll > nunroll_by_av)
476132718Skan    nunroll = nunroll_by_av;
477132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
478132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
479132718Skan
480132718Skan  /* Skip big loops.  */
481132718Skan  if (nunroll <= 1)
482132718Skan    {
483132718Skan      if (rtl_dump_file)
484132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
485132718Skan      return;
486132718Skan    }
487132718Skan
488132718Skan  /* Check for simple loops.  */
489132718Skan  if (!loop->has_desc)
490132718Skan    {
491132718Skan      loop->simple = simple_loop_p (loop, &loop->desc);
492132718Skan      loop->has_desc = 1;
493132718Skan    }
494132718Skan
495132718Skan  /* Check number of iterations.  */
496132718Skan  if (!loop->simple || !loop->desc.const_iter)
497132718Skan    {
498132718Skan      if (rtl_dump_file)
499132718Skan	fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n");
500132718Skan      return;
501132718Skan    }
502132718Skan
503132718Skan  /* Check whether the loop rolls enough to consider.  */
504132718Skan  if (loop->desc.niter < 2 * nunroll)
505132718Skan    {
506132718Skan      if (rtl_dump_file)
507132718Skan	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
508132718Skan      return;
509132718Skan    }
510132718Skan
511132718Skan  /* Success; now compute number of iterations to unroll.  We alter
512132718Skan     nunroll so that as few as possible copies of loop body are
513132718Skan     necessary, while still not decreasing the number of unrollings
514132718Skan     too much (at most by 1).  */
515132718Skan  best_copies = 2 * nunroll + 10;
516132718Skan
517132718Skan  i = 2 * nunroll + 2;
518132718Skan  if ((unsigned) i - 1 >= loop->desc.niter)
519132718Skan    i = loop->desc.niter - 2;
520132718Skan
521132718Skan  for (; i >= nunroll - 1; i--)
522132718Skan    {
523132718Skan      unsigned exit_mod = loop->desc.niter % (i + 1);
524132718Skan
525132718Skan      if (loop->desc.postincr)
526132718Skan	n_copies = exit_mod + i + 1;
527132718Skan      else if (exit_mod != (unsigned) i || loop->desc.may_be_zero)
528132718Skan	n_copies = exit_mod + i + 2;
529132718Skan      else
530132718Skan	n_copies = i + 1;
531132718Skan
532132718Skan      if (n_copies < best_copies)
533132718Skan	{
534132718Skan	  best_copies = n_copies;
535132718Skan	  best_unroll = i;
536132718Skan	}
537132718Skan    }
538132718Skan
539132718Skan  if (rtl_dump_file)
540132718Skan    fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
541132718Skan	     best_unroll + 1, best_copies, nunroll);
542132718Skan
543132718Skan  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
544132718Skan  loop->lpt_decision.times = best_unroll;
545132718Skan}
546132718Skan
547132718Skan/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
548132718Skan   times.  The transformation does this:
549132718Skan
550132718Skan   for (i = 0; i < 102; i++)
551132718Skan     body;
552132718Skan
553132718Skan   ==>
554132718Skan
555132718Skan   i = 0;
556132718Skan   body; i++;
557132718Skan   body; i++;
558132718Skan   while (i < 102)
559132718Skan     {
560132718Skan       body; i++;
561132718Skan       body; i++;
562132718Skan       body; i++;
563132718Skan       body; i++;
564132718Skan     }
565132718Skan  */
566132718Skanstatic void
567132718Skanunroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
568132718Skan{
569132718Skan  unsigned HOST_WIDE_INT niter;
570132718Skan  unsigned exit_mod;
571132718Skan  sbitmap wont_exit;
572132718Skan  unsigned n_remove_edges, i;
573132718Skan  edge *remove_edges;
574132718Skan  unsigned max_unroll = loop->lpt_decision.times;
575132718Skan  struct loop_desc *desc = &loop->desc;
576132718Skan  bool discard_inc = false;
577132718Skan  bool is_bct;
578132718Skan
579132718Skan  niter = desc->niter;
580132718Skan
581132718Skan  if (niter <= (unsigned) max_unroll + 1)
582132718Skan    abort ();  /* Should not get here (such loop should be peeled instead).  */
583132718Skan
584132718Skan  exit_mod = niter % (max_unroll + 1);
585132718Skan
586132718Skan  wont_exit = sbitmap_alloc (max_unroll + 1);
587132718Skan  sbitmap_ones (wont_exit);
588132718Skan
589132718Skan  remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
590132718Skan  n_remove_edges = 0;
591132718Skan
592132718Skan  /* For a loop ending with a branch and count for which the increment
593132718Skan     of the count register will be discarded, adjust the initialization of
594132718Skan     the count register.  */
595132718Skan  if ((is_bct = is_bct_cond (BB_END (desc->out_edge->src)))
596132718Skan      && (discard_inc = discard_increment (loop)))
597132718Skan    {
598132718Skan      rtx ini_var;
599132718Skan
600132718Skan      rtx init_code;
601132718Skan      int n_peel, new_bct_value;
602132718Skan
603132718Skan      /* Get expression for number of iterations.  */
604132718Skan      start_sequence ();
605132718Skan
606132718Skan      n_peel = (niter+1) % (max_unroll+1);
607132718Skan      new_bct_value = (niter+1 - n_peel) / (max_unroll+1) ;
608132718Skan      ini_var = GEN_INT (new_bct_value);
609132718Skan
610132718Skan      emit_move_insn (desc->var, ini_var);
611132718Skan      init_code = get_insns ();
612132718Skan      end_sequence ();
613132718Skan
614132718Skan      loop_split_edge_with (loop_preheader_edge (loop), init_code);
615132718Skan    }
616132718Skan
617132718Skan  if (desc->postincr)
618132718Skan    {
619132718Skan      /* Counter is incremented after the exit test; leave exit test
620132718Skan	 in the first copy, so that the loops that start with test
621132718Skan	 of exit condition have continuous body after unrolling.  */
622132718Skan
623132718Skan      if (rtl_dump_file)
624132718Skan	fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n");
625132718Skan
626132718Skan      /* Peel exit_mod iterations.  */
627132718Skan      RESET_BIT (wont_exit, 0);
628132718Skan      if (desc->may_be_zero)
629132718Skan	RESET_BIT (wont_exit, 1);
630132718Skan
631132718Skan      if (exit_mod
632132718Skan	  && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
633132718Skan		loops, exit_mod,
634132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
635132718Skan		DLTHE_FLAG_UPDATE_FREQ))
636132718Skan	abort ();
637132718Skan
638132718Skan      SET_BIT (wont_exit, 1);
639132718Skan    }
640132718Skan  else
641132718Skan    {
642132718Skan      /* Leave exit test in last copy, for the same reason as above if
643132718Skan	 the loop tests the condition at the end of loop body.  */
644132718Skan
645132718Skan      if (rtl_dump_file)
646132718Skan	fprintf (rtl_dump_file, ";; Condition on end of loop.\n");
647132718Skan
648132718Skan      /* We know that niter >= max_unroll + 2; so we do not need to care of
649132718Skan	 case when we would exit before reaching the loop.  So just peel
650132718Skan	 exit_mod + 1 iterations.
651132718Skan	 */
652132718Skan      if (exit_mod != (unsigned) max_unroll || desc->may_be_zero)
653132718Skan	{
654132718Skan	  RESET_BIT (wont_exit, 0);
655132718Skan	  if (desc->may_be_zero)
656132718Skan	    RESET_BIT (wont_exit, 1);
657132718Skan
658132718Skan	  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
659132718Skan		loops, exit_mod + 1,
660132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
661132718Skan		DLTHE_FLAG_UPDATE_FREQ))
662132718Skan	    abort ();
663132718Skan
664132718Skan	  SET_BIT (wont_exit, 0);
665132718Skan	  SET_BIT (wont_exit, 1);
666132718Skan	}
667132718Skan
668132718Skan      RESET_BIT (wont_exit, max_unroll);
669132718Skan    }
670132718Skan
671132718Skan  /* Now unroll the loop.  */
672132718Skan  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
673132718Skan		loops, max_unroll,
674132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
675132718Skan		DLTHE_FLAG_UPDATE_FREQ))
676132718Skan    abort ();
677132718Skan
678132718Skan  free (wont_exit);
679132718Skan
680132718Skan  /* Expand the branch and count.  */
681132718Skan  if (is_bct)
682132718Skan    for (i = 0; i < n_remove_edges; i++)
683132718Skan      expand_bct (remove_edges[i], discard_inc);
684132718Skan
685132718Skan  /* Remove the edges.  */
686132718Skan  for (i = 0; i < n_remove_edges; i++)
687132718Skan    remove_path (loops, remove_edges[i]);
688132718Skan  free (remove_edges);
689132718Skan
690132718Skan  if (rtl_dump_file)
691132718Skan    fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop));
692132718Skan}
693132718Skan
694132718Skan/* Decide whether to unroll LOOP iterating runtime computable number of times
695132718Skan   and how much.  */
696132718Skanstatic void
697132718Skandecide_unroll_runtime_iterations (struct loop *loop, int flags)
698132718Skan{
699132718Skan  unsigned nunroll, nunroll_by_av, i;
700132718Skan
701132718Skan  if (!(flags & UAP_UNROLL))
702132718Skan    {
703132718Skan      /* We were not asked to, just return back silently.  */
704132718Skan      return;
705132718Skan    }
706132718Skan
707132718Skan  if (rtl_dump_file)
708132718Skan    fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n");
709132718Skan
710132718Skan  /* nunroll = total number of copies of the original loop body in
711132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
712132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
713132718Skan  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
714132718Skan  if (nunroll > nunroll_by_av)
715132718Skan    nunroll = nunroll_by_av;
716132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
717132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
718132718Skan
719132718Skan  /* Skip big loops.  */
720132718Skan  if (nunroll <= 1)
721132718Skan    {
722132718Skan      if (rtl_dump_file)
723132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
724132718Skan      return;
725132718Skan    }
726132718Skan
727132718Skan  /* Check for simple loops.  */
728132718Skan  if (!loop->has_desc)
729132718Skan    {
730132718Skan      loop->simple = simple_loop_p (loop, &loop->desc);
731132718Skan      loop->has_desc = 1;
732132718Skan    }
733132718Skan
734132718Skan  /* Check simpleness.  */
735132718Skan  if (!loop->simple)
736132718Skan    {
737132718Skan      if (rtl_dump_file)
738132718Skan	fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n");
739132718Skan      return;
740132718Skan    }
741132718Skan
742132718Skan  if (loop->desc.const_iter)
743132718Skan    {
744132718Skan      if (rtl_dump_file)
745132718Skan	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
746132718Skan      return;
747132718Skan    }
748132718Skan
749132718Skan  /* If we have profile feedback, check whether the loop rolls.  */
750132718Skan  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
751132718Skan    {
752132718Skan      if (rtl_dump_file)
753132718Skan	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
754132718Skan      return;
755132718Skan    }
756132718Skan
757132718Skan  /* Success; now force nunroll to be power of 2, as we are unable to
758132718Skan     cope with overflows in computation of number of iterations.  */
759132718Skan  for (i = 1; 2 * i <= nunroll; i *= 2);
760132718Skan
761132718Skan  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
762132718Skan  loop->lpt_decision.times = i - 1;
763132718Skan}
764132718Skan
765132718Skan/* Unroll LOOP for that we are able to count number of iterations in runtime
766132718Skan   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
767132718Skan   extra care for case n < 0):
768132718Skan
769132718Skan   for (i = 0; i < n; i++)
770132718Skan     body;
771132718Skan
772132718Skan   ==>
773132718Skan
774132718Skan   i = 0;
775132718Skan   mod = n % 4;
776132718Skan
777132718Skan   switch (mod)
778132718Skan     {
779132718Skan       case 3:
780132718Skan         body; i++;
781132718Skan       case 2:
782132718Skan         body; i++;
783132718Skan       case 1:
784132718Skan         body; i++;
785132718Skan       case 0: ;
786132718Skan     }
787132718Skan
788132718Skan   while (i < n)
789132718Skan     {
790132718Skan       body; i++;
791132718Skan       body; i++;
792132718Skan       body; i++;
793132718Skan       body; i++;
794132718Skan     }
795132718Skan   */
796132718Skanstatic void
797132718Skanunroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
798132718Skan{
799132718Skan  rtx niter, init_code, branch_code, jump, label;
800132718Skan  unsigned i, j, p;
801132718Skan  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
802132718Skan  unsigned n_dom_bbs;
803132718Skan  sbitmap wont_exit;
804132718Skan  int may_exit_copy;
805132718Skan  unsigned n_peel, n_remove_edges;
806132718Skan  edge *remove_edges, e;
807132718Skan  bool extra_zero_check, last_may_exit;
808132718Skan  unsigned max_unroll = loop->lpt_decision.times;
809132718Skan  struct loop_desc *desc = &loop->desc;
810132718Skan  bool discard_inc = false;
811132718Skan  bool is_bct;
812132718Skan
813132718Skan  /* Remember blocks whose dominators will have to be updated.  */
814132718Skan  dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
815132718Skan  n_dom_bbs = 0;
816132718Skan
817132718Skan  body = get_loop_body (loop);
818132718Skan  for (i = 0; i < loop->num_nodes; i++)
819132718Skan    {
820132718Skan      unsigned nldom;
821132718Skan      basic_block *ldom;
822132718Skan
823132718Skan      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
824132718Skan      for (j = 0; j < nldom; j++)
825132718Skan	if (!flow_bb_inside_loop_p (loop, ldom[j]))
826132718Skan	  dom_bbs[n_dom_bbs++] = ldom[j];
827132718Skan
828132718Skan      free (ldom);
829132718Skan    }
830132718Skan  free (body);
831132718Skan
832132718Skan  if (desc->postincr)
833132718Skan    {
834132718Skan      /* Leave exit in first copy (for explanation why see comment in
835132718Skan	 unroll_loop_constant_iterations).  */
836132718Skan      may_exit_copy = 0;
837132718Skan      n_peel = max_unroll - 1;
838132718Skan      extra_zero_check = true;
839132718Skan      last_may_exit = false;
840132718Skan    }
841132718Skan  else
842132718Skan    {
843132718Skan      /* Leave exit in last copy (for explanation why see comment in
844132718Skan	 unroll_loop_constant_iterations).  */
845132718Skan      may_exit_copy = max_unroll;
846132718Skan      n_peel = max_unroll;
847132718Skan      extra_zero_check = false;
848132718Skan      last_may_exit = true;
849132718Skan    }
850132718Skan
851132718Skan  /* Get expression for number of iterations.  */
852132718Skan  start_sequence ();
853132718Skan  niter = count_loop_iterations (desc, NULL, NULL);
854132718Skan  if (!niter)
855132718Skan    abort ();
856132718Skan  niter = force_operand (niter, NULL);
857132718Skan
858132718Skan  /* Count modulo by ANDing it with max_unroll; we use the fact that
859132718Skan     the number of unrollings is a power of two, and thus this is correct
860132718Skan     even if there is overflow in the computation.  */
861132718Skan  niter = expand_simple_binop (GET_MODE (desc->var), AND,
862132718Skan			       niter,
863132718Skan			       GEN_INT (max_unroll),
864132718Skan			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
865132718Skan
866132718Skan  /* For a loop ending with a branch and count for which the increment
867132718Skan     of the count register will be discarded, adjust the initialization of
868132718Skan     the count register.  */
869132718Skan  if ((is_bct = is_bct_cond (BB_END (desc->out_edge->src)))
870132718Skan      && (discard_inc = discard_increment (loop)))
871132718Skan    {
872132718Skan      rtx count, count2, count_unroll_mod;
873132718Skan      int count_unroll;
874132718Skan
875132718Skan      /* start_sequence (); */
876132718Skan
877132718Skan      count = count_loop_iterations (desc, NULL, NULL);
878132718Skan
879132718Skan      count_unroll = loop->lpt_decision.times+1;
880132718Skan
881132718Skan
882132718Skan
883132718Skan      count_unroll_mod =  GEN_INT (exact_log2 (count_unroll));
884132718Skan      count = expand_simple_binop (GET_MODE (desc->var), LSHIFTRT,
885132718Skan				  count, count_unroll_mod,
886132718Skan				  0, 0, OPTAB_LIB_WIDEN);
887132718Skan
888132718Skan      count2 = expand_simple_binop (GET_MODE (desc->var), PLUS,
889132718Skan				     count, GEN_INT (2),
890132718Skan				     0, 0, OPTAB_LIB_WIDEN);
891132718Skan
892132718Skan      emit_move_insn (desc->var, count2);
893132718Skan    }
894132718Skan
895132718Skan  init_code = get_insns ();
896132718Skan  end_sequence ();
897132718Skan
898132718Skan  /* Precondition the loop.  */
899132718Skan  loop_split_edge_with (loop_preheader_edge (loop), init_code);
900132718Skan
901132718Skan  remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
902132718Skan  n_remove_edges = 0;
903132718Skan
904132718Skan  wont_exit = sbitmap_alloc (max_unroll + 2);
905132718Skan
906132718Skan  /* Peel the first copy of loop body (almost always we must leave exit test
907132718Skan     here; the only exception is when we have extra zero check and the number
908132718Skan     of iterations is reliable (i.e. comes out of NE condition).  Also record
909132718Skan     the place of (possible) extra zero check.  */
910132718Skan  sbitmap_zero (wont_exit);
911132718Skan  if (extra_zero_check && desc->cond == NE)
912132718Skan    SET_BIT (wont_exit, 1);
913132718Skan  ezc_swtch = loop_preheader_edge (loop)->src;
914132718Skan  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
915132718Skan		loops, 1,
916132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
917132718Skan		DLTHE_FLAG_UPDATE_FREQ))
918132718Skan    abort ();
919132718Skan
920132718Skan  /* Record the place where switch will be built for preconditioning.  */
921132718Skan  swtch = loop_split_edge_with (loop_preheader_edge (loop),
922132718Skan				NULL_RTX);
923132718Skan
924132718Skan  for (i = 0; i < n_peel; i++)
925132718Skan    {
926132718Skan      /* Peel the copy.  */
927132718Skan      sbitmap_zero (wont_exit);
928132718Skan      if (i != n_peel - 1 || !last_may_exit)
929132718Skan	SET_BIT (wont_exit, 1);
930132718Skan      if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
931132718Skan		loops, 1,
932132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
933132718Skan		DLTHE_FLAG_UPDATE_FREQ))
934132718Skan	abort ();
935132718Skan
936132718Skan      /* Create item for switch.  */
937132718Skan      j = n_peel - i - (extra_zero_check ? 0 : 1);
938132718Skan      p = REG_BR_PROB_BASE / (i + 2);
939132718Skan
940132718Skan      /* If modulo is zero do not jumo to the header of the unrolled loops.
941132718Skan         Jump instead to the last branch and count that precedes it.  */
942132718Skan      if (is_bct && discard_inc && (j == 0))
943132718Skan	{
944132718Skan	  basic_block lastbb = loop_preheader_edge(loop)->src;
945132718Skan	  rtx split_after;
946132718Skan
947132718Skan          /* Skip dummy basic blocks generated during the unrolling.  */
948132718Skan	  while (!is_bct_cond (BB_END (lastbb)))
949132718Skan	    lastbb = lastbb->pred->src;
950132718Skan
951132718Skan	  split_after = PREV_INSN (BB_END (lastbb));
952132718Skan
953132718Skan	  preheader = split_loop_bb (lastbb , split_after)->dest;
954132718Skan	}
955132718Skan      else
956132718Skan	preheader = loop_split_edge_with (loop_preheader_edge (loop),
957132718Skan					  NULL_RTX);
958132718Skan      label = block_label (preheader);
959132718Skan      start_sequence ();
960132718Skan      do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0,
961132718Skan			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
962132718Skan			       label);
963132718Skan      jump = get_last_insn ();
964132718Skan      JUMP_LABEL (jump) = label;
965132718Skan      REG_NOTES (jump)
966132718Skan	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
967132718Skan				   GEN_INT (p), REG_NOTES (jump));
968132718Skan
969132718Skan      LABEL_NUSES (label)++;
970132718Skan      branch_code = get_insns ();
971132718Skan      end_sequence ();
972132718Skan
973132718Skan      swtch = loop_split_edge_with (swtch->pred, branch_code);
974132718Skan      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
975132718Skan      swtch->succ->probability = REG_BR_PROB_BASE - p;
976132718Skan      e = make_edge (swtch, preheader,
977132718Skan		     swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
978132718Skan      e->probability = p;
979132718Skan    }
980132718Skan
981132718Skan  if (extra_zero_check)
982132718Skan    {
983132718Skan      /* Add branch for zero iterations.  */
984132718Skan      p = REG_BR_PROB_BASE / (max_unroll + 1);
985132718Skan      swtch = ezc_swtch;
986132718Skan      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
987132718Skan      label = block_label (preheader);
988132718Skan      start_sequence ();
989132718Skan      do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0,
990132718Skan			       GET_MODE (desc->var), NULL_RTX, NULL_RTX,
991132718Skan			       label);
992132718Skan      jump = get_last_insn ();
993132718Skan      JUMP_LABEL (jump) = label;
994132718Skan      REG_NOTES (jump)
995132718Skan	      = gen_rtx_EXPR_LIST (REG_BR_PROB,
996132718Skan				   GEN_INT (p), REG_NOTES (jump));
997132718Skan
998132718Skan      LABEL_NUSES (label)++;
999132718Skan      branch_code = get_insns ();
1000132718Skan      end_sequence ();
1001132718Skan
1002132718Skan      swtch = loop_split_edge_with (swtch->succ, branch_code);
1003132718Skan      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1004132718Skan      swtch->succ->probability = REG_BR_PROB_BASE - p;
1005132718Skan      e = make_edge (swtch, preheader,
1006132718Skan		     swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP);
1007132718Skan      e->probability = p;
1008132718Skan    }
1009132718Skan
1010132718Skan  /* Recount dominators for outer blocks.  */
1011132718Skan  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1012132718Skan
1013132718Skan  /* And unroll loop.  */
1014132718Skan
1015132718Skan  sbitmap_ones (wont_exit);
1016132718Skan  RESET_BIT (wont_exit, may_exit_copy);
1017132718Skan
1018132718Skan  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1019132718Skan		loops, max_unroll,
1020132718Skan		wont_exit, desc->out_edge, remove_edges, &n_remove_edges,
1021132718Skan		DLTHE_FLAG_UPDATE_FREQ))
1022132718Skan    abort ();
1023132718Skan
1024132718Skan  free (wont_exit);
1025132718Skan
1026132718Skan  /* Expand the branch and count.  */
1027132718Skan  if (is_bct)
1028132718Skan    for (i = 0; i < n_remove_edges; i++)
1029132718Skan      expand_bct (remove_edges[i], discard_inc);
1030132718Skan
1031132718Skan  /* Remove the edges.  */
1032132718Skan  for (i = 0; i < n_remove_edges; i++)
1033132718Skan    remove_path (loops, remove_edges[i]);
1034132718Skan  free (remove_edges);
1035132718Skan
1036132718Skan  if (rtl_dump_file)
1037132718Skan    fprintf (rtl_dump_file,
1038132718Skan	     ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n",
1039132718Skan	     max_unroll, num_loop_insns (loop));
1040132718Skan}
1041132718Skan
1042132718Skan/* Decide whether to simply peel LOOP and how much.  */
1043132718Skanstatic void
1044132718Skandecide_peel_simple (struct loop *loop, int flags)
1045132718Skan{
1046132718Skan  unsigned npeel;
1047132718Skan
1048132718Skan  if (!(flags & UAP_PEEL))
1049132718Skan    {
1050132718Skan      /* We were not asked to, just return back silently.  */
1051132718Skan      return;
1052132718Skan    }
1053132718Skan
1054132718Skan  if (rtl_dump_file)
1055132718Skan    fprintf (rtl_dump_file, ";; Considering simply peeling loop\n");
1056132718Skan
1057132718Skan  /* npeel = number of iterations to peel.  */
1058132718Skan  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1059132718Skan  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1060132718Skan    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1061132718Skan
1062132718Skan  /* Skip big loops.  */
1063132718Skan  if (!npeel)
1064132718Skan    {
1065132718Skan      if (rtl_dump_file)
1066132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1067132718Skan      return;
1068132718Skan    }
1069132718Skan
1070132718Skan  /* Check for simple loops.  */
1071132718Skan  if (!loop->has_desc)
1072132718Skan    {
1073132718Skan      loop->simple = simple_loop_p (loop, &loop->desc);
1074132718Skan      loop->has_desc = 1;
1075132718Skan    }
1076132718Skan
1077132718Skan  /* Check number of iterations.  */
1078132718Skan  if (loop->simple && loop->desc.const_iter)
1079132718Skan    {
1080132718Skan      if (rtl_dump_file)
1081132718Skan	fprintf (rtl_dump_file, ";; Loop iterates constant times\n");
1082132718Skan      return;
1083132718Skan    }
1084132718Skan
1085132718Skan  /* Do not simply peel loops with branches inside -- it increases number
1086132718Skan     of mispredicts.  */
1087132718Skan  if (loop->desc.n_branches > 1)
1088132718Skan    {
1089132718Skan      if (rtl_dump_file)
1090132718Skan	fprintf (rtl_dump_file, ";; Not peeling, contains branches\n");
1091132718Skan      return;
1092132718Skan    }
1093132718Skan
1094132718Skan  if (loop->header->count)
1095132718Skan    {
1096132718Skan      unsigned niter = expected_loop_iterations (loop);
1097132718Skan      if (niter + 1 > npeel)
1098132718Skan	{
1099132718Skan	  if (rtl_dump_file)
1100132718Skan	    {
1101132718Skan	      fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much (");
1102132718Skan	      fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1));
1103132718Skan	      fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel);
1104132718Skan	    }
1105132718Skan	  return;
1106132718Skan	}
1107132718Skan      npeel = niter + 1;
1108132718Skan    }
1109132718Skan  else
1110132718Skan    {
1111132718Skan      /* For now we have no good heuristics to decide whether loop peeling
1112132718Skan         will be effective, so disable it.  */
1113132718Skan      if (rtl_dump_file)
1114132718Skan	fprintf (rtl_dump_file,
1115132718Skan		 ";; Not peeling loop, no evidence it will be profitable\n");
1116132718Skan      return;
1117132718Skan    }
1118132718Skan
1119132718Skan  /* Success.  */
1120132718Skan  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1121132718Skan  loop->lpt_decision.times = npeel;
1122132718Skan}
1123132718Skan
1124132718Skan/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1125132718Skan   while (cond)
1126132718Skan     body;
1127132718Skan
1128132718Skan   ==>
1129132718Skan
1130132718Skan   if (!cond) goto end;
1131132718Skan   body;
1132132718Skan   if (!cond) goto end;
1133132718Skan   body;
1134132718Skan   while (cond)
1135132718Skan     body;
1136132718Skan   end: ;
1137132718Skan   */
1138132718Skanstatic void
1139132718Skanpeel_loop_simple (struct loops *loops, struct loop *loop)
1140132718Skan{
1141132718Skan  sbitmap wont_exit;
1142132718Skan  unsigned npeel = loop->lpt_decision.times;
1143132718Skan
1144132718Skan  wont_exit = sbitmap_alloc (npeel + 1);
1145132718Skan  sbitmap_zero (wont_exit);
1146132718Skan
1147132718Skan  if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1148132718Skan		loops, npeel, wont_exit, NULL, NULL, NULL,
1149132718Skan		DLTHE_FLAG_UPDATE_FREQ))
1150132718Skan    abort ();
1151132718Skan
1152132718Skan  free (wont_exit);
1153132718Skan
1154132718Skan  if (rtl_dump_file)
1155132718Skan    fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel);
1156132718Skan}
1157132718Skan
1158132718Skan/* Decide whether to unroll LOOP stupidly and how much.  */
1159132718Skanstatic void
1160132718Skandecide_unroll_stupid (struct loop *loop, int flags)
1161132718Skan{
1162132718Skan  unsigned nunroll, nunroll_by_av, i;
1163132718Skan
1164132718Skan  if (!(flags & UAP_UNROLL_ALL))
1165132718Skan    {
1166132718Skan      /* We were not asked to, just return back silently.  */
1167132718Skan      return;
1168132718Skan    }
1169132718Skan
1170132718Skan  if (rtl_dump_file)
1171132718Skan    fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n");
1172132718Skan
1173132718Skan  /* nunroll = total number of copies of the original loop body in
1174132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1175132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1176132718Skan  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1177132718Skan  if (nunroll > nunroll_by_av)
1178132718Skan    nunroll = nunroll_by_av;
1179132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1180132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1181132718Skan
1182132718Skan  /* Skip big loops.  */
1183132718Skan  if (nunroll <= 1)
1184132718Skan    {
1185132718Skan      if (rtl_dump_file)
1186132718Skan	fprintf (rtl_dump_file, ";; Not considering loop, is too big\n");
1187132718Skan      return;
1188132718Skan    }
1189132718Skan
1190132718Skan  /* Check for simple loops.  */
1191132718Skan  if (!loop->has_desc)
1192132718Skan    {
1193132718Skan      loop->simple = simple_loop_p (loop, &loop->desc);
1194132718Skan      loop->has_desc = 1;
1195132718Skan    }
1196132718Skan
1197132718Skan  /* Check simpleness.  */
1198132718Skan  if (loop->simple)
1199132718Skan    {
1200132718Skan      if (rtl_dump_file)
1201132718Skan	fprintf (rtl_dump_file, ";; The loop is simple\n");
1202132718Skan      return;
1203132718Skan    }
1204132718Skan
1205132718Skan  /* Do not unroll loops with branches inside -- it increases number
1206132718Skan     of mispredicts.  */
1207132718Skan  if (loop->desc.n_branches > 1)
1208132718Skan    {
1209132718Skan      if (rtl_dump_file)
1210132718Skan	fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n");
1211132718Skan      return;
1212132718Skan    }
1213132718Skan
1214132718Skan  /* If we have profile feedback, check whether the loop rolls.  */
1215132718Skan  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
1216132718Skan    {
1217132718Skan      if (rtl_dump_file)
1218132718Skan	fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n");
1219132718Skan      return;
1220132718Skan    }
1221132718Skan
1222132718Skan  /* Success.  Now force nunroll to be power of 2, as it seems that this
1223132718Skan     improves results (partially because of better alignments, partially
1224132718Skan     because of some dark magic).  */
1225132718Skan  for (i = 1; 2 * i <= nunroll; i *= 2);
1226132718Skan
1227132718Skan  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1228132718Skan  loop->lpt_decision.times = i - 1;
1229132718Skan}
1230132718Skan
1231132718Skan/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1232132718Skan   while (cond)
1233132718Skan     body;
1234132718Skan
1235132718Skan   ==>
1236132718Skan
1237132718Skan   while (cond)
1238132718Skan     {
1239132718Skan       body;
1240132718Skan       if (!cond) break;
1241132718Skan       body;
1242132718Skan       if (!cond) break;
1243132718Skan       body;
1244132718Skan       if (!cond) break;
1245132718Skan       body;
1246132718Skan     }
1247132718Skan   */
1248132718Skanstatic void
1249132718Skanunroll_loop_stupid (struct loops *loops, struct loop *loop)
1250132718Skan{
1251132718Skan  sbitmap wont_exit;
1252132718Skan  unsigned nunroll = loop->lpt_decision.times;
1253132718Skan
1254132718Skan  wont_exit = sbitmap_alloc (nunroll + 1);
1255132718Skan  sbitmap_zero (wont_exit);
1256132718Skan
1257132718Skan  if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1258132718Skan		loops, nunroll, wont_exit, NULL, NULL, NULL,
1259132718Skan		DLTHE_FLAG_UPDATE_FREQ))
1260132718Skan    abort ();
1261132718Skan
1262132718Skan  free (wont_exit);
1263132718Skan
1264132718Skan  if (rtl_dump_file)
1265132718Skan    fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n",
1266132718Skan	     nunroll, num_loop_insns (loop));
1267132718Skan}
1268132718Skan
1269132718Skan/* Expand a bct instruction in a branch and an increment.
1270132718Skan   If flag_inc is set, the induction variable does not need to be
1271132718Skan   incremented.  */
1272132718Skanvoid expand_bct (edge e, int flag_inc)
1273132718Skan{
1274132718Skan  rtx bct_insn = BB_END (e->src);
1275132718Skan  rtx cmp;
1276132718Skan  rtx inc;
1277132718Skan  rtx seq;
1278132718Skan
1279132718Skan  rtx tgt;
1280132718Skan  rtx condition;
1281132718Skan  rtx label;
1282132718Skan  rtx reg;
1283132718Skan  rtx jump;
1284132718Skan  rtx pattern = PATTERN(bct_insn);
1285132718Skan
1286132718Skan  if (!(is_bct_cond(bct_insn)))
1287132718Skan    return;
1288132718Skan
1289132718Skan  inc = get_var_set_from_bct (bct_insn);
1290132718Skan  cmp = XVECEXP (pattern, 0, 0);
1291132718Skan  reg = SET_DEST (inc);
1292132718Skan
1293132718Skan  start_sequence ();
1294132718Skan  if (!flag_inc)
1295132718Skan    {
1296132718Skan      tgt = force_operand (XEXP (inc, 1), XEXP (inc, 0));
1297132718Skan      if (tgt != XEXP (inc, 0))
1298132718Skan	emit_move_insn (XEXP (inc, 0), tgt);
1299132718Skan    }
1300132718Skan
1301132718Skan  condition = XEXP (SET_SRC (cmp), 0);
1302132718Skan  label = XEXP (SET_SRC (cmp), 1);
1303132718Skan
1304132718Skan  do_compare_rtx_and_jump (copy_rtx (reg), XEXP (condition, 1),
1305132718Skan			   GET_CODE (condition), 0,
1306132718Skan			   GET_MODE (reg), NULL_RTX, NULL_RTX,
1307132718Skan			   label);
1308132718Skan  jump = get_last_insn ();
1309132718Skan  JUMP_LABEL (jump) = label;
1310132718Skan  seq = get_insns ();
1311132718Skan  end_sequence ();
1312132718Skan  emit_insn_after (seq, bct_insn);
1313132718Skan
1314132718Skan  delete_insn (bct_insn);
1315132718Skan
1316132718Skan  return;
1317132718Skan}
1318132718Skan
1319132718Skan/* Check that the increment of the count register can be discarded.  */
1320132718Skanbool
1321132718Skandiscard_increment (struct loop *loop)
1322132718Skan{
1323132718Skan  struct loop_desc *desc = &loop->desc;
1324132718Skan  rtx inc, set_src, reg;
1325132718Skan  rtx bct_insn;
1326132718Skan  unsigned int i;
1327132718Skan  basic_block *body;
1328132718Skan
1329132718Skan  bct_insn = BB_END (desc->out_edge->src);
1330132718Skan  if (!is_bct_cond (bct_insn))
1331132718Skan    abort();
1332132718Skan
1333132718Skan  inc = get_var_set_from_bct (bct_insn);
1334132718Skan
1335132718Skan  /* Check that inc is of the form reg = reg - 1.  */
1336132718Skan  reg = SET_DEST (inc);
1337132718Skan  set_src = SET_SRC (inc);
1338132718Skan
1339132718Skan  if (GET_CODE (set_src) != PLUS)
1340132718Skan    return false;
1341132718Skan
1342132718Skan  if (!rtx_equal_p (XEXP (set_src, 0), reg))
1343132718Skan    return false;
1344132718Skan
1345132718Skan  if (!CONSTANT_P (XEXP (set_src, 1)))
1346132718Skan     return false;
1347132718Skan
1348132718Skan  if (INTVAL (XEXP (set_src, 1)) != -1)
1349132718Skan     return false;
1350132718Skan
1351132718Skan  /* We need to check that the register has no other uses beside the branch and
1352132718Skan     count.  */
1353132718Skan  body = get_loop_body (loop);
1354132718Skan  for(i=0; i < loop->num_nodes; i++)
1355132718Skan    {
1356132718Skan      if (reg_mentioned_p (desc->var, BB_HEAD (body[i])))
1357132718Skan	  return false;
1358132718Skan
1359132718Skan      if (body[i] != desc->out_edge->src)
1360132718Skan	if (reg_mentioned_p (desc->var, BB_END (body[i])))
1361132718Skan	  return false;
1362132718Skan
1363132718Skan      if (reg_used_between_p (desc->var, BB_HEAD (body[i]), BB_END (body[i])))
1364132718Skan	  return false;
1365132718Skan    }
1366132718Skan
1367132718Skan  /* Check that the branch and count ends the latch.  */
1368132718Skan  if (desc->out_edge->src != loop->latch)
1369132718Skan    {
1370132718Skan      rtx insn;
1371132718Skan
1372132718Skan      /* Latch is a dummy block generated by loop-init.  */
1373132718Skan      if (BRANCH_EDGE(desc->out_edge->src)->dest != loop->latch)
1374132718Skan	  return false;
1375132718Skan
1376132718Skan      for (insn = BB_HEAD (loop->latch); insn != NEXT_INSN (BB_END (loop->latch));
1377132718Skan	   insn = NEXT_INSN (insn))
1378132718Skan        if (INSN_P (insn)) return false;
1379132718Skan    }
1380132718Skan
1381132718Skan  return true;
1382132718Skan}
1383132718Skan
1384