1/* Loop unrolling and peeling.
2   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "cfglayout.h"
31#include "params.h"
32#include "output.h"
33#include "expr.h"
34#include "hashtab.h"
35#include "recog.h"
36#include "varray.h"
37
38/* This pass performs loop unrolling and peeling.  We only perform these
39   optimizations on innermost loops (with single exception) because
40   the impact on performance is greatest here, and we want to avoid
41   unnecessary code size growth.  The gain is caused by greater sequentiality
42   of code, better code to optimize for further passes and in some cases
43   by fewer testings of exit conditions.  The main problem is code growth,
44   that impacts performance negatively due to effect of caches.
45
46   What we do:
47
48   -- complete peeling of once-rolling loops; this is the above mentioned
49      exception, as this causes loop to be cancelled completely and
50      does not cause code growth
51   -- complete peeling of loops that roll (small) constant times.
52   -- simple peeling of first iterations of loops that do not roll much
53      (according to profile feedback)
54   -- unrolling of loops that roll constant times; this is almost always
55      win, as we get rid of exit condition tests.
56   -- unrolling of loops that roll number of times that we can compute
57      in runtime; we also get rid of exit condition tests here, but there
58      is the extra expense for calculating the number of iterations
59   -- simple unrolling of remaining loops; this is performed only if we
60      are asked to, as the gain is questionable in this case and often
61      it may even slow down the code
62   For more detailed descriptions of each of those, see comments at
63   appropriate function below.
64
65   There is a lot of parameters (defined and described in params.def) that
66   control how much we unroll/peel.
67
68   ??? A great problem is that we don't have a good way how to determine
69   how many times we should unroll the loop; the experiments I have made
70   showed that this choice may affect performance in order of several %.
71   */
72
73/* Information about induction variables to split.  */
74
75struct iv_to_split
76{
77  rtx insn;		/* The insn in that the induction variable occurs.  */
78  rtx base_var;		/* The variable on that the values in the further
79			   iterations are based.  */
80  rtx step;		/* Step of the induction variable.  */
81  unsigned n_loc;
82  unsigned loc[3];	/* Location where the definition of the induction
83			   variable occurs in the insn.  For example if
84			   N_LOC is 2, the expression is located at
85			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
86};
87
88DEF_VEC_P(rtx);
89DEF_VEC_ALLOC_P(rtx,heap);
90
91/* Information about accumulators to expand.  */
92
93struct var_to_expand
94{
95  rtx insn;		           /* The insn in that the variable expansion occurs.  */
96  rtx reg;                         /* The accumulator which is expanded.  */
97  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
98  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
99                                      or multiplication.  */
100  int expansion_count;             /* Count the number of expansions generated so far.  */
101  int reuse_expansion;             /* The expansion we intend to reuse to expand
102                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
103                                      the original accumulator.  Else use
104                                      var_expansions[REUSE_EXPANSION - 1].  */
105};
106
107/* Information about optimization applied in
108   the unrolled loop.  */
109
110struct opt_info
111{
112  htab_t insns_to_split;           /* A hashtable of insns to split.  */
113  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
114                                      to expand.  */
115  unsigned first_new_block;        /* The first basic block that was
116                                      duplicated.  */
117  basic_block loop_exit;           /* The loop exit basic block.  */
118  basic_block loop_preheader;      /* The loop preheader basic block.  */
119};
120
121static void decide_unrolling_and_peeling (struct loops *, int);
122static void peel_loops_completely (struct loops *, int);
123static void decide_peel_simple (struct loop *, int);
124static void decide_peel_once_rolling (struct loop *, int);
125static void decide_peel_completely (struct loop *, int);
126static void decide_unroll_stupid (struct loop *, int);
127static void decide_unroll_constant_iterations (struct loop *, int);
128static void decide_unroll_runtime_iterations (struct loop *, int);
129static void peel_loop_simple (struct loops *, struct loop *);
130static void peel_loop_completely (struct loops *, struct loop *);
131static void unroll_loop_stupid (struct loops *, struct loop *);
132static void unroll_loop_constant_iterations (struct loops *, struct loop *);
133static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
134static struct opt_info *analyze_insns_in_loop (struct loop *);
135static void opt_info_start_duplication (struct opt_info *);
136static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
137static void free_opt_info (struct opt_info *);
138static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
139static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
140static struct iv_to_split *analyze_iv_to_split_insn (rtx);
141static void expand_var_during_unrolling (struct var_to_expand *, rtx);
142static int insert_var_expansion_initialization (void **, void *);
143static int combine_var_copies_in_loop_exit (void **, void *);
144static int release_var_copies (void **, void *);
145static rtx get_expansion (struct var_to_expand *);
146
147/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
148void
149unroll_and_peel_loops (struct loops *loops, int flags)
150{
151  struct loop *loop, *next;
152  bool check;
153
154  /* First perform complete loop peeling (it is almost surely a win,
155     and affects parameters for further decision a lot).  */
156  peel_loops_completely (loops, flags);
157
158  /* Now decide rest of unrolling and peeling.  */
159  decide_unrolling_and_peeling (loops, flags);
160
161  loop = loops->tree_root;
162  while (loop->inner)
163    loop = loop->inner;
164
165  /* Scan the loops, inner ones first.  */
166  while (loop != loops->tree_root)
167    {
168      if (loop->next)
169	{
170	  next = loop->next;
171	  while (next->inner)
172	    next = next->inner;
173	}
174      else
175	next = loop->outer;
176
177      check = true;
178      /* And perform the appropriate transformations.  */
179      switch (loop->lpt_decision.decision)
180	{
181	case LPT_PEEL_COMPLETELY:
182	  /* Already done.  */
183	  gcc_unreachable ();
184	case LPT_PEEL_SIMPLE:
185	  peel_loop_simple (loops, loop);
186	  break;
187	case LPT_UNROLL_CONSTANT:
188	  unroll_loop_constant_iterations (loops, loop);
189	  break;
190	case LPT_UNROLL_RUNTIME:
191	  unroll_loop_runtime_iterations (loops, loop);
192	  break;
193	case LPT_UNROLL_STUPID:
194	  unroll_loop_stupid (loops, loop);
195	  break;
196	case LPT_NONE:
197	  check = false;
198	  break;
199	default:
200	  gcc_unreachable ();
201	}
202      if (check)
203	{
204#ifdef ENABLE_CHECKING
205	  verify_dominators (CDI_DOMINATORS);
206	  verify_loop_structure (loops);
207#endif
208	}
209      loop = next;
210    }
211
212  iv_analysis_done ();
213}
214
215/* Check whether exit of the LOOP is at the end of loop body.  */
216
217static bool
218loop_exit_at_end_p (struct loop *loop)
219{
220  struct niter_desc *desc = get_simple_loop_desc (loop);
221  rtx insn;
222
223  if (desc->in_edge->dest != loop->latch)
224    return false;
225
226  /* Check that the latch is empty.  */
227  FOR_BB_INSNS (loop->latch, insn)
228    {
229      if (INSN_P (insn))
230	return false;
231    }
232
233  return true;
234}
235
236/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
237static void
238peel_loops_completely (struct loops *loops, int flags)
239{
240  struct loop *loop;
241  unsigned i;
242
243  /* Scan the loops, the inner ones first.  */
244  for (i = loops->num - 1; i > 0; i--)
245    {
246      loop = loops->parray[i];
247      if (!loop)
248	continue;
249
250      loop->lpt_decision.decision = LPT_NONE;
251
252      if (dump_file)
253	fprintf (dump_file,
254		 "\n;; *** Considering loop %d for complete peeling ***\n",
255		 loop->num);
256
257      loop->ninsns = num_loop_insns (loop);
258
259      decide_peel_once_rolling (loop, flags);
260      if (loop->lpt_decision.decision == LPT_NONE)
261	decide_peel_completely (loop, flags);
262
263      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
264	{
265	  peel_loop_completely (loops, loop);
266#ifdef ENABLE_CHECKING
267	  verify_dominators (CDI_DOMINATORS);
268	  verify_loop_structure (loops);
269#endif
270	}
271    }
272}
273
274/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
275static void
276decide_unrolling_and_peeling (struct loops *loops, int flags)
277{
278  struct loop *loop = loops->tree_root, *next;
279
280  while (loop->inner)
281    loop = loop->inner;
282
283  /* Scan the loops, inner ones first.  */
284  while (loop != loops->tree_root)
285    {
286      if (loop->next)
287	{
288	  next = loop->next;
289	  while (next->inner)
290	    next = next->inner;
291	}
292      else
293	next = loop->outer;
294
295      loop->lpt_decision.decision = LPT_NONE;
296
297      if (dump_file)
298	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
299
300      /* Do not peel cold areas.  */
301      if (!maybe_hot_bb_p (loop->header))
302	{
303	  if (dump_file)
304	    fprintf (dump_file, ";; Not considering loop, cold area\n");
305	  loop = next;
306	  continue;
307	}
308
309      /* Can the loop be manipulated?  */
310      if (!can_duplicate_loop_p (loop))
311	{
312	  if (dump_file)
313	    fprintf (dump_file,
314		     ";; Not considering loop, cannot duplicate\n");
315	  loop = next;
316	  continue;
317	}
318
319      /* Skip non-innermost loops.  */
320      if (loop->inner)
321	{
322	  if (dump_file)
323	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
324	  loop = next;
325	  continue;
326	}
327
328      loop->ninsns = num_loop_insns (loop);
329      loop->av_ninsns = average_num_loop_insns (loop);
330
331      /* Try transformations one by one in decreasing order of
332	 priority.  */
333
334      decide_unroll_constant_iterations (loop, flags);
335      if (loop->lpt_decision.decision == LPT_NONE)
336	decide_unroll_runtime_iterations (loop, flags);
337      if (loop->lpt_decision.decision == LPT_NONE)
338	decide_unroll_stupid (loop, flags);
339      if (loop->lpt_decision.decision == LPT_NONE)
340	decide_peel_simple (loop, flags);
341
342      loop = next;
343    }
344}
345
346/* Decide whether the LOOP is once rolling and suitable for complete
347   peeling.  */
348static void
349decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
350{
351  struct niter_desc *desc;
352
353  if (dump_file)
354    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
355
356  /* Is the loop small enough?  */
357  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
358    {
359      if (dump_file)
360	fprintf (dump_file, ";; Not considering loop, is too big\n");
361      return;
362    }
363
364  /* Check for simple loops.  */
365  desc = get_simple_loop_desc (loop);
366
367  /* Check number of iterations.  */
368  if (!desc->simple_p
369      || desc->assumptions
370      || desc->infinite
371      || !desc->const_iter
372      || desc->niter != 0)
373    {
374      if (dump_file)
375	fprintf (dump_file,
376		 ";; Unable to prove that the loop rolls exactly once\n");
377      return;
378    }
379
380  /* Success.  */
381  if (dump_file)
382    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
383  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
384}
385
386/* Decide whether the LOOP is suitable for complete peeling.  */
387static void
388decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
389{
390  unsigned npeel;
391  struct niter_desc *desc;
392
393  if (dump_file)
394    fprintf (dump_file, "\n;; Considering peeling completely\n");
395
396  /* Skip non-innermost loops.  */
397  if (loop->inner)
398    {
399      if (dump_file)
400	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
401      return;
402    }
403
404  /* Do not peel cold areas.  */
405  if (!maybe_hot_bb_p (loop->header))
406    {
407      if (dump_file)
408	fprintf (dump_file, ";; Not considering loop, cold area\n");
409      return;
410    }
411
412  /* Can the loop be manipulated?  */
413  if (!can_duplicate_loop_p (loop))
414    {
415      if (dump_file)
416	fprintf (dump_file,
417		 ";; Not considering loop, cannot duplicate\n");
418      return;
419    }
420
421  /* npeel = number of iterations to peel.  */
422  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
423  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
424    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
425
426  /* Is the loop small enough?  */
427  if (!npeel)
428    {
429      if (dump_file)
430	fprintf (dump_file, ";; Not considering loop, is too big\n");
431      return;
432    }
433
434  /* Check for simple loops.  */
435  desc = get_simple_loop_desc (loop);
436
437  /* Check number of iterations.  */
438  if (!desc->simple_p
439      || desc->assumptions
440      || !desc->const_iter
441      || desc->infinite)
442    {
443      if (dump_file)
444	fprintf (dump_file,
445		 ";; Unable to prove that the loop iterates constant times\n");
446      return;
447    }
448
449  if (desc->niter > npeel - 1)
450    {
451      if (dump_file)
452	{
453	  fprintf (dump_file,
454		   ";; Not peeling loop completely, rolls too much (");
455	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
456	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
457	}
458      return;
459    }
460
461  /* Success.  */
462  if (dump_file)
463    fprintf (dump_file, ";; Decided to peel loop completely\n");
464  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
465}
466
467/* Peel all iterations of LOOP, remove exit edges and cancel the loop
468   completely.  The transformation done:
469
470   for (i = 0; i < 4; i++)
471     body;
472
473   ==>
474
475   i = 0;
476   body; i++;
477   body; i++;
478   body; i++;
479   body; i++;
480   */
481static void
482peel_loop_completely (struct loops *loops, struct loop *loop)
483{
484  sbitmap wont_exit;
485  unsigned HOST_WIDE_INT npeel;
486  unsigned n_remove_edges, i;
487  edge *remove_edges, ein;
488  struct niter_desc *desc = get_simple_loop_desc (loop);
489  struct opt_info *opt_info = NULL;
490
491  npeel = desc->niter;
492
493  if (npeel)
494    {
495      bool ok;
496
497      wont_exit = sbitmap_alloc (npeel + 1);
498      sbitmap_ones (wont_exit);
499      RESET_BIT (wont_exit, 0);
500      if (desc->noloop_assumptions)
501	RESET_BIT (wont_exit, 1);
502
503      remove_edges = xcalloc (npeel, sizeof (edge));
504      n_remove_edges = 0;
505
506      if (flag_split_ivs_in_unroller)
507        opt_info = analyze_insns_in_loop (loop);
508
509      opt_info_start_duplication (opt_info);
510      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
511					  loops, npeel,
512					  wont_exit, desc->out_edge,
513					  remove_edges, &n_remove_edges,
514					  DLTHE_FLAG_UPDATE_FREQ
515					  | DLTHE_FLAG_COMPLETTE_PEEL
516					  | (opt_info
517					     ? DLTHE_RECORD_COPY_NUMBER : 0));
518      gcc_assert (ok);
519
520      free (wont_exit);
521
522      if (opt_info)
523 	{
524 	  apply_opt_in_copies (opt_info, npeel, false, true);
525 	  free_opt_info (opt_info);
526 	}
527
528      /* Remove the exit edges.  */
529      for (i = 0; i < n_remove_edges; i++)
530	remove_path (loops, remove_edges[i]);
531      free (remove_edges);
532    }
533
534  ein = desc->in_edge;
535  free_simple_loop_desc (loop);
536
537  /* Now remove the unreachable part of the last iteration and cancel
538     the loop.  */
539  remove_path (loops, ein);
540
541  if (dump_file)
542    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
543}
544
545/* Decide whether to unroll LOOP iterating constant number of times
546   and how much.  */
547
548static void
549decide_unroll_constant_iterations (struct loop *loop, int flags)
550{
551  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
552  struct niter_desc *desc;
553
554  if (!(flags & UAP_UNROLL))
555    {
556      /* We were not asked to, just return back silently.  */
557      return;
558    }
559
560  if (dump_file)
561    fprintf (dump_file,
562	     "\n;; Considering unrolling loop with constant "
563	     "number of iterations\n");
564
565  /* nunroll = total number of copies of the original loop body in
566     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
567  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
568  nunroll_by_av
569    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
570  if (nunroll > nunroll_by_av)
571    nunroll = nunroll_by_av;
572  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
573    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
574
575  /* Skip big loops.  */
576  if (nunroll <= 1)
577    {
578      if (dump_file)
579	fprintf (dump_file, ";; Not considering loop, is too big\n");
580      return;
581    }
582
583  /* Check for simple loops.  */
584  desc = get_simple_loop_desc (loop);
585
586  /* Check number of iterations.  */
587  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
588    {
589      if (dump_file)
590	fprintf (dump_file,
591		 ";; Unable to prove that the loop iterates constant times\n");
592      return;
593    }
594
595  /* Check whether the loop rolls enough to consider.  */
596  if (desc->niter < 2 * nunroll)
597    {
598      if (dump_file)
599	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
600      return;
601    }
602
603  /* Success; now compute number of iterations to unroll.  We alter
604     nunroll so that as few as possible copies of loop body are
605     necessary, while still not decreasing the number of unrollings
606     too much (at most by 1).  */
607  best_copies = 2 * nunroll + 10;
608
609  i = 2 * nunroll + 2;
610  if (i - 1 >= desc->niter)
611    i = desc->niter - 2;
612
613  for (; i >= nunroll - 1; i--)
614    {
615      unsigned exit_mod = desc->niter % (i + 1);
616
617      if (!loop_exit_at_end_p (loop))
618	n_copies = exit_mod + i + 1;
619      else if (exit_mod != (unsigned) i
620	       || desc->noloop_assumptions != NULL_RTX)
621	n_copies = exit_mod + i + 2;
622      else
623	n_copies = i + 1;
624
625      if (n_copies < best_copies)
626	{
627	  best_copies = n_copies;
628	  best_unroll = i;
629	}
630    }
631
632  if (dump_file)
633    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
634	     best_unroll + 1, best_copies, nunroll);
635
636  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
637  loop->lpt_decision.times = best_unroll;
638
639  if (dump_file)
640    fprintf (dump_file,
641	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
642	     loop->lpt_decision.times);
643}
644
645/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
646   times.  The transformation does this:
647
648   for (i = 0; i < 102; i++)
649     body;
650
651   ==>
652
653   i = 0;
654   body; i++;
655   body; i++;
656   while (i < 102)
657     {
658       body; i++;
659       body; i++;
660       body; i++;
661       body; i++;
662     }
663  */
664static void
665unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
666{
667  unsigned HOST_WIDE_INT niter;
668  unsigned exit_mod;
669  sbitmap wont_exit;
670  unsigned n_remove_edges, i;
671  edge *remove_edges;
672  unsigned max_unroll = loop->lpt_decision.times;
673  struct niter_desc *desc = get_simple_loop_desc (loop);
674  bool exit_at_end = loop_exit_at_end_p (loop);
675  struct opt_info *opt_info = NULL;
676  bool ok;
677
678  niter = desc->niter;
679
680  /* Should not get here (such loop should be peeled instead).  */
681  gcc_assert (niter > max_unroll + 1);
682
683  exit_mod = niter % (max_unroll + 1);
684
685  wont_exit = sbitmap_alloc (max_unroll + 1);
686  sbitmap_ones (wont_exit);
687
688  remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge));
689  n_remove_edges = 0;
690  if (flag_split_ivs_in_unroller
691      || flag_variable_expansion_in_unroller)
692    opt_info = analyze_insns_in_loop (loop);
693
694  if (!exit_at_end)
695    {
696      /* The exit is not at the end of the loop; leave exit test
697	 in the first copy, so that the loops that start with test
698	 of exit condition have continuous body after unrolling.  */
699
700      if (dump_file)
701	fprintf (dump_file, ";; Condition on beginning of loop.\n");
702
703      /* Peel exit_mod iterations.  */
704      RESET_BIT (wont_exit, 0);
705      if (desc->noloop_assumptions)
706	RESET_BIT (wont_exit, 1);
707
708      if (exit_mod)
709	{
710	  opt_info_start_duplication (opt_info);
711          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
712					      loops, exit_mod,
713					      wont_exit, desc->out_edge,
714					      remove_edges, &n_remove_edges,
715					      DLTHE_FLAG_UPDATE_FREQ
716					      | (opt_info && exit_mod > 1
717						 ? DLTHE_RECORD_COPY_NUMBER
718						   : 0));
719	  gcc_assert (ok);
720
721          if (opt_info && exit_mod > 1)
722 	    apply_opt_in_copies (opt_info, exit_mod, false, false);
723
724	  desc->noloop_assumptions = NULL_RTX;
725	  desc->niter -= exit_mod;
726	  desc->niter_max -= exit_mod;
727	}
728
729      SET_BIT (wont_exit, 1);
730    }
731  else
732    {
733      /* Leave exit test in last copy, for the same reason as above if
734	 the loop tests the condition at the end of loop body.  */
735
736      if (dump_file)
737	fprintf (dump_file, ";; Condition on end of loop.\n");
738
739      /* We know that niter >= max_unroll + 2; so we do not need to care of
740	 case when we would exit before reaching the loop.  So just peel
741	 exit_mod + 1 iterations.  */
742      if (exit_mod != max_unroll
743	  || desc->noloop_assumptions)
744	{
745	  RESET_BIT (wont_exit, 0);
746	  if (desc->noloop_assumptions)
747	    RESET_BIT (wont_exit, 1);
748
749          opt_info_start_duplication (opt_info);
750	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
751					      loops, exit_mod + 1,
752					      wont_exit, desc->out_edge,
753					      remove_edges, &n_remove_edges,
754					      DLTHE_FLAG_UPDATE_FREQ
755					      | (opt_info && exit_mod > 0
756						 ? DLTHE_RECORD_COPY_NUMBER
757						   : 0));
758	  gcc_assert (ok);
759
760          if (opt_info && exit_mod > 0)
761  	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
762
763	  desc->niter -= exit_mod + 1;
764	  desc->niter_max -= exit_mod + 1;
765	  desc->noloop_assumptions = NULL_RTX;
766
767	  SET_BIT (wont_exit, 0);
768	  SET_BIT (wont_exit, 1);
769	}
770
771      RESET_BIT (wont_exit, max_unroll);
772    }
773
774  /* Now unroll the loop.  */
775
776  opt_info_start_duplication (opt_info);
777  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
778				      loops, max_unroll,
779				      wont_exit, desc->out_edge,
780				      remove_edges, &n_remove_edges,
781				      DLTHE_FLAG_UPDATE_FREQ
782				      | (opt_info
783					 ? DLTHE_RECORD_COPY_NUMBER
784					   : 0));
785  gcc_assert (ok);
786
787  if (opt_info)
788    {
789      apply_opt_in_copies (opt_info, max_unroll, true, true);
790      free_opt_info (opt_info);
791    }
792
793  free (wont_exit);
794
795  if (exit_at_end)
796    {
797      basic_block exit_block = get_bb_copy (desc->in_edge->src);
798      /* Find a new in and out edge; they are in the last copy we have made.  */
799
800      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
801	{
802	  desc->out_edge = EDGE_SUCC (exit_block, 0);
803	  desc->in_edge = EDGE_SUCC (exit_block, 1);
804	}
805      else
806	{
807	  desc->out_edge = EDGE_SUCC (exit_block, 1);
808	  desc->in_edge = EDGE_SUCC (exit_block, 0);
809	}
810    }
811
812  desc->niter /= max_unroll + 1;
813  desc->niter_max /= max_unroll + 1;
814  desc->niter_expr = GEN_INT (desc->niter);
815
816  /* Remove the edges.  */
817  for (i = 0; i < n_remove_edges; i++)
818    remove_path (loops, remove_edges[i]);
819  free (remove_edges);
820
821  if (dump_file)
822    fprintf (dump_file,
823	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
824	     max_unroll, num_loop_insns (loop));
825}
826
827/* Decide whether to unroll LOOP iterating runtime computable number of times
828   and how much.  */
829static void
830decide_unroll_runtime_iterations (struct loop *loop, int flags)
831{
832  unsigned nunroll, nunroll_by_av, i;
833  struct niter_desc *desc;
834
835  if (!(flags & UAP_UNROLL))
836    {
837      /* We were not asked to, just return back silently.  */
838      return;
839    }
840
841  if (dump_file)
842    fprintf (dump_file,
843	     "\n;; Considering unrolling loop with runtime "
844	     "computable number of iterations\n");
845
846  /* nunroll = total number of copies of the original loop body in
847     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
848  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
849  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
850  if (nunroll > nunroll_by_av)
851    nunroll = nunroll_by_av;
852  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
853    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
854
855  /* Skip big loops.  */
856  if (nunroll <= 1)
857    {
858      if (dump_file)
859	fprintf (dump_file, ";; Not considering loop, is too big\n");
860      return;
861    }
862
863  /* Check for simple loops.  */
864  desc = get_simple_loop_desc (loop);
865
866  /* Check simpleness.  */
867  if (!desc->simple_p || desc->assumptions)
868    {
869      if (dump_file)
870	fprintf (dump_file,
871		 ";; Unable to prove that the number of iterations "
872		 "can be counted in runtime\n");
873      return;
874    }
875
876  if (desc->const_iter)
877    {
878      if (dump_file)
879	fprintf (dump_file, ";; Loop iterates constant times\n");
880      return;
881    }
882
883  /* If we have profile feedback, check whether the loop rolls.  */
884  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
885    {
886      if (dump_file)
887	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
888      return;
889    }
890
891  /* Success; now force nunroll to be power of 2, as we are unable to
892     cope with overflows in computation of number of iterations.  */
893  for (i = 1; 2 * i <= nunroll; i *= 2)
894    continue;
895
896  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
897  loop->lpt_decision.times = i - 1;
898
899  if (dump_file)
900    fprintf (dump_file,
901	     ";; Decided to unroll the runtime computable "
902	     "times rolling loop, %d times.\n",
903	     loop->lpt_decision.times);
904}
905
906/* Unroll LOOP for that we are able to count number of iterations in runtime
907   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
908   extra care for case n < 0):
909
910   for (i = 0; i < n; i++)
911     body;
912
913   ==>
914
915   i = 0;
916   mod = n % 4;
917
918   switch (mod)
919     {
920       case 3:
921         body; i++;
922       case 2:
923         body; i++;
924       case 1:
925         body; i++;
926       case 0: ;
927     }
928
929   while (i < n)
930     {
931       body; i++;
932       body; i++;
933       body; i++;
934       body; i++;
935     }
936   */
937static void
938unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
939{
940  rtx old_niter, niter, init_code, branch_code, tmp;
941  unsigned i, j, p;
942  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
943  unsigned n_dom_bbs;
944  sbitmap wont_exit;
945  int may_exit_copy;
946  unsigned n_peel, n_remove_edges;
947  edge *remove_edges, e;
948  bool extra_zero_check, last_may_exit;
949  unsigned max_unroll = loop->lpt_decision.times;
950  struct niter_desc *desc = get_simple_loop_desc (loop);
951  bool exit_at_end = loop_exit_at_end_p (loop);
952  struct opt_info *opt_info = NULL;
953  bool ok;
954
955  if (flag_split_ivs_in_unroller
956      || flag_variable_expansion_in_unroller)
957    opt_info = analyze_insns_in_loop (loop);
958
959  /* Remember blocks whose dominators will have to be updated.  */
960  dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block));
961  n_dom_bbs = 0;
962
963  body = get_loop_body (loop);
964  for (i = 0; i < loop->num_nodes; i++)
965    {
966      unsigned nldom;
967      basic_block *ldom;
968
969      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
970      for (j = 0; j < nldom; j++)
971	if (!flow_bb_inside_loop_p (loop, ldom[j]))
972	  dom_bbs[n_dom_bbs++] = ldom[j];
973
974      free (ldom);
975    }
976  free (body);
977
978  if (!exit_at_end)
979    {
980      /* Leave exit in first copy (for explanation why see comment in
981	 unroll_loop_constant_iterations).  */
982      may_exit_copy = 0;
983      n_peel = max_unroll - 1;
984      extra_zero_check = true;
985      last_may_exit = false;
986    }
987  else
988    {
989      /* Leave exit in last copy (for explanation why see comment in
990	 unroll_loop_constant_iterations).  */
991      may_exit_copy = max_unroll;
992      n_peel = max_unroll;
993      extra_zero_check = false;
994      last_may_exit = true;
995    }
996
997  /* Get expression for number of iterations.  */
998  start_sequence ();
999  old_niter = niter = gen_reg_rtx (desc->mode);
1000  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
1001  if (tmp != niter)
1002    emit_move_insn (niter, tmp);
1003
1004  /* Count modulo by ANDing it with max_unroll; we use the fact that
1005     the number of unrollings is a power of two, and thus this is correct
1006     even if there is overflow in the computation.  */
1007  niter = expand_simple_binop (desc->mode, AND,
1008			       niter,
1009			       GEN_INT (max_unroll),
1010			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1011
1012  init_code = get_insns ();
1013  end_sequence ();
1014
1015  /* Precondition the loop.  */
1016  loop_split_edge_with (loop_preheader_edge (loop), init_code);
1017
1018  remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge));
1019  n_remove_edges = 0;
1020
1021  wont_exit = sbitmap_alloc (max_unroll + 2);
1022
1023  /* Peel the first copy of loop body (almost always we must leave exit test
1024     here; the only exception is when we have extra zero check and the number
1025     of iterations is reliable.  Also record the place of (possible) extra
1026     zero check.  */
1027  sbitmap_zero (wont_exit);
1028  if (extra_zero_check
1029      && !desc->noloop_assumptions)
1030    SET_BIT (wont_exit, 1);
1031  ezc_swtch = loop_preheader_edge (loop)->src;
1032  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1033				      loops, 1,
1034				      wont_exit, desc->out_edge,
1035				      remove_edges, &n_remove_edges,
1036				      DLTHE_FLAG_UPDATE_FREQ);
1037  gcc_assert (ok);
1038
1039  /* Record the place where switch will be built for preconditioning.  */
1040  swtch = loop_split_edge_with (loop_preheader_edge (loop),
1041				NULL_RTX);
1042
1043  for (i = 0; i < n_peel; i++)
1044    {
1045      /* Peel the copy.  */
1046      sbitmap_zero (wont_exit);
1047      if (i != n_peel - 1 || !last_may_exit)
1048	SET_BIT (wont_exit, 1);
1049      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1050					  loops, 1,
1051					  wont_exit, desc->out_edge,
1052					  remove_edges, &n_remove_edges,
1053					  DLTHE_FLAG_UPDATE_FREQ);
1054      gcc_assert (ok);
1055
1056      /* Create item for switch.  */
1057      j = n_peel - i - (extra_zero_check ? 0 : 1);
1058      p = REG_BR_PROB_BASE / (i + 2);
1059
1060      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1061      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1062					  block_label (preheader), p,
1063					  NULL_RTX);
1064
1065      swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1066      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1067      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1068      e = make_edge (swtch, preheader,
1069		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1070      e->probability = p;
1071    }
1072
1073  if (extra_zero_check)
1074    {
1075      /* Add branch for zero iterations.  */
1076      p = REG_BR_PROB_BASE / (max_unroll + 1);
1077      swtch = ezc_swtch;
1078      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1079      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1080					  block_label (preheader), p,
1081					  NULL_RTX);
1082
1083      swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1084      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1085      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1086      e = make_edge (swtch, preheader,
1087		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1088      e->probability = p;
1089    }
1090
1091  /* Recount dominators for outer blocks.  */
1092  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1093
1094  /* And unroll loop.  */
1095
1096  sbitmap_ones (wont_exit);
1097  RESET_BIT (wont_exit, may_exit_copy);
1098  opt_info_start_duplication (opt_info);
1099
1100  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1101				      loops, max_unroll,
1102				      wont_exit, desc->out_edge,
1103				      remove_edges, &n_remove_edges,
1104				      DLTHE_FLAG_UPDATE_FREQ
1105				      | (opt_info
1106					 ? DLTHE_RECORD_COPY_NUMBER
1107					   : 0));
1108  gcc_assert (ok);
1109
1110  if (opt_info)
1111    {
1112      apply_opt_in_copies (opt_info, max_unroll, true, true);
1113      free_opt_info (opt_info);
1114    }
1115
1116  free (wont_exit);
1117
1118  if (exit_at_end)
1119    {
1120      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1121      /* Find a new in and out edge; they are in the last copy we have
1122	 made.  */
1123
1124      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1125	{
1126	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1127	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1128	}
1129      else
1130	{
1131	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1132	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1133	}
1134    }
1135
1136  /* Remove the edges.  */
1137  for (i = 0; i < n_remove_edges; i++)
1138    remove_path (loops, remove_edges[i]);
1139  free (remove_edges);
1140
1141  /* We must be careful when updating the number of iterations due to
1142     preconditioning and the fact that the value must be valid at entry
1143     of the loop.  After passing through the above code, we see that
1144     the correct new number of iterations is this:  */
1145  gcc_assert (!desc->const_iter);
1146  desc->niter_expr =
1147    simplify_gen_binary (UDIV, desc->mode, old_niter,
1148			 GEN_INT (max_unroll + 1));
1149  desc->niter_max /= max_unroll + 1;
1150  if (exit_at_end)
1151    {
1152      desc->niter_expr =
1153	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1154      desc->noloop_assumptions = NULL_RTX;
1155      desc->niter_max--;
1156    }
1157
1158  if (dump_file)
1159    fprintf (dump_file,
1160	     ";; Unrolled loop %d times, counting # of iterations "
1161	     "in runtime, %i insns\n",
1162	     max_unroll, num_loop_insns (loop));
1163}
1164
1165/* Decide whether to simply peel LOOP and how much.  */
1166static void
1167decide_peel_simple (struct loop *loop, int flags)
1168{
1169  unsigned npeel;
1170  struct niter_desc *desc;
1171
1172  if (!(flags & UAP_PEEL))
1173    {
1174      /* We were not asked to, just return back silently.  */
1175      return;
1176    }
1177
1178  if (dump_file)
1179    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1180
1181  /* npeel = number of iterations to peel.  */
1182  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1183  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1184    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1185
1186  /* Skip big loops.  */
1187  if (!npeel)
1188    {
1189      if (dump_file)
1190	fprintf (dump_file, ";; Not considering loop, is too big\n");
1191      return;
1192    }
1193
1194  /* Check for simple loops.  */
1195  desc = get_simple_loop_desc (loop);
1196
1197  /* Check number of iterations.  */
1198  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1199    {
1200      if (dump_file)
1201	fprintf (dump_file, ";; Loop iterates constant times\n");
1202      return;
1203    }
1204
1205  /* Do not simply peel loops with branches inside -- it increases number
1206     of mispredicts.  */
1207  if (num_loop_branches (loop) > 1)
1208    {
1209      if (dump_file)
1210	fprintf (dump_file, ";; Not peeling, contains branches\n");
1211      return;
1212    }
1213
1214  if (loop->header->count)
1215    {
1216      unsigned niter = expected_loop_iterations (loop);
1217      if (niter + 1 > npeel)
1218	{
1219	  if (dump_file)
1220	    {
1221	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1222	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1223		       (HOST_WIDEST_INT) (niter + 1));
1224	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1225		       npeel);
1226	    }
1227	  return;
1228	}
1229      npeel = niter + 1;
1230    }
1231  else
1232    {
1233      /* For now we have no good heuristics to decide whether loop peeling
1234         will be effective, so disable it.  */
1235      if (dump_file)
1236	fprintf (dump_file,
1237		 ";; Not peeling loop, no evidence it will be profitable\n");
1238      return;
1239    }
1240
1241  /* Success.  */
1242  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1243  loop->lpt_decision.times = npeel;
1244
1245  if (dump_file)
1246    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1247	     loop->lpt_decision.times);
1248}
1249
1250/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1251   while (cond)
1252     body;
1253
1254   ==>
1255
1256   if (!cond) goto end;
1257   body;
1258   if (!cond) goto end;
1259   body;
1260   while (cond)
1261     body;
1262   end: ;
1263   */
1264static void
1265peel_loop_simple (struct loops *loops, struct loop *loop)
1266{
1267  sbitmap wont_exit;
1268  unsigned npeel = loop->lpt_decision.times;
1269  struct niter_desc *desc = get_simple_loop_desc (loop);
1270  struct opt_info *opt_info = NULL;
1271  bool ok;
1272
1273  if (flag_split_ivs_in_unroller && npeel > 1)
1274    opt_info = analyze_insns_in_loop (loop);
1275
1276  wont_exit = sbitmap_alloc (npeel + 1);
1277  sbitmap_zero (wont_exit);
1278
1279  opt_info_start_duplication (opt_info);
1280
1281  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1282				      loops, npeel, wont_exit,
1283				      NULL, NULL,
1284				      NULL, DLTHE_FLAG_UPDATE_FREQ
1285				      | (opt_info
1286					 ? DLTHE_RECORD_COPY_NUMBER
1287					   : 0));
1288  gcc_assert (ok);
1289
1290  free (wont_exit);
1291
1292  if (opt_info)
1293    {
1294      apply_opt_in_copies (opt_info, npeel, false, false);
1295      free_opt_info (opt_info);
1296    }
1297
1298  if (desc->simple_p)
1299    {
1300      if (desc->const_iter)
1301	{
1302	  desc->niter -= npeel;
1303	  desc->niter_expr = GEN_INT (desc->niter);
1304	  desc->noloop_assumptions = NULL_RTX;
1305	}
1306      else
1307	{
1308	  /* We cannot just update niter_expr, as its value might be clobbered
1309	     inside loop.  We could handle this by counting the number into
1310	     temporary just like we do in runtime unrolling, but it does not
1311	     seem worthwhile.  */
1312	  free_simple_loop_desc (loop);
1313	}
1314    }
1315  if (dump_file)
1316    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1317}
1318
1319/* Decide whether to unroll LOOP stupidly and how much.  */
1320static void
1321decide_unroll_stupid (struct loop *loop, int flags)
1322{
1323  unsigned nunroll, nunroll_by_av, i;
1324  struct niter_desc *desc;
1325
1326  if (!(flags & UAP_UNROLL_ALL))
1327    {
1328      /* We were not asked to, just return back silently.  */
1329      return;
1330    }
1331
1332  if (dump_file)
1333    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1334
1335  /* nunroll = total number of copies of the original loop body in
1336     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1337  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1338  nunroll_by_av
1339    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1340  if (nunroll > nunroll_by_av)
1341    nunroll = nunroll_by_av;
1342  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1343    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1344
1345  /* Skip big loops.  */
1346  if (nunroll <= 1)
1347    {
1348      if (dump_file)
1349	fprintf (dump_file, ";; Not considering loop, is too big\n");
1350      return;
1351    }
1352
1353  /* Check for simple loops.  */
1354  desc = get_simple_loop_desc (loop);
1355
1356  /* Check simpleness.  */
1357  if (desc->simple_p && !desc->assumptions)
1358    {
1359      if (dump_file)
1360	fprintf (dump_file, ";; The loop is simple\n");
1361      return;
1362    }
1363
1364  /* Do not unroll loops with branches inside -- it increases number
1365     of mispredicts.  */
1366  if (num_loop_branches (loop) > 1)
1367    {
1368      if (dump_file)
1369	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1370      return;
1371    }
1372
1373  /* If we have profile feedback, check whether the loop rolls.  */
1374  if (loop->header->count
1375      && expected_loop_iterations (loop) < 2 * nunroll)
1376    {
1377      if (dump_file)
1378	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1379      return;
1380    }
1381
1382  /* Success.  Now force nunroll to be power of 2, as it seems that this
1383     improves results (partially because of better alignments, partially
1384     because of some dark magic).  */
1385  for (i = 1; 2 * i <= nunroll; i *= 2)
1386    continue;
1387
1388  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1389  loop->lpt_decision.times = i - 1;
1390
1391  if (dump_file)
1392    fprintf (dump_file,
1393	     ";; Decided to unroll the loop stupidly, %d times.\n",
1394	     loop->lpt_decision.times);
1395}
1396
1397/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1398   while (cond)
1399     body;
1400
1401   ==>
1402
1403   while (cond)
1404     {
1405       body;
1406       if (!cond) break;
1407       body;
1408       if (!cond) break;
1409       body;
1410       if (!cond) break;
1411       body;
1412     }
1413   */
1414static void
1415unroll_loop_stupid (struct loops *loops, struct loop *loop)
1416{
1417  sbitmap wont_exit;
1418  unsigned nunroll = loop->lpt_decision.times;
1419  struct niter_desc *desc = get_simple_loop_desc (loop);
1420  struct opt_info *opt_info = NULL;
1421  bool ok;
1422
1423  if (flag_split_ivs_in_unroller
1424      || flag_variable_expansion_in_unroller)
1425    opt_info = analyze_insns_in_loop (loop);
1426
1427
1428  wont_exit = sbitmap_alloc (nunroll + 1);
1429  sbitmap_zero (wont_exit);
1430  opt_info_start_duplication (opt_info);
1431
1432  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1433				      loops, nunroll, wont_exit,
1434				      NULL, NULL, NULL,
1435				      DLTHE_FLAG_UPDATE_FREQ
1436				      | (opt_info
1437					 ? DLTHE_RECORD_COPY_NUMBER
1438					   : 0));
1439  gcc_assert (ok);
1440
1441  if (opt_info)
1442    {
1443      apply_opt_in_copies (opt_info, nunroll, true, true);
1444      free_opt_info (opt_info);
1445    }
1446
1447  free (wont_exit);
1448
1449  if (desc->simple_p)
1450    {
1451      /* We indeed may get here provided that there are nontrivial assumptions
1452	 for a loop to be really simple.  We could update the counts, but the
1453	 problem is that we are unable to decide which exit will be taken
1454	 (not really true in case the number of iterations is constant,
1455	 but noone will do anything with this information, so we do not
1456	 worry about it).  */
1457      desc->simple_p = false;
1458    }
1459
1460  if (dump_file)
1461    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1462	     nunroll, num_loop_insns (loop));
1463}
1464
1465/* A hash function for information about insns to split.  */
1466
1467static hashval_t
1468si_info_hash (const void *ivts)
1469{
1470  return htab_hash_pointer (((struct iv_to_split *) ivts)->insn);
1471}
1472
1473/* An equality functions for information about insns to split.  */
1474
1475static int
1476si_info_eq (const void *ivts1, const void *ivts2)
1477{
1478  const struct iv_to_split *i1 = ivts1;
1479  const struct iv_to_split *i2 = ivts2;
1480
1481  return i1->insn == i2->insn;
1482}
1483
1484/* Return a hash for VES, which is really a "var_to_expand *".  */
1485
1486static hashval_t
1487ve_info_hash (const void *ves)
1488{
1489  return htab_hash_pointer (((struct var_to_expand *) ves)->insn);
1490}
1491
1492/* Return true if IVTS1 and IVTS2 (which are really both of type
1493   "var_to_expand *") refer to the same instruction.  */
1494
1495static int
1496ve_info_eq (const void *ivts1, const void *ivts2)
1497{
1498  const struct var_to_expand *i1 = ivts1;
1499  const struct var_to_expand *i2 = ivts2;
1500
1501  return i1->insn == i2->insn;
1502}
1503
1504/* Returns true if REG is referenced in one insn in LOOP.  */
1505
1506bool
1507referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1508{
1509  basic_block *body, bb;
1510  unsigned i;
1511  int count_ref = 0;
1512  rtx insn;
1513
1514  body = get_loop_body (loop);
1515  for (i = 0; i < loop->num_nodes; i++)
1516    {
1517      bb = body[i];
1518
1519      FOR_BB_INSNS (bb, insn)
1520      {
1521        if (rtx_referenced_p (reg, insn))
1522          count_ref++;
1523      }
1524    }
1525  return (count_ref  == 1);
1526}
1527
1528/* Determine whether INSN contains an accumulator
1529   which can be expanded into separate copies,
1530   one for each copy of the LOOP body.
1531
1532   for (i = 0 ; i < n; i++)
1533     sum += a[i];
1534
1535   ==>
1536
1537   sum += a[i]
1538   ....
1539   i = i+1;
1540   sum1 += a[i]
1541   ....
1542   i = i+1
1543   sum2 += a[i];
1544   ....
1545
1546   Return NULL if INSN contains no opportunity for expansion of accumulator.
1547   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1548   information and return a pointer to it.
1549*/
1550
1551static struct var_to_expand *
1552analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1553{
1554  rtx set, dest, src, op1;
1555  struct var_to_expand *ves;
1556  enum machine_mode mode1, mode2;
1557
1558  set = single_set (insn);
1559  if (!set)
1560    return NULL;
1561
1562  dest = SET_DEST (set);
1563  src = SET_SRC (set);
1564
1565  if (GET_CODE (src) != PLUS
1566      && GET_CODE (src) != MINUS
1567      && GET_CODE (src) != MULT)
1568    return NULL;
1569
1570  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1571     in MD.  But if there is no optab to generate the insn, we can not
1572     perform the variable expansion.  This can happen if an MD provides
1573     an insn but not a named pattern to generate it, for example to avoid
1574     producing code that needs additional mode switches like for x87/mmx.
1575
1576     So we check have_insn_for which looks for an optab for the operation
1577     in SRC.  If it doesn't exist, we can't perform the expansion even
1578     though INSN is valid.  */
1579  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1580    return NULL;
1581
1582  if (!XEXP (src, 0))
1583    return NULL;
1584
1585  op1 = XEXP (src, 0);
1586
1587  if (!REG_P (dest)
1588      && !(GET_CODE (dest) == SUBREG
1589           && REG_P (SUBREG_REG (dest))))
1590    return NULL;
1591
1592  if (!rtx_equal_p (dest, op1))
1593    return NULL;
1594
1595  if (!referenced_in_one_insn_in_loop_p (loop, dest))
1596    return NULL;
1597
1598  if (rtx_referenced_p (dest, XEXP (src, 1)))
1599    return NULL;
1600
1601  mode1 = GET_MODE (dest);
1602  mode2 = GET_MODE (XEXP (src, 1));
1603  if ((FLOAT_MODE_P (mode1)
1604       || FLOAT_MODE_P (mode2))
1605      && !flag_unsafe_math_optimizations)
1606    return NULL;
1607
1608  /* Record the accumulator to expand.  */
1609  ves = xmalloc (sizeof (struct var_to_expand));
1610  ves->insn = insn;
1611  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1612  ves->reg = copy_rtx (dest);
1613  ves->op = GET_CODE (src);
1614  ves->expansion_count = 0;
1615  ves->reuse_expansion = 0;
1616  return ves;
1617}
1618
1619/* Determine whether there is an induction variable in INSN that
1620   we would like to split during unrolling.
1621
1622   I.e. replace
1623
1624   i = i + 1;
1625   ...
1626   i = i + 1;
1627   ...
1628   i = i + 1;
1629   ...
1630
1631   type chains by
1632
1633   i0 = i + 1
1634   ...
1635   i = i0 + 1
1636   ...
1637   i = i0 + 2
1638   ...
1639
1640   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1641   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1642   pointer to it.  */
1643
1644static struct iv_to_split *
1645analyze_iv_to_split_insn (rtx insn)
1646{
1647  rtx set, dest;
1648  struct rtx_iv iv;
1649  struct iv_to_split *ivts;
1650  bool ok;
1651
1652  /* For now we just split the basic induction variables.  Later this may be
1653     extended for example by selecting also addresses of memory references.  */
1654  set = single_set (insn);
1655  if (!set)
1656    return NULL;
1657
1658  dest = SET_DEST (set);
1659  if (!REG_P (dest))
1660    return NULL;
1661
1662  if (!biv_p (insn, dest))
1663    return NULL;
1664
1665  ok = iv_analyze (insn, dest, &iv);
1666  gcc_assert (ok);
1667
1668  if (iv.step == const0_rtx
1669      || iv.mode != iv.extend_mode)
1670    return NULL;
1671
1672  /* Record the insn to split.  */
1673  ivts = xmalloc (sizeof (struct iv_to_split));
1674  ivts->insn = insn;
1675  ivts->base_var = NULL_RTX;
1676  ivts->step = iv.step;
1677  ivts->n_loc = 1;
1678  ivts->loc[0] = 1;
1679
1680  return ivts;
1681}
1682
1683/* Determines which of insns in LOOP can be optimized.
1684   Return a OPT_INFO struct with the relevant hash tables filled
1685   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1686   is undefined for the return value.  */
1687
1688static struct opt_info *
1689analyze_insns_in_loop (struct loop *loop)
1690{
1691  basic_block *body, bb;
1692  unsigned i, num_edges = 0;
1693  struct opt_info *opt_info = xcalloc (1, sizeof (struct opt_info));
1694  rtx insn;
1695  struct iv_to_split *ivts = NULL;
1696  struct var_to_expand *ves = NULL;
1697  PTR *slot1;
1698  PTR *slot2;
1699  edge *edges = get_loop_exit_edges (loop, &num_edges);
1700  bool can_apply = false;
1701
1702  iv_analysis_loop_init (loop);
1703
1704  body = get_loop_body (loop);
1705
1706  if (flag_split_ivs_in_unroller)
1707    opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1708                                            si_info_hash, si_info_eq, free);
1709
1710  /* Record the loop exit bb and loop preheader before the unrolling.  */
1711  if (!loop_preheader_edge (loop)->src)
1712    {
1713      loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1714      opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1715    }
1716  else
1717    opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1718
1719  if (num_edges == 1
1720      && !(edges[0]->flags & EDGE_COMPLEX))
1721    {
1722      opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1723      can_apply = true;
1724    }
1725
1726  if (flag_variable_expansion_in_unroller
1727      && can_apply)
1728    opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1729						      ve_info_hash, ve_info_eq, free);
1730
1731  for (i = 0; i < loop->num_nodes; i++)
1732    {
1733      bb = body[i];
1734      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1735	continue;
1736
1737      FOR_BB_INSNS (bb, insn)
1738      {
1739        if (!INSN_P (insn))
1740          continue;
1741
1742        if (opt_info->insns_to_split)
1743          ivts = analyze_iv_to_split_insn (insn);
1744
1745        if (ivts)
1746          {
1747            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1748            *slot1 = ivts;
1749            continue;
1750          }
1751
1752        if (opt_info->insns_with_var_to_expand)
1753          ves = analyze_insn_to_expand_var (loop, insn);
1754
1755        if (ves)
1756          {
1757            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1758            *slot2 = ves;
1759          }
1760      }
1761    }
1762
1763  free (edges);
1764  free (body);
1765  return opt_info;
1766}
1767
1768/* Called just before loop duplication.  Records start of duplicated area
1769   to OPT_INFO.  */
1770
1771static void
1772opt_info_start_duplication (struct opt_info *opt_info)
1773{
1774  if (opt_info)
1775    opt_info->first_new_block = last_basic_block;
1776}
1777
1778/* Determine the number of iterations between initialization of the base
1779   variable and the current copy (N_COPY).  N_COPIES is the total number
1780   of newly created copies.  UNROLLING is true if we are unrolling
1781   (not peeling) the loop.  */
1782
1783static unsigned
1784determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1785{
1786  if (unrolling)
1787    {
1788      /* If we are unrolling, initialization is done in the original loop
1789	 body (number 0).  */
1790      return n_copy;
1791    }
1792  else
1793    {
1794      /* If we are peeling, the copy in that the initialization occurs has
1795	 number 1.  The original loop (number 0) is the last.  */
1796      if (n_copy)
1797	return n_copy - 1;
1798      else
1799	return n_copies;
1800    }
1801}
1802
1803/* Locate in EXPR the expression corresponding to the location recorded
1804   in IVTS, and return a pointer to the RTX for this location.  */
1805
1806static rtx *
1807get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1808{
1809  unsigned i;
1810  rtx *ret = &expr;
1811
1812  for (i = 0; i < ivts->n_loc; i++)
1813    ret = &XEXP (*ret, ivts->loc[i]);
1814
1815  return ret;
1816}
1817
1818/* Allocate basic variable for the induction variable chain.  Callback for
1819   htab_traverse.  */
1820
1821static int
1822allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1823{
1824  struct iv_to_split *ivts = *slot;
1825  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1826
1827  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1828
1829  return 1;
1830}
1831
1832/* Insert initialization of basic variable of IVTS before INSN, taking
1833   the initial value from INSN.  */
1834
1835static void
1836insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1837{
1838  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1839  rtx seq;
1840
1841  start_sequence ();
1842  expr = force_operand (expr, ivts->base_var);
1843  if (expr != ivts->base_var)
1844    emit_move_insn (ivts->base_var, expr);
1845  seq = get_insns ();
1846  end_sequence ();
1847
1848  emit_insn_before (seq, insn);
1849}
1850
1851/* Replace the use of induction variable described in IVTS in INSN
1852   by base variable + DELTA * step.  */
1853
1854static void
1855split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1856{
1857  rtx expr, *loc, seq, incr, var;
1858  enum machine_mode mode = GET_MODE (ivts->base_var);
1859  rtx src, dest, set;
1860
1861  /* Construct base + DELTA * step.  */
1862  if (!delta)
1863    expr = ivts->base_var;
1864  else
1865    {
1866      incr = simplify_gen_binary (MULT, mode,
1867				  ivts->step, gen_int_mode (delta, mode));
1868      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1869				  ivts->base_var, incr);
1870    }
1871
1872  /* Figure out where to do the replacement.  */
1873  loc = get_ivts_expr (single_set (insn), ivts);
1874
1875  /* If we can make the replacement right away, we're done.  */
1876  if (validate_change (insn, loc, expr, 0))
1877    return;
1878
1879  /* Otherwise, force EXPR into a register and try again.  */
1880  start_sequence ();
1881  var = gen_reg_rtx (mode);
1882  expr = force_operand (expr, var);
1883  if (expr != var)
1884    emit_move_insn (var, expr);
1885  seq = get_insns ();
1886  end_sequence ();
1887  emit_insn_before (seq, insn);
1888
1889  if (validate_change (insn, loc, var, 0))
1890    return;
1891
1892  /* The last chance.  Try recreating the assignment in insn
1893     completely from scratch.  */
1894  set = single_set (insn);
1895  gcc_assert (set);
1896
1897  start_sequence ();
1898  *loc = var;
1899  src = copy_rtx (SET_SRC (set));
1900  dest = copy_rtx (SET_DEST (set));
1901  src = force_operand (src, dest);
1902  if (src != dest)
1903    emit_move_insn (dest, src);
1904  seq = get_insns ();
1905  end_sequence ();
1906
1907  emit_insn_before (seq, insn);
1908  delete_insn (insn);
1909}
1910
1911
1912/* Return one expansion of the accumulator recorded in struct VE.  */
1913
1914static rtx
1915get_expansion (struct var_to_expand *ve)
1916{
1917  rtx reg;
1918
1919  if (ve->reuse_expansion == 0)
1920    reg = ve->reg;
1921  else
1922    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1923
1924  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1925    ve->reuse_expansion = 0;
1926  else
1927    ve->reuse_expansion++;
1928
1929  return reg;
1930}
1931
1932
1933/* Given INSN replace the uses of the accumulator recorded in VE
1934   with a new register.  */
1935
1936static void
1937expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1938{
1939  rtx new_reg, set;
1940  bool really_new_expansion = false;
1941
1942  set = single_set (insn);
1943  gcc_assert (set);
1944
1945  /* Generate a new register only if the expansion limit has not been
1946     reached.  Else reuse an already existing expansion.  */
1947  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1948    {
1949      really_new_expansion = true;
1950      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1951    }
1952  else
1953    new_reg = get_expansion (ve);
1954
1955  validate_change (insn, &SET_DEST (set), new_reg, 1);
1956  validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1957
1958  if (apply_change_group ())
1959    if (really_new_expansion)
1960      {
1961        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1962        ve->expansion_count++;
1963      }
1964}
1965
1966/* Initialize the variable expansions in loop preheader.
1967   Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1968   basic block where the initialization of the expansions
1969   should take place.  */
1970
1971static int
1972insert_var_expansion_initialization (void **slot, void *place_p)
1973{
1974  struct var_to_expand *ve = *slot;
1975  basic_block place = (basic_block)place_p;
1976  rtx seq, var, zero_init, insn;
1977  unsigned i;
1978
1979  if (VEC_length (rtx, ve->var_expansions) == 0)
1980    return 1;
1981
1982  start_sequence ();
1983  if (ve->op == PLUS || ve->op == MINUS)
1984    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1985      {
1986        zero_init =  CONST0_RTX (GET_MODE (var));
1987        emit_move_insn (var, zero_init);
1988      }
1989  else if (ve->op == MULT)
1990    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1991      {
1992        zero_init =  CONST1_RTX (GET_MODE (var));
1993        emit_move_insn (var, zero_init);
1994      }
1995
1996  seq = get_insns ();
1997  end_sequence ();
1998
1999  insn = BB_HEAD (place);
2000  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2001    insn = NEXT_INSN (insn);
2002
2003  emit_insn_after (seq, insn);
2004  /* Continue traversing the hash table.  */
2005  return 1;
2006}
2007
2008/*  Combine the variable expansions at the loop exit.
2009    Callbacks for htab_traverse.  PLACE_P is the loop exit
2010    basic block where the summation of the expansions should
2011    take place.  */
2012
2013static int
2014combine_var_copies_in_loop_exit (void **slot, void *place_p)
2015{
2016  struct var_to_expand *ve = *slot;
2017  basic_block place = (basic_block)place_p;
2018  rtx sum = ve->reg;
2019  rtx expr, seq, var, insn;
2020  unsigned i;
2021
2022  if (VEC_length (rtx, ve->var_expansions) == 0)
2023    return 1;
2024
2025  start_sequence ();
2026  if (ve->op == PLUS || ve->op == MINUS)
2027    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2028      {
2029        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2030                                   var, sum);
2031      }
2032  else if (ve->op == MULT)
2033    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2034      {
2035        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2036                                   var, sum);
2037      }
2038
2039  expr = force_operand (sum, ve->reg);
2040  if (expr != ve->reg)
2041    emit_move_insn (ve->reg, expr);
2042  seq = get_insns ();
2043  end_sequence ();
2044
2045  insn = BB_HEAD (place);
2046  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2047    insn = NEXT_INSN (insn);
2048
2049  emit_insn_after (seq, insn);
2050
2051  /* Continue traversing the hash table.  */
2052  return 1;
2053}
2054
2055/* Apply loop optimizations in loop copies using the
2056   data which gathered during the unrolling.  Structure
2057   OPT_INFO record that data.
2058
2059   UNROLLING is true if we unrolled (not peeled) the loop.
2060   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2061   the loop (as it should happen in complete unrolling, but not in ordinary
2062   peeling of the loop).  */
2063
2064static void
2065apply_opt_in_copies (struct opt_info *opt_info,
2066                     unsigned n_copies, bool unrolling,
2067                     bool rewrite_original_loop)
2068{
2069  unsigned i, delta;
2070  basic_block bb, orig_bb;
2071  rtx insn, orig_insn, next;
2072  struct iv_to_split ivts_templ, *ivts;
2073  struct var_to_expand ve_templ, *ves;
2074
2075  /* Sanity check -- we need to put initialization in the original loop
2076     body.  */
2077  gcc_assert (!unrolling || rewrite_original_loop);
2078
2079  /* Allocate the basic variables (i0).  */
2080  if (opt_info->insns_to_split)
2081    htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2082
2083  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2084    {
2085      bb = BASIC_BLOCK (i);
2086      orig_bb = get_bb_original (bb);
2087
2088      /* bb->aux holds position in copy sequence initialized by
2089	 duplicate_loop_to_header_edge.  */
2090      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2091					unrolling);
2092      bb->aux = 0;
2093      orig_insn = BB_HEAD (orig_bb);
2094      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2095        {
2096          next = NEXT_INSN (insn);
2097          if (!INSN_P (insn))
2098            continue;
2099
2100          while (!INSN_P (orig_insn))
2101            orig_insn = NEXT_INSN (orig_insn);
2102
2103          ivts_templ.insn = orig_insn;
2104          ve_templ.insn = orig_insn;
2105
2106          /* Apply splitting iv optimization.  */
2107          if (opt_info->insns_to_split)
2108            {
2109              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2110
2111              if (ivts)
2112                {
2113		  gcc_assert (GET_CODE (PATTERN (insn))
2114			      == GET_CODE (PATTERN (orig_insn)));
2115
2116                  if (!delta)
2117                    insert_base_initialization (ivts, insn);
2118                  split_iv (ivts, insn, delta);
2119                }
2120            }
2121          /* Apply variable expansion optimization.  */
2122          if (unrolling && opt_info->insns_with_var_to_expand)
2123            {
2124              ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2125              if (ves)
2126                {
2127		  gcc_assert (GET_CODE (PATTERN (insn))
2128			      == GET_CODE (PATTERN (orig_insn)));
2129                  expand_var_during_unrolling (ves, insn);
2130                }
2131            }
2132          orig_insn = NEXT_INSN (orig_insn);
2133        }
2134    }
2135
2136  if (!rewrite_original_loop)
2137    return;
2138
2139  /* Initialize the variable expansions in the loop preheader
2140     and take care of combining them at the loop exit.  */
2141  if (opt_info->insns_with_var_to_expand)
2142    {
2143      htab_traverse (opt_info->insns_with_var_to_expand,
2144                     insert_var_expansion_initialization,
2145                     opt_info->loop_preheader);
2146      htab_traverse (opt_info->insns_with_var_to_expand,
2147                     combine_var_copies_in_loop_exit,
2148                     opt_info->loop_exit);
2149    }
2150
2151  /* Rewrite also the original loop body.  Find them as originals of the blocks
2152     in the last copied iteration, i.e. those that have
2153     get_bb_copy (get_bb_original (bb)) == bb.  */
2154  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2155    {
2156      bb = BASIC_BLOCK (i);
2157      orig_bb = get_bb_original (bb);
2158      if (get_bb_copy (orig_bb) != bb)
2159	continue;
2160
2161      delta = determine_split_iv_delta (0, n_copies, unrolling);
2162      for (orig_insn = BB_HEAD (orig_bb);
2163           orig_insn != NEXT_INSN (BB_END (bb));
2164           orig_insn = next)
2165        {
2166          next = NEXT_INSN (orig_insn);
2167
2168          if (!INSN_P (orig_insn))
2169 	    continue;
2170
2171          ivts_templ.insn = orig_insn;
2172          if (opt_info->insns_to_split)
2173            {
2174              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2175              if (ivts)
2176                {
2177                  if (!delta)
2178                    insert_base_initialization (ivts, orig_insn);
2179                  split_iv (ivts, orig_insn, delta);
2180                  continue;
2181                }
2182            }
2183
2184        }
2185    }
2186}
2187
2188/*  Release the data structures used for the variable expansion
2189    optimization.  Callbacks for htab_traverse.  */
2190
2191static int
2192release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2193{
2194  struct var_to_expand *ve = *slot;
2195
2196  VEC_free (rtx, heap, ve->var_expansions);
2197
2198  /* Continue traversing the hash table.  */
2199  return 1;
2200}
2201
2202/* Release OPT_INFO.  */
2203
2204static void
2205free_opt_info (struct opt_info *opt_info)
2206{
2207  if (opt_info->insns_to_split)
2208    htab_delete (opt_info->insns_to_split);
2209  if (opt_info->insns_with_var_to_expand)
2210    {
2211      htab_traverse (opt_info->insns_with_var_to_expand,
2212                     release_var_copies, NULL);
2213      htab_delete (opt_info->insns_with_var_to_expand);
2214    }
2215  free (opt_info);
2216}
2217