1/* Loop unrolling and peeling.
2   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "cfglayout.h"
31#include "params.h"
32#include "output.h"
33#include "expr.h"
34#include "hashtab.h"
35#include "recog.h"
36
37/* This pass performs loop unrolling and peeling.  We only perform these
38   optimizations on innermost loops (with single exception) because
39   the impact on performance is greatest here, and we want to avoid
40   unnecessary code size growth.  The gain is caused by greater sequentiality
41   of code, better code to optimize for further passes and in some cases
42   by fewer testings of exit conditions.  The main problem is code growth,
43   that impacts performance negatively due to effect of caches.
44
45   What we do:
46
47   -- complete peeling of once-rolling loops; this is the above mentioned
48      exception, as this causes loop to be cancelled completely and
49      does not cause code growth
50   -- complete peeling of loops that roll (small) constant times.
51   -- simple peeling of first iterations of loops that do not roll much
52      (according to profile feedback)
53   -- unrolling of loops that roll constant times; this is almost always
54      win, as we get rid of exit condition tests.
55   -- unrolling of loops that roll number of times that we can compute
56      in runtime; we also get rid of exit condition tests here, but there
57      is the extra expense for calculating the number of iterations
58   -- simple unrolling of remaining loops; this is performed only if we
59      are asked to, as the gain is questionable in this case and often
60      it may even slow down the code
61   For more detailed descriptions of each of those, see comments at
62   appropriate function below.
63
64   There is a lot of parameters (defined and described in params.def) that
65   control how much we unroll/peel.
66
67   ??? A great problem is that we don't have a good way how to determine
68   how many times we should unroll the loop; the experiments I have made
69   showed that this choice may affect performance in order of several %.
70   */
71
72/* Information about induction variables to split.  */
73
74struct iv_to_split
75{
76  rtx insn;		/* The insn in that the induction variable occurs.  */
77  rtx base_var;		/* The variable on that the values in the further
78			   iterations are based.  */
79  rtx step;		/* Step of the induction variable.  */
80  unsigned n_loc;
81  unsigned loc[3];	/* Location where the definition of the induction
82			   variable occurs in the insn.  For example if
83			   N_LOC is 2, the expression is located at
84			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
85};
86
87/* Information about accumulators to expand.  */
88
89struct var_to_expand
90{
91  rtx insn;		           /* The insn in that the variable expansion occurs.  */
92  rtx reg;                         /* The accumulator which is expanded.  */
93  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
94  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
95                                      or multiplication.  */
96  int expansion_count;             /* Count the number of expansions generated so far.  */
97  int reuse_expansion;             /* The expansion we intend to reuse to expand
98                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
99                                      the original accumulator.  Else use
100                                      var_expansions[REUSE_EXPANSION - 1].  */
101};
102
103/* Information about optimization applied in
104   the unrolled loop.  */
105
106struct opt_info
107{
108  htab_t insns_to_split;           /* A hashtable of insns to split.  */
109  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
110                                      to expand.  */
111  unsigned first_new_block;        /* The first basic block that was
112                                      duplicated.  */
113  basic_block loop_exit;           /* The loop exit basic block.  */
114  basic_block loop_preheader;      /* The loop preheader basic block.  */
115};
116
117static void decide_unrolling_and_peeling (struct loops *, int);
118static void peel_loops_completely (struct loops *, int);
119static void decide_peel_simple (struct loop *, int);
120static void decide_peel_once_rolling (struct loop *, int);
121static void decide_peel_completely (struct loop *, int);
122static void decide_unroll_stupid (struct loop *, int);
123static void decide_unroll_constant_iterations (struct loop *, int);
124static void decide_unroll_runtime_iterations (struct loop *, int);
125static void peel_loop_simple (struct loops *, struct loop *);
126static void peel_loop_completely (struct loops *, struct loop *);
127static void unroll_loop_stupid (struct loops *, struct loop *);
128static void unroll_loop_constant_iterations (struct loops *, struct loop *);
129static void unroll_loop_runtime_iterations (struct loops *, struct loop *);
130static struct opt_info *analyze_insns_in_loop (struct loop *);
131static void opt_info_start_duplication (struct opt_info *);
132static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
133static void free_opt_info (struct opt_info *);
134static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
135static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
136static struct iv_to_split *analyze_iv_to_split_insn (rtx);
137static void expand_var_during_unrolling (struct var_to_expand *, rtx);
138static int insert_var_expansion_initialization (void **, void *);
139static int combine_var_copies_in_loop_exit (void **, void *);
140static int release_var_copies (void **, void *);
141static rtx get_expansion (struct var_to_expand *);
142
143/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
144void
145unroll_and_peel_loops (struct loops *loops, int flags)
146{
147  struct loop *loop, *next;
148  bool check;
149
150  /* First perform complete loop peeling (it is almost surely a win,
151     and affects parameters for further decision a lot).  */
152  peel_loops_completely (loops, flags);
153
154  /* Now decide rest of unrolling and peeling.  */
155  decide_unrolling_and_peeling (loops, flags);
156
157  loop = loops->tree_root;
158  while (loop->inner)
159    loop = loop->inner;
160
161  /* Scan the loops, inner ones first.  */
162  while (loop != loops->tree_root)
163    {
164      if (loop->next)
165	{
166	  next = loop->next;
167	  while (next->inner)
168	    next = next->inner;
169	}
170      else
171	next = loop->outer;
172
173      check = true;
174      /* And perform the appropriate transformations.  */
175      switch (loop->lpt_decision.decision)
176	{
177	case LPT_PEEL_COMPLETELY:
178	  /* Already done.  */
179	  gcc_unreachable ();
180	case LPT_PEEL_SIMPLE:
181	  peel_loop_simple (loops, loop);
182	  break;
183	case LPT_UNROLL_CONSTANT:
184	  unroll_loop_constant_iterations (loops, loop);
185	  break;
186	case LPT_UNROLL_RUNTIME:
187	  unroll_loop_runtime_iterations (loops, loop);
188	  break;
189	case LPT_UNROLL_STUPID:
190	  unroll_loop_stupid (loops, loop);
191	  break;
192	case LPT_NONE:
193	  check = false;
194	  break;
195	default:
196	  gcc_unreachable ();
197	}
198      if (check)
199	{
200#ifdef ENABLE_CHECKING
201	  verify_dominators (CDI_DOMINATORS);
202	  verify_loop_structure (loops);
203#endif
204	}
205      loop = next;
206    }
207
208  iv_analysis_done ();
209}
210
211/* Check whether exit of the LOOP is at the end of loop body.  */
212
213static bool
214loop_exit_at_end_p (struct loop *loop)
215{
216  struct niter_desc *desc = get_simple_loop_desc (loop);
217  rtx insn;
218
219  if (desc->in_edge->dest != loop->latch)
220    return false;
221
222  /* Check that the latch is empty.  */
223  FOR_BB_INSNS (loop->latch, insn)
224    {
225      if (INSN_P (insn))
226	return false;
227    }
228
229  return true;
230}
231
232/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
233static void
234peel_loops_completely (struct loops *loops, int flags)
235{
236  struct loop *loop;
237  unsigned i;
238
239  /* Scan the loops, the inner ones first.  */
240  for (i = loops->num - 1; i > 0; i--)
241    {
242      loop = loops->parray[i];
243      if (!loop)
244	continue;
245
246      loop->lpt_decision.decision = LPT_NONE;
247
248      if (dump_file)
249	fprintf (dump_file,
250		 "\n;; *** Considering loop %d for complete peeling ***\n",
251		 loop->num);
252
253      loop->ninsns = num_loop_insns (loop);
254
255      decide_peel_once_rolling (loop, flags);
256      if (loop->lpt_decision.decision == LPT_NONE)
257	decide_peel_completely (loop, flags);
258
259      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
260	{
261	  peel_loop_completely (loops, loop);
262#ifdef ENABLE_CHECKING
263	  verify_dominators (CDI_DOMINATORS);
264	  verify_loop_structure (loops);
265#endif
266	}
267    }
268}
269
270/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
271static void
272decide_unrolling_and_peeling (struct loops *loops, int flags)
273{
274  struct loop *loop = loops->tree_root, *next;
275
276  while (loop->inner)
277    loop = loop->inner;
278
279  /* Scan the loops, inner ones first.  */
280  while (loop != loops->tree_root)
281    {
282      if (loop->next)
283	{
284	  next = loop->next;
285	  while (next->inner)
286	    next = next->inner;
287	}
288      else
289	next = loop->outer;
290
291      loop->lpt_decision.decision = LPT_NONE;
292
293      if (dump_file)
294	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
295
296      /* Do not peel cold areas.  */
297      if (!maybe_hot_bb_p (loop->header))
298	{
299	  if (dump_file)
300	    fprintf (dump_file, ";; Not considering loop, cold area\n");
301	  loop = next;
302	  continue;
303	}
304
305      /* Can the loop be manipulated?  */
306      if (!can_duplicate_loop_p (loop))
307	{
308	  if (dump_file)
309	    fprintf (dump_file,
310		     ";; Not considering loop, cannot duplicate\n");
311	  loop = next;
312	  continue;
313	}
314
315      /* Skip non-innermost loops.  */
316      if (loop->inner)
317	{
318	  if (dump_file)
319	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
320	  loop = next;
321	  continue;
322	}
323
324      loop->ninsns = num_loop_insns (loop);
325      loop->av_ninsns = average_num_loop_insns (loop);
326
327      /* Try transformations one by one in decreasing order of
328	 priority.  */
329
330      decide_unroll_constant_iterations (loop, flags);
331      if (loop->lpt_decision.decision == LPT_NONE)
332	decide_unroll_runtime_iterations (loop, flags);
333      if (loop->lpt_decision.decision == LPT_NONE)
334	decide_unroll_stupid (loop, flags);
335      if (loop->lpt_decision.decision == LPT_NONE)
336	decide_peel_simple (loop, flags);
337
338      loop = next;
339    }
340}
341
342/* Decide whether the LOOP is once rolling and suitable for complete
343   peeling.  */
344static void
345decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
346{
347  struct niter_desc *desc;
348
349  if (dump_file)
350    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
351
352  /* Is the loop small enough?  */
353  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
354    {
355      if (dump_file)
356	fprintf (dump_file, ";; Not considering loop, is too big\n");
357      return;
358    }
359
360  /* Check for simple loops.  */
361  desc = get_simple_loop_desc (loop);
362
363  /* Check number of iterations.  */
364  if (!desc->simple_p
365      || desc->assumptions
366      || desc->infinite
367      || !desc->const_iter
368      || desc->niter != 0)
369    {
370      if (dump_file)
371	fprintf (dump_file,
372		 ";; Unable to prove that the loop rolls exactly once\n");
373      return;
374    }
375
376  /* Success.  */
377  if (dump_file)
378    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
379  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
380}
381
382/* Decide whether the LOOP is suitable for complete peeling.  */
383static void
384decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
385{
386  unsigned npeel;
387  struct niter_desc *desc;
388
389  if (dump_file)
390    fprintf (dump_file, "\n;; Considering peeling completely\n");
391
392  /* Skip non-innermost loops.  */
393  if (loop->inner)
394    {
395      if (dump_file)
396	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
397      return;
398    }
399
400  /* Do not peel cold areas.  */
401  if (!maybe_hot_bb_p (loop->header))
402    {
403      if (dump_file)
404	fprintf (dump_file, ";; Not considering loop, cold area\n");
405      return;
406    }
407
408  /* Can the loop be manipulated?  */
409  if (!can_duplicate_loop_p (loop))
410    {
411      if (dump_file)
412	fprintf (dump_file,
413		 ";; Not considering loop, cannot duplicate\n");
414      return;
415    }
416
417  /* npeel = number of iterations to peel.  */
418  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
419  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
420    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
421
422  /* Is the loop small enough?  */
423  if (!npeel)
424    {
425      if (dump_file)
426	fprintf (dump_file, ";; Not considering loop, is too big\n");
427      return;
428    }
429
430  /* Check for simple loops.  */
431  desc = get_simple_loop_desc (loop);
432
433  /* Check number of iterations.  */
434  if (!desc->simple_p
435      || desc->assumptions
436      || !desc->const_iter
437      || desc->infinite)
438    {
439      if (dump_file)
440	fprintf (dump_file,
441		 ";; Unable to prove that the loop iterates constant times\n");
442      return;
443    }
444
445  if (desc->niter > npeel - 1)
446    {
447      if (dump_file)
448	{
449	  fprintf (dump_file,
450		   ";; Not peeling loop completely, rolls too much (");
451	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
452	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
453	}
454      return;
455    }
456
457  /* Success.  */
458  if (dump_file)
459    fprintf (dump_file, ";; Decided to peel loop completely\n");
460  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
461}
462
463/* Peel all iterations of LOOP, remove exit edges and cancel the loop
464   completely.  The transformation done:
465
466   for (i = 0; i < 4; i++)
467     body;
468
469   ==>
470
471   i = 0;
472   body; i++;
473   body; i++;
474   body; i++;
475   body; i++;
476   */
477static void
478peel_loop_completely (struct loops *loops, struct loop *loop)
479{
480  sbitmap wont_exit;
481  unsigned HOST_WIDE_INT npeel;
482  unsigned n_remove_edges, i;
483  edge *remove_edges, ein;
484  struct niter_desc *desc = get_simple_loop_desc (loop);
485  struct opt_info *opt_info = NULL;
486
487  npeel = desc->niter;
488
489  if (npeel)
490    {
491      bool ok;
492
493      wont_exit = sbitmap_alloc (npeel + 1);
494      sbitmap_ones (wont_exit);
495      RESET_BIT (wont_exit, 0);
496      if (desc->noloop_assumptions)
497	RESET_BIT (wont_exit, 1);
498
499      remove_edges = XCNEWVEC (edge, npeel);
500      n_remove_edges = 0;
501
502      if (flag_split_ivs_in_unroller)
503        opt_info = analyze_insns_in_loop (loop);
504
505      opt_info_start_duplication (opt_info);
506      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
507					  loops, npeel,
508					  wont_exit, desc->out_edge,
509					  remove_edges, &n_remove_edges,
510					  DLTHE_FLAG_UPDATE_FREQ
511					  | DLTHE_FLAG_COMPLETTE_PEEL
512					  | (opt_info
513					     ? DLTHE_RECORD_COPY_NUMBER : 0));
514      gcc_assert (ok);
515
516      free (wont_exit);
517
518      if (opt_info)
519 	{
520 	  apply_opt_in_copies (opt_info, npeel, false, true);
521 	  free_opt_info (opt_info);
522 	}
523
524      /* Remove the exit edges.  */
525      for (i = 0; i < n_remove_edges; i++)
526	remove_path (loops, remove_edges[i]);
527      free (remove_edges);
528    }
529
530  ein = desc->in_edge;
531  free_simple_loop_desc (loop);
532
533  /* Now remove the unreachable part of the last iteration and cancel
534     the loop.  */
535  remove_path (loops, ein);
536
537  if (dump_file)
538    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
539}
540
541/* Decide whether to unroll LOOP iterating constant number of times
542   and how much.  */
543
544static void
545decide_unroll_constant_iterations (struct loop *loop, int flags)
546{
547  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
548  struct niter_desc *desc;
549
550  if (!(flags & UAP_UNROLL))
551    {
552      /* We were not asked to, just return back silently.  */
553      return;
554    }
555
556  if (dump_file)
557    fprintf (dump_file,
558	     "\n;; Considering unrolling loop with constant "
559	     "number of iterations\n");
560
561  /* nunroll = total number of copies of the original loop body in
562     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
563  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
564  nunroll_by_av
565    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
566  if (nunroll > nunroll_by_av)
567    nunroll = nunroll_by_av;
568  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
569    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
570
571  /* Skip big loops.  */
572  if (nunroll <= 1)
573    {
574      if (dump_file)
575	fprintf (dump_file, ";; Not considering loop, is too big\n");
576      return;
577    }
578
579  /* Check for simple loops.  */
580  desc = get_simple_loop_desc (loop);
581
582  /* Check number of iterations.  */
583  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
584    {
585      if (dump_file)
586	fprintf (dump_file,
587		 ";; Unable to prove that the loop iterates constant times\n");
588      return;
589    }
590
591  /* Check whether the loop rolls enough to consider.  */
592  if (desc->niter < 2 * nunroll)
593    {
594      if (dump_file)
595	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
596      return;
597    }
598
599  /* Success; now compute number of iterations to unroll.  We alter
600     nunroll so that as few as possible copies of loop body are
601     necessary, while still not decreasing the number of unrollings
602     too much (at most by 1).  */
603  best_copies = 2 * nunroll + 10;
604
605  i = 2 * nunroll + 2;
606  if (i - 1 >= desc->niter)
607    i = desc->niter - 2;
608
609  for (; i >= nunroll - 1; i--)
610    {
611      unsigned exit_mod = desc->niter % (i + 1);
612
613      if (!loop_exit_at_end_p (loop))
614	n_copies = exit_mod + i + 1;
615      else if (exit_mod != (unsigned) i
616	       || desc->noloop_assumptions != NULL_RTX)
617	n_copies = exit_mod + i + 2;
618      else
619	n_copies = i + 1;
620
621      if (n_copies < best_copies)
622	{
623	  best_copies = n_copies;
624	  best_unroll = i;
625	}
626    }
627
628  if (dump_file)
629    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
630	     best_unroll + 1, best_copies, nunroll);
631
632  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
633  loop->lpt_decision.times = best_unroll;
634
635  if (dump_file)
636    fprintf (dump_file,
637	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
638	     loop->lpt_decision.times);
639}
640
641/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
642   times.  The transformation does this:
643
644   for (i = 0; i < 102; i++)
645     body;
646
647   ==>
648
649   i = 0;
650   body; i++;
651   body; i++;
652   while (i < 102)
653     {
654       body; i++;
655       body; i++;
656       body; i++;
657       body; i++;
658     }
659  */
660static void
661unroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
662{
663  unsigned HOST_WIDE_INT niter;
664  unsigned exit_mod;
665  sbitmap wont_exit;
666  unsigned n_remove_edges, i;
667  edge *remove_edges;
668  unsigned max_unroll = loop->lpt_decision.times;
669  struct niter_desc *desc = get_simple_loop_desc (loop);
670  bool exit_at_end = loop_exit_at_end_p (loop);
671  struct opt_info *opt_info = NULL;
672  bool ok;
673
674  niter = desc->niter;
675
676  /* Should not get here (such loop should be peeled instead).  */
677  gcc_assert (niter > max_unroll + 1);
678
679  exit_mod = niter % (max_unroll + 1);
680
681  wont_exit = sbitmap_alloc (max_unroll + 1);
682  sbitmap_ones (wont_exit);
683
684  remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
685  n_remove_edges = 0;
686  if (flag_split_ivs_in_unroller
687      || flag_variable_expansion_in_unroller)
688    opt_info = analyze_insns_in_loop (loop);
689
690  if (!exit_at_end)
691    {
692      /* The exit is not at the end of the loop; leave exit test
693	 in the first copy, so that the loops that start with test
694	 of exit condition have continuous body after unrolling.  */
695
696      if (dump_file)
697	fprintf (dump_file, ";; Condition on beginning of loop.\n");
698
699      /* Peel exit_mod iterations.  */
700      RESET_BIT (wont_exit, 0);
701      if (desc->noloop_assumptions)
702	RESET_BIT (wont_exit, 1);
703
704      if (exit_mod)
705	{
706	  opt_info_start_duplication (opt_info);
707          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
708					      loops, exit_mod,
709					      wont_exit, desc->out_edge,
710					      remove_edges, &n_remove_edges,
711					      DLTHE_FLAG_UPDATE_FREQ
712					      | (opt_info && exit_mod > 1
713						 ? DLTHE_RECORD_COPY_NUMBER
714						   : 0));
715	  gcc_assert (ok);
716
717          if (opt_info && exit_mod > 1)
718 	    apply_opt_in_copies (opt_info, exit_mod, false, false);
719
720	  desc->noloop_assumptions = NULL_RTX;
721	  desc->niter -= exit_mod;
722	  desc->niter_max -= exit_mod;
723	}
724
725      SET_BIT (wont_exit, 1);
726    }
727  else
728    {
729      /* Leave exit test in last copy, for the same reason as above if
730	 the loop tests the condition at the end of loop body.  */
731
732      if (dump_file)
733	fprintf (dump_file, ";; Condition on end of loop.\n");
734
735      /* We know that niter >= max_unroll + 2; so we do not need to care of
736	 case when we would exit before reaching the loop.  So just peel
737	 exit_mod + 1 iterations.  */
738      if (exit_mod != max_unroll
739	  || desc->noloop_assumptions)
740	{
741	  RESET_BIT (wont_exit, 0);
742	  if (desc->noloop_assumptions)
743	    RESET_BIT (wont_exit, 1);
744
745          opt_info_start_duplication (opt_info);
746	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
747					      loops, exit_mod + 1,
748					      wont_exit, desc->out_edge,
749					      remove_edges, &n_remove_edges,
750					      DLTHE_FLAG_UPDATE_FREQ
751					      | (opt_info && exit_mod > 0
752						 ? DLTHE_RECORD_COPY_NUMBER
753						   : 0));
754	  gcc_assert (ok);
755
756          if (opt_info && exit_mod > 0)
757  	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
758
759	  desc->niter -= exit_mod + 1;
760	  desc->niter_max -= exit_mod + 1;
761	  desc->noloop_assumptions = NULL_RTX;
762
763	  SET_BIT (wont_exit, 0);
764	  SET_BIT (wont_exit, 1);
765	}
766
767      RESET_BIT (wont_exit, max_unroll);
768    }
769
770  /* Now unroll the loop.  */
771
772  opt_info_start_duplication (opt_info);
773  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
774				      loops, max_unroll,
775				      wont_exit, desc->out_edge,
776				      remove_edges, &n_remove_edges,
777				      DLTHE_FLAG_UPDATE_FREQ
778				      | (opt_info
779					 ? DLTHE_RECORD_COPY_NUMBER
780					   : 0));
781  gcc_assert (ok);
782
783  if (opt_info)
784    {
785      apply_opt_in_copies (opt_info, max_unroll, true, true);
786      free_opt_info (opt_info);
787    }
788
789  free (wont_exit);
790
791  if (exit_at_end)
792    {
793      basic_block exit_block = get_bb_copy (desc->in_edge->src);
794      /* Find a new in and out edge; they are in the last copy we have made.  */
795
796      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
797	{
798	  desc->out_edge = EDGE_SUCC (exit_block, 0);
799	  desc->in_edge = EDGE_SUCC (exit_block, 1);
800	}
801      else
802	{
803	  desc->out_edge = EDGE_SUCC (exit_block, 1);
804	  desc->in_edge = EDGE_SUCC (exit_block, 0);
805	}
806    }
807
808  desc->niter /= max_unroll + 1;
809  desc->niter_max /= max_unroll + 1;
810  desc->niter_expr = GEN_INT (desc->niter);
811
812  /* Remove the edges.  */
813  for (i = 0; i < n_remove_edges; i++)
814    remove_path (loops, remove_edges[i]);
815  free (remove_edges);
816
817  if (dump_file)
818    fprintf (dump_file,
819	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
820	     max_unroll, num_loop_insns (loop));
821}
822
823/* Decide whether to unroll LOOP iterating runtime computable number of times
824   and how much.  */
825static void
826decide_unroll_runtime_iterations (struct loop *loop, int flags)
827{
828  unsigned nunroll, nunroll_by_av, i;
829  struct niter_desc *desc;
830
831  if (!(flags & UAP_UNROLL))
832    {
833      /* We were not asked to, just return back silently.  */
834      return;
835    }
836
837  if (dump_file)
838    fprintf (dump_file,
839	     "\n;; Considering unrolling loop with runtime "
840	     "computable number of iterations\n");
841
842  /* nunroll = total number of copies of the original loop body in
843     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
844  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
845  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
846  if (nunroll > nunroll_by_av)
847    nunroll = nunroll_by_av;
848  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
849    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
850
851  /* Skip big loops.  */
852  if (nunroll <= 1)
853    {
854      if (dump_file)
855	fprintf (dump_file, ";; Not considering loop, is too big\n");
856      return;
857    }
858
859  /* Check for simple loops.  */
860  desc = get_simple_loop_desc (loop);
861
862  /* Check simpleness.  */
863  if (!desc->simple_p || desc->assumptions)
864    {
865      if (dump_file)
866	fprintf (dump_file,
867		 ";; Unable to prove that the number of iterations "
868		 "can be counted in runtime\n");
869      return;
870    }
871
872  if (desc->const_iter)
873    {
874      if (dump_file)
875	fprintf (dump_file, ";; Loop iterates constant times\n");
876      return;
877    }
878
879  /* If we have profile feedback, check whether the loop rolls.  */
880  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
881    {
882      if (dump_file)
883	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
884      return;
885    }
886
887  /* Success; now force nunroll to be power of 2, as we are unable to
888     cope with overflows in computation of number of iterations.  */
889  for (i = 1; 2 * i <= nunroll; i *= 2)
890    continue;
891
892  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
893  loop->lpt_decision.times = i - 1;
894
895  if (dump_file)
896    fprintf (dump_file,
897	     ";; Decided to unroll the runtime computable "
898	     "times rolling loop, %d times.\n",
899	     loop->lpt_decision.times);
900}
901
902/* Unroll LOOP for that we are able to count number of iterations in runtime
903   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
904   extra care for case n < 0):
905
906   for (i = 0; i < n; i++)
907     body;
908
909   ==>
910
911   i = 0;
912   mod = n % 4;
913
914   switch (mod)
915     {
916       case 3:
917         body; i++;
918       case 2:
919         body; i++;
920       case 1:
921         body; i++;
922       case 0: ;
923     }
924
925   while (i < n)
926     {
927       body; i++;
928       body; i++;
929       body; i++;
930       body; i++;
931     }
932   */
933static void
934unroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
935{
936  rtx old_niter, niter, init_code, branch_code, tmp;
937  unsigned i, j, p;
938  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
939  unsigned n_dom_bbs;
940  sbitmap wont_exit;
941  int may_exit_copy;
942  unsigned n_peel, n_remove_edges;
943  edge *remove_edges, e;
944  bool extra_zero_check, last_may_exit;
945  unsigned max_unroll = loop->lpt_decision.times;
946  struct niter_desc *desc = get_simple_loop_desc (loop);
947  bool exit_at_end = loop_exit_at_end_p (loop);
948  struct opt_info *opt_info = NULL;
949  bool ok;
950
951  if (flag_split_ivs_in_unroller
952      || flag_variable_expansion_in_unroller)
953    opt_info = analyze_insns_in_loop (loop);
954
955  /* Remember blocks whose dominators will have to be updated.  */
956  dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
957  n_dom_bbs = 0;
958
959  body = get_loop_body (loop);
960  for (i = 0; i < loop->num_nodes; i++)
961    {
962      unsigned nldom;
963      basic_block *ldom;
964
965      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
966      for (j = 0; j < nldom; j++)
967	if (!flow_bb_inside_loop_p (loop, ldom[j]))
968	  dom_bbs[n_dom_bbs++] = ldom[j];
969
970      free (ldom);
971    }
972  free (body);
973
974  if (!exit_at_end)
975    {
976      /* Leave exit in first copy (for explanation why see comment in
977	 unroll_loop_constant_iterations).  */
978      may_exit_copy = 0;
979      n_peel = max_unroll - 1;
980      extra_zero_check = true;
981      last_may_exit = false;
982    }
983  else
984    {
985      /* Leave exit in last copy (for explanation why see comment in
986	 unroll_loop_constant_iterations).  */
987      may_exit_copy = max_unroll;
988      n_peel = max_unroll;
989      extra_zero_check = false;
990      last_may_exit = true;
991    }
992
993  /* Get expression for number of iterations.  */
994  start_sequence ();
995  old_niter = niter = gen_reg_rtx (desc->mode);
996  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
997  if (tmp != niter)
998    emit_move_insn (niter, tmp);
999
1000  /* Count modulo by ANDing it with max_unroll; we use the fact that
1001     the number of unrollings is a power of two, and thus this is correct
1002     even if there is overflow in the computation.  */
1003  niter = expand_simple_binop (desc->mode, AND,
1004			       niter,
1005			       GEN_INT (max_unroll),
1006			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1007
1008  init_code = get_insns ();
1009  end_sequence ();
1010
1011  /* Precondition the loop.  */
1012  loop_split_edge_with (loop_preheader_edge (loop), init_code);
1013
1014  remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1015  n_remove_edges = 0;
1016
1017  wont_exit = sbitmap_alloc (max_unroll + 2);
1018
1019  /* Peel the first copy of loop body (almost always we must leave exit test
1020     here; the only exception is when we have extra zero check and the number
1021     of iterations is reliable.  Also record the place of (possible) extra
1022     zero check.  */
1023  sbitmap_zero (wont_exit);
1024  if (extra_zero_check
1025      && !desc->noloop_assumptions)
1026    SET_BIT (wont_exit, 1);
1027  ezc_swtch = loop_preheader_edge (loop)->src;
1028  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1029				      loops, 1,
1030				      wont_exit, desc->out_edge,
1031				      remove_edges, &n_remove_edges,
1032				      DLTHE_FLAG_UPDATE_FREQ);
1033  gcc_assert (ok);
1034
1035  /* Record the place where switch will be built for preconditioning.  */
1036  swtch = loop_split_edge_with (loop_preheader_edge (loop),
1037				NULL_RTX);
1038
1039  for (i = 0; i < n_peel; i++)
1040    {
1041      /* Peel the copy.  */
1042      sbitmap_zero (wont_exit);
1043      if (i != n_peel - 1 || !last_may_exit)
1044	SET_BIT (wont_exit, 1);
1045      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1046					  loops, 1,
1047					  wont_exit, desc->out_edge,
1048					  remove_edges, &n_remove_edges,
1049					  DLTHE_FLAG_UPDATE_FREQ);
1050      gcc_assert (ok);
1051
1052      /* Create item for switch.  */
1053      j = n_peel - i - (extra_zero_check ? 0 : 1);
1054      p = REG_BR_PROB_BASE / (i + 2);
1055
1056      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1057      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1058					  block_label (preheader), p,
1059					  NULL_RTX);
1060
1061      swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1062      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1063      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1064      e = make_edge (swtch, preheader,
1065		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1066      e->probability = p;
1067    }
1068
1069  if (extra_zero_check)
1070    {
1071      /* Add branch for zero iterations.  */
1072      p = REG_BR_PROB_BASE / (max_unroll + 1);
1073      swtch = ezc_swtch;
1074      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1075      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1076					  block_label (preheader), p,
1077					  NULL_RTX);
1078
1079      swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1080      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1081      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1082      e = make_edge (swtch, preheader,
1083		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1084      e->probability = p;
1085    }
1086
1087  /* Recount dominators for outer blocks.  */
1088  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1089
1090  /* And unroll loop.  */
1091
1092  sbitmap_ones (wont_exit);
1093  RESET_BIT (wont_exit, may_exit_copy);
1094  opt_info_start_duplication (opt_info);
1095
1096  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1097				      loops, max_unroll,
1098				      wont_exit, desc->out_edge,
1099				      remove_edges, &n_remove_edges,
1100				      DLTHE_FLAG_UPDATE_FREQ
1101				      | (opt_info
1102					 ? DLTHE_RECORD_COPY_NUMBER
1103					   : 0));
1104  gcc_assert (ok);
1105
1106  if (opt_info)
1107    {
1108      apply_opt_in_copies (opt_info, max_unroll, true, true);
1109      free_opt_info (opt_info);
1110    }
1111
1112  free (wont_exit);
1113
1114  if (exit_at_end)
1115    {
1116      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1117      /* Find a new in and out edge; they are in the last copy we have
1118	 made.  */
1119
1120      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1121	{
1122	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1123	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1124	}
1125      else
1126	{
1127	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1128	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1129	}
1130    }
1131
1132  /* Remove the edges.  */
1133  for (i = 0; i < n_remove_edges; i++)
1134    remove_path (loops, remove_edges[i]);
1135  free (remove_edges);
1136
1137  /* We must be careful when updating the number of iterations due to
1138     preconditioning and the fact that the value must be valid at entry
1139     of the loop.  After passing through the above code, we see that
1140     the correct new number of iterations is this:  */
1141  gcc_assert (!desc->const_iter);
1142  desc->niter_expr =
1143    simplify_gen_binary (UDIV, desc->mode, old_niter,
1144			 GEN_INT (max_unroll + 1));
1145  desc->niter_max /= max_unroll + 1;
1146  if (exit_at_end)
1147    {
1148      desc->niter_expr =
1149	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1150      desc->noloop_assumptions = NULL_RTX;
1151      desc->niter_max--;
1152    }
1153
1154  if (dump_file)
1155    fprintf (dump_file,
1156	     ";; Unrolled loop %d times, counting # of iterations "
1157	     "in runtime, %i insns\n",
1158	     max_unroll, num_loop_insns (loop));
1159
1160  if (dom_bbs)
1161    free (dom_bbs);
1162}
1163
1164/* Decide whether to simply peel LOOP and how much.  */
1165static void
1166decide_peel_simple (struct loop *loop, int flags)
1167{
1168  unsigned npeel;
1169  struct niter_desc *desc;
1170
1171  if (!(flags & UAP_PEEL))
1172    {
1173      /* We were not asked to, just return back silently.  */
1174      return;
1175    }
1176
1177  if (dump_file)
1178    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1179
1180  /* npeel = number of iterations to peel.  */
1181  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1182  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1183    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1184
1185  /* Skip big loops.  */
1186  if (!npeel)
1187    {
1188      if (dump_file)
1189	fprintf (dump_file, ";; Not considering loop, is too big\n");
1190      return;
1191    }
1192
1193  /* Check for simple loops.  */
1194  desc = get_simple_loop_desc (loop);
1195
1196  /* Check number of iterations.  */
1197  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1198    {
1199      if (dump_file)
1200	fprintf (dump_file, ";; Loop iterates constant times\n");
1201      return;
1202    }
1203
1204  /* Do not simply peel loops with branches inside -- it increases number
1205     of mispredicts.  */
1206  if (num_loop_branches (loop) > 1)
1207    {
1208      if (dump_file)
1209	fprintf (dump_file, ";; Not peeling, contains branches\n");
1210      return;
1211    }
1212
1213  if (loop->header->count)
1214    {
1215      unsigned niter = expected_loop_iterations (loop);
1216      if (niter + 1 > npeel)
1217	{
1218	  if (dump_file)
1219	    {
1220	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1221	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1222		       (HOST_WIDEST_INT) (niter + 1));
1223	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1224		       npeel);
1225	    }
1226	  return;
1227	}
1228      npeel = niter + 1;
1229    }
1230  else
1231    {
1232      /* For now we have no good heuristics to decide whether loop peeling
1233         will be effective, so disable it.  */
1234      if (dump_file)
1235	fprintf (dump_file,
1236		 ";; Not peeling loop, no evidence it will be profitable\n");
1237      return;
1238    }
1239
1240  /* Success.  */
1241  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1242  loop->lpt_decision.times = npeel;
1243
1244  if (dump_file)
1245    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1246	     loop->lpt_decision.times);
1247}
1248
1249/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1250   while (cond)
1251     body;
1252
1253   ==>
1254
1255   if (!cond) goto end;
1256   body;
1257   if (!cond) goto end;
1258   body;
1259   while (cond)
1260     body;
1261   end: ;
1262   */
1263static void
1264peel_loop_simple (struct loops *loops, struct loop *loop)
1265{
1266  sbitmap wont_exit;
1267  unsigned npeel = loop->lpt_decision.times;
1268  struct niter_desc *desc = get_simple_loop_desc (loop);
1269  struct opt_info *opt_info = NULL;
1270  bool ok;
1271
1272  if (flag_split_ivs_in_unroller && npeel > 1)
1273    opt_info = analyze_insns_in_loop (loop);
1274
1275  wont_exit = sbitmap_alloc (npeel + 1);
1276  sbitmap_zero (wont_exit);
1277
1278  opt_info_start_duplication (opt_info);
1279
1280  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1281				      loops, npeel, wont_exit,
1282				      NULL, NULL,
1283				      NULL, DLTHE_FLAG_UPDATE_FREQ
1284				      | (opt_info
1285					 ? DLTHE_RECORD_COPY_NUMBER
1286					   : 0));
1287  gcc_assert (ok);
1288
1289  free (wont_exit);
1290
1291  if (opt_info)
1292    {
1293      apply_opt_in_copies (opt_info, npeel, false, false);
1294      free_opt_info (opt_info);
1295    }
1296
1297  if (desc->simple_p)
1298    {
1299      if (desc->const_iter)
1300	{
1301	  desc->niter -= npeel;
1302	  desc->niter_expr = GEN_INT (desc->niter);
1303	  desc->noloop_assumptions = NULL_RTX;
1304	}
1305      else
1306	{
1307	  /* We cannot just update niter_expr, as its value might be clobbered
1308	     inside loop.  We could handle this by counting the number into
1309	     temporary just like we do in runtime unrolling, but it does not
1310	     seem worthwhile.  */
1311	  free_simple_loop_desc (loop);
1312	}
1313    }
1314  if (dump_file)
1315    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1316}
1317
1318/* Decide whether to unroll LOOP stupidly and how much.  */
1319static void
1320decide_unroll_stupid (struct loop *loop, int flags)
1321{
1322  unsigned nunroll, nunroll_by_av, i;
1323  struct niter_desc *desc;
1324
1325  if (!(flags & UAP_UNROLL_ALL))
1326    {
1327      /* We were not asked to, just return back silently.  */
1328      return;
1329    }
1330
1331  if (dump_file)
1332    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1333
1334  /* nunroll = total number of copies of the original loop body in
1335     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1336  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1337  nunroll_by_av
1338    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1339  if (nunroll > nunroll_by_av)
1340    nunroll = nunroll_by_av;
1341  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1342    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1343
1344  /* Skip big loops.  */
1345  if (nunroll <= 1)
1346    {
1347      if (dump_file)
1348	fprintf (dump_file, ";; Not considering loop, is too big\n");
1349      return;
1350    }
1351
1352  /* Check for simple loops.  */
1353  desc = get_simple_loop_desc (loop);
1354
1355  /* Check simpleness.  */
1356  if (desc->simple_p && !desc->assumptions)
1357    {
1358      if (dump_file)
1359	fprintf (dump_file, ";; The loop is simple\n");
1360      return;
1361    }
1362
1363  /* Do not unroll loops with branches inside -- it increases number
1364     of mispredicts.  */
1365  if (num_loop_branches (loop) > 1)
1366    {
1367      if (dump_file)
1368	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1369      return;
1370    }
1371
1372  /* If we have profile feedback, check whether the loop rolls.  */
1373  if (loop->header->count
1374      && expected_loop_iterations (loop) < 2 * nunroll)
1375    {
1376      if (dump_file)
1377	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1378      return;
1379    }
1380
1381  /* Success.  Now force nunroll to be power of 2, as it seems that this
1382     improves results (partially because of better alignments, partially
1383     because of some dark magic).  */
1384  for (i = 1; 2 * i <= nunroll; i *= 2)
1385    continue;
1386
1387  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1388  loop->lpt_decision.times = i - 1;
1389
1390  if (dump_file)
1391    fprintf (dump_file,
1392	     ";; Decided to unroll the loop stupidly, %d times.\n",
1393	     loop->lpt_decision.times);
1394}
1395
1396/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1397   while (cond)
1398     body;
1399
1400   ==>
1401
1402   while (cond)
1403     {
1404       body;
1405       if (!cond) break;
1406       body;
1407       if (!cond) break;
1408       body;
1409       if (!cond) break;
1410       body;
1411     }
1412   */
1413static void
1414unroll_loop_stupid (struct loops *loops, struct loop *loop)
1415{
1416  sbitmap wont_exit;
1417  unsigned nunroll = loop->lpt_decision.times;
1418  struct niter_desc *desc = get_simple_loop_desc (loop);
1419  struct opt_info *opt_info = NULL;
1420  bool ok;
1421
1422  if (flag_split_ivs_in_unroller
1423      || flag_variable_expansion_in_unroller)
1424    opt_info = analyze_insns_in_loop (loop);
1425
1426
1427  wont_exit = sbitmap_alloc (nunroll + 1);
1428  sbitmap_zero (wont_exit);
1429  opt_info_start_duplication (opt_info);
1430
1431  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1432				      loops, nunroll, wont_exit,
1433				      NULL, NULL, NULL,
1434				      DLTHE_FLAG_UPDATE_FREQ
1435				      | (opt_info
1436					 ? DLTHE_RECORD_COPY_NUMBER
1437					   : 0));
1438  gcc_assert (ok);
1439
1440  if (opt_info)
1441    {
1442      apply_opt_in_copies (opt_info, nunroll, true, true);
1443      free_opt_info (opt_info);
1444    }
1445
1446  free (wont_exit);
1447
1448  if (desc->simple_p)
1449    {
1450      /* We indeed may get here provided that there are nontrivial assumptions
1451	 for a loop to be really simple.  We could update the counts, but the
1452	 problem is that we are unable to decide which exit will be taken
1453	 (not really true in case the number of iterations is constant,
1454	 but noone will do anything with this information, so we do not
1455	 worry about it).  */
1456      desc->simple_p = false;
1457    }
1458
1459  if (dump_file)
1460    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1461	     nunroll, num_loop_insns (loop));
1462}
1463
1464/* A hash function for information about insns to split.  */
1465
1466static hashval_t
1467si_info_hash (const void *ivts)
1468{
1469  return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1470}
1471
1472/* An equality functions for information about insns to split.  */
1473
1474static int
1475si_info_eq (const void *ivts1, const void *ivts2)
1476{
1477  const struct iv_to_split *i1 = ivts1;
1478  const struct iv_to_split *i2 = ivts2;
1479
1480  return i1->insn == i2->insn;
1481}
1482
1483/* Return a hash for VES, which is really a "var_to_expand *".  */
1484
1485static hashval_t
1486ve_info_hash (const void *ves)
1487{
1488  return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1489}
1490
1491/* Return true if IVTS1 and IVTS2 (which are really both of type
1492   "var_to_expand *") refer to the same instruction.  */
1493
1494static int
1495ve_info_eq (const void *ivts1, const void *ivts2)
1496{
1497  const struct var_to_expand *i1 = ivts1;
1498  const struct var_to_expand *i2 = ivts2;
1499
1500  return i1->insn == i2->insn;
1501}
1502
1503/* Returns true if REG is referenced in one insn in LOOP.  */
1504
1505bool
1506referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1507{
1508  basic_block *body, bb;
1509  unsigned i;
1510  int count_ref = 0;
1511  rtx insn;
1512
1513  body = get_loop_body (loop);
1514  for (i = 0; i < loop->num_nodes; i++)
1515    {
1516      bb = body[i];
1517
1518      FOR_BB_INSNS (bb, insn)
1519      {
1520        if (rtx_referenced_p (reg, insn))
1521          count_ref++;
1522      }
1523    }
1524  return (count_ref  == 1);
1525}
1526
1527/* Determine whether INSN contains an accumulator
1528   which can be expanded into separate copies,
1529   one for each copy of the LOOP body.
1530
1531   for (i = 0 ; i < n; i++)
1532     sum += a[i];
1533
1534   ==>
1535
1536   sum += a[i]
1537   ....
1538   i = i+1;
1539   sum1 += a[i]
1540   ....
1541   i = i+1
1542   sum2 += a[i];
1543   ....
1544
1545   Return NULL if INSN contains no opportunity for expansion of accumulator.
1546   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1547   information and return a pointer to it.
1548*/
1549
1550static struct var_to_expand *
1551analyze_insn_to_expand_var (struct loop *loop, rtx insn)
1552{
1553  rtx set, dest, src, op1;
1554  struct var_to_expand *ves;
1555  enum machine_mode mode1, mode2;
1556
1557  set = single_set (insn);
1558  if (!set)
1559    return NULL;
1560
1561  dest = SET_DEST (set);
1562  src = SET_SRC (set);
1563
1564  if (GET_CODE (src) != PLUS
1565      && GET_CODE (src) != MINUS
1566      && GET_CODE (src) != MULT)
1567    return NULL;
1568
1569  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1570     in MD.  But if there is no optab to generate the insn, we can not
1571     perform the variable expansion.  This can happen if an MD provides
1572     an insn but not a named pattern to generate it, for example to avoid
1573     producing code that needs additional mode switches like for x87/mmx.
1574
1575     So we check have_insn_for which looks for an optab for the operation
1576     in SRC.  If it doesn't exist, we can't perform the expansion even
1577     though INSN is valid.  */
1578  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1579    return NULL;
1580
1581  if (!XEXP (src, 0))
1582    return NULL;
1583
1584  op1 = XEXP (src, 0);
1585
1586  if (!REG_P (dest)
1587      && !(GET_CODE (dest) == SUBREG
1588           && REG_P (SUBREG_REG (dest))))
1589    return NULL;
1590
1591  if (!rtx_equal_p (dest, op1))
1592    return NULL;
1593
1594  if (!referenced_in_one_insn_in_loop_p (loop, dest))
1595    return NULL;
1596
1597  if (rtx_referenced_p (dest, XEXP (src, 1)))
1598    return NULL;
1599
1600  mode1 = GET_MODE (dest);
1601  mode2 = GET_MODE (XEXP (src, 1));
1602  if ((FLOAT_MODE_P (mode1)
1603       || FLOAT_MODE_P (mode2))
1604      && !flag_unsafe_math_optimizations)
1605    return NULL;
1606
1607  /* Record the accumulator to expand.  */
1608  ves = XNEW (struct var_to_expand);
1609  ves->insn = insn;
1610  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1611  ves->reg = copy_rtx (dest);
1612  ves->op = GET_CODE (src);
1613  ves->expansion_count = 0;
1614  ves->reuse_expansion = 0;
1615  return ves;
1616}
1617
1618/* Determine whether there is an induction variable in INSN that
1619   we would like to split during unrolling.
1620
1621   I.e. replace
1622
1623   i = i + 1;
1624   ...
1625   i = i + 1;
1626   ...
1627   i = i + 1;
1628   ...
1629
1630   type chains by
1631
1632   i0 = i + 1
1633   ...
1634   i = i0 + 1
1635   ...
1636   i = i0 + 2
1637   ...
1638
1639   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1640   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1641   pointer to it.  */
1642
1643static struct iv_to_split *
1644analyze_iv_to_split_insn (rtx insn)
1645{
1646  rtx set, dest;
1647  struct rtx_iv iv;
1648  struct iv_to_split *ivts;
1649  bool ok;
1650
1651  /* For now we just split the basic induction variables.  Later this may be
1652     extended for example by selecting also addresses of memory references.  */
1653  set = single_set (insn);
1654  if (!set)
1655    return NULL;
1656
1657  dest = SET_DEST (set);
1658  if (!REG_P (dest))
1659    return NULL;
1660
1661  if (!biv_p (insn, dest))
1662    return NULL;
1663
1664  ok = iv_analyze_result (insn, dest, &iv);
1665
1666  /* This used to be an assert under the assumption that if biv_p returns
1667     true that iv_analyze_result must also return true.  However, that
1668     assumption is not strictly correct as evidenced by pr25569.
1669
1670     Returning NULL when iv_analyze_result returns false is safe and
1671     avoids the problems in pr25569 until the iv_analyze_* routines
1672     can be fixed, which is apparently hard and time consuming
1673     according to their author.  */
1674  if (! ok)
1675    return NULL;
1676
1677  if (iv.step == const0_rtx
1678      || iv.mode != iv.extend_mode)
1679    return NULL;
1680
1681  /* Record the insn to split.  */
1682  ivts = XNEW (struct iv_to_split);
1683  ivts->insn = insn;
1684  ivts->base_var = NULL_RTX;
1685  ivts->step = iv.step;
1686  ivts->n_loc = 1;
1687  ivts->loc[0] = 1;
1688
1689  return ivts;
1690}
1691
1692/* Determines which of insns in LOOP can be optimized.
1693   Return a OPT_INFO struct with the relevant hash tables filled
1694   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1695   is undefined for the return value.  */
1696
1697static struct opt_info *
1698analyze_insns_in_loop (struct loop *loop)
1699{
1700  basic_block *body, bb;
1701  unsigned i, num_edges = 0;
1702  struct opt_info *opt_info = XCNEW (struct opt_info);
1703  rtx insn;
1704  struct iv_to_split *ivts = NULL;
1705  struct var_to_expand *ves = NULL;
1706  PTR *slot1;
1707  PTR *slot2;
1708  edge *edges = get_loop_exit_edges (loop, &num_edges);
1709  bool can_apply = false;
1710
1711  iv_analysis_loop_init (loop);
1712
1713  body = get_loop_body (loop);
1714
1715  if (flag_split_ivs_in_unroller)
1716    opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1717                                            si_info_hash, si_info_eq, free);
1718
1719  /* Record the loop exit bb and loop preheader before the unrolling.  */
1720  if (!loop_preheader_edge (loop)->src)
1721    {
1722      loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1723      opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1724    }
1725  else
1726    opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1727
1728  if (num_edges == 1
1729      && !(edges[0]->flags & EDGE_COMPLEX))
1730    {
1731      opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1732      can_apply = true;
1733    }
1734
1735  if (flag_variable_expansion_in_unroller
1736      && can_apply)
1737    opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1738						      ve_info_hash, ve_info_eq, free);
1739
1740  for (i = 0; i < loop->num_nodes; i++)
1741    {
1742      bb = body[i];
1743      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1744	continue;
1745
1746      FOR_BB_INSNS (bb, insn)
1747      {
1748        if (!INSN_P (insn))
1749          continue;
1750
1751        if (opt_info->insns_to_split)
1752          ivts = analyze_iv_to_split_insn (insn);
1753
1754        if (ivts)
1755          {
1756            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1757            *slot1 = ivts;
1758            continue;
1759          }
1760
1761        if (opt_info->insns_with_var_to_expand)
1762          ves = analyze_insn_to_expand_var (loop, insn);
1763
1764        if (ves)
1765          {
1766            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1767            *slot2 = ves;
1768          }
1769      }
1770    }
1771
1772  free (edges);
1773  free (body);
1774  return opt_info;
1775}
1776
1777/* Called just before loop duplication.  Records start of duplicated area
1778   to OPT_INFO.  */
1779
1780static void
1781opt_info_start_duplication (struct opt_info *opt_info)
1782{
1783  if (opt_info)
1784    opt_info->first_new_block = last_basic_block;
1785}
1786
1787/* Determine the number of iterations between initialization of the base
1788   variable and the current copy (N_COPY).  N_COPIES is the total number
1789   of newly created copies.  UNROLLING is true if we are unrolling
1790   (not peeling) the loop.  */
1791
1792static unsigned
1793determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1794{
1795  if (unrolling)
1796    {
1797      /* If we are unrolling, initialization is done in the original loop
1798	 body (number 0).  */
1799      return n_copy;
1800    }
1801  else
1802    {
1803      /* If we are peeling, the copy in that the initialization occurs has
1804	 number 1.  The original loop (number 0) is the last.  */
1805      if (n_copy)
1806	return n_copy - 1;
1807      else
1808	return n_copies;
1809    }
1810}
1811
1812/* Locate in EXPR the expression corresponding to the location recorded
1813   in IVTS, and return a pointer to the RTX for this location.  */
1814
1815static rtx *
1816get_ivts_expr (rtx expr, struct iv_to_split *ivts)
1817{
1818  unsigned i;
1819  rtx *ret = &expr;
1820
1821  for (i = 0; i < ivts->n_loc; i++)
1822    ret = &XEXP (*ret, ivts->loc[i]);
1823
1824  return ret;
1825}
1826
1827/* Allocate basic variable for the induction variable chain.  Callback for
1828   htab_traverse.  */
1829
1830static int
1831allocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1832{
1833  struct iv_to_split *ivts = *slot;
1834  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1835
1836  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1837
1838  return 1;
1839}
1840
1841/* Insert initialization of basic variable of IVTS before INSN, taking
1842   the initial value from INSN.  */
1843
1844static void
1845insert_base_initialization (struct iv_to_split *ivts, rtx insn)
1846{
1847  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1848  rtx seq;
1849
1850  start_sequence ();
1851  expr = force_operand (expr, ivts->base_var);
1852  if (expr != ivts->base_var)
1853    emit_move_insn (ivts->base_var, expr);
1854  seq = get_insns ();
1855  end_sequence ();
1856
1857  emit_insn_before (seq, insn);
1858}
1859
1860/* Replace the use of induction variable described in IVTS in INSN
1861   by base variable + DELTA * step.  */
1862
1863static void
1864split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1865{
1866  rtx expr, *loc, seq, incr, var;
1867  enum machine_mode mode = GET_MODE (ivts->base_var);
1868  rtx src, dest, set;
1869
1870  /* Construct base + DELTA * step.  */
1871  if (!delta)
1872    expr = ivts->base_var;
1873  else
1874    {
1875      incr = simplify_gen_binary (MULT, mode,
1876				  ivts->step, gen_int_mode (delta, mode));
1877      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1878				  ivts->base_var, incr);
1879    }
1880
1881  /* Figure out where to do the replacement.  */
1882  loc = get_ivts_expr (single_set (insn), ivts);
1883
1884  /* If we can make the replacement right away, we're done.  */
1885  if (validate_change (insn, loc, expr, 0))
1886    return;
1887
1888  /* Otherwise, force EXPR into a register and try again.  */
1889  start_sequence ();
1890  var = gen_reg_rtx (mode);
1891  expr = force_operand (expr, var);
1892  if (expr != var)
1893    emit_move_insn (var, expr);
1894  seq = get_insns ();
1895  end_sequence ();
1896  emit_insn_before (seq, insn);
1897
1898  if (validate_change (insn, loc, var, 0))
1899    return;
1900
1901  /* The last chance.  Try recreating the assignment in insn
1902     completely from scratch.  */
1903  set = single_set (insn);
1904  gcc_assert (set);
1905
1906  start_sequence ();
1907  *loc = var;
1908  src = copy_rtx (SET_SRC (set));
1909  dest = copy_rtx (SET_DEST (set));
1910  src = force_operand (src, dest);
1911  if (src != dest)
1912    emit_move_insn (dest, src);
1913  seq = get_insns ();
1914  end_sequence ();
1915
1916  emit_insn_before (seq, insn);
1917  delete_insn (insn);
1918}
1919
1920
1921/* Return one expansion of the accumulator recorded in struct VE.  */
1922
1923static rtx
1924get_expansion (struct var_to_expand *ve)
1925{
1926  rtx reg;
1927
1928  if (ve->reuse_expansion == 0)
1929    reg = ve->reg;
1930  else
1931    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1932
1933  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1934    ve->reuse_expansion = 0;
1935  else
1936    ve->reuse_expansion++;
1937
1938  return reg;
1939}
1940
1941
1942/* Given INSN replace the uses of the accumulator recorded in VE
1943   with a new register.  */
1944
1945static void
1946expand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1947{
1948  rtx new_reg, set;
1949  bool really_new_expansion = false;
1950
1951  set = single_set (insn);
1952  gcc_assert (set);
1953
1954  /* Generate a new register only if the expansion limit has not been
1955     reached.  Else reuse an already existing expansion.  */
1956  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1957    {
1958      really_new_expansion = true;
1959      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1960    }
1961  else
1962    new_reg = get_expansion (ve);
1963
1964  validate_change (insn, &SET_DEST (set), new_reg, 1);
1965  validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1966
1967  if (apply_change_group ())
1968    if (really_new_expansion)
1969      {
1970        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1971        ve->expansion_count++;
1972      }
1973}
1974
1975/* Initialize the variable expansions in loop preheader.
1976   Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1977   basic block where the initialization of the expansions
1978   should take place.  */
1979
1980static int
1981insert_var_expansion_initialization (void **slot, void *place_p)
1982{
1983  struct var_to_expand *ve = *slot;
1984  basic_block place = (basic_block)place_p;
1985  rtx seq, var, zero_init, insn;
1986  unsigned i;
1987
1988  if (VEC_length (rtx, ve->var_expansions) == 0)
1989    return 1;
1990
1991  start_sequence ();
1992  if (ve->op == PLUS || ve->op == MINUS)
1993    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1994      {
1995        zero_init =  CONST0_RTX (GET_MODE (var));
1996        emit_move_insn (var, zero_init);
1997      }
1998  else if (ve->op == MULT)
1999    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2000      {
2001        zero_init =  CONST1_RTX (GET_MODE (var));
2002        emit_move_insn (var, zero_init);
2003      }
2004
2005  seq = get_insns ();
2006  end_sequence ();
2007
2008  insn = BB_HEAD (place);
2009  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2010    insn = NEXT_INSN (insn);
2011
2012  emit_insn_after (seq, insn);
2013  /* Continue traversing the hash table.  */
2014  return 1;
2015}
2016
2017/*  Combine the variable expansions at the loop exit.
2018    Callbacks for htab_traverse.  PLACE_P is the loop exit
2019    basic block where the summation of the expansions should
2020    take place.  */
2021
2022static int
2023combine_var_copies_in_loop_exit (void **slot, void *place_p)
2024{
2025  struct var_to_expand *ve = *slot;
2026  basic_block place = (basic_block)place_p;
2027  rtx sum = ve->reg;
2028  rtx expr, seq, var, insn;
2029  unsigned i;
2030
2031  if (VEC_length (rtx, ve->var_expansions) == 0)
2032    return 1;
2033
2034  start_sequence ();
2035  if (ve->op == PLUS || ve->op == MINUS)
2036    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2037      {
2038        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2039                                   var, sum);
2040      }
2041  else if (ve->op == MULT)
2042    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2043      {
2044        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2045                                   var, sum);
2046      }
2047
2048  expr = force_operand (sum, ve->reg);
2049  if (expr != ve->reg)
2050    emit_move_insn (ve->reg, expr);
2051  seq = get_insns ();
2052  end_sequence ();
2053
2054  insn = BB_HEAD (place);
2055  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2056    insn = NEXT_INSN (insn);
2057
2058  emit_insn_after (seq, insn);
2059
2060  /* Continue traversing the hash table.  */
2061  return 1;
2062}
2063
2064/* Apply loop optimizations in loop copies using the
2065   data which gathered during the unrolling.  Structure
2066   OPT_INFO record that data.
2067
2068   UNROLLING is true if we unrolled (not peeled) the loop.
2069   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2070   the loop (as it should happen in complete unrolling, but not in ordinary
2071   peeling of the loop).  */
2072
2073static void
2074apply_opt_in_copies (struct opt_info *opt_info,
2075                     unsigned n_copies, bool unrolling,
2076                     bool rewrite_original_loop)
2077{
2078  unsigned i, delta;
2079  basic_block bb, orig_bb;
2080  rtx insn, orig_insn, next;
2081  struct iv_to_split ivts_templ, *ivts;
2082  struct var_to_expand ve_templ, *ves;
2083
2084  /* Sanity check -- we need to put initialization in the original loop
2085     body.  */
2086  gcc_assert (!unrolling || rewrite_original_loop);
2087
2088  /* Allocate the basic variables (i0).  */
2089  if (opt_info->insns_to_split)
2090    htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2091
2092  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2093    {
2094      bb = BASIC_BLOCK (i);
2095      orig_bb = get_bb_original (bb);
2096
2097      /* bb->aux holds position in copy sequence initialized by
2098	 duplicate_loop_to_header_edge.  */
2099      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2100					unrolling);
2101      bb->aux = 0;
2102      orig_insn = BB_HEAD (orig_bb);
2103      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2104        {
2105          next = NEXT_INSN (insn);
2106          if (!INSN_P (insn))
2107            continue;
2108
2109          while (!INSN_P (orig_insn))
2110            orig_insn = NEXT_INSN (orig_insn);
2111
2112          ivts_templ.insn = orig_insn;
2113          ve_templ.insn = orig_insn;
2114
2115          /* Apply splitting iv optimization.  */
2116          if (opt_info->insns_to_split)
2117            {
2118              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2119
2120              if (ivts)
2121                {
2122		  gcc_assert (GET_CODE (PATTERN (insn))
2123			      == GET_CODE (PATTERN (orig_insn)));
2124
2125                  if (!delta)
2126                    insert_base_initialization (ivts, insn);
2127                  split_iv (ivts, insn, delta);
2128                }
2129            }
2130          /* Apply variable expansion optimization.  */
2131          if (unrolling && opt_info->insns_with_var_to_expand)
2132            {
2133              ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2134              if (ves)
2135                {
2136		  gcc_assert (GET_CODE (PATTERN (insn))
2137			      == GET_CODE (PATTERN (orig_insn)));
2138                  expand_var_during_unrolling (ves, insn);
2139                }
2140            }
2141          orig_insn = NEXT_INSN (orig_insn);
2142        }
2143    }
2144
2145  if (!rewrite_original_loop)
2146    return;
2147
2148  /* Initialize the variable expansions in the loop preheader
2149     and take care of combining them at the loop exit.  */
2150  if (opt_info->insns_with_var_to_expand)
2151    {
2152      htab_traverse (opt_info->insns_with_var_to_expand,
2153                     insert_var_expansion_initialization,
2154                     opt_info->loop_preheader);
2155      htab_traverse (opt_info->insns_with_var_to_expand,
2156                     combine_var_copies_in_loop_exit,
2157                     opt_info->loop_exit);
2158    }
2159
2160  /* Rewrite also the original loop body.  Find them as originals of the blocks
2161     in the last copied iteration, i.e. those that have
2162     get_bb_copy (get_bb_original (bb)) == bb.  */
2163  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2164    {
2165      bb = BASIC_BLOCK (i);
2166      orig_bb = get_bb_original (bb);
2167      if (get_bb_copy (orig_bb) != bb)
2168	continue;
2169
2170      delta = determine_split_iv_delta (0, n_copies, unrolling);
2171      for (orig_insn = BB_HEAD (orig_bb);
2172           orig_insn != NEXT_INSN (BB_END (bb));
2173           orig_insn = next)
2174        {
2175          next = NEXT_INSN (orig_insn);
2176
2177          if (!INSN_P (orig_insn))
2178 	    continue;
2179
2180          ivts_templ.insn = orig_insn;
2181          if (opt_info->insns_to_split)
2182            {
2183              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2184              if (ivts)
2185                {
2186                  if (!delta)
2187                    insert_base_initialization (ivts, orig_insn);
2188                  split_iv (ivts, orig_insn, delta);
2189                  continue;
2190                }
2191            }
2192
2193        }
2194    }
2195}
2196
2197/*  Release the data structures used for the variable expansion
2198    optimization.  Callbacks for htab_traverse.  */
2199
2200static int
2201release_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2202{
2203  struct var_to_expand *ve = *slot;
2204
2205  VEC_free (rtx, heap, ve->var_expansions);
2206
2207  /* Continue traversing the hash table.  */
2208  return 1;
2209}
2210
2211/* Release OPT_INFO.  */
2212
2213static void
2214free_opt_info (struct opt_info *opt_info)
2215{
2216  if (opt_info->insns_to_split)
2217    htab_delete (opt_info->insns_to_split);
2218  if (opt_info->insns_with_var_to_expand)
2219    {
2220      htab_traverse (opt_info->insns_with_var_to_expand,
2221                     release_var_copies, NULL);
2222      htab_delete (opt_info->insns_with_var_to_expand);
2223    }
2224  free (opt_info);
2225}
2226