1132718Skan/* Loop unrolling and peeling.
2169689Skan   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3132718Skan
4132718SkanThis file is part of GCC.
5132718Skan
6132718SkanGCC is free software; you can redistribute it and/or modify it under
7132718Skanthe terms of the GNU General Public License as published by the Free
8132718SkanSoftware Foundation; either version 2, or (at your option) any later
9132718Skanversion.
10132718Skan
11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
13132718SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14132718Skanfor more details.
15132718Skan
16132718SkanYou should have received a copy of the GNU General Public License
17132718Skanalong with GCC; see the file COPYING.  If not, write to the Free
18169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19169689Skan02110-1301, USA.  */
20132718Skan
21132718Skan#include "config.h"
22132718Skan#include "system.h"
23132718Skan#include "coretypes.h"
24132718Skan#include "tm.h"
25132718Skan#include "rtl.h"
26132718Skan#include "hard-reg-set.h"
27169689Skan#include "obstack.h"
28132718Skan#include "basic-block.h"
29132718Skan#include "cfgloop.h"
30132718Skan#include "cfglayout.h"
31132718Skan#include "params.h"
32132718Skan#include "output.h"
33132718Skan#include "expr.h"
34169689Skan#include "hashtab.h"
35169689Skan#include "recog.h"
36132718Skan
37132718Skan/* This pass performs loop unrolling and peeling.  We only perform these
38132718Skan   optimizations on innermost loops (with single exception) because
39132718Skan   the impact on performance is greatest here, and we want to avoid
40132718Skan   unnecessary code size growth.  The gain is caused by greater sequentiality
41132718Skan   of code, better code to optimize for further passes and in some cases
42132718Skan   by fewer testings of exit conditions.  The main problem is code growth,
43132718Skan   that impacts performance negatively due to effect of caches.
44132718Skan
45132718Skan   What we do:
46132718Skan
47132718Skan   -- complete peeling of once-rolling loops; this is the above mentioned
48132718Skan      exception, as this causes loop to be cancelled completely and
49132718Skan      does not cause code growth
50132718Skan   -- complete peeling of loops that roll (small) constant times.
51132718Skan   -- simple peeling of first iterations of loops that do not roll much
52132718Skan      (according to profile feedback)
53132718Skan   -- unrolling of loops that roll constant times; this is almost always
54132718Skan      win, as we get rid of exit condition tests.
55132718Skan   -- unrolling of loops that roll number of times that we can compute
56132718Skan      in runtime; we also get rid of exit condition tests here, but there
57132718Skan      is the extra expense for calculating the number of iterations
58132718Skan   -- simple unrolling of remaining loops; this is performed only if we
59132718Skan      are asked to, as the gain is questionable in this case and often
60132718Skan      it may even slow down the code
61132718Skan   For more detailed descriptions of each of those, see comments at
62132718Skan   appropriate function below.
63132718Skan
64132718Skan   There is a lot of parameters (defined and described in params.def) that
65132718Skan   control how much we unroll/peel.
66132718Skan
67132718Skan   ??? A great problem is that we don't have a good way how to determine
68132718Skan   how many times we should unroll the loop; the experiments I have made
69132718Skan   showed that this choice may affect performance in order of several %.
70132718Skan   */
71132718Skan
72169689Skan/* Information about induction variables to split.  */
73169689Skan
74169689Skanstruct iv_to_split
75169689Skan{
76169689Skan  rtx insn;		/* The insn in that the induction variable occurs.  */
77169689Skan  rtx base_var;		/* The variable on that the values in the further
78169689Skan			   iterations are based.  */
79169689Skan  rtx step;		/* Step of the induction variable.  */
80169689Skan  unsigned n_loc;
81169689Skan  unsigned loc[3];	/* Location where the definition of the induction
82169689Skan			   variable occurs in the insn.  For example if
83169689Skan			   N_LOC is 2, the expression is located at
84169689Skan			   XEXP (XEXP (single_set, loc[0]), loc[1]).  */
85169689Skan};
86169689Skan
87169689Skan/* Information about accumulators to expand.  */
88169689Skan
89169689Skanstruct var_to_expand
90169689Skan{
91169689Skan  rtx insn;		           /* The insn in that the variable expansion occurs.  */
92169689Skan  rtx reg;                         /* The accumulator which is expanded.  */
93169689Skan  VEC(rtx,heap) *var_expansions;   /* The copies of the accumulator which is expanded.  */
94169689Skan  enum rtx_code op;                /* The type of the accumulation - addition, subtraction
95169689Skan                                      or multiplication.  */
96169689Skan  int expansion_count;             /* Count the number of expansions generated so far.  */
97169689Skan  int reuse_expansion;             /* The expansion we intend to reuse to expand
98169689Skan                                      the accumulator.  If REUSE_EXPANSION is 0 reuse
99169689Skan                                      the original accumulator.  Else use
100169689Skan                                      var_expansions[REUSE_EXPANSION - 1].  */
101169689Skan};
102169689Skan
103169689Skan/* Information about optimization applied in
104169689Skan   the unrolled loop.  */
105169689Skan
106169689Skanstruct opt_info
107169689Skan{
108169689Skan  htab_t insns_to_split;           /* A hashtable of insns to split.  */
109169689Skan  htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators
110169689Skan                                      to expand.  */
111169689Skan  unsigned first_new_block;        /* The first basic block that was
112169689Skan                                      duplicated.  */
113169689Skan  basic_block loop_exit;           /* The loop exit basic block.  */
114169689Skan  basic_block loop_preheader;      /* The loop preheader basic block.  */
115169689Skan};
116169689Skan
117132718Skanstatic void decide_unrolling_and_peeling (struct loops *, int);
118132718Skanstatic void peel_loops_completely (struct loops *, int);
119132718Skanstatic void decide_peel_simple (struct loop *, int);
120132718Skanstatic void decide_peel_once_rolling (struct loop *, int);
121132718Skanstatic void decide_peel_completely (struct loop *, int);
122132718Skanstatic void decide_unroll_stupid (struct loop *, int);
123132718Skanstatic void decide_unroll_constant_iterations (struct loop *, int);
124132718Skanstatic void decide_unroll_runtime_iterations (struct loop *, int);
125132718Skanstatic void peel_loop_simple (struct loops *, struct loop *);
126132718Skanstatic void peel_loop_completely (struct loops *, struct loop *);
127132718Skanstatic void unroll_loop_stupid (struct loops *, struct loop *);
128132718Skanstatic void unroll_loop_constant_iterations (struct loops *, struct loop *);
129132718Skanstatic void unroll_loop_runtime_iterations (struct loops *, struct loop *);
130169689Skanstatic struct opt_info *analyze_insns_in_loop (struct loop *);
131169689Skanstatic void opt_info_start_duplication (struct opt_info *);
132169689Skanstatic void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
133169689Skanstatic void free_opt_info (struct opt_info *);
134169689Skanstatic struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx);
135169689Skanstatic bool referenced_in_one_insn_in_loop_p (struct loop *, rtx);
136169689Skanstatic struct iv_to_split *analyze_iv_to_split_insn (rtx);
137169689Skanstatic void expand_var_during_unrolling (struct var_to_expand *, rtx);
138169689Skanstatic int insert_var_expansion_initialization (void **, void *);
139169689Skanstatic int combine_var_copies_in_loop_exit (void **, void *);
140169689Skanstatic int release_var_copies (void **, void *);
141169689Skanstatic rtx get_expansion (struct var_to_expand *);
142132718Skan
143132718Skan/* Unroll and/or peel (depending on FLAGS) LOOPS.  */
144132718Skanvoid
145132718Skanunroll_and_peel_loops (struct loops *loops, int flags)
146132718Skan{
147132718Skan  struct loop *loop, *next;
148169689Skan  bool check;
149132718Skan
150132718Skan  /* First perform complete loop peeling (it is almost surely a win,
151132718Skan     and affects parameters for further decision a lot).  */
152132718Skan  peel_loops_completely (loops, flags);
153132718Skan
154132718Skan  /* Now decide rest of unrolling and peeling.  */
155132718Skan  decide_unrolling_and_peeling (loops, flags);
156132718Skan
157132718Skan  loop = loops->tree_root;
158132718Skan  while (loop->inner)
159132718Skan    loop = loop->inner;
160132718Skan
161132718Skan  /* Scan the loops, inner ones first.  */
162132718Skan  while (loop != loops->tree_root)
163132718Skan    {
164132718Skan      if (loop->next)
165132718Skan	{
166132718Skan	  next = loop->next;
167132718Skan	  while (next->inner)
168132718Skan	    next = next->inner;
169132718Skan	}
170132718Skan      else
171132718Skan	next = loop->outer;
172132718Skan
173169689Skan      check = true;
174132718Skan      /* And perform the appropriate transformations.  */
175132718Skan      switch (loop->lpt_decision.decision)
176132718Skan	{
177132718Skan	case LPT_PEEL_COMPLETELY:
178132718Skan	  /* Already done.  */
179169689Skan	  gcc_unreachable ();
180132718Skan	case LPT_PEEL_SIMPLE:
181132718Skan	  peel_loop_simple (loops, loop);
182132718Skan	  break;
183132718Skan	case LPT_UNROLL_CONSTANT:
184132718Skan	  unroll_loop_constant_iterations (loops, loop);
185132718Skan	  break;
186132718Skan	case LPT_UNROLL_RUNTIME:
187132718Skan	  unroll_loop_runtime_iterations (loops, loop);
188132718Skan	  break;
189132718Skan	case LPT_UNROLL_STUPID:
190132718Skan	  unroll_loop_stupid (loops, loop);
191132718Skan	  break;
192132718Skan	case LPT_NONE:
193169689Skan	  check = false;
194132718Skan	  break;
195132718Skan	default:
196169689Skan	  gcc_unreachable ();
197132718Skan	}
198132718Skan      if (check)
199132718Skan	{
200132718Skan#ifdef ENABLE_CHECKING
201132718Skan	  verify_dominators (CDI_DOMINATORS);
202132718Skan	  verify_loop_structure (loops);
203132718Skan#endif
204132718Skan	}
205132718Skan      loop = next;
206132718Skan    }
207169689Skan
208169689Skan  iv_analysis_done ();
209132718Skan}
210132718Skan
211169689Skan/* Check whether exit of the LOOP is at the end of loop body.  */
212169689Skan
213169689Skanstatic bool
214169689Skanloop_exit_at_end_p (struct loop *loop)
215169689Skan{
216169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
217169689Skan  rtx insn;
218169689Skan
219169689Skan  if (desc->in_edge->dest != loop->latch)
220169689Skan    return false;
221169689Skan
222169689Skan  /* Check that the latch is empty.  */
223169689Skan  FOR_BB_INSNS (loop->latch, insn)
224169689Skan    {
225169689Skan      if (INSN_P (insn))
226169689Skan	return false;
227169689Skan    }
228169689Skan
229169689Skan  return true;
230169689Skan}
231169689Skan
232132718Skan/* Check whether to peel LOOPS (depending on FLAGS) completely and do so.  */
233132718Skanstatic void
234132718Skanpeel_loops_completely (struct loops *loops, int flags)
235132718Skan{
236169689Skan  struct loop *loop;
237169689Skan  unsigned i;
238132718Skan
239169689Skan  /* Scan the loops, the inner ones first.  */
240169689Skan  for (i = loops->num - 1; i > 0; i--)
241132718Skan    {
242169689Skan      loop = loops->parray[i];
243169689Skan      if (!loop)
244169689Skan	continue;
245132718Skan
246132718Skan      loop->lpt_decision.decision = LPT_NONE;
247132718Skan
248169689Skan      if (dump_file)
249169689Skan	fprintf (dump_file,
250169689Skan		 "\n;; *** Considering loop %d for complete peeling ***\n",
251132718Skan		 loop->num);
252132718Skan
253132718Skan      loop->ninsns = num_loop_insns (loop);
254132718Skan
255132718Skan      decide_peel_once_rolling (loop, flags);
256132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
257132718Skan	decide_peel_completely (loop, flags);
258132718Skan
259132718Skan      if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
260132718Skan	{
261132718Skan	  peel_loop_completely (loops, loop);
262132718Skan#ifdef ENABLE_CHECKING
263132718Skan	  verify_dominators (CDI_DOMINATORS);
264132718Skan	  verify_loop_structure (loops);
265132718Skan#endif
266132718Skan	}
267132718Skan    }
268132718Skan}
269132718Skan
270132718Skan/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much.  */
271132718Skanstatic void
272132718Skandecide_unrolling_and_peeling (struct loops *loops, int flags)
273132718Skan{
274132718Skan  struct loop *loop = loops->tree_root, *next;
275132718Skan
276132718Skan  while (loop->inner)
277132718Skan    loop = loop->inner;
278132718Skan
279132718Skan  /* Scan the loops, inner ones first.  */
280132718Skan  while (loop != loops->tree_root)
281132718Skan    {
282132718Skan      if (loop->next)
283132718Skan	{
284132718Skan	  next = loop->next;
285132718Skan	  while (next->inner)
286132718Skan	    next = next->inner;
287132718Skan	}
288132718Skan      else
289132718Skan	next = loop->outer;
290132718Skan
291132718Skan      loop->lpt_decision.decision = LPT_NONE;
292132718Skan
293169689Skan      if (dump_file)
294169689Skan	fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
295132718Skan
296132718Skan      /* Do not peel cold areas.  */
297132718Skan      if (!maybe_hot_bb_p (loop->header))
298132718Skan	{
299169689Skan	  if (dump_file)
300169689Skan	    fprintf (dump_file, ";; Not considering loop, cold area\n");
301132718Skan	  loop = next;
302132718Skan	  continue;
303132718Skan	}
304132718Skan
305132718Skan      /* Can the loop be manipulated?  */
306132718Skan      if (!can_duplicate_loop_p (loop))
307132718Skan	{
308169689Skan	  if (dump_file)
309169689Skan	    fprintf (dump_file,
310132718Skan		     ";; Not considering loop, cannot duplicate\n");
311132718Skan	  loop = next;
312132718Skan	  continue;
313132718Skan	}
314132718Skan
315132718Skan      /* Skip non-innermost loops.  */
316132718Skan      if (loop->inner)
317132718Skan	{
318169689Skan	  if (dump_file)
319169689Skan	    fprintf (dump_file, ";; Not considering loop, is not innermost\n");
320132718Skan	  loop = next;
321132718Skan	  continue;
322132718Skan	}
323132718Skan
324132718Skan      loop->ninsns = num_loop_insns (loop);
325132718Skan      loop->av_ninsns = average_num_loop_insns (loop);
326132718Skan
327132718Skan      /* Try transformations one by one in decreasing order of
328132718Skan	 priority.  */
329132718Skan
330132718Skan      decide_unroll_constant_iterations (loop, flags);
331132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
332132718Skan	decide_unroll_runtime_iterations (loop, flags);
333132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
334132718Skan	decide_unroll_stupid (loop, flags);
335132718Skan      if (loop->lpt_decision.decision == LPT_NONE)
336132718Skan	decide_peel_simple (loop, flags);
337132718Skan
338132718Skan      loop = next;
339132718Skan    }
340132718Skan}
341132718Skan
342132718Skan/* Decide whether the LOOP is once rolling and suitable for complete
343132718Skan   peeling.  */
344132718Skanstatic void
345132718Skandecide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
346132718Skan{
347169689Skan  struct niter_desc *desc;
348132718Skan
349169689Skan  if (dump_file)
350169689Skan    fprintf (dump_file, "\n;; Considering peeling once rolling loop\n");
351169689Skan
352132718Skan  /* Is the loop small enough?  */
353132718Skan  if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns)
354132718Skan    {
355169689Skan      if (dump_file)
356169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
357132718Skan      return;
358132718Skan    }
359132718Skan
360132718Skan  /* Check for simple loops.  */
361169689Skan  desc = get_simple_loop_desc (loop);
362132718Skan
363132718Skan  /* Check number of iterations.  */
364169689Skan  if (!desc->simple_p
365169689Skan      || desc->assumptions
366169689Skan      || desc->infinite
367169689Skan      || !desc->const_iter
368169689Skan      || desc->niter != 0)
369132718Skan    {
370169689Skan      if (dump_file)
371169689Skan	fprintf (dump_file,
372169689Skan		 ";; Unable to prove that the loop rolls exactly once\n");
373132718Skan      return;
374132718Skan    }
375132718Skan
376132718Skan  /* Success.  */
377169689Skan  if (dump_file)
378169689Skan    fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
379132718Skan  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
380132718Skan}
381132718Skan
382132718Skan/* Decide whether the LOOP is suitable for complete peeling.  */
383132718Skanstatic void
384132718Skandecide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
385132718Skan{
386132718Skan  unsigned npeel;
387169689Skan  struct niter_desc *desc;
388132718Skan
389169689Skan  if (dump_file)
390169689Skan    fprintf (dump_file, "\n;; Considering peeling completely\n");
391132718Skan
392132718Skan  /* Skip non-innermost loops.  */
393132718Skan  if (loop->inner)
394132718Skan    {
395169689Skan      if (dump_file)
396169689Skan	fprintf (dump_file, ";; Not considering loop, is not innermost\n");
397132718Skan      return;
398132718Skan    }
399132718Skan
400132718Skan  /* Do not peel cold areas.  */
401132718Skan  if (!maybe_hot_bb_p (loop->header))
402132718Skan    {
403169689Skan      if (dump_file)
404169689Skan	fprintf (dump_file, ";; Not considering loop, cold area\n");
405132718Skan      return;
406132718Skan    }
407132718Skan
408132718Skan  /* Can the loop be manipulated?  */
409132718Skan  if (!can_duplicate_loop_p (loop))
410132718Skan    {
411169689Skan      if (dump_file)
412169689Skan	fprintf (dump_file,
413132718Skan		 ";; Not considering loop, cannot duplicate\n");
414132718Skan      return;
415132718Skan    }
416132718Skan
417132718Skan  /* npeel = number of iterations to peel.  */
418132718Skan  npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns;
419132718Skan  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
420132718Skan    npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
421132718Skan
422132718Skan  /* Is the loop small enough?  */
423132718Skan  if (!npeel)
424132718Skan    {
425169689Skan      if (dump_file)
426169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
427132718Skan      return;
428132718Skan    }
429132718Skan
430132718Skan  /* Check for simple loops.  */
431169689Skan  desc = get_simple_loop_desc (loop);
432132718Skan
433132718Skan  /* Check number of iterations.  */
434169689Skan  if (!desc->simple_p
435169689Skan      || desc->assumptions
436169689Skan      || !desc->const_iter
437169689Skan      || desc->infinite)
438132718Skan    {
439169689Skan      if (dump_file)
440169689Skan	fprintf (dump_file,
441169689Skan		 ";; Unable to prove that the loop iterates constant times\n");
442132718Skan      return;
443132718Skan    }
444132718Skan
445169689Skan  if (desc->niter > npeel - 1)
446132718Skan    {
447169689Skan      if (dump_file)
448132718Skan	{
449169689Skan	  fprintf (dump_file,
450169689Skan		   ";; Not peeling loop completely, rolls too much (");
451169689Skan	  fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
452169689Skan	  fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel);
453132718Skan	}
454132718Skan      return;
455132718Skan    }
456132718Skan
457132718Skan  /* Success.  */
458169689Skan  if (dump_file)
459169689Skan    fprintf (dump_file, ";; Decided to peel loop completely\n");
460132718Skan  loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
461132718Skan}
462132718Skan
463132718Skan/* Peel all iterations of LOOP, remove exit edges and cancel the loop
464132718Skan   completely.  The transformation done:
465132718Skan
466132718Skan   for (i = 0; i < 4; i++)
467132718Skan     body;
468132718Skan
469132718Skan   ==>
470132718Skan
471132718Skan   i = 0;
472132718Skan   body; i++;
473132718Skan   body; i++;
474132718Skan   body; i++;
475132718Skan   body; i++;
476132718Skan   */
477132718Skanstatic void
478132718Skanpeel_loop_completely (struct loops *loops, struct loop *loop)
479132718Skan{
480132718Skan  sbitmap wont_exit;
481132718Skan  unsigned HOST_WIDE_INT npeel;
482132718Skan  unsigned n_remove_edges, i;
483169689Skan  edge *remove_edges, ein;
484169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
485169689Skan  struct opt_info *opt_info = NULL;
486169689Skan
487132718Skan  npeel = desc->niter;
488132718Skan
489132718Skan  if (npeel)
490132718Skan    {
491169689Skan      bool ok;
492169689Skan
493132718Skan      wont_exit = sbitmap_alloc (npeel + 1);
494132718Skan      sbitmap_ones (wont_exit);
495132718Skan      RESET_BIT (wont_exit, 0);
496169689Skan      if (desc->noloop_assumptions)
497132718Skan	RESET_BIT (wont_exit, 1);
498132718Skan
499169689Skan      remove_edges = XCNEWVEC (edge, npeel);
500132718Skan      n_remove_edges = 0;
501132718Skan
502169689Skan      if (flag_split_ivs_in_unroller)
503169689Skan        opt_info = analyze_insns_in_loop (loop);
504169689Skan
505169689Skan      opt_info_start_duplication (opt_info);
506169689Skan      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
507169689Skan					  loops, npeel,
508169689Skan					  wont_exit, desc->out_edge,
509169689Skan					  remove_edges, &n_remove_edges,
510169689Skan					  DLTHE_FLAG_UPDATE_FREQ
511169689Skan					  | DLTHE_FLAG_COMPLETTE_PEEL
512169689Skan					  | (opt_info
513169689Skan					     ? DLTHE_RECORD_COPY_NUMBER : 0));
514169689Skan      gcc_assert (ok);
515132718Skan
516132718Skan      free (wont_exit);
517169689Skan
518169689Skan      if (opt_info)
519169689Skan 	{
520169689Skan 	  apply_opt_in_copies (opt_info, npeel, false, true);
521169689Skan 	  free_opt_info (opt_info);
522169689Skan 	}
523132718Skan
524132718Skan      /* Remove the exit edges.  */
525132718Skan      for (i = 0; i < n_remove_edges; i++)
526132718Skan	remove_path (loops, remove_edges[i]);
527132718Skan      free (remove_edges);
528132718Skan    }
529132718Skan
530169689Skan  ein = desc->in_edge;
531169689Skan  free_simple_loop_desc (loop);
532132718Skan
533132718Skan  /* Now remove the unreachable part of the last iteration and cancel
534132718Skan     the loop.  */
535169689Skan  remove_path (loops, ein);
536132718Skan
537169689Skan  if (dump_file)
538169689Skan    fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel);
539132718Skan}
540132718Skan
541169689Skan/* Decide whether to unroll LOOP iterating constant number of times
542169689Skan   and how much.  */
543169689Skan
544132718Skanstatic void
545132718Skandecide_unroll_constant_iterations (struct loop *loop, int flags)
546132718Skan{
547169689Skan  unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
548169689Skan  struct niter_desc *desc;
549132718Skan
550132718Skan  if (!(flags & UAP_UNROLL))
551132718Skan    {
552132718Skan      /* We were not asked to, just return back silently.  */
553132718Skan      return;
554132718Skan    }
555132718Skan
556169689Skan  if (dump_file)
557169689Skan    fprintf (dump_file,
558169689Skan	     "\n;; Considering unrolling loop with constant "
559169689Skan	     "number of iterations\n");
560132718Skan
561132718Skan  /* nunroll = total number of copies of the original loop body in
562132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
563132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
564169689Skan  nunroll_by_av
565169689Skan    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
566132718Skan  if (nunroll > nunroll_by_av)
567132718Skan    nunroll = nunroll_by_av;
568132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
569132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
570132718Skan
571132718Skan  /* Skip big loops.  */
572132718Skan  if (nunroll <= 1)
573132718Skan    {
574169689Skan      if (dump_file)
575169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
576132718Skan      return;
577132718Skan    }
578132718Skan
579132718Skan  /* Check for simple loops.  */
580169689Skan  desc = get_simple_loop_desc (loop);
581132718Skan
582132718Skan  /* Check number of iterations.  */
583169689Skan  if (!desc->simple_p || !desc->const_iter || desc->assumptions)
584132718Skan    {
585169689Skan      if (dump_file)
586169689Skan	fprintf (dump_file,
587169689Skan		 ";; Unable to prove that the loop iterates constant times\n");
588132718Skan      return;
589132718Skan    }
590132718Skan
591132718Skan  /* Check whether the loop rolls enough to consider.  */
592169689Skan  if (desc->niter < 2 * nunroll)
593132718Skan    {
594169689Skan      if (dump_file)
595169689Skan	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
596132718Skan      return;
597132718Skan    }
598132718Skan
599132718Skan  /* Success; now compute number of iterations to unroll.  We alter
600132718Skan     nunroll so that as few as possible copies of loop body are
601132718Skan     necessary, while still not decreasing the number of unrollings
602132718Skan     too much (at most by 1).  */
603132718Skan  best_copies = 2 * nunroll + 10;
604132718Skan
605132718Skan  i = 2 * nunroll + 2;
606169689Skan  if (i - 1 >= desc->niter)
607169689Skan    i = desc->niter - 2;
608132718Skan
609132718Skan  for (; i >= nunroll - 1; i--)
610132718Skan    {
611169689Skan      unsigned exit_mod = desc->niter % (i + 1);
612132718Skan
613169689Skan      if (!loop_exit_at_end_p (loop))
614132718Skan	n_copies = exit_mod + i + 1;
615169689Skan      else if (exit_mod != (unsigned) i
616169689Skan	       || desc->noloop_assumptions != NULL_RTX)
617132718Skan	n_copies = exit_mod + i + 2;
618132718Skan      else
619132718Skan	n_copies = i + 1;
620132718Skan
621132718Skan      if (n_copies < best_copies)
622132718Skan	{
623132718Skan	  best_copies = n_copies;
624132718Skan	  best_unroll = i;
625132718Skan	}
626132718Skan    }
627132718Skan
628169689Skan  if (dump_file)
629169689Skan    fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n",
630132718Skan	     best_unroll + 1, best_copies, nunroll);
631132718Skan
632132718Skan  loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
633132718Skan  loop->lpt_decision.times = best_unroll;
634169689Skan
635169689Skan  if (dump_file)
636169689Skan    fprintf (dump_file,
637169689Skan	     ";; Decided to unroll the constant times rolling loop, %d times.\n",
638169689Skan	     loop->lpt_decision.times);
639132718Skan}
640132718Skan
641132718Skan/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1
642132718Skan   times.  The transformation does this:
643132718Skan
644132718Skan   for (i = 0; i < 102; i++)
645132718Skan     body;
646132718Skan
647132718Skan   ==>
648132718Skan
649132718Skan   i = 0;
650132718Skan   body; i++;
651132718Skan   body; i++;
652132718Skan   while (i < 102)
653132718Skan     {
654132718Skan       body; i++;
655132718Skan       body; i++;
656132718Skan       body; i++;
657132718Skan       body; i++;
658132718Skan     }
659132718Skan  */
660132718Skanstatic void
661132718Skanunroll_loop_constant_iterations (struct loops *loops, struct loop *loop)
662132718Skan{
663132718Skan  unsigned HOST_WIDE_INT niter;
664132718Skan  unsigned exit_mod;
665132718Skan  sbitmap wont_exit;
666132718Skan  unsigned n_remove_edges, i;
667132718Skan  edge *remove_edges;
668132718Skan  unsigned max_unroll = loop->lpt_decision.times;
669169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
670169689Skan  bool exit_at_end = loop_exit_at_end_p (loop);
671169689Skan  struct opt_info *opt_info = NULL;
672169689Skan  bool ok;
673169689Skan
674132718Skan  niter = desc->niter;
675132718Skan
676169689Skan  /* Should not get here (such loop should be peeled instead).  */
677169689Skan  gcc_assert (niter > max_unroll + 1);
678132718Skan
679132718Skan  exit_mod = niter % (max_unroll + 1);
680132718Skan
681132718Skan  wont_exit = sbitmap_alloc (max_unroll + 1);
682132718Skan  sbitmap_ones (wont_exit);
683132718Skan
684169689Skan  remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1);
685132718Skan  n_remove_edges = 0;
686169689Skan  if (flag_split_ivs_in_unroller
687169689Skan      || flag_variable_expansion_in_unroller)
688169689Skan    opt_info = analyze_insns_in_loop (loop);
689169689Skan
690169689Skan  if (!exit_at_end)
691132718Skan    {
692169689Skan      /* The exit is not at the end of the loop; leave exit test
693132718Skan	 in the first copy, so that the loops that start with test
694132718Skan	 of exit condition have continuous body after unrolling.  */
695132718Skan
696169689Skan      if (dump_file)
697169689Skan	fprintf (dump_file, ";; Condition on beginning of loop.\n");
698132718Skan
699132718Skan      /* Peel exit_mod iterations.  */
700132718Skan      RESET_BIT (wont_exit, 0);
701169689Skan      if (desc->noloop_assumptions)
702132718Skan	RESET_BIT (wont_exit, 1);
703132718Skan
704169689Skan      if (exit_mod)
705169689Skan	{
706169689Skan	  opt_info_start_duplication (opt_info);
707169689Skan          ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
708169689Skan					      loops, exit_mod,
709169689Skan					      wont_exit, desc->out_edge,
710169689Skan					      remove_edges, &n_remove_edges,
711169689Skan					      DLTHE_FLAG_UPDATE_FREQ
712169689Skan					      | (opt_info && exit_mod > 1
713169689Skan						 ? DLTHE_RECORD_COPY_NUMBER
714169689Skan						   : 0));
715169689Skan	  gcc_assert (ok);
716132718Skan
717169689Skan          if (opt_info && exit_mod > 1)
718169689Skan 	    apply_opt_in_copies (opt_info, exit_mod, false, false);
719169689Skan
720169689Skan	  desc->noloop_assumptions = NULL_RTX;
721169689Skan	  desc->niter -= exit_mod;
722169689Skan	  desc->niter_max -= exit_mod;
723169689Skan	}
724169689Skan
725132718Skan      SET_BIT (wont_exit, 1);
726132718Skan    }
727132718Skan  else
728132718Skan    {
729132718Skan      /* Leave exit test in last copy, for the same reason as above if
730132718Skan	 the loop tests the condition at the end of loop body.  */
731132718Skan
732169689Skan      if (dump_file)
733169689Skan	fprintf (dump_file, ";; Condition on end of loop.\n");
734132718Skan
735132718Skan      /* We know that niter >= max_unroll + 2; so we do not need to care of
736132718Skan	 case when we would exit before reaching the loop.  So just peel
737169689Skan	 exit_mod + 1 iterations.  */
738169689Skan      if (exit_mod != max_unroll
739169689Skan	  || desc->noloop_assumptions)
740132718Skan	{
741132718Skan	  RESET_BIT (wont_exit, 0);
742169689Skan	  if (desc->noloop_assumptions)
743132718Skan	    RESET_BIT (wont_exit, 1);
744169689Skan
745169689Skan          opt_info_start_duplication (opt_info);
746169689Skan	  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
747169689Skan					      loops, exit_mod + 1,
748169689Skan					      wont_exit, desc->out_edge,
749169689Skan					      remove_edges, &n_remove_edges,
750169689Skan					      DLTHE_FLAG_UPDATE_FREQ
751169689Skan					      | (opt_info && exit_mod > 0
752169689Skan						 ? DLTHE_RECORD_COPY_NUMBER
753169689Skan						   : 0));
754169689Skan	  gcc_assert (ok);
755169689Skan
756169689Skan          if (opt_info && exit_mod > 0)
757169689Skan  	    apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
758132718Skan
759169689Skan	  desc->niter -= exit_mod + 1;
760169689Skan	  desc->niter_max -= exit_mod + 1;
761169689Skan	  desc->noloop_assumptions = NULL_RTX;
762132718Skan
763132718Skan	  SET_BIT (wont_exit, 0);
764132718Skan	  SET_BIT (wont_exit, 1);
765132718Skan	}
766132718Skan
767132718Skan      RESET_BIT (wont_exit, max_unroll);
768132718Skan    }
769132718Skan
770132718Skan  /* Now unroll the loop.  */
771169689Skan
772169689Skan  opt_info_start_duplication (opt_info);
773169689Skan  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
774169689Skan				      loops, max_unroll,
775169689Skan				      wont_exit, desc->out_edge,
776169689Skan				      remove_edges, &n_remove_edges,
777169689Skan				      DLTHE_FLAG_UPDATE_FREQ
778169689Skan				      | (opt_info
779169689Skan					 ? DLTHE_RECORD_COPY_NUMBER
780169689Skan					   : 0));
781169689Skan  gcc_assert (ok);
782132718Skan
783169689Skan  if (opt_info)
784169689Skan    {
785169689Skan      apply_opt_in_copies (opt_info, max_unroll, true, true);
786169689Skan      free_opt_info (opt_info);
787169689Skan    }
788169689Skan
789132718Skan  free (wont_exit);
790132718Skan
791169689Skan  if (exit_at_end)
792169689Skan    {
793169689Skan      basic_block exit_block = get_bb_copy (desc->in_edge->src);
794169689Skan      /* Find a new in and out edge; they are in the last copy we have made.  */
795169689Skan
796169689Skan      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
797169689Skan	{
798169689Skan	  desc->out_edge = EDGE_SUCC (exit_block, 0);
799169689Skan	  desc->in_edge = EDGE_SUCC (exit_block, 1);
800169689Skan	}
801169689Skan      else
802169689Skan	{
803169689Skan	  desc->out_edge = EDGE_SUCC (exit_block, 1);
804169689Skan	  desc->in_edge = EDGE_SUCC (exit_block, 0);
805169689Skan	}
806169689Skan    }
807132718Skan
808169689Skan  desc->niter /= max_unroll + 1;
809169689Skan  desc->niter_max /= max_unroll + 1;
810169689Skan  desc->niter_expr = GEN_INT (desc->niter);
811169689Skan
812132718Skan  /* Remove the edges.  */
813132718Skan  for (i = 0; i < n_remove_edges; i++)
814132718Skan    remove_path (loops, remove_edges[i]);
815132718Skan  free (remove_edges);
816132718Skan
817169689Skan  if (dump_file)
818169689Skan    fprintf (dump_file,
819169689Skan	     ";; Unrolled loop %d times, constant # of iterations %i insns\n",
820169689Skan	     max_unroll, num_loop_insns (loop));
821132718Skan}
822132718Skan
823132718Skan/* Decide whether to unroll LOOP iterating runtime computable number of times
824132718Skan   and how much.  */
825132718Skanstatic void
826132718Skandecide_unroll_runtime_iterations (struct loop *loop, int flags)
827132718Skan{
828132718Skan  unsigned nunroll, nunroll_by_av, i;
829169689Skan  struct niter_desc *desc;
830132718Skan
831132718Skan  if (!(flags & UAP_UNROLL))
832132718Skan    {
833132718Skan      /* We were not asked to, just return back silently.  */
834132718Skan      return;
835132718Skan    }
836132718Skan
837169689Skan  if (dump_file)
838169689Skan    fprintf (dump_file,
839169689Skan	     "\n;; Considering unrolling loop with runtime "
840169689Skan	     "computable number of iterations\n");
841132718Skan
842132718Skan  /* nunroll = total number of copies of the original loop body in
843132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
844132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
845132718Skan  nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
846132718Skan  if (nunroll > nunroll_by_av)
847132718Skan    nunroll = nunroll_by_av;
848132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
849132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
850132718Skan
851132718Skan  /* Skip big loops.  */
852132718Skan  if (nunroll <= 1)
853132718Skan    {
854169689Skan      if (dump_file)
855169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
856132718Skan      return;
857132718Skan    }
858132718Skan
859132718Skan  /* Check for simple loops.  */
860169689Skan  desc = get_simple_loop_desc (loop);
861132718Skan
862132718Skan  /* Check simpleness.  */
863169689Skan  if (!desc->simple_p || desc->assumptions)
864132718Skan    {
865169689Skan      if (dump_file)
866169689Skan	fprintf (dump_file,
867169689Skan		 ";; Unable to prove that the number of iterations "
868169689Skan		 "can be counted in runtime\n");
869132718Skan      return;
870132718Skan    }
871132718Skan
872169689Skan  if (desc->const_iter)
873132718Skan    {
874169689Skan      if (dump_file)
875169689Skan	fprintf (dump_file, ";; Loop iterates constant times\n");
876132718Skan      return;
877132718Skan    }
878132718Skan
879132718Skan  /* If we have profile feedback, check whether the loop rolls.  */
880132718Skan  if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll)
881132718Skan    {
882169689Skan      if (dump_file)
883169689Skan	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
884132718Skan      return;
885132718Skan    }
886132718Skan
887132718Skan  /* Success; now force nunroll to be power of 2, as we are unable to
888132718Skan     cope with overflows in computation of number of iterations.  */
889169689Skan  for (i = 1; 2 * i <= nunroll; i *= 2)
890169689Skan    continue;
891132718Skan
892132718Skan  loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
893132718Skan  loop->lpt_decision.times = i - 1;
894169689Skan
895169689Skan  if (dump_file)
896169689Skan    fprintf (dump_file,
897169689Skan	     ";; Decided to unroll the runtime computable "
898169689Skan	     "times rolling loop, %d times.\n",
899169689Skan	     loop->lpt_decision.times);
900132718Skan}
901132718Skan
902132718Skan/* Unroll LOOP for that we are able to count number of iterations in runtime
903132718Skan   LOOP->LPT_DECISION.TIMES + 1 times.  The transformation does this (with some
904132718Skan   extra care for case n < 0):
905132718Skan
906132718Skan   for (i = 0; i < n; i++)
907132718Skan     body;
908132718Skan
909132718Skan   ==>
910132718Skan
911132718Skan   i = 0;
912132718Skan   mod = n % 4;
913132718Skan
914132718Skan   switch (mod)
915132718Skan     {
916132718Skan       case 3:
917132718Skan         body; i++;
918132718Skan       case 2:
919132718Skan         body; i++;
920132718Skan       case 1:
921132718Skan         body; i++;
922132718Skan       case 0: ;
923132718Skan     }
924132718Skan
925132718Skan   while (i < n)
926132718Skan     {
927132718Skan       body; i++;
928132718Skan       body; i++;
929132718Skan       body; i++;
930132718Skan       body; i++;
931132718Skan     }
932132718Skan   */
933132718Skanstatic void
934132718Skanunroll_loop_runtime_iterations (struct loops *loops, struct loop *loop)
935132718Skan{
936169689Skan  rtx old_niter, niter, init_code, branch_code, tmp;
937132718Skan  unsigned i, j, p;
938132718Skan  basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch;
939132718Skan  unsigned n_dom_bbs;
940132718Skan  sbitmap wont_exit;
941132718Skan  int may_exit_copy;
942132718Skan  unsigned n_peel, n_remove_edges;
943132718Skan  edge *remove_edges, e;
944132718Skan  bool extra_zero_check, last_may_exit;
945132718Skan  unsigned max_unroll = loop->lpt_decision.times;
946169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
947169689Skan  bool exit_at_end = loop_exit_at_end_p (loop);
948169689Skan  struct opt_info *opt_info = NULL;
949169689Skan  bool ok;
950169689Skan
951169689Skan  if (flag_split_ivs_in_unroller
952169689Skan      || flag_variable_expansion_in_unroller)
953169689Skan    opt_info = analyze_insns_in_loop (loop);
954169689Skan
955132718Skan  /* Remember blocks whose dominators will have to be updated.  */
956169689Skan  dom_bbs = XCNEWVEC (basic_block, n_basic_blocks);
957132718Skan  n_dom_bbs = 0;
958132718Skan
959132718Skan  body = get_loop_body (loop);
960132718Skan  for (i = 0; i < loop->num_nodes; i++)
961132718Skan    {
962132718Skan      unsigned nldom;
963132718Skan      basic_block *ldom;
964132718Skan
965132718Skan      nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom);
966132718Skan      for (j = 0; j < nldom; j++)
967132718Skan	if (!flow_bb_inside_loop_p (loop, ldom[j]))
968132718Skan	  dom_bbs[n_dom_bbs++] = ldom[j];
969132718Skan
970132718Skan      free (ldom);
971132718Skan    }
972132718Skan  free (body);
973132718Skan
974169689Skan  if (!exit_at_end)
975132718Skan    {
976132718Skan      /* Leave exit in first copy (for explanation why see comment in
977132718Skan	 unroll_loop_constant_iterations).  */
978132718Skan      may_exit_copy = 0;
979132718Skan      n_peel = max_unroll - 1;
980132718Skan      extra_zero_check = true;
981132718Skan      last_may_exit = false;
982132718Skan    }
983132718Skan  else
984132718Skan    {
985132718Skan      /* Leave exit in last copy (for explanation why see comment in
986132718Skan	 unroll_loop_constant_iterations).  */
987132718Skan      may_exit_copy = max_unroll;
988132718Skan      n_peel = max_unroll;
989132718Skan      extra_zero_check = false;
990132718Skan      last_may_exit = true;
991132718Skan    }
992132718Skan
993132718Skan  /* Get expression for number of iterations.  */
994132718Skan  start_sequence ();
995169689Skan  old_niter = niter = gen_reg_rtx (desc->mode);
996169689Skan  tmp = force_operand (copy_rtx (desc->niter_expr), niter);
997169689Skan  if (tmp != niter)
998169689Skan    emit_move_insn (niter, tmp);
999132718Skan
1000132718Skan  /* Count modulo by ANDing it with max_unroll; we use the fact that
1001132718Skan     the number of unrollings is a power of two, and thus this is correct
1002132718Skan     even if there is overflow in the computation.  */
1003169689Skan  niter = expand_simple_binop (desc->mode, AND,
1004132718Skan			       niter,
1005132718Skan			       GEN_INT (max_unroll),
1006132718Skan			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
1007132718Skan
1008132718Skan  init_code = get_insns ();
1009132718Skan  end_sequence ();
1010132718Skan
1011132718Skan  /* Precondition the loop.  */
1012132718Skan  loop_split_edge_with (loop_preheader_edge (loop), init_code);
1013132718Skan
1014169689Skan  remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1);
1015132718Skan  n_remove_edges = 0;
1016132718Skan
1017132718Skan  wont_exit = sbitmap_alloc (max_unroll + 2);
1018132718Skan
1019132718Skan  /* Peel the first copy of loop body (almost always we must leave exit test
1020132718Skan     here; the only exception is when we have extra zero check and the number
1021169689Skan     of iterations is reliable.  Also record the place of (possible) extra
1022169689Skan     zero check.  */
1023132718Skan  sbitmap_zero (wont_exit);
1024169689Skan  if (extra_zero_check
1025169689Skan      && !desc->noloop_assumptions)
1026132718Skan    SET_BIT (wont_exit, 1);
1027132718Skan  ezc_swtch = loop_preheader_edge (loop)->src;
1028169689Skan  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1029169689Skan				      loops, 1,
1030169689Skan				      wont_exit, desc->out_edge,
1031169689Skan				      remove_edges, &n_remove_edges,
1032169689Skan				      DLTHE_FLAG_UPDATE_FREQ);
1033169689Skan  gcc_assert (ok);
1034132718Skan
1035132718Skan  /* Record the place where switch will be built for preconditioning.  */
1036132718Skan  swtch = loop_split_edge_with (loop_preheader_edge (loop),
1037132718Skan				NULL_RTX);
1038132718Skan
1039132718Skan  for (i = 0; i < n_peel; i++)
1040132718Skan    {
1041132718Skan      /* Peel the copy.  */
1042132718Skan      sbitmap_zero (wont_exit);
1043132718Skan      if (i != n_peel - 1 || !last_may_exit)
1044132718Skan	SET_BIT (wont_exit, 1);
1045169689Skan      ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1046169689Skan					  loops, 1,
1047169689Skan					  wont_exit, desc->out_edge,
1048169689Skan					  remove_edges, &n_remove_edges,
1049169689Skan					  DLTHE_FLAG_UPDATE_FREQ);
1050169689Skan      gcc_assert (ok);
1051132718Skan
1052132718Skan      /* Create item for switch.  */
1053132718Skan      j = n_peel - i - (extra_zero_check ? 0 : 1);
1054132718Skan      p = REG_BR_PROB_BASE / (i + 2);
1055132718Skan
1056169689Skan      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1057169689Skan      branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1058169689Skan					  block_label (preheader), p,
1059132718Skan					  NULL_RTX);
1060132718Skan
1061169689Skan      swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code);
1062132718Skan      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1063169689Skan      single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1064132718Skan      e = make_edge (swtch, preheader,
1065169689Skan		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1066132718Skan      e->probability = p;
1067132718Skan    }
1068132718Skan
1069132718Skan  if (extra_zero_check)
1070132718Skan    {
1071132718Skan      /* Add branch for zero iterations.  */
1072132718Skan      p = REG_BR_PROB_BASE / (max_unroll + 1);
1073132718Skan      swtch = ezc_swtch;
1074132718Skan      preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1075169689Skan      branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1076169689Skan					  block_label (preheader), p,
1077169689Skan					  NULL_RTX);
1078132718Skan
1079169689Skan      swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code);
1080132718Skan      set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1081169689Skan      single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1082132718Skan      e = make_edge (swtch, preheader,
1083169689Skan		     single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1084132718Skan      e->probability = p;
1085132718Skan    }
1086132718Skan
1087132718Skan  /* Recount dominators for outer blocks.  */
1088132718Skan  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs);
1089132718Skan
1090132718Skan  /* And unroll loop.  */
1091132718Skan
1092132718Skan  sbitmap_ones (wont_exit);
1093132718Skan  RESET_BIT (wont_exit, may_exit_copy);
1094169689Skan  opt_info_start_duplication (opt_info);
1095169689Skan
1096169689Skan  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1097169689Skan				      loops, max_unroll,
1098169689Skan				      wont_exit, desc->out_edge,
1099169689Skan				      remove_edges, &n_remove_edges,
1100169689Skan				      DLTHE_FLAG_UPDATE_FREQ
1101169689Skan				      | (opt_info
1102169689Skan					 ? DLTHE_RECORD_COPY_NUMBER
1103169689Skan					   : 0));
1104169689Skan  gcc_assert (ok);
1105169689Skan
1106169689Skan  if (opt_info)
1107169689Skan    {
1108169689Skan      apply_opt_in_copies (opt_info, max_unroll, true, true);
1109169689Skan      free_opt_info (opt_info);
1110169689Skan    }
1111132718Skan
1112132718Skan  free (wont_exit);
1113132718Skan
1114169689Skan  if (exit_at_end)
1115169689Skan    {
1116169689Skan      basic_block exit_block = get_bb_copy (desc->in_edge->src);
1117169689Skan      /* Find a new in and out edge; they are in the last copy we have
1118169689Skan	 made.  */
1119169689Skan
1120169689Skan      if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1121169689Skan	{
1122169689Skan	  desc->out_edge = EDGE_SUCC (exit_block, 0);
1123169689Skan	  desc->in_edge = EDGE_SUCC (exit_block, 1);
1124169689Skan	}
1125169689Skan      else
1126169689Skan	{
1127169689Skan	  desc->out_edge = EDGE_SUCC (exit_block, 1);
1128169689Skan	  desc->in_edge = EDGE_SUCC (exit_block, 0);
1129169689Skan	}
1130169689Skan    }
1131132718Skan
1132132718Skan  /* Remove the edges.  */
1133132718Skan  for (i = 0; i < n_remove_edges; i++)
1134132718Skan    remove_path (loops, remove_edges[i]);
1135132718Skan  free (remove_edges);
1136132718Skan
1137169689Skan  /* We must be careful when updating the number of iterations due to
1138169689Skan     preconditioning and the fact that the value must be valid at entry
1139169689Skan     of the loop.  After passing through the above code, we see that
1140169689Skan     the correct new number of iterations is this:  */
1141169689Skan  gcc_assert (!desc->const_iter);
1142169689Skan  desc->niter_expr =
1143169689Skan    simplify_gen_binary (UDIV, desc->mode, old_niter,
1144169689Skan			 GEN_INT (max_unroll + 1));
1145169689Skan  desc->niter_max /= max_unroll + 1;
1146169689Skan  if (exit_at_end)
1147169689Skan    {
1148169689Skan      desc->niter_expr =
1149169689Skan	simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1150169689Skan      desc->noloop_assumptions = NULL_RTX;
1151169689Skan      desc->niter_max--;
1152169689Skan    }
1153169689Skan
1154169689Skan  if (dump_file)
1155169689Skan    fprintf (dump_file,
1156169689Skan	     ";; Unrolled loop %d times, counting # of iterations "
1157169689Skan	     "in runtime, %i insns\n",
1158132718Skan	     max_unroll, num_loop_insns (loop));
1159169689Skan
1160169689Skan  if (dom_bbs)
1161169689Skan    free (dom_bbs);
1162132718Skan}
1163132718Skan
1164132718Skan/* Decide whether to simply peel LOOP and how much.  */
1165132718Skanstatic void
1166132718Skandecide_peel_simple (struct loop *loop, int flags)
1167132718Skan{
1168132718Skan  unsigned npeel;
1169169689Skan  struct niter_desc *desc;
1170132718Skan
1171132718Skan  if (!(flags & UAP_PEEL))
1172132718Skan    {
1173132718Skan      /* We were not asked to, just return back silently.  */
1174132718Skan      return;
1175132718Skan    }
1176132718Skan
1177169689Skan  if (dump_file)
1178169689Skan    fprintf (dump_file, "\n;; Considering simply peeling loop\n");
1179132718Skan
1180132718Skan  /* npeel = number of iterations to peel.  */
1181132718Skan  npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns;
1182132718Skan  if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES))
1183132718Skan    npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES);
1184132718Skan
1185132718Skan  /* Skip big loops.  */
1186132718Skan  if (!npeel)
1187132718Skan    {
1188169689Skan      if (dump_file)
1189169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
1190132718Skan      return;
1191132718Skan    }
1192132718Skan
1193132718Skan  /* Check for simple loops.  */
1194169689Skan  desc = get_simple_loop_desc (loop);
1195132718Skan
1196132718Skan  /* Check number of iterations.  */
1197169689Skan  if (desc->simple_p && !desc->assumptions && desc->const_iter)
1198132718Skan    {
1199169689Skan      if (dump_file)
1200169689Skan	fprintf (dump_file, ";; Loop iterates constant times\n");
1201132718Skan      return;
1202132718Skan    }
1203132718Skan
1204132718Skan  /* Do not simply peel loops with branches inside -- it increases number
1205132718Skan     of mispredicts.  */
1206169689Skan  if (num_loop_branches (loop) > 1)
1207132718Skan    {
1208169689Skan      if (dump_file)
1209169689Skan	fprintf (dump_file, ";; Not peeling, contains branches\n");
1210132718Skan      return;
1211132718Skan    }
1212132718Skan
1213132718Skan  if (loop->header->count)
1214132718Skan    {
1215132718Skan      unsigned niter = expected_loop_iterations (loop);
1216132718Skan      if (niter + 1 > npeel)
1217132718Skan	{
1218169689Skan	  if (dump_file)
1219132718Skan	    {
1220169689Skan	      fprintf (dump_file, ";; Not peeling loop, rolls too much (");
1221169689Skan	      fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
1222169689Skan		       (HOST_WIDEST_INT) (niter + 1));
1223169689Skan	      fprintf (dump_file, " iterations > %d [maximum peelings])\n",
1224169689Skan		       npeel);
1225132718Skan	    }
1226132718Skan	  return;
1227132718Skan	}
1228132718Skan      npeel = niter + 1;
1229132718Skan    }
1230132718Skan  else
1231132718Skan    {
1232132718Skan      /* For now we have no good heuristics to decide whether loop peeling
1233132718Skan         will be effective, so disable it.  */
1234169689Skan      if (dump_file)
1235169689Skan	fprintf (dump_file,
1236132718Skan		 ";; Not peeling loop, no evidence it will be profitable\n");
1237132718Skan      return;
1238132718Skan    }
1239132718Skan
1240132718Skan  /* Success.  */
1241132718Skan  loop->lpt_decision.decision = LPT_PEEL_SIMPLE;
1242132718Skan  loop->lpt_decision.times = npeel;
1243169689Skan
1244169689Skan  if (dump_file)
1245169689Skan    fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n",
1246169689Skan	     loop->lpt_decision.times);
1247132718Skan}
1248132718Skan
1249132718Skan/* Peel a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1250132718Skan   while (cond)
1251132718Skan     body;
1252132718Skan
1253132718Skan   ==>
1254132718Skan
1255132718Skan   if (!cond) goto end;
1256132718Skan   body;
1257132718Skan   if (!cond) goto end;
1258132718Skan   body;
1259132718Skan   while (cond)
1260132718Skan     body;
1261132718Skan   end: ;
1262132718Skan   */
1263132718Skanstatic void
1264132718Skanpeel_loop_simple (struct loops *loops, struct loop *loop)
1265132718Skan{
1266132718Skan  sbitmap wont_exit;
1267132718Skan  unsigned npeel = loop->lpt_decision.times;
1268169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
1269169689Skan  struct opt_info *opt_info = NULL;
1270169689Skan  bool ok;
1271169689Skan
1272169689Skan  if (flag_split_ivs_in_unroller && npeel > 1)
1273169689Skan    opt_info = analyze_insns_in_loop (loop);
1274169689Skan
1275132718Skan  wont_exit = sbitmap_alloc (npeel + 1);
1276132718Skan  sbitmap_zero (wont_exit);
1277169689Skan
1278169689Skan  opt_info_start_duplication (opt_info);
1279169689Skan
1280169689Skan  ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1281169689Skan				      loops, npeel, wont_exit,
1282169689Skan				      NULL, NULL,
1283169689Skan				      NULL, DLTHE_FLAG_UPDATE_FREQ
1284169689Skan				      | (opt_info
1285169689Skan					 ? DLTHE_RECORD_COPY_NUMBER
1286169689Skan					   : 0));
1287169689Skan  gcc_assert (ok);
1288132718Skan
1289132718Skan  free (wont_exit);
1290169689Skan
1291169689Skan  if (opt_info)
1292169689Skan    {
1293169689Skan      apply_opt_in_copies (opt_info, npeel, false, false);
1294169689Skan      free_opt_info (opt_info);
1295169689Skan    }
1296132718Skan
1297169689Skan  if (desc->simple_p)
1298169689Skan    {
1299169689Skan      if (desc->const_iter)
1300169689Skan	{
1301169689Skan	  desc->niter -= npeel;
1302169689Skan	  desc->niter_expr = GEN_INT (desc->niter);
1303169689Skan	  desc->noloop_assumptions = NULL_RTX;
1304169689Skan	}
1305169689Skan      else
1306169689Skan	{
1307169689Skan	  /* We cannot just update niter_expr, as its value might be clobbered
1308169689Skan	     inside loop.  We could handle this by counting the number into
1309169689Skan	     temporary just like we do in runtime unrolling, but it does not
1310169689Skan	     seem worthwhile.  */
1311169689Skan	  free_simple_loop_desc (loop);
1312169689Skan	}
1313169689Skan    }
1314169689Skan  if (dump_file)
1315169689Skan    fprintf (dump_file, ";; Peeling loop %d times\n", npeel);
1316132718Skan}
1317132718Skan
1318132718Skan/* Decide whether to unroll LOOP stupidly and how much.  */
1319132718Skanstatic void
1320132718Skandecide_unroll_stupid (struct loop *loop, int flags)
1321132718Skan{
1322132718Skan  unsigned nunroll, nunroll_by_av, i;
1323169689Skan  struct niter_desc *desc;
1324132718Skan
1325132718Skan  if (!(flags & UAP_UNROLL_ALL))
1326132718Skan    {
1327132718Skan      /* We were not asked to, just return back silently.  */
1328132718Skan      return;
1329132718Skan    }
1330132718Skan
1331169689Skan  if (dump_file)
1332169689Skan    fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1333132718Skan
1334132718Skan  /* nunroll = total number of copies of the original loop body in
1335132718Skan     unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1336132718Skan  nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1337169689Skan  nunroll_by_av
1338169689Skan    = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1339132718Skan  if (nunroll > nunroll_by_av)
1340132718Skan    nunroll = nunroll_by_av;
1341132718Skan  if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1342132718Skan    nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1343132718Skan
1344132718Skan  /* Skip big loops.  */
1345132718Skan  if (nunroll <= 1)
1346132718Skan    {
1347169689Skan      if (dump_file)
1348169689Skan	fprintf (dump_file, ";; Not considering loop, is too big\n");
1349132718Skan      return;
1350132718Skan    }
1351132718Skan
1352132718Skan  /* Check for simple loops.  */
1353169689Skan  desc = get_simple_loop_desc (loop);
1354132718Skan
1355132718Skan  /* Check simpleness.  */
1356169689Skan  if (desc->simple_p && !desc->assumptions)
1357132718Skan    {
1358169689Skan      if (dump_file)
1359169689Skan	fprintf (dump_file, ";; The loop is simple\n");
1360132718Skan      return;
1361132718Skan    }
1362132718Skan
1363132718Skan  /* Do not unroll loops with branches inside -- it increases number
1364132718Skan     of mispredicts.  */
1365169689Skan  if (num_loop_branches (loop) > 1)
1366132718Skan    {
1367169689Skan      if (dump_file)
1368169689Skan	fprintf (dump_file, ";; Not unrolling, contains branches\n");
1369132718Skan      return;
1370132718Skan    }
1371132718Skan
1372132718Skan  /* If we have profile feedback, check whether the loop rolls.  */
1373169689Skan  if (loop->header->count
1374169689Skan      && expected_loop_iterations (loop) < 2 * nunroll)
1375132718Skan    {
1376169689Skan      if (dump_file)
1377169689Skan	fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1378132718Skan      return;
1379132718Skan    }
1380132718Skan
1381132718Skan  /* Success.  Now force nunroll to be power of 2, as it seems that this
1382132718Skan     improves results (partially because of better alignments, partially
1383132718Skan     because of some dark magic).  */
1384169689Skan  for (i = 1; 2 * i <= nunroll; i *= 2)
1385169689Skan    continue;
1386132718Skan
1387132718Skan  loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1388132718Skan  loop->lpt_decision.times = i - 1;
1389169689Skan
1390169689Skan  if (dump_file)
1391169689Skan    fprintf (dump_file,
1392169689Skan	     ";; Decided to unroll the loop stupidly, %d times.\n",
1393169689Skan	     loop->lpt_decision.times);
1394132718Skan}
1395132718Skan
1396132718Skan/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation:
1397132718Skan   while (cond)
1398132718Skan     body;
1399132718Skan
1400132718Skan   ==>
1401132718Skan
1402132718Skan   while (cond)
1403132718Skan     {
1404132718Skan       body;
1405132718Skan       if (!cond) break;
1406132718Skan       body;
1407132718Skan       if (!cond) break;
1408132718Skan       body;
1409132718Skan       if (!cond) break;
1410132718Skan       body;
1411132718Skan     }
1412132718Skan   */
1413132718Skanstatic void
1414132718Skanunroll_loop_stupid (struct loops *loops, struct loop *loop)
1415132718Skan{
1416132718Skan  sbitmap wont_exit;
1417132718Skan  unsigned nunroll = loop->lpt_decision.times;
1418169689Skan  struct niter_desc *desc = get_simple_loop_desc (loop);
1419169689Skan  struct opt_info *opt_info = NULL;
1420169689Skan  bool ok;
1421169689Skan
1422169689Skan  if (flag_split_ivs_in_unroller
1423169689Skan      || flag_variable_expansion_in_unroller)
1424169689Skan    opt_info = analyze_insns_in_loop (loop);
1425169689Skan
1426169689Skan
1427132718Skan  wont_exit = sbitmap_alloc (nunroll + 1);
1428132718Skan  sbitmap_zero (wont_exit);
1429169689Skan  opt_info_start_duplication (opt_info);
1430169689Skan
1431169689Skan  ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1432169689Skan				      loops, nunroll, wont_exit,
1433169689Skan				      NULL, NULL, NULL,
1434169689Skan				      DLTHE_FLAG_UPDATE_FREQ
1435169689Skan				      | (opt_info
1436169689Skan					 ? DLTHE_RECORD_COPY_NUMBER
1437169689Skan					   : 0));
1438169689Skan  gcc_assert (ok);
1439169689Skan
1440169689Skan  if (opt_info)
1441169689Skan    {
1442169689Skan      apply_opt_in_copies (opt_info, nunroll, true, true);
1443169689Skan      free_opt_info (opt_info);
1444169689Skan    }
1445132718Skan
1446132718Skan  free (wont_exit);
1447132718Skan
1448169689Skan  if (desc->simple_p)
1449169689Skan    {
1450169689Skan      /* We indeed may get here provided that there are nontrivial assumptions
1451169689Skan	 for a loop to be really simple.  We could update the counts, but the
1452169689Skan	 problem is that we are unable to decide which exit will be taken
1453169689Skan	 (not really true in case the number of iterations is constant,
1454169689Skan	 but noone will do anything with this information, so we do not
1455169689Skan	 worry about it).  */
1456169689Skan      desc->simple_p = false;
1457169689Skan    }
1458169689Skan
1459169689Skan  if (dump_file)
1460169689Skan    fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1461132718Skan	     nunroll, num_loop_insns (loop));
1462132718Skan}
1463132718Skan
1464169689Skan/* A hash function for information about insns to split.  */
1465146895Skan
1466169689Skanstatic hashval_t
1467169689Skansi_info_hash (const void *ivts)
1468169689Skan{
1469169689Skan  return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn);
1470169689Skan}
1471169689Skan
1472169689Skan/* An equality functions for information about insns to split.  */
1473169689Skan
1474169689Skanstatic int
1475169689Skansi_info_eq (const void *ivts1, const void *ivts2)
1476169689Skan{
1477169689Skan  const struct iv_to_split *i1 = ivts1;
1478169689Skan  const struct iv_to_split *i2 = ivts2;
1479169689Skan
1480169689Skan  return i1->insn == i2->insn;
1481169689Skan}
1482169689Skan
1483169689Skan/* Return a hash for VES, which is really a "var_to_expand *".  */
1484169689Skan
1485169689Skanstatic hashval_t
1486169689Skanve_info_hash (const void *ves)
1487169689Skan{
1488169689Skan  return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn);
1489169689Skan}
1490169689Skan
1491169689Skan/* Return true if IVTS1 and IVTS2 (which are really both of type
1492169689Skan   "var_to_expand *") refer to the same instruction.  */
1493169689Skan
1494169689Skanstatic int
1495169689Skanve_info_eq (const void *ivts1, const void *ivts2)
1496169689Skan{
1497169689Skan  const struct var_to_expand *i1 = ivts1;
1498169689Skan  const struct var_to_expand *i2 = ivts2;
1499169689Skan
1500169689Skan  return i1->insn == i2->insn;
1501169689Skan}
1502169689Skan
1503169689Skan/* Returns true if REG is referenced in one insn in LOOP.  */
1504169689Skan
1505169689Skanbool
1506169689Skanreferenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg)
1507169689Skan{
1508169689Skan  basic_block *body, bb;
1509169689Skan  unsigned i;
1510169689Skan  int count_ref = 0;
1511169689Skan  rtx insn;
1512169689Skan
1513169689Skan  body = get_loop_body (loop);
1514169689Skan  for (i = 0; i < loop->num_nodes; i++)
1515169689Skan    {
1516169689Skan      bb = body[i];
1517169689Skan
1518169689Skan      FOR_BB_INSNS (bb, insn)
1519169689Skan      {
1520169689Skan        if (rtx_referenced_p (reg, insn))
1521169689Skan          count_ref++;
1522169689Skan      }
1523169689Skan    }
1524169689Skan  return (count_ref  == 1);
1525169689Skan}
1526169689Skan
1527169689Skan/* Determine whether INSN contains an accumulator
1528169689Skan   which can be expanded into separate copies,
1529169689Skan   one for each copy of the LOOP body.
1530169689Skan
1531169689Skan   for (i = 0 ; i < n; i++)
1532169689Skan     sum += a[i];
1533169689Skan
1534169689Skan   ==>
1535169689Skan
1536169689Skan   sum += a[i]
1537169689Skan   ....
1538169689Skan   i = i+1;
1539169689Skan   sum1 += a[i]
1540169689Skan   ....
1541169689Skan   i = i+1
1542169689Skan   sum2 += a[i];
1543169689Skan   ....
1544169689Skan
1545169689Skan   Return NULL if INSN contains no opportunity for expansion of accumulator.
1546169689Skan   Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1547169689Skan   information and return a pointer to it.
1548169689Skan*/
1549169689Skan
1550169689Skanstatic struct var_to_expand *
1551169689Skananalyze_insn_to_expand_var (struct loop *loop, rtx insn)
1552169689Skan{
1553169689Skan  rtx set, dest, src, op1;
1554169689Skan  struct var_to_expand *ves;
1555169689Skan  enum machine_mode mode1, mode2;
1556169689Skan
1557169689Skan  set = single_set (insn);
1558169689Skan  if (!set)
1559169689Skan    return NULL;
1560169689Skan
1561169689Skan  dest = SET_DEST (set);
1562169689Skan  src = SET_SRC (set);
1563169689Skan
1564169689Skan  if (GET_CODE (src) != PLUS
1565169689Skan      && GET_CODE (src) != MINUS
1566169689Skan      && GET_CODE (src) != MULT)
1567169689Skan    return NULL;
1568169689Skan
1569169689Skan  /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1570169689Skan     in MD.  But if there is no optab to generate the insn, we can not
1571169689Skan     perform the variable expansion.  This can happen if an MD provides
1572169689Skan     an insn but not a named pattern to generate it, for example to avoid
1573169689Skan     producing code that needs additional mode switches like for x87/mmx.
1574169689Skan
1575169689Skan     So we check have_insn_for which looks for an optab for the operation
1576169689Skan     in SRC.  If it doesn't exist, we can't perform the expansion even
1577169689Skan     though INSN is valid.  */
1578169689Skan  if (!have_insn_for (GET_CODE (src), GET_MODE (src)))
1579169689Skan    return NULL;
1580169689Skan
1581169689Skan  if (!XEXP (src, 0))
1582169689Skan    return NULL;
1583169689Skan
1584169689Skan  op1 = XEXP (src, 0);
1585169689Skan
1586169689Skan  if (!REG_P (dest)
1587169689Skan      && !(GET_CODE (dest) == SUBREG
1588169689Skan           && REG_P (SUBREG_REG (dest))))
1589169689Skan    return NULL;
1590169689Skan
1591169689Skan  if (!rtx_equal_p (dest, op1))
1592169689Skan    return NULL;
1593169689Skan
1594169689Skan  if (!referenced_in_one_insn_in_loop_p (loop, dest))
1595169689Skan    return NULL;
1596169689Skan
1597169689Skan  if (rtx_referenced_p (dest, XEXP (src, 1)))
1598169689Skan    return NULL;
1599169689Skan
1600169689Skan  mode1 = GET_MODE (dest);
1601169689Skan  mode2 = GET_MODE (XEXP (src, 1));
1602169689Skan  if ((FLOAT_MODE_P (mode1)
1603169689Skan       || FLOAT_MODE_P (mode2))
1604169689Skan      && !flag_unsafe_math_optimizations)
1605169689Skan    return NULL;
1606169689Skan
1607169689Skan  /* Record the accumulator to expand.  */
1608169689Skan  ves = XNEW (struct var_to_expand);
1609169689Skan  ves->insn = insn;
1610169689Skan  ves->var_expansions = VEC_alloc (rtx, heap, 1);
1611169689Skan  ves->reg = copy_rtx (dest);
1612169689Skan  ves->op = GET_CODE (src);
1613169689Skan  ves->expansion_count = 0;
1614169689Skan  ves->reuse_expansion = 0;
1615169689Skan  return ves;
1616169689Skan}
1617169689Skan
1618169689Skan/* Determine whether there is an induction variable in INSN that
1619169689Skan   we would like to split during unrolling.
1620169689Skan
1621169689Skan   I.e. replace
1622169689Skan
1623169689Skan   i = i + 1;
1624169689Skan   ...
1625169689Skan   i = i + 1;
1626169689Skan   ...
1627169689Skan   i = i + 1;
1628169689Skan   ...
1629169689Skan
1630169689Skan   type chains by
1631169689Skan
1632169689Skan   i0 = i + 1
1633169689Skan   ...
1634169689Skan   i = i0 + 1
1635169689Skan   ...
1636169689Skan   i = i0 + 2
1637169689Skan   ...
1638169689Skan
1639169689Skan   Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1640169689Skan   an IV_TO_SPLIT structure, fill it with the relevant information and return a
1641169689Skan   pointer to it.  */
1642169689Skan
1643169689Skanstatic struct iv_to_split *
1644169689Skananalyze_iv_to_split_insn (rtx insn)
1645169689Skan{
1646169689Skan  rtx set, dest;
1647169689Skan  struct rtx_iv iv;
1648169689Skan  struct iv_to_split *ivts;
1649169689Skan  bool ok;
1650169689Skan
1651169689Skan  /* For now we just split the basic induction variables.  Later this may be
1652169689Skan     extended for example by selecting also addresses of memory references.  */
1653169689Skan  set = single_set (insn);
1654169689Skan  if (!set)
1655169689Skan    return NULL;
1656169689Skan
1657169689Skan  dest = SET_DEST (set);
1658169689Skan  if (!REG_P (dest))
1659169689Skan    return NULL;
1660169689Skan
1661169689Skan  if (!biv_p (insn, dest))
1662169689Skan    return NULL;
1663169689Skan
1664169689Skan  ok = iv_analyze_result (insn, dest, &iv);
1665169689Skan
1666169689Skan  /* This used to be an assert under the assumption that if biv_p returns
1667169689Skan     true that iv_analyze_result must also return true.  However, that
1668169689Skan     assumption is not strictly correct as evidenced by pr25569.
1669169689Skan
1670169689Skan     Returning NULL when iv_analyze_result returns false is safe and
1671169689Skan     avoids the problems in pr25569 until the iv_analyze_* routines
1672169689Skan     can be fixed, which is apparently hard and time consuming
1673169689Skan     according to their author.  */
1674169689Skan  if (! ok)
1675169689Skan    return NULL;
1676169689Skan
1677169689Skan  if (iv.step == const0_rtx
1678169689Skan      || iv.mode != iv.extend_mode)
1679169689Skan    return NULL;
1680169689Skan
1681169689Skan  /* Record the insn to split.  */
1682169689Skan  ivts = XNEW (struct iv_to_split);
1683169689Skan  ivts->insn = insn;
1684169689Skan  ivts->base_var = NULL_RTX;
1685169689Skan  ivts->step = iv.step;
1686169689Skan  ivts->n_loc = 1;
1687169689Skan  ivts->loc[0] = 1;
1688169689Skan
1689169689Skan  return ivts;
1690169689Skan}
1691169689Skan
1692169689Skan/* Determines which of insns in LOOP can be optimized.
1693169689Skan   Return a OPT_INFO struct with the relevant hash tables filled
1694169689Skan   with all insns to be optimized.  The FIRST_NEW_BLOCK field
1695169689Skan   is undefined for the return value.  */
1696169689Skan
1697169689Skanstatic struct opt_info *
1698169689Skananalyze_insns_in_loop (struct loop *loop)
1699169689Skan{
1700169689Skan  basic_block *body, bb;
1701169689Skan  unsigned i, num_edges = 0;
1702169689Skan  struct opt_info *opt_info = XCNEW (struct opt_info);
1703169689Skan  rtx insn;
1704169689Skan  struct iv_to_split *ivts = NULL;
1705169689Skan  struct var_to_expand *ves = NULL;
1706169689Skan  PTR *slot1;
1707169689Skan  PTR *slot2;
1708169689Skan  edge *edges = get_loop_exit_edges (loop, &num_edges);
1709169689Skan  bool can_apply = false;
1710169689Skan
1711169689Skan  iv_analysis_loop_init (loop);
1712169689Skan
1713169689Skan  body = get_loop_body (loop);
1714169689Skan
1715169689Skan  if (flag_split_ivs_in_unroller)
1716169689Skan    opt_info->insns_to_split = htab_create (5 * loop->num_nodes,
1717169689Skan                                            si_info_hash, si_info_eq, free);
1718169689Skan
1719169689Skan  /* Record the loop exit bb and loop preheader before the unrolling.  */
1720169689Skan  if (!loop_preheader_edge (loop)->src)
1721169689Skan    {
1722169689Skan      loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1723169689Skan      opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
1724169689Skan    }
1725169689Skan  else
1726169689Skan    opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1727169689Skan
1728169689Skan  if (num_edges == 1
1729169689Skan      && !(edges[0]->flags & EDGE_COMPLEX))
1730169689Skan    {
1731169689Skan      opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX);
1732169689Skan      can_apply = true;
1733169689Skan    }
1734169689Skan
1735169689Skan  if (flag_variable_expansion_in_unroller
1736169689Skan      && can_apply)
1737169689Skan    opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes,
1738169689Skan						      ve_info_hash, ve_info_eq, free);
1739169689Skan
1740169689Skan  for (i = 0; i < loop->num_nodes; i++)
1741169689Skan    {
1742169689Skan      bb = body[i];
1743169689Skan      if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1744169689Skan	continue;
1745169689Skan
1746169689Skan      FOR_BB_INSNS (bb, insn)
1747169689Skan      {
1748169689Skan        if (!INSN_P (insn))
1749169689Skan          continue;
1750169689Skan
1751169689Skan        if (opt_info->insns_to_split)
1752169689Skan          ivts = analyze_iv_to_split_insn (insn);
1753169689Skan
1754169689Skan        if (ivts)
1755169689Skan          {
1756169689Skan            slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT);
1757169689Skan            *slot1 = ivts;
1758169689Skan            continue;
1759169689Skan          }
1760169689Skan
1761169689Skan        if (opt_info->insns_with_var_to_expand)
1762169689Skan          ves = analyze_insn_to_expand_var (loop, insn);
1763169689Skan
1764169689Skan        if (ves)
1765169689Skan          {
1766169689Skan            slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT);
1767169689Skan            *slot2 = ves;
1768169689Skan          }
1769169689Skan      }
1770169689Skan    }
1771169689Skan
1772169689Skan  free (edges);
1773169689Skan  free (body);
1774169689Skan  return opt_info;
1775169689Skan}
1776169689Skan
1777169689Skan/* Called just before loop duplication.  Records start of duplicated area
1778169689Skan   to OPT_INFO.  */
1779169689Skan
1780169689Skanstatic void
1781169689Skanopt_info_start_duplication (struct opt_info *opt_info)
1782169689Skan{
1783169689Skan  if (opt_info)
1784169689Skan    opt_info->first_new_block = last_basic_block;
1785169689Skan}
1786169689Skan
1787169689Skan/* Determine the number of iterations between initialization of the base
1788169689Skan   variable and the current copy (N_COPY).  N_COPIES is the total number
1789169689Skan   of newly created copies.  UNROLLING is true if we are unrolling
1790169689Skan   (not peeling) the loop.  */
1791169689Skan
1792169689Skanstatic unsigned
1793169689Skandetermine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1794169689Skan{
1795169689Skan  if (unrolling)
1796169689Skan    {
1797169689Skan      /* If we are unrolling, initialization is done in the original loop
1798169689Skan	 body (number 0).  */
1799169689Skan      return n_copy;
1800169689Skan    }
1801169689Skan  else
1802169689Skan    {
1803169689Skan      /* If we are peeling, the copy in that the initialization occurs has
1804169689Skan	 number 1.  The original loop (number 0) is the last.  */
1805169689Skan      if (n_copy)
1806169689Skan	return n_copy - 1;
1807169689Skan      else
1808169689Skan	return n_copies;
1809169689Skan    }
1810169689Skan}
1811169689Skan
1812169689Skan/* Locate in EXPR the expression corresponding to the location recorded
1813169689Skan   in IVTS, and return a pointer to the RTX for this location.  */
1814169689Skan
1815169689Skanstatic rtx *
1816169689Skanget_ivts_expr (rtx expr, struct iv_to_split *ivts)
1817169689Skan{
1818169689Skan  unsigned i;
1819169689Skan  rtx *ret = &expr;
1820169689Skan
1821169689Skan  for (i = 0; i < ivts->n_loc; i++)
1822169689Skan    ret = &XEXP (*ret, ivts->loc[i]);
1823169689Skan
1824169689Skan  return ret;
1825169689Skan}
1826169689Skan
1827169689Skan/* Allocate basic variable for the induction variable chain.  Callback for
1828169689Skan   htab_traverse.  */
1829169689Skan
1830169689Skanstatic int
1831169689Skanallocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED)
1832169689Skan{
1833169689Skan  struct iv_to_split *ivts = *slot;
1834169689Skan  rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts);
1835169689Skan
1836169689Skan  ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1837169689Skan
1838169689Skan  return 1;
1839169689Skan}
1840169689Skan
1841169689Skan/* Insert initialization of basic variable of IVTS before INSN, taking
1842169689Skan   the initial value from INSN.  */
1843169689Skan
1844146895Skanstatic void
1845169689Skaninsert_base_initialization (struct iv_to_split *ivts, rtx insn)
1846132718Skan{
1847169689Skan  rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts));
1848132718Skan  rtx seq;
1849132718Skan
1850169689Skan  start_sequence ();
1851169689Skan  expr = force_operand (expr, ivts->base_var);
1852169689Skan  if (expr != ivts->base_var)
1853169689Skan    emit_move_insn (ivts->base_var, expr);
1854169689Skan  seq = get_insns ();
1855169689Skan  end_sequence ();
1856132718Skan
1857169689Skan  emit_insn_before (seq, insn);
1858169689Skan}
1859132718Skan
1860169689Skan/* Replace the use of induction variable described in IVTS in INSN
1861169689Skan   by base variable + DELTA * step.  */
1862169689Skan
1863169689Skanstatic void
1864169689Skansplit_iv (struct iv_to_split *ivts, rtx insn, unsigned delta)
1865169689Skan{
1866169689Skan  rtx expr, *loc, seq, incr, var;
1867169689Skan  enum machine_mode mode = GET_MODE (ivts->base_var);
1868169689Skan  rtx src, dest, set;
1869169689Skan
1870169689Skan  /* Construct base + DELTA * step.  */
1871169689Skan  if (!delta)
1872169689Skan    expr = ivts->base_var;
1873169689Skan  else
1874132718Skan    {
1875169689Skan      incr = simplify_gen_binary (MULT, mode,
1876169689Skan				  ivts->step, gen_int_mode (delta, mode));
1877169689Skan      expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1878169689Skan				  ivts->base_var, incr);
1879132718Skan    }
1880132718Skan
1881169689Skan  /* Figure out where to do the replacement.  */
1882169689Skan  loc = get_ivts_expr (single_set (insn), ivts);
1883132718Skan
1884169689Skan  /* If we can make the replacement right away, we're done.  */
1885169689Skan  if (validate_change (insn, loc, expr, 0))
1886169689Skan    return;
1887169689Skan
1888169689Skan  /* Otherwise, force EXPR into a register and try again.  */
1889169689Skan  start_sequence ();
1890169689Skan  var = gen_reg_rtx (mode);
1891169689Skan  expr = force_operand (expr, var);
1892169689Skan  if (expr != var)
1893169689Skan    emit_move_insn (var, expr);
1894132718Skan  seq = get_insns ();
1895132718Skan  end_sequence ();
1896169689Skan  emit_insn_before (seq, insn);
1897169689Skan
1898169689Skan  if (validate_change (insn, loc, var, 0))
1899169689Skan    return;
1900132718Skan
1901169689Skan  /* The last chance.  Try recreating the assignment in insn
1902169689Skan     completely from scratch.  */
1903169689Skan  set = single_set (insn);
1904169689Skan  gcc_assert (set);
1905132718Skan
1906169689Skan  start_sequence ();
1907169689Skan  *loc = var;
1908169689Skan  src = copy_rtx (SET_SRC (set));
1909169689Skan  dest = copy_rtx (SET_DEST (set));
1910169689Skan  src = force_operand (src, dest);
1911169689Skan  if (src != dest)
1912169689Skan    emit_move_insn (dest, src);
1913169689Skan  seq = get_insns ();
1914169689Skan  end_sequence ();
1915169689Skan
1916169689Skan  emit_insn_before (seq, insn);
1917169689Skan  delete_insn (insn);
1918132718Skan}
1919132718Skan
1920169689Skan
1921169689Skan/* Return one expansion of the accumulator recorded in struct VE.  */
1922169689Skan
1923169689Skanstatic rtx
1924169689Skanget_expansion (struct var_to_expand *ve)
1925132718Skan{
1926169689Skan  rtx reg;
1927132718Skan
1928169689Skan  if (ve->reuse_expansion == 0)
1929169689Skan    reg = ve->reg;
1930169689Skan  else
1931169689Skan    reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1);
1932169689Skan
1933169689Skan  if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion)
1934169689Skan    ve->reuse_expansion = 0;
1935169689Skan  else
1936169689Skan    ve->reuse_expansion++;
1937169689Skan
1938169689Skan  return reg;
1939169689Skan}
1940132718Skan
1941132718Skan
1942169689Skan/* Given INSN replace the uses of the accumulator recorded in VE
1943169689Skan   with a new register.  */
1944132718Skan
1945169689Skanstatic void
1946169689Skanexpand_var_during_unrolling (struct var_to_expand *ve, rtx insn)
1947169689Skan{
1948169689Skan  rtx new_reg, set;
1949169689Skan  bool really_new_expansion = false;
1950169689Skan
1951169689Skan  set = single_set (insn);
1952169689Skan  gcc_assert (set);
1953169689Skan
1954169689Skan  /* Generate a new register only if the expansion limit has not been
1955169689Skan     reached.  Else reuse an already existing expansion.  */
1956169689Skan  if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1957169689Skan    {
1958169689Skan      really_new_expansion = true;
1959169689Skan      new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1960169689Skan    }
1961169689Skan  else
1962169689Skan    new_reg = get_expansion (ve);
1963132718Skan
1964169689Skan  validate_change (insn, &SET_DEST (set), new_reg, 1);
1965169689Skan  validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1);
1966132718Skan
1967169689Skan  if (apply_change_group ())
1968169689Skan    if (really_new_expansion)
1969169689Skan      {
1970169689Skan        VEC_safe_push (rtx, heap, ve->var_expansions, new_reg);
1971169689Skan        ve->expansion_count++;
1972169689Skan      }
1973169689Skan}
1974132718Skan
1975169689Skan/* Initialize the variable expansions in loop preheader.
1976169689Skan   Callbacks for htab_traverse.  PLACE_P is the loop-preheader
1977169689Skan   basic block where the initialization of the expansions
1978169689Skan   should take place.  */
1979169689Skan
1980169689Skanstatic int
1981169689Skaninsert_var_expansion_initialization (void **slot, void *place_p)
1982169689Skan{
1983169689Skan  struct var_to_expand *ve = *slot;
1984169689Skan  basic_block place = (basic_block)place_p;
1985169689Skan  rtx seq, var, zero_init, insn;
1986169689Skan  unsigned i;
1987132718Skan
1988169689Skan  if (VEC_length (rtx, ve->var_expansions) == 0)
1989169689Skan    return 1;
1990169689Skan
1991169689Skan  start_sequence ();
1992169689Skan  if (ve->op == PLUS || ve->op == MINUS)
1993169689Skan    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
1994169689Skan      {
1995169689Skan        zero_init =  CONST0_RTX (GET_MODE (var));
1996169689Skan        emit_move_insn (var, zero_init);
1997169689Skan      }
1998169689Skan  else if (ve->op == MULT)
1999169689Skan    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2000169689Skan      {
2001169689Skan        zero_init =  CONST1_RTX (GET_MODE (var));
2002169689Skan        emit_move_insn (var, zero_init);
2003169689Skan      }
2004169689Skan
2005169689Skan  seq = get_insns ();
2006169689Skan  end_sequence ();
2007169689Skan
2008169689Skan  insn = BB_HEAD (place);
2009169689Skan  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2010169689Skan    insn = NEXT_INSN (insn);
2011169689Skan
2012169689Skan  emit_insn_after (seq, insn);
2013169689Skan  /* Continue traversing the hash table.  */
2014169689Skan  return 1;
2015169689Skan}
2016132718Skan
2017169689Skan/*  Combine the variable expansions at the loop exit.
2018169689Skan    Callbacks for htab_traverse.  PLACE_P is the loop exit
2019169689Skan    basic block where the summation of the expansions should
2020169689Skan    take place.  */
2021132718Skan
2022169689Skanstatic int
2023169689Skancombine_var_copies_in_loop_exit (void **slot, void *place_p)
2024169689Skan{
2025169689Skan  struct var_to_expand *ve = *slot;
2026169689Skan  basic_block place = (basic_block)place_p;
2027169689Skan  rtx sum = ve->reg;
2028169689Skan  rtx expr, seq, var, insn;
2029169689Skan  unsigned i;
2030169689Skan
2031169689Skan  if (VEC_length (rtx, ve->var_expansions) == 0)
2032169689Skan    return 1;
2033169689Skan
2034169689Skan  start_sequence ();
2035169689Skan  if (ve->op == PLUS || ve->op == MINUS)
2036169689Skan    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2037169689Skan      {
2038169689Skan        sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg),
2039169689Skan                                   var, sum);
2040169689Skan      }
2041169689Skan  else if (ve->op == MULT)
2042169689Skan    for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++)
2043169689Skan      {
2044169689Skan        sum = simplify_gen_binary (MULT, GET_MODE (ve->reg),
2045169689Skan                                   var, sum);
2046169689Skan      }
2047169689Skan
2048169689Skan  expr = force_operand (sum, ve->reg);
2049169689Skan  if (expr != ve->reg)
2050169689Skan    emit_move_insn (ve->reg, expr);
2051169689Skan  seq = get_insns ();
2052169689Skan  end_sequence ();
2053169689Skan
2054169689Skan  insn = BB_HEAD (place);
2055169689Skan  while (!NOTE_INSN_BASIC_BLOCK_P (insn))
2056169689Skan    insn = NEXT_INSN (insn);
2057169689Skan
2058169689Skan  emit_insn_after (seq, insn);
2059169689Skan
2060169689Skan  /* Continue traversing the hash table.  */
2061169689Skan  return 1;
2062169689Skan}
2063169689Skan
2064169689Skan/* Apply loop optimizations in loop copies using the
2065169689Skan   data which gathered during the unrolling.  Structure
2066169689Skan   OPT_INFO record that data.
2067169689Skan
2068169689Skan   UNROLLING is true if we unrolled (not peeled) the loop.
2069169689Skan   REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2070169689Skan   the loop (as it should happen in complete unrolling, but not in ordinary
2071169689Skan   peeling of the loop).  */
2072169689Skan
2073169689Skanstatic void
2074169689Skanapply_opt_in_copies (struct opt_info *opt_info,
2075169689Skan                     unsigned n_copies, bool unrolling,
2076169689Skan                     bool rewrite_original_loop)
2077169689Skan{
2078169689Skan  unsigned i, delta;
2079169689Skan  basic_block bb, orig_bb;
2080169689Skan  rtx insn, orig_insn, next;
2081169689Skan  struct iv_to_split ivts_templ, *ivts;
2082169689Skan  struct var_to_expand ve_templ, *ves;
2083169689Skan
2084169689Skan  /* Sanity check -- we need to put initialization in the original loop
2085169689Skan     body.  */
2086169689Skan  gcc_assert (!unrolling || rewrite_original_loop);
2087169689Skan
2088169689Skan  /* Allocate the basic variables (i0).  */
2089169689Skan  if (opt_info->insns_to_split)
2090169689Skan    htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL);
2091169689Skan
2092169689Skan  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2093169689Skan    {
2094169689Skan      bb = BASIC_BLOCK (i);
2095169689Skan      orig_bb = get_bb_original (bb);
2096169689Skan
2097169689Skan      /* bb->aux holds position in copy sequence initialized by
2098169689Skan	 duplicate_loop_to_header_edge.  */
2099169689Skan      delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2100169689Skan					unrolling);
2101169689Skan      bb->aux = 0;
2102169689Skan      orig_insn = BB_HEAD (orig_bb);
2103169689Skan      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
2104169689Skan        {
2105169689Skan          next = NEXT_INSN (insn);
2106169689Skan          if (!INSN_P (insn))
2107169689Skan            continue;
2108169689Skan
2109169689Skan          while (!INSN_P (orig_insn))
2110169689Skan            orig_insn = NEXT_INSN (orig_insn);
2111169689Skan
2112169689Skan          ivts_templ.insn = orig_insn;
2113169689Skan          ve_templ.insn = orig_insn;
2114169689Skan
2115169689Skan          /* Apply splitting iv optimization.  */
2116169689Skan          if (opt_info->insns_to_split)
2117169689Skan            {
2118169689Skan              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2119169689Skan
2120169689Skan              if (ivts)
2121169689Skan                {
2122169689Skan		  gcc_assert (GET_CODE (PATTERN (insn))
2123169689Skan			      == GET_CODE (PATTERN (orig_insn)));
2124169689Skan
2125169689Skan                  if (!delta)
2126169689Skan                    insert_base_initialization (ivts, insn);
2127169689Skan                  split_iv (ivts, insn, delta);
2128169689Skan                }
2129169689Skan            }
2130169689Skan          /* Apply variable expansion optimization.  */
2131169689Skan          if (unrolling && opt_info->insns_with_var_to_expand)
2132169689Skan            {
2133169689Skan              ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ);
2134169689Skan              if (ves)
2135169689Skan                {
2136169689Skan		  gcc_assert (GET_CODE (PATTERN (insn))
2137169689Skan			      == GET_CODE (PATTERN (orig_insn)));
2138169689Skan                  expand_var_during_unrolling (ves, insn);
2139169689Skan                }
2140169689Skan            }
2141169689Skan          orig_insn = NEXT_INSN (orig_insn);
2142169689Skan        }
2143132718Skan    }
2144132718Skan
2145169689Skan  if (!rewrite_original_loop)
2146169689Skan    return;
2147169689Skan
2148169689Skan  /* Initialize the variable expansions in the loop preheader
2149169689Skan     and take care of combining them at the loop exit.  */
2150169689Skan  if (opt_info->insns_with_var_to_expand)
2151132718Skan    {
2152169689Skan      htab_traverse (opt_info->insns_with_var_to_expand,
2153169689Skan                     insert_var_expansion_initialization,
2154169689Skan                     opt_info->loop_preheader);
2155169689Skan      htab_traverse (opt_info->insns_with_var_to_expand,
2156169689Skan                     combine_var_copies_in_loop_exit,
2157169689Skan                     opt_info->loop_exit);
2158169689Skan    }
2159169689Skan
2160169689Skan  /* Rewrite also the original loop body.  Find them as originals of the blocks
2161169689Skan     in the last copied iteration, i.e. those that have
2162169689Skan     get_bb_copy (get_bb_original (bb)) == bb.  */
2163169689Skan  for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++)
2164169689Skan    {
2165169689Skan      bb = BASIC_BLOCK (i);
2166169689Skan      orig_bb = get_bb_original (bb);
2167169689Skan      if (get_bb_copy (orig_bb) != bb)
2168169689Skan	continue;
2169169689Skan
2170169689Skan      delta = determine_split_iv_delta (0, n_copies, unrolling);
2171169689Skan      for (orig_insn = BB_HEAD (orig_bb);
2172169689Skan           orig_insn != NEXT_INSN (BB_END (bb));
2173169689Skan           orig_insn = next)
2174169689Skan        {
2175169689Skan          next = NEXT_INSN (orig_insn);
2176169689Skan
2177169689Skan          if (!INSN_P (orig_insn))
2178169689Skan 	    continue;
2179169689Skan
2180169689Skan          ivts_templ.insn = orig_insn;
2181169689Skan          if (opt_info->insns_to_split)
2182169689Skan            {
2183169689Skan              ivts = htab_find (opt_info->insns_to_split, &ivts_templ);
2184169689Skan              if (ivts)
2185169689Skan                {
2186169689Skan                  if (!delta)
2187169689Skan                    insert_base_initialization (ivts, orig_insn);
2188169689Skan                  split_iv (ivts, orig_insn, delta);
2189169689Skan                  continue;
2190169689Skan                }
2191169689Skan            }
2192169689Skan
2193169689Skan        }
2194169689Skan    }
2195169689Skan}
2196132718Skan
2197169689Skan/*  Release the data structures used for the variable expansion
2198169689Skan    optimization.  Callbacks for htab_traverse.  */
2199132718Skan
2200169689Skanstatic int
2201169689Skanrelease_var_copies (void **slot, void *data ATTRIBUTE_UNUSED)
2202169689Skan{
2203169689Skan  struct var_to_expand *ve = *slot;
2204169689Skan
2205169689Skan  VEC_free (rtx, heap, ve->var_expansions);
2206169689Skan
2207169689Skan  /* Continue traversing the hash table.  */
2208169689Skan  return 1;
2209169689Skan}
2210169689Skan
2211169689Skan/* Release OPT_INFO.  */
2212169689Skan
2213169689Skanstatic void
2214169689Skanfree_opt_info (struct opt_info *opt_info)
2215169689Skan{
2216169689Skan  if (opt_info->insns_to_split)
2217169689Skan    htab_delete (opt_info->insns_to_split);
2218169689Skan  if (opt_info->insns_with_var_to_expand)
2219169689Skan    {
2220169689Skan      htab_traverse (opt_info->insns_with_var_to_expand,
2221169689Skan                     release_var_copies, NULL);
2222169689Skan      htab_delete (opt_info->insns_with_var_to_expand);
2223132718Skan    }
2224169689Skan  free (opt_info);
2225132718Skan}
2226