1/* Loop unswitching for GNU compiler.
2   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 2, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "cfglayout.h"
31#include "params.h"
32#include "output.h"
33#include "expr.h"
34
35/* This pass moves constant conditions out of loops, duplicating the loop
36   in progress, i.e. this code:
37
38   while (loop_cond)
39     {
40       A;
41       if (cond)
42         branch1;
43       else
44	 branch2;
45       B;
46       if (cond)
47         branch3;
48       C;
49     }
50   where nothing inside the loop alters cond is transformed
51   into
52
53   if (cond)
54     {
55       while (loop_cond)
56	 {
57	   A;
58	   branch1;
59	   B;
60	   branch3;
61	   C;
62	 }
63     }
64   else
65     {
66       while (loop_cond)
67	 {
68	   A;
69	   branch2;
70	   B;
71	   C;
72	 }
73     }
74
75  Duplicating the loop might lead to code growth exponential in number of
76  branches inside loop, so we limit the number of unswitchings performed
77  in a single loop to PARAM_MAX_UNSWITCH_LEVEL.  We only perform the
78  transformation on innermost loops, as the benefit of doing it on loops
79  containing subloops would not be very large compared to complications
80  with handling this case.  */
81
82static struct loop *unswitch_loop (struct loops *, struct loop *,
83				   basic_block, rtx, rtx);
84static void unswitch_single_loop (struct loops *, struct loop *, rtx, int);
85static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
86
87/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
88   true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
89   in order to create a jump.  */
90
91rtx
92compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
93		      rtx cinsn)
94{
95  rtx seq, jump, cond;
96  enum machine_mode mode;
97
98  mode = GET_MODE (op0);
99  if (mode == VOIDmode)
100    mode = GET_MODE (op1);
101
102  start_sequence ();
103  if (GET_MODE_CLASS (mode) == MODE_CC)
104    {
105      /* A hack -- there seems to be no easy generic way how to make a
106	 conditional jump from a ccmode comparison.  */
107      gcc_assert (cinsn);
108      cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
109      gcc_assert (GET_CODE (cond) == comp);
110      gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
111      gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
112      emit_jump_insn (copy_insn (PATTERN (cinsn)));
113      jump = get_last_insn ();
114      JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
115      LABEL_NUSES (JUMP_LABEL (jump))++;
116      redirect_jump (jump, label, 0);
117    }
118  else
119    {
120      gcc_assert (!cinsn);
121
122      op0 = force_operand (op0, NULL_RTX);
123      op1 = force_operand (op1, NULL_RTX);
124      do_compare_rtx_and_jump (op0, op1, comp, 0,
125			       mode, NULL_RTX, NULL_RTX, label);
126      jump = get_last_insn ();
127      JUMP_LABEL (jump) = label;
128      LABEL_NUSES (label)++;
129    }
130  REG_NOTES (jump) = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (prob),
131					REG_NOTES (jump));
132  seq = get_insns ();
133  end_sequence ();
134
135  return seq;
136}
137
138/* Main entry point.  Perform loop unswitching on all suitable LOOPS.  */
139void
140unswitch_loops (struct loops *loops)
141{
142  int i, num;
143  struct loop *loop;
144
145  /* Go through inner loops (only original ones).  */
146  num = loops->num;
147
148  for (i = 1; i < num; i++)
149    {
150      /* Removed loop?  */
151      loop = loops->parray[i];
152      if (!loop)
153	continue;
154
155      if (loop->inner)
156	continue;
157
158      unswitch_single_loop (loops, loop, NULL_RTX, 0);
159#ifdef ENABLE_CHECKING
160      verify_dominators (CDI_DOMINATORS);
161      verify_loop_structure (loops);
162#endif
163    }
164
165  iv_analysis_done ();
166}
167
168/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
169   basic blocks (for what it means see comments below).  In case condition
170   compares loop invariant cc mode register, return the jump in CINSN.  */
171
172static rtx
173may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
174{
175  rtx test, at, insn, op[2], stest;
176  struct rtx_iv iv;
177  unsigned i;
178  enum machine_mode mode;
179
180  /* BB must end in a simple conditional jump.  */
181  if (EDGE_COUNT (bb->succs) != 2)
182    return NULL_RTX;
183  if (!any_condjump_p (BB_END (bb)))
184    return NULL_RTX;
185
186  /* With branches inside loop.  */
187  if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
188      || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
189    return NULL_RTX;
190
191  /* It must be executed just once each iteration (because otherwise we
192     are unable to update dominator/irreducible loop information correctly).  */
193  if (!just_once_each_iteration_p (loop, bb))
194    return NULL_RTX;
195
196  /* Condition must be invariant.  */
197  test = get_condition (BB_END (bb), &at, true, false);
198  if (!test)
199    return NULL_RTX;
200
201  for (i = 0; i < 2; i++)
202    {
203      op[i] = XEXP (test, i);
204
205      if (CONSTANT_P (op[i]))
206	continue;
207
208      insn = iv_get_reaching_def (at, op[i]);
209      if (!iv_analyze (insn, op[i], &iv))
210	return NULL_RTX;
211      if (iv.step != const0_rtx
212	  || iv.first_special)
213	return NULL_RTX;
214
215      op[i] = get_iv_value (&iv, const0_rtx);
216    }
217
218  mode = GET_MODE (op[0]);
219  if (mode == VOIDmode)
220    mode = GET_MODE (op[1]);
221  if (GET_MODE_CLASS (mode) == MODE_CC)
222    {
223      if (at != BB_END (bb))
224	return NULL_RTX;
225
226      if (!rtx_equal_p (op[0], XEXP (test, 0))
227	  || !rtx_equal_p (op[1], XEXP (test, 1)))
228	return NULL_RTX;
229
230      *cinsn = BB_END (bb);
231      return test;
232    }
233
234  stest = simplify_gen_relational (GET_CODE (test), SImode,
235				   mode, op[0], op[1]);
236  if (stest == const0_rtx
237      || stest == const_true_rtx)
238    return stest;
239
240  return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
241					  op[0], op[1]));
242}
243
244/* Reverses CONDition; returns NULL if we cannot.  */
245rtx
246reversed_condition (rtx cond)
247{
248  enum rtx_code reversed;
249  reversed = reversed_comparison_code (cond, NULL);
250  if (reversed == UNKNOWN)
251    return NULL_RTX;
252  else
253    return gen_rtx_fmt_ee (reversed,
254			   GET_MODE (cond), XEXP (cond, 0),
255			   XEXP (cond, 1));
256}
257
258/* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
259   unswitched on and are therefore known to be true in this LOOP.  NUM is
260   number of unswitchings done; do not allow it to grow too much, it is too
261   easy to create example on that the code would grow exponentially.  */
262static void
263unswitch_single_loop (struct loops *loops, struct loop *loop,
264		      rtx cond_checked, int num)
265{
266  basic_block *bbs;
267  struct loop *nloop;
268  unsigned i;
269  rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
270  int repeat;
271  edge e;
272
273  /* Do not unswitch too much.  */
274  if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
275    {
276      if (dump_file)
277	fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
278      return;
279    }
280
281  /* Only unswitch innermost loops.  */
282  if (loop->inner)
283    {
284      if (dump_file)
285	fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
286      return;
287    }
288
289  /* We must be able to duplicate loop body.  */
290  if (!can_duplicate_loop_p (loop))
291    {
292      if (dump_file)
293	fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
294      return;
295    }
296
297  /* The loop should not be too large, to limit code growth.  */
298  if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
299    {
300      if (dump_file)
301	fprintf (dump_file, ";; Not unswitching, loop too big\n");
302      return;
303    }
304
305  /* Do not unswitch in cold areas.  */
306  if (!maybe_hot_bb_p (loop->header))
307    {
308      if (dump_file)
309	fprintf (dump_file, ";; Not unswitching, not hot area\n");
310      return;
311    }
312
313  /* Nor if the loop usually does not roll.  */
314  if (expected_loop_iterations (loop) < 1)
315    {
316      if (dump_file)
317	fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
318      return;
319    }
320
321  do
322    {
323      repeat = 0;
324      cinsn = NULL_RTX;
325
326      /* Find a bb to unswitch on.  */
327      bbs = get_loop_body (loop);
328      iv_analysis_loop_init (loop);
329      for (i = 0; i < loop->num_nodes; i++)
330	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
331	  break;
332
333      if (i == loop->num_nodes)
334	{
335	  free (bbs);
336	  return;
337	}
338
339      if (cond != const0_rtx
340	  && cond != const_true_rtx)
341	{
342	  rcond = reversed_condition (cond);
343	  if (rcond)
344	    rcond = canon_condition (rcond);
345
346	  /* Check whether the result can be predicted.  */
347	  for (acond = cond_checked; acond; acond = XEXP (acond, 1))
348	    simplify_using_condition (XEXP (acond, 0), &cond, NULL);
349	}
350
351      if (cond == const_true_rtx)
352	{
353	  /* Remove false path.  */
354	  e = FALLTHRU_EDGE (bbs[i]);
355	  remove_path (loops, e);
356	  free (bbs);
357	  repeat = 1;
358	}
359      else if (cond == const0_rtx)
360	{
361	  /* Remove true path.  */
362	  e = BRANCH_EDGE (bbs[i]);
363	  remove_path (loops, e);
364	  free (bbs);
365	  repeat = 1;
366	}
367    } while (repeat);
368
369  /* We found the condition we can unswitch on.  */
370  conds = alloc_EXPR_LIST (0, cond, cond_checked);
371  if (rcond)
372    rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
373  else
374    rconds = cond_checked;
375
376  if (dump_file)
377    fprintf (dump_file, ";; Unswitching loop\n");
378
379  /* Unswitch the loop on this condition.  */
380  nloop = unswitch_loop (loops, loop, bbs[i], cond, cinsn);
381  gcc_assert (nloop);
382
383  /* Invoke itself on modified loops.  */
384  unswitch_single_loop (loops, nloop, rconds, num + 1);
385  unswitch_single_loop (loops, loop, conds, num + 1);
386
387  free_EXPR_LIST_node (conds);
388  if (rcond)
389    free_EXPR_LIST_node (rconds);
390
391  free (bbs);
392}
393
394/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
395   unswitching of innermost loops.  UNSWITCH_ON must be executed in every
396   iteration, i.e. it must dominate LOOP latch.  COND is the condition
397   determining which loop is entered.  Returns NULL if impossible, new loop
398   otherwise.  The new loop is entered if COND is true.  If CINSN is not
399   NULL, it is the insn in that COND is compared.  */
400
401static struct loop *
402unswitch_loop (struct loops *loops, struct loop *loop, basic_block unswitch_on,
403	       rtx cond, rtx cinsn)
404{
405  edge entry, latch_edge, true_edge, false_edge, e;
406  basic_block switch_bb, unswitch_on_alt;
407  struct loop *nloop;
408  sbitmap zero_bitmap;
409  int irred_flag, prob;
410  rtx seq;
411
412  /* Some sanity checking.  */
413  gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
414  gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
415  gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
416  gcc_assert (!loop->inner);
417  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
418  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));
419
420  entry = loop_preheader_edge (loop);
421
422  /* Make a copy.  */
423  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
424  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
425  zero_bitmap = sbitmap_alloc (2);
426  sbitmap_zero (zero_bitmap);
427  if (!duplicate_loop_to_header_edge (loop, entry, loops, 1,
428	zero_bitmap, NULL, NULL, NULL, 0))
429    return NULL;
430  free (zero_bitmap);
431  entry->flags |= irred_flag;
432
433  /* Record the block with condition we unswitch on.  */
434  unswitch_on_alt = get_bb_copy (unswitch_on);
435  true_edge = BRANCH_EDGE (unswitch_on_alt);
436  false_edge = FALLTHRU_EDGE (unswitch_on);
437  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
438
439  /* Create a block with the condition.  */
440  prob = true_edge->probability;
441  switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
442  seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
443			      block_label (true_edge->dest),
444			      prob, cinsn);
445  emit_insn_after (seq, BB_END (switch_bb));
446  e = make_edge (switch_bb, true_edge->dest, 0);
447  e->probability = prob;
448  e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
449  e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
450  e->probability = false_edge->probability;
451  e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
452
453  if (irred_flag)
454    {
455      switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
456      EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
457      EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
458    }
459  else
460    {
461      switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
462      EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
463      EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
464    }
465
466  /* Loopify from the copy of LOOP body, constructing the new loop.  */
467  nloop = loopify (loops, latch_edge,
468		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
469		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true);
470
471  /* Remove branches that are now unreachable in new loops.  */
472  remove_path (loops, true_edge);
473  remove_path (loops, false_edge);
474
475  /* One of created loops do not have to be subloop of the outer loop now,
476     so fix its placement in loop data structure.  */
477  fix_loop_placement (loop);
478  fix_loop_placement (nloop);
479
480  /* Preserve the simple loop preheaders.  */
481  loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
482  loop_split_edge_with (loop_preheader_edge (nloop), NULL_RTX);
483
484  return nloop;
485}
486