1/* Loop unswitching for GNU compiler.
2   Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "rtl.h"
26#include "hard-reg-set.h"
27#include "obstack.h"
28#include "basic-block.h"
29#include "cfgloop.h"
30#include "cfglayout.h"
31#include "params.h"
32#include "output.h"
33#include "expr.h"
34
35/* This pass moves constant conditions out of loops, duplicating the loop
36   in progress, i.e. this code:
37
38   while (loop_cond)
39     {
40       A;
41       if (cond)
42         branch1;
43       else
44	 branch2;
45       B;
46       if (cond)
47         branch3;
48       C;
49     }
50   where nothing inside the loop alters cond is transformed
51   into
52
53   if (cond)
54     {
55       while (loop_cond)
56	 {
57	   A;
58	   branch1;
59	   B;
60	   branch3;
61	   C;
62	 }
63     }
64   else
65     {
66       while (loop_cond)
67	 {
68	   A;
69	   branch2;
70	   B;
71	   C;
72	 }
73     }
74
75  Duplicating the loop might lead to code growth exponential in number of
76  branches inside loop, so we limit the number of unswitchings performed
77  in a single loop to PARAM_MAX_UNSWITCH_LEVEL.  We only perform the
78  transformation on innermost loops, as the benefit of doing it on loops
79  containing subloops would not be very large compared to complications
80  with handling this case.  */
81
82static struct loop *unswitch_loop (struct loop *, basic_block, rtx, rtx);
83static void unswitch_single_loop (struct loop *, rtx, int);
84static rtx may_unswitch_on (basic_block, struct loop *, rtx *);
85
86/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
87   true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
88   in order to create a jump.  */
89
90rtx
91compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
92		      rtx cinsn)
93{
94  rtx seq, jump, cond;
95  enum machine_mode mode;
96
97  mode = GET_MODE (op0);
98  if (mode == VOIDmode)
99    mode = GET_MODE (op1);
100
101  start_sequence ();
102  if (GET_MODE_CLASS (mode) == MODE_CC)
103    {
104      /* A hack -- there seems to be no easy generic way how to make a
105	 conditional jump from a ccmode comparison.  */
106      gcc_assert (cinsn);
107      cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
108      gcc_assert (GET_CODE (cond) == comp);
109      gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
110      gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
111      emit_jump_insn (copy_insn (PATTERN (cinsn)));
112      jump = get_last_insn ();
113      JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
114      LABEL_NUSES (JUMP_LABEL (jump))++;
115      redirect_jump (jump, label, 0);
116    }
117  else
118    {
119      gcc_assert (!cinsn);
120
121      op0 = force_operand (op0, NULL_RTX);
122      op1 = force_operand (op1, NULL_RTX);
123      do_compare_rtx_and_jump (op0, op1, comp, 0,
124			       mode, NULL_RTX, NULL_RTX, label, -1);
125      jump = get_last_insn ();
126      JUMP_LABEL (jump) = label;
127      LABEL_NUSES (label)++;
128    }
129  add_reg_note (jump, REG_BR_PROB, GEN_INT (prob));
130
131  seq = get_insns ();
132  end_sequence ();
133
134  return seq;
135}
136
137/* Main entry point.  Perform loop unswitching on all suitable loops.  */
138void
139unswitch_loops (void)
140{
141  loop_iterator li;
142  struct loop *loop;
143
144  /* Go through inner loops (only original ones).  */
145
146  FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
147    {
148      unswitch_single_loop (loop, NULL_RTX, 0);
149#ifdef ENABLE_CHECKING
150      verify_dominators (CDI_DOMINATORS);
151      verify_loop_structure ();
152#endif
153    }
154
155  iv_analysis_done ();
156}
157
158/* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
159   basic blocks (for what it means see comments below).  In case condition
160   compares loop invariant cc mode register, return the jump in CINSN.  */
161
162static rtx
163may_unswitch_on (basic_block bb, struct loop *loop, rtx *cinsn)
164{
165  rtx test, at, op[2], stest;
166  struct rtx_iv iv;
167  unsigned i;
168  enum machine_mode mode;
169
170  /* BB must end in a simple conditional jump.  */
171  if (EDGE_COUNT (bb->succs) != 2)
172    return NULL_RTX;
173  if (!any_condjump_p (BB_END (bb)))
174    return NULL_RTX;
175
176  /* With branches inside loop.  */
177  if (!flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 0)->dest)
178      || !flow_bb_inside_loop_p (loop, EDGE_SUCC (bb, 1)->dest))
179    return NULL_RTX;
180
181  /* It must be executed just once each iteration (because otherwise we
182     are unable to update dominator/irreducible loop information correctly).  */
183  if (!just_once_each_iteration_p (loop, bb))
184    return NULL_RTX;
185
186  /* Condition must be invariant.  */
187  test = get_condition (BB_END (bb), &at, true, false);
188  if (!test)
189    return NULL_RTX;
190
191  for (i = 0; i < 2; i++)
192    {
193      op[i] = XEXP (test, i);
194
195      if (CONSTANT_P (op[i]))
196	continue;
197
198      if (!iv_analyze (at, op[i], &iv))
199	return NULL_RTX;
200      if (iv.step != const0_rtx
201	  || iv.first_special)
202	return NULL_RTX;
203
204      op[i] = get_iv_value (&iv, const0_rtx);
205    }
206
207  mode = GET_MODE (op[0]);
208  if (mode == VOIDmode)
209    mode = GET_MODE (op[1]);
210  if (GET_MODE_CLASS (mode) == MODE_CC)
211    {
212      if (at != BB_END (bb))
213	return NULL_RTX;
214
215      if (!rtx_equal_p (op[0], XEXP (test, 0))
216	  || !rtx_equal_p (op[1], XEXP (test, 1)))
217	return NULL_RTX;
218
219      *cinsn = BB_END (bb);
220      return test;
221    }
222
223  stest = simplify_gen_relational (GET_CODE (test), SImode,
224				   mode, op[0], op[1]);
225  if (stest == const0_rtx
226      || stest == const_true_rtx)
227    return stest;
228
229  return canon_condition (gen_rtx_fmt_ee (GET_CODE (test), SImode,
230					  op[0], op[1]));
231}
232
233/* Reverses CONDition; returns NULL if we cannot.  */
234rtx
235reversed_condition (rtx cond)
236{
237  enum rtx_code reversed;
238  reversed = reversed_comparison_code (cond, NULL);
239  if (reversed == UNKNOWN)
240    return NULL_RTX;
241  else
242    return gen_rtx_fmt_ee (reversed,
243			   GET_MODE (cond), XEXP (cond, 0),
244			   XEXP (cond, 1));
245}
246
247/* Unswitch single LOOP.  COND_CHECKED holds list of conditions we already
248   unswitched on and are therefore known to be true in this LOOP.  NUM is
249   number of unswitchings done; do not allow it to grow too much, it is too
250   easy to create example on that the code would grow exponentially.  */
251static void
252unswitch_single_loop (struct loop *loop, rtx cond_checked, int num)
253{
254  basic_block *bbs;
255  struct loop *nloop;
256  unsigned i;
257  rtx cond, rcond = NULL_RTX, conds, rconds, acond, cinsn;
258  int repeat;
259  edge e;
260
261  /* Do not unswitch too much.  */
262  if (num > PARAM_VALUE (PARAM_MAX_UNSWITCH_LEVEL))
263    {
264      if (dump_file)
265	fprintf (dump_file, ";; Not unswitching anymore, hit max level\n");
266      return;
267    }
268
269  /* Only unswitch innermost loops.  */
270  if (loop->inner)
271    {
272      if (dump_file)
273	fprintf (dump_file, ";; Not unswitching, not innermost loop\n");
274      return;
275    }
276
277  /* We must be able to duplicate loop body.  */
278  if (!can_duplicate_loop_p (loop))
279    {
280      if (dump_file)
281	fprintf (dump_file, ";; Not unswitching, can't duplicate loop\n");
282      return;
283    }
284
285  /* The loop should not be too large, to limit code growth.  */
286  if (num_loop_insns (loop) > PARAM_VALUE (PARAM_MAX_UNSWITCH_INSNS))
287    {
288      if (dump_file)
289	fprintf (dump_file, ";; Not unswitching, loop too big\n");
290      return;
291    }
292
293  /* Do not unswitch in cold areas.  */
294  if (optimize_loop_for_size_p (loop))
295    {
296      if (dump_file)
297	fprintf (dump_file, ";; Not unswitching, not hot area\n");
298      return;
299    }
300
301  /* Nor if the loop usually does not roll.  */
302  if (expected_loop_iterations (loop) < 1)
303    {
304      if (dump_file)
305	fprintf (dump_file, ";; Not unswitching, loop iterations < 1\n");
306      return;
307    }
308
309  do
310    {
311      repeat = 0;
312      cinsn = NULL_RTX;
313
314      /* Find a bb to unswitch on.  */
315      bbs = get_loop_body (loop);
316      iv_analysis_loop_init (loop);
317      for (i = 0; i < loop->num_nodes; i++)
318	if ((cond = may_unswitch_on (bbs[i], loop, &cinsn)))
319	  break;
320
321      if (i == loop->num_nodes)
322	{
323	  free (bbs);
324	  return;
325	}
326
327      if (cond != const0_rtx
328	  && cond != const_true_rtx)
329	{
330	  rcond = reversed_condition (cond);
331	  if (rcond)
332	    rcond = canon_condition (rcond);
333
334	  /* Check whether the result can be predicted.  */
335	  for (acond = cond_checked; acond; acond = XEXP (acond, 1))
336	    simplify_using_condition (XEXP (acond, 0), &cond, NULL);
337	}
338
339      if (cond == const_true_rtx)
340	{
341	  /* Remove false path.  */
342	  e = FALLTHRU_EDGE (bbs[i]);
343	  remove_path (e);
344	  free (bbs);
345	  repeat = 1;
346	}
347      else if (cond == const0_rtx)
348	{
349	  /* Remove true path.  */
350	  e = BRANCH_EDGE (bbs[i]);
351	  remove_path (e);
352	  free (bbs);
353	  repeat = 1;
354	}
355    } while (repeat);
356
357  /* We found the condition we can unswitch on.  */
358  conds = alloc_EXPR_LIST (0, cond, cond_checked);
359  if (rcond)
360    rconds = alloc_EXPR_LIST (0, rcond, cond_checked);
361  else
362    rconds = cond_checked;
363
364  if (dump_file)
365    fprintf (dump_file, ";; Unswitching loop\n");
366
367  /* Unswitch the loop on this condition.  */
368  nloop = unswitch_loop (loop, bbs[i], cond, cinsn);
369  gcc_assert (nloop);
370
371  /* Invoke itself on modified loops.  */
372  unswitch_single_loop (nloop, rconds, num + 1);
373  unswitch_single_loop (loop, conds, num + 1);
374
375  free_EXPR_LIST_node (conds);
376  if (rcond)
377    free_EXPR_LIST_node (rconds);
378
379  free (bbs);
380}
381
382/* Unswitch a LOOP w.r. to given basic block UNSWITCH_ON.  We only support
383   unswitching of innermost loops.  UNSWITCH_ON must be executed in every
384   iteration, i.e. it must dominate LOOP latch.  COND is the condition
385   determining which loop is entered.  Returns NULL if impossible, new loop
386   otherwise.  The new loop is entered if COND is true.  If CINSN is not
387   NULL, it is the insn in that COND is compared.  */
388
389static struct loop *
390unswitch_loop (struct loop *loop, basic_block unswitch_on, rtx cond, rtx cinsn)
391{
392  edge entry, latch_edge, true_edge, false_edge, e;
393  basic_block switch_bb, unswitch_on_alt;
394  struct loop *nloop;
395  int irred_flag, prob;
396  rtx seq;
397
398  /* Some sanity checking.  */
399  gcc_assert (flow_bb_inside_loop_p (loop, unswitch_on));
400  gcc_assert (EDGE_COUNT (unswitch_on->succs) == 2);
401  gcc_assert (just_once_each_iteration_p (loop, unswitch_on));
402  gcc_assert (!loop->inner);
403  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 0)->dest));
404  gcc_assert (flow_bb_inside_loop_p (loop, EDGE_SUCC (unswitch_on, 1)->dest));
405
406  entry = loop_preheader_edge (loop);
407
408  /* Make a copy.  */
409  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
410  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
411  if (!duplicate_loop_to_header_edge (loop, entry, 1,
412			      	      NULL, NULL, NULL, 0))
413    return NULL;
414  entry->flags |= irred_flag;
415
416  /* Record the block with condition we unswitch on.  */
417  unswitch_on_alt = get_bb_copy (unswitch_on);
418  true_edge = BRANCH_EDGE (unswitch_on_alt);
419  false_edge = FALLTHRU_EDGE (unswitch_on);
420  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
421
422  /* Create a block with the condition.  */
423  prob = true_edge->probability;
424  switch_bb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
425  seq = compare_and_jump_seq (XEXP (cond, 0), XEXP (cond, 1), GET_CODE (cond),
426			      block_label (true_edge->dest),
427			      prob, cinsn);
428  emit_insn_after (seq, BB_END (switch_bb));
429  e = make_edge (switch_bb, true_edge->dest, 0);
430  e->probability = prob;
431  e->count = latch_edge->count * prob / REG_BR_PROB_BASE;
432  e = make_edge (switch_bb, FALLTHRU_EDGE (unswitch_on)->dest, EDGE_FALLTHRU);
433  e->probability = false_edge->probability;
434  e->count = latch_edge->count * (false_edge->probability) / REG_BR_PROB_BASE;
435
436  if (irred_flag)
437    {
438      switch_bb->flags |= BB_IRREDUCIBLE_LOOP;
439      EDGE_SUCC (switch_bb, 0)->flags |= EDGE_IRREDUCIBLE_LOOP;
440      EDGE_SUCC (switch_bb, 1)->flags |= EDGE_IRREDUCIBLE_LOOP;
441    }
442  else
443    {
444      switch_bb->flags &= ~BB_IRREDUCIBLE_LOOP;
445      EDGE_SUCC (switch_bb, 0)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
446      EDGE_SUCC (switch_bb, 1)->flags &= ~EDGE_IRREDUCIBLE_LOOP;
447    }
448
449  /* Loopify from the copy of LOOP body, constructing the new loop.  */
450  nloop = loopify (latch_edge,
451		   single_pred_edge (get_bb_copy (loop->header)), switch_bb,
452		   BRANCH_EDGE (switch_bb), FALLTHRU_EDGE (switch_bb), true,
453		   prob, REG_BR_PROB_BASE - prob);
454
455  /* Remove branches that are now unreachable in new loops.  */
456  remove_path (true_edge);
457  remove_path (false_edge);
458
459  /* Preserve the simple loop preheaders.  */
460  split_edge (loop_preheader_edge (loop));
461  split_edge (loop_preheader_edge (nloop));
462
463  return nloop;
464}
465