1/* Loop manipulation code for GNU compiler.
2   Copyright (C) 2002-2020 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "rtl.h"
25#include "tree.h"
26#include "gimple.h"
27#include "cfghooks.h"
28#include "cfganal.h"
29#include "cfgloop.h"
30#include "gimple-iterator.h"
31#include "gimplify-me.h"
32#include "tree-ssa-loop-manip.h"
33#include "dumpfile.h"
34
35static void copy_loops_to (class loop **, int,
36			   class loop *);
37static void loop_redirect_edge (edge, basic_block);
38static void remove_bbs (basic_block *, int);
39static bool rpe_enum_p (const_basic_block, const void *);
40static int find_path (edge, basic_block **);
41static void fix_loop_placements (class loop *, bool *);
42static bool fix_bb_placement (basic_block);
43static void fix_bb_placements (basic_block, bool *, bitmap);
44
45/* Checks whether basic block BB is dominated by DATA.  */
46static bool
47rpe_enum_p (const_basic_block bb, const void *data)
48{
49  return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
50}
51
52/* Remove basic blocks BBS.  NBBS is the number of the basic blocks.  */
53
54static void
55remove_bbs (basic_block *bbs, int nbbs)
56{
57  int i;
58
59  for (i = 0; i < nbbs; i++)
60    delete_basic_block (bbs[i]);
61}
62
63/* Find path -- i.e. the basic blocks dominated by edge E and put them
64   into array BBS, that will be allocated large enough to contain them.
65   E->dest must have exactly one predecessor for this to work (it is
66   easy to achieve and we do not put it here because we do not want to
67   alter anything by this function).  The number of basic blocks in the
68   path is returned.  */
69static int
70find_path (edge e, basic_block **bbs)
71{
72  gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
73
74  /* Find bbs in the path.  */
75  *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
76  return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
77			     n_basic_blocks_for_fn (cfun), e->dest);
78}
79
80/* Fix placement of basic block BB inside loop hierarchy --
81   Let L be a loop to that BB belongs.  Then every successor of BB must either
82     1) belong to some superloop of loop L, or
83     2) be a header of loop K such that K->outer is superloop of L
84   Returns true if we had to move BB into other loop to enforce this condition,
85   false if the placement of BB was already correct (provided that placements
86   of its successors are correct).  */
87static bool
88fix_bb_placement (basic_block bb)
89{
90  edge e;
91  edge_iterator ei;
92  class loop *loop = current_loops->tree_root, *act;
93
94  FOR_EACH_EDGE (e, ei, bb->succs)
95    {
96      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
97	continue;
98
99      act = e->dest->loop_father;
100      if (act->header == e->dest)
101	act = loop_outer (act);
102
103      if (flow_loop_nested_p (loop, act))
104	loop = act;
105    }
106
107  if (loop == bb->loop_father)
108    return false;
109
110  remove_bb_from_loops (bb);
111  add_bb_to_loop (bb, loop);
112
113  return true;
114}
115
116/* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
117   of LOOP to that leads at least one exit edge of LOOP, and set it
118   as the immediate superloop of LOOP.  Return true if the immediate superloop
119   of LOOP changed.
120
121   IRRED_INVALIDATED is set to true if a change in the loop structures might
122   invalidate the information about irreducible regions.  */
123
124static bool
125fix_loop_placement (class loop *loop, bool *irred_invalidated)
126{
127  unsigned i;
128  edge e;
129  vec<edge> exits = get_loop_exit_edges (loop);
130  class loop *father = current_loops->tree_root, *act;
131  bool ret = false;
132
133  FOR_EACH_VEC_ELT (exits, i, e)
134    {
135      act = find_common_loop (loop, e->dest->loop_father);
136      if (flow_loop_nested_p (father, act))
137	father = act;
138    }
139
140  if (father != loop_outer (loop))
141    {
142      for (act = loop_outer (loop); act != father; act = loop_outer (act))
143	act->num_nodes -= loop->num_nodes;
144      flow_loop_tree_node_remove (loop);
145      flow_loop_tree_node_add (father, loop);
146
147      /* The exit edges of LOOP no longer exits its original immediate
148	 superloops; remove them from the appropriate exit lists.  */
149      FOR_EACH_VEC_ELT (exits, i, e)
150	{
151	  /* We may need to recompute irreducible loops.  */
152	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
153	    *irred_invalidated = true;
154	  rescan_loop_exit (e, false, false);
155	}
156
157      ret = true;
158    }
159
160  exits.release ();
161  return ret;
162}
163
164/* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
165   enforce condition stated in description of fix_bb_placement. We
166   start from basic block FROM that had some of its successors removed, so that
167   his placement no longer has to be correct, and iteratively fix placement of
168   its predecessors that may change if placement of FROM changed.  Also fix
169   placement of subloops of FROM->loop_father, that might also be altered due
170   to this change; the condition for them is similar, except that instead of
171   successors we consider edges coming out of the loops.
172
173   If the changes may invalidate the information about irreducible regions,
174   IRRED_INVALIDATED is set to true.
175
176   If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
177   changed loop_father are collected there. */
178
179static void
180fix_bb_placements (basic_block from,
181		   bool *irred_invalidated,
182		   bitmap loop_closed_ssa_invalidated)
183{
184  basic_block *queue, *qtop, *qbeg, *qend;
185  class loop *base_loop, *target_loop;
186  edge e;
187
188  /* We pass through blocks back-reachable from FROM, testing whether some
189     of their successors moved to outer loop.  It may be necessary to
190     iterate several times, but it is finite, as we stop unless we move
191     the basic block up the loop structure.  The whole story is a bit
192     more complicated due to presence of subloops, those are moved using
193     fix_loop_placement.  */
194
195  base_loop = from->loop_father;
196  /* If we are already in the outermost loop, the basic blocks cannot be moved
197     outside of it.  If FROM is the header of the base loop, it cannot be moved
198     outside of it, either.  In both cases, we can end now.  */
199  if (base_loop == current_loops->tree_root
200      || from == base_loop->header)
201    return;
202
203  auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
204  bitmap_clear (in_queue);
205  bitmap_set_bit (in_queue, from->index);
206  /* Prevent us from going out of the base_loop.  */
207  bitmap_set_bit (in_queue, base_loop->header->index);
208
209  queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
210  qtop = queue + base_loop->num_nodes + 1;
211  qbeg = queue;
212  qend = queue + 1;
213  *qbeg = from;
214
215  while (qbeg != qend)
216    {
217      edge_iterator ei;
218      from = *qbeg;
219      qbeg++;
220      if (qbeg == qtop)
221	qbeg = queue;
222      bitmap_clear_bit (in_queue, from->index);
223
224      if (from->loop_father->header == from)
225	{
226	  /* Subloop header, maybe move the loop upward.  */
227	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
228	    continue;
229	  target_loop = loop_outer (from->loop_father);
230	  if (loop_closed_ssa_invalidated)
231	    {
232	      basic_block *bbs = get_loop_body (from->loop_father);
233	      for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
234		bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
235	      free (bbs);
236	    }
237	}
238      else
239	{
240	  /* Ordinary basic block.  */
241	  if (!fix_bb_placement (from))
242	    continue;
243	  target_loop = from->loop_father;
244	  if (loop_closed_ssa_invalidated)
245	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
246	}
247
248      FOR_EACH_EDGE (e, ei, from->succs)
249	{
250	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
251	    *irred_invalidated = true;
252	}
253
254      /* Something has changed, insert predecessors into queue.  */
255      FOR_EACH_EDGE (e, ei, from->preds)
256	{
257	  basic_block pred = e->src;
258	  class loop *nca;
259
260	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
261	    *irred_invalidated = true;
262
263	  if (bitmap_bit_p (in_queue, pred->index))
264	    continue;
265
266	  /* If it is subloop, then it either was not moved, or
267	     the path up the loop tree from base_loop do not contain
268	     it.  */
269	  nca = find_common_loop (pred->loop_father, base_loop);
270	  if (pred->loop_father != base_loop
271	      && (nca == base_loop
272		  || nca != pred->loop_father))
273	    pred = pred->loop_father->header;
274	  else if (!flow_loop_nested_p (target_loop, pred->loop_father))
275	    {
276	      /* If PRED is already higher in the loop hierarchy than the
277		 TARGET_LOOP to that we moved FROM, the change of the position
278		 of FROM does not affect the position of PRED, so there is no
279		 point in processing it.  */
280	      continue;
281	    }
282
283	  if (bitmap_bit_p (in_queue, pred->index))
284	    continue;
285
286	  /* Schedule the basic block.  */
287	  *qend = pred;
288	  qend++;
289	  if (qend == qtop)
290	    qend = queue;
291	  bitmap_set_bit (in_queue, pred->index);
292	}
293    }
294  free (queue);
295}
296
297/* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
298   and update loop structures and dominators.  Return true if we were able
299   to remove the path, false otherwise (and nothing is affected then).  */
300bool
301remove_path (edge e, bool *irred_invalidated,
302	     bitmap loop_closed_ssa_invalidated)
303{
304  edge ae;
305  basic_block *rem_bbs, *bord_bbs, from, bb;
306  vec<basic_block> dom_bbs;
307  int i, nrem, n_bord_bbs;
308  bool local_irred_invalidated = false;
309  edge_iterator ei;
310  class loop *l, *f;
311
312  if (! irred_invalidated)
313    irred_invalidated = &local_irred_invalidated;
314
315  if (!can_remove_branch_p (e))
316    return false;
317
318  /* Keep track of whether we need to update information about irreducible
319     regions.  This is the case if the removed area is a part of the
320     irreducible region, or if the set of basic blocks that belong to a loop
321     that is inside an irreducible region is changed, or if such a loop is
322     removed.  */
323  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
324    *irred_invalidated = true;
325
326  /* We need to check whether basic blocks are dominated by the edge
327     e, but we only have basic block dominators.  This is easy to
328     fix -- when e->dest has exactly one predecessor, this corresponds
329     to blocks dominated by e->dest, if not, split the edge.  */
330  if (!single_pred_p (e->dest))
331    e = single_pred_edge (split_edge (e));
332
333  /* It may happen that by removing path we remove one or more loops
334     we belong to.  In this case first unloop the loops, then proceed
335     normally.   We may assume that e->dest is not a header of any loop,
336     as it now has exactly one predecessor.  */
337  for (l = e->src->loop_father; loop_outer (l); l = f)
338    {
339      f = loop_outer (l);
340      if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
341        unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
342    }
343
344  /* Identify the path.  */
345  nrem = find_path (e, &rem_bbs);
346
347  n_bord_bbs = 0;
348  bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
349  auto_sbitmap seen (last_basic_block_for_fn (cfun));
350  bitmap_clear (seen);
351
352  /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
353  for (i = 0; i < nrem; i++)
354    bitmap_set_bit (seen, rem_bbs[i]->index);
355  if (!*irred_invalidated)
356    FOR_EACH_EDGE (ae, ei, e->src->succs)
357      if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
358	  && !bitmap_bit_p (seen, ae->dest->index)
359	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
360	{
361	  *irred_invalidated = true;
362	  break;
363	}
364
365  for (i = 0; i < nrem; i++)
366    {
367      FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
368	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
369	    && !bitmap_bit_p (seen, ae->dest->index))
370	  {
371	    bitmap_set_bit (seen, ae->dest->index);
372	    bord_bbs[n_bord_bbs++] = ae->dest;
373
374	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
375	      *irred_invalidated = true;
376	  }
377    }
378
379  /* Remove the path.  */
380  from = e->src;
381  remove_branch (e);
382  dom_bbs.create (0);
383
384  /* Cancel loops contained in the path.  */
385  for (i = 0; i < nrem; i++)
386    if (rem_bbs[i]->loop_father->header == rem_bbs[i])
387      cancel_loop_tree (rem_bbs[i]->loop_father);
388
389  remove_bbs (rem_bbs, nrem);
390  free (rem_bbs);
391
392  /* Find blocks whose dominators may be affected.  */
393  bitmap_clear (seen);
394  for (i = 0; i < n_bord_bbs; i++)
395    {
396      basic_block ldom;
397
398      bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
399      if (bitmap_bit_p (seen, bb->index))
400	continue;
401      bitmap_set_bit (seen, bb->index);
402
403      for (ldom = first_dom_son (CDI_DOMINATORS, bb);
404	   ldom;
405	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
406	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
407	  dom_bbs.safe_push (ldom);
408    }
409
410  /* Recount dominators.  */
411  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
412  dom_bbs.release ();
413  free (bord_bbs);
414
415  /* Fix placements of basic blocks inside loops and the placement of
416     loops in the loop tree.  */
417  fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
418  fix_loop_placements (from->loop_father, irred_invalidated);
419
420  if (local_irred_invalidated
421      && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
422    mark_irreducible_loops ();
423
424  return true;
425}
426
427/* Creates place for a new LOOP in loops structure of FN.  */
428
429void
430place_new_loop (struct function *fn, class loop *loop)
431{
432  loop->num = number_of_loops (fn);
433  vec_safe_push (loops_for_fn (fn)->larray, loop);
434}
435
436/* Given LOOP structure with filled header and latch, find the body of the
437   corresponding loop and add it to loops tree.  Insert the LOOP as a son of
438   outer.  */
439
440void
441add_loop (class loop *loop, class loop *outer)
442{
443  basic_block *bbs;
444  int i, n;
445  class loop *subloop;
446  edge e;
447  edge_iterator ei;
448
449  /* Add it to loop structure.  */
450  place_new_loop (cfun, loop);
451  flow_loop_tree_node_add (outer, loop);
452
453  /* Find its nodes.  */
454  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
455  n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
456
457  for (i = 0; i < n; i++)
458    {
459      if (bbs[i]->loop_father == outer)
460	{
461	  remove_bb_from_loops (bbs[i]);
462	  add_bb_to_loop (bbs[i], loop);
463	  continue;
464	}
465
466      loop->num_nodes++;
467
468      /* If we find a direct subloop of OUTER, move it to LOOP.  */
469      subloop = bbs[i]->loop_father;
470      if (loop_outer (subloop) == outer
471	  && subloop->header == bbs[i])
472	{
473	  flow_loop_tree_node_remove (subloop);
474	  flow_loop_tree_node_add (loop, subloop);
475	}
476    }
477
478  /* Update the information about loop exit edges.  */
479  for (i = 0; i < n; i++)
480    {
481      FOR_EACH_EDGE (e, ei, bbs[i]->succs)
482	{
483	  rescan_loop_exit (e, false, false);
484	}
485    }
486
487  free (bbs);
488}
489
490/* Scale profile of loop by P.  */
491
492void
493scale_loop_frequencies (class loop *loop, profile_probability p)
494{
495  basic_block *bbs;
496
497  bbs = get_loop_body (loop);
498  scale_bbs_frequencies (bbs, loop->num_nodes, p);
499  free (bbs);
500}
501
502/* Scale profile in LOOP by P.
503   If ITERATION_BOUND is non-zero, scale even further if loop is predicted
504   to iterate too many times.
505   Before caling this function, preheader block profile should be already
506   scaled to final count.  This is necessary because loop iterations are
507   determined by comparing header edge count to latch ege count and thus
508   they need to be scaled synchronously.  */
509
510void
511scale_loop_profile (class loop *loop, profile_probability p,
512		    gcov_type iteration_bound)
513{
514  edge e, preheader_e;
515  edge_iterator ei;
516
517  if (dump_file && (dump_flags & TDF_DETAILS))
518    {
519      fprintf (dump_file, ";; Scaling loop %i with scale ",
520	       loop->num);
521      p.dump (dump_file);
522      fprintf (dump_file, " bounding iterations to %i\n",
523	       (int)iteration_bound);
524    }
525
526  /* Scale the probabilities.  */
527  scale_loop_frequencies (loop, p);
528
529  if (iteration_bound == 0)
530    return;
531
532  gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true);
533
534  if (dump_file && (dump_flags & TDF_DETAILS))
535    {
536      fprintf (dump_file, ";; guessed iterations after scaling %i\n",
537	       (int)iterations);
538    }
539
540  /* See if loop is predicted to iterate too many times.  */
541  if (iterations <= iteration_bound)
542    return;
543
544  preheader_e = loop_preheader_edge (loop);
545
546  /* We could handle also loops without preheaders, but bounding is
547     currently used only by optimizers that have preheaders constructed.  */
548  gcc_checking_assert (preheader_e);
549  profile_count count_in = preheader_e->count ();
550
551  if (count_in > profile_count::zero ()
552      && loop->header->count.initialized_p ())
553    {
554      profile_count count_delta = profile_count::zero ();
555
556      e = single_exit (loop);
557      if (e)
558	{
559	  edge other_e;
560	  FOR_EACH_EDGE (other_e, ei, e->src->succs)
561	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
562		&& e != other_e)
563	      break;
564
565	  /* Probability of exit must be 1/iterations.  */
566	  count_delta = e->count ();
567	  e->probability = profile_probability::always ()
568				    .apply_scale (1, iteration_bound);
569	  other_e->probability = e->probability.invert ();
570
571	  /* In code below we only handle the following two updates.  */
572	  if (other_e->dest != loop->header
573	      && other_e->dest != loop->latch
574	      && (dump_file && (dump_flags & TDF_DETAILS)))
575	    {
576	      fprintf (dump_file, ";; giving up on update of paths from "
577		       "exit condition to latch\n");
578	    }
579	}
580      else
581        if (dump_file && (dump_flags & TDF_DETAILS))
582	  fprintf (dump_file, ";; Loop has multiple exit edges; "
583	      		      "giving up on exit condition update\n");
584
585      /* Roughly speaking we want to reduce the loop body profile by the
586	 difference of loop iterations.  We however can do better if
587	 we look at the actual profile, if it is available.  */
588      p = profile_probability::always ();
589
590      count_in = count_in.apply_scale (iteration_bound, 1);
591      p = count_in.probability_in (loop->header->count);
592      if (!(p > profile_probability::never ()))
593	p = profile_probability::very_unlikely ();
594
595      if (p == profile_probability::always ()
596	  || !p.initialized_p ())
597	return;
598
599      /* If latch exists, change its count, since we changed
600	 probability of exit.  Theoretically we should update everything from
601	 source of exit edge to latch, but for vectorizer this is enough.  */
602      if (loop->latch && loop->latch != e->src)
603	loop->latch->count += count_delta;
604
605      /* Scale the probabilities.  */
606      scale_loop_frequencies (loop, p);
607
608      /* Change latch's count back.  */
609      if (loop->latch && loop->latch != e->src)
610	loop->latch->count -= count_delta;
611
612      if (dump_file && (dump_flags & TDF_DETAILS))
613	fprintf (dump_file, ";; guessed iterations are now %i\n",
614		 (int)expected_loop_iterations_unbounded (loop, NULL, true));
615    }
616}
617
618/* Recompute dominance information for basic blocks outside LOOP.  */
619
620static void
621update_dominators_in_loop (class loop *loop)
622{
623  vec<basic_block> dom_bbs = vNULL;
624  basic_block *body;
625  unsigned i;
626
627  auto_sbitmap seen (last_basic_block_for_fn (cfun));
628  bitmap_clear (seen);
629  body = get_loop_body (loop);
630
631  for (i = 0; i < loop->num_nodes; i++)
632    bitmap_set_bit (seen, body[i]->index);
633
634  for (i = 0; i < loop->num_nodes; i++)
635    {
636      basic_block ldom;
637
638      for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
639	   ldom;
640	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
641	if (!bitmap_bit_p (seen, ldom->index))
642	  {
643	    bitmap_set_bit (seen, ldom->index);
644	    dom_bbs.safe_push (ldom);
645	  }
646    }
647
648  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
649  free (body);
650  dom_bbs.release ();
651}
652
653/* Creates an if region as shown above. CONDITION is used to create
654   the test for the if.
655
656   |
657   |     -------------                 -------------
658   |     |  pred_bb  |                 |  pred_bb  |
659   |     -------------                 -------------
660   |           |                             |
661   |           |                             | ENTRY_EDGE
662   |           | ENTRY_EDGE                  V
663   |           |             ====>     -------------
664   |           |                       |  cond_bb  |
665   |           |                       | CONDITION |
666   |           |                       -------------
667   |           V                        /         \
668   |     -------------         e_false /           \ e_true
669   |     |  succ_bb  |                V             V
670   |     -------------         -----------       -----------
671   |                           | false_bb |      | true_bb |
672   |                           -----------       -----------
673   |                                   \           /
674   |                                    \         /
675   |                                     V       V
676   |                                   -------------
677   |                                   |  join_bb  |
678   |                                   -------------
679   |                                         | exit_edge (result)
680   |                                         V
681   |                                    -----------
682   |                                    | succ_bb |
683   |                                    -----------
684   |
685 */
686
687edge
688create_empty_if_region_on_edge (edge entry_edge, tree condition)
689{
690
691  basic_block cond_bb, true_bb, false_bb, join_bb;
692  edge e_true, e_false, exit_edge;
693  gcond *cond_stmt;
694  tree simple_cond;
695  gimple_stmt_iterator gsi;
696
697  cond_bb = split_edge (entry_edge);
698
699  /* Insert condition in cond_bb.  */
700  gsi = gsi_last_bb (cond_bb);
701  simple_cond =
702    force_gimple_operand_gsi (&gsi, condition, true, NULL,
703			      false, GSI_NEW_STMT);
704  cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
705  gsi = gsi_last_bb (cond_bb);
706  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
707
708  join_bb = split_edge (single_succ_edge (cond_bb));
709
710  e_true = single_succ_edge (cond_bb);
711  true_bb = split_edge (e_true);
712
713  e_false = make_edge (cond_bb, join_bb, 0);
714  false_bb = split_edge (e_false);
715
716  e_true->flags &= ~EDGE_FALLTHRU;
717  e_true->flags |= EDGE_TRUE_VALUE;
718  e_false->flags &= ~EDGE_FALLTHRU;
719  e_false->flags |= EDGE_FALSE_VALUE;
720
721  set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
722  set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
723  set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
724  set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
725
726  exit_edge = single_succ_edge (join_bb);
727
728  if (single_pred_p (exit_edge->dest))
729    set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
730
731  return exit_edge;
732}
733
734/* create_empty_loop_on_edge
735   |
736   |    - pred_bb -                   ------ pred_bb ------
737   |   |           |                 | iv0 = initial_value |
738   |    -----|-----                   ---------|-----------
739   |         |                       ______    | entry_edge
740   |         | entry_edge           /      |   |
741   |         |             ====>   |      -V---V- loop_header -------------
742   |         V                     |     | iv_before = phi (iv0, iv_after) |
743   |    - succ_bb -                |      ---|-----------------------------
744   |   |           |               |         |
745   |    -----------                |      ---V--- loop_body ---------------
746   |                               |     | iv_after = iv_before + stride   |
747   |                               |     | if (iv_before < upper_bound)    |
748   |                               |      ---|--------------\--------------
749   |                               |         |               \ exit_e
750   |                               |         V                \
751   |                               |       - loop_latch -      V- succ_bb -
752   |                               |      |              |     |           |
753   |                               |       /-------------       -----------
754   |                                \ ___ /
755
756   Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
757   that is used before the increment of IV. IV_BEFORE should be used for
758   adding code to the body that uses the IV.  OUTER is the outer loop in
759   which the new loop should be inserted.
760
761   Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
762   inserted on the loop entry edge.  This implies that this function
763   should be used only when the UPPER_BOUND expression is a loop
764   invariant.  */
765
766class loop *
767create_empty_loop_on_edge (edge entry_edge,
768			   tree initial_value,
769			   tree stride, tree upper_bound,
770			   tree iv,
771			   tree *iv_before,
772			   tree *iv_after,
773			   class loop *outer)
774{
775  basic_block loop_header, loop_latch, succ_bb, pred_bb;
776  class loop *loop;
777  gimple_stmt_iterator gsi;
778  gimple_seq stmts;
779  gcond *cond_expr;
780  tree exit_test;
781  edge exit_e;
782
783  gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
784
785  /* Create header, latch and wire up the loop.  */
786  pred_bb = entry_edge->src;
787  loop_header = split_edge (entry_edge);
788  loop_latch = split_edge (single_succ_edge (loop_header));
789  succ_bb = single_succ (loop_latch);
790  make_edge (loop_header, succ_bb, 0);
791  redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
792
793  /* Set immediate dominator information.  */
794  set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
795  set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
796  set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
797
798  /* Initialize a loop structure and put it in a loop hierarchy.  */
799  loop = alloc_loop ();
800  loop->header = loop_header;
801  loop->latch = loop_latch;
802  add_loop (loop, outer);
803
804  /* TODO: Fix counts.  */
805  scale_loop_frequencies (loop, profile_probability::even ());
806
807  /* Update dominators.  */
808  update_dominators_in_loop (loop);
809
810  /* Modify edge flags.  */
811  exit_e = single_exit (loop);
812  exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
813  single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
814
815  /* Construct IV code in loop.  */
816  initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
817  if (stmts)
818    {
819      gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
820      gsi_commit_edge_inserts ();
821    }
822
823  upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
824  if (stmts)
825    {
826      gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
827      gsi_commit_edge_inserts ();
828    }
829
830  gsi = gsi_last_bb (loop_header);
831  create_iv (initial_value, stride, iv, loop, &gsi, false,
832	     iv_before, iv_after);
833
834  /* Insert loop exit condition.  */
835  cond_expr = gimple_build_cond
836    (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
837
838  exit_test = gimple_cond_lhs (cond_expr);
839  exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
840					false, GSI_NEW_STMT);
841  gimple_cond_set_lhs (cond_expr, exit_test);
842  gsi = gsi_last_bb (exit_e->src);
843  gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
844
845  split_block_after_labels (loop_header);
846
847  return loop;
848}
849
850/* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
851   latch to header and update loop tree and dominators
852   accordingly. Everything between them plus LATCH_EDGE destination must
853   be dominated by HEADER_EDGE destination, and back-reachable from
854   LATCH_EDGE source.  HEADER_EDGE is redirected to basic block SWITCH_BB,
855   FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
856   TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
857   Returns the newly created loop.  Frequencies and counts in the new loop
858   are scaled by FALSE_SCALE and in the old one by TRUE_SCALE.  */
859
860class loop *
861loopify (edge latch_edge, edge header_edge,
862	 basic_block switch_bb, edge true_edge, edge false_edge,
863	 bool redirect_all_edges, profile_probability true_scale,
864	 profile_probability false_scale)
865{
866  basic_block succ_bb = latch_edge->dest;
867  basic_block pred_bb = header_edge->src;
868  class loop *loop = alloc_loop ();
869  class loop *outer = loop_outer (succ_bb->loop_father);
870  profile_count cnt;
871
872  loop->header = header_edge->dest;
873  loop->latch = latch_edge->src;
874
875  cnt = header_edge->count ();
876
877  /* Redirect edges.  */
878  loop_redirect_edge (latch_edge, loop->header);
879  loop_redirect_edge (true_edge, succ_bb);
880
881  /* During loop versioning, one of the switch_bb edge is already properly
882     set. Do not redirect it again unless redirect_all_edges is true.  */
883  if (redirect_all_edges)
884    {
885      loop_redirect_edge (header_edge, switch_bb);
886      loop_redirect_edge (false_edge, loop->header);
887
888      /* Update dominators.  */
889      set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
890      set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
891    }
892
893  set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
894
895  /* Compute new loop.  */
896  add_loop (loop, outer);
897
898  /* Add switch_bb to appropriate loop.  */
899  if (switch_bb->loop_father)
900    remove_bb_from_loops (switch_bb);
901  add_bb_to_loop (switch_bb, outer);
902
903  /* Fix counts.  */
904  if (redirect_all_edges)
905    {
906      switch_bb->count = cnt;
907    }
908  scale_loop_frequencies (loop, false_scale);
909  scale_loop_frequencies (succ_bb->loop_father, true_scale);
910  update_dominators_in_loop (loop);
911
912  return loop;
913}
914
915/* Remove the latch edge of a LOOP and update loops to indicate that
916   the LOOP was removed.  After this function, original loop latch will
917   have no successor, which caller is expected to fix somehow.
918
919   If this may cause the information about irreducible regions to become
920   invalid, IRRED_INVALIDATED is set to true.
921
922   LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
923   basic blocks that had non-trivial update on their loop_father.*/
924
925void
926unloop (class loop *loop, bool *irred_invalidated,
927	bitmap loop_closed_ssa_invalidated)
928{
929  basic_block *body;
930  class loop *ploop;
931  unsigned i, n;
932  basic_block latch = loop->latch;
933  bool dummy = false;
934
935  if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
936    *irred_invalidated = true;
937
938  /* This is relatively straightforward.  The dominators are unchanged, as
939     loop header dominates loop latch, so the only thing we have to care of
940     is the placement of loops and basic blocks inside the loop tree.  We
941     move them all to the loop->outer, and then let fix_bb_placements do
942     its work.  */
943
944  body = get_loop_body (loop);
945  n = loop->num_nodes;
946  for (i = 0; i < n; i++)
947    if (body[i]->loop_father == loop)
948      {
949	remove_bb_from_loops (body[i]);
950	add_bb_to_loop (body[i], loop_outer (loop));
951      }
952  free (body);
953
954  while (loop->inner)
955    {
956      ploop = loop->inner;
957      flow_loop_tree_node_remove (ploop);
958      flow_loop_tree_node_add (loop_outer (loop), ploop);
959    }
960
961  /* Remove the loop and free its data.  */
962  delete_loop (loop);
963
964  remove_edge (single_succ_edge (latch));
965
966  /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
967     there is an irreducible region inside the cancelled loop, the flags will
968     be still correct.  */
969  fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
970}
971
972/* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
973   condition stated in description of fix_loop_placement holds for them.
974   It is used in case when we removed some edges coming out of LOOP, which
975   may cause the right placement of LOOP inside loop tree to change.
976
977   IRRED_INVALIDATED is set to true if a change in the loop structures might
978   invalidate the information about irreducible regions.  */
979
980static void
981fix_loop_placements (class loop *loop, bool *irred_invalidated)
982{
983  class loop *outer;
984
985  while (loop_outer (loop))
986    {
987      outer = loop_outer (loop);
988      if (!fix_loop_placement (loop, irred_invalidated))
989	break;
990
991      /* Changing the placement of a loop in the loop tree may alter the
992	 validity of condition 2) of the description of fix_bb_placement
993	 for its preheader, because the successor is the header and belongs
994	 to the loop.  So call fix_bb_placements to fix up the placement
995	 of the preheader and (possibly) of its predecessors.  */
996      fix_bb_placements (loop_preheader_edge (loop)->src,
997			 irred_invalidated, NULL);
998      loop = outer;
999    }
1000}
1001
1002/* Duplicate loop bounds and other information we store about
1003   the loop into its duplicate.  */
1004
1005void
1006copy_loop_info (class loop *loop, class loop *target)
1007{
1008  gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1009  target->any_upper_bound = loop->any_upper_bound;
1010  target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1011  target->any_likely_upper_bound = loop->any_likely_upper_bound;
1012  target->nb_iterations_likely_upper_bound
1013    = loop->nb_iterations_likely_upper_bound;
1014  target->any_estimate = loop->any_estimate;
1015  target->nb_iterations_estimate = loop->nb_iterations_estimate;
1016  target->estimate_state = loop->estimate_state;
1017  target->safelen = loop->safelen;
1018  target->simdlen = loop->simdlen;
1019  target->constraints = loop->constraints;
1020  target->can_be_parallel = loop->can_be_parallel;
1021  target->warned_aggressive_loop_optimizations
1022    |= loop->warned_aggressive_loop_optimizations;
1023  target->dont_vectorize = loop->dont_vectorize;
1024  target->force_vectorize = loop->force_vectorize;
1025  target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
1026  target->finite_p = loop->finite_p;
1027  target->unroll = loop->unroll;
1028  target->owned_clique = loop->owned_clique;
1029}
1030
1031/* Copies copy of LOOP as subloop of TARGET loop, placing newly
1032   created loop into loops structure.  If AFTER is non-null
1033   the new loop is added at AFTER->next, otherwise in front of TARGETs
1034   sibling list.  */
1035class loop *
1036duplicate_loop (class loop *loop, class loop *target, class loop *after)
1037{
1038  class loop *cloop;
1039  cloop = alloc_loop ();
1040  place_new_loop (cfun, cloop);
1041
1042  copy_loop_info (loop, cloop);
1043
1044  /* Mark the new loop as copy of LOOP.  */
1045  set_loop_copy (loop, cloop);
1046
1047  /* Add it to target.  */
1048  flow_loop_tree_node_add (target, cloop, after);
1049
1050  return cloop;
1051}
1052
1053/* Copies structure of subloops of LOOP into TARGET loop, placing
1054   newly created loops into loop tree at the end of TARGETs sibling
1055   list in the original order.  */
1056void
1057duplicate_subloops (class loop *loop, class loop *target)
1058{
1059  class loop *aloop, *cloop, *tail;
1060
1061  for (tail = target->inner; tail && tail->next; tail = tail->next)
1062    ;
1063  for (aloop = loop->inner; aloop; aloop = aloop->next)
1064    {
1065      cloop = duplicate_loop (aloop, target, tail);
1066      tail = cloop;
1067      gcc_assert(!tail->next);
1068      duplicate_subloops (aloop, cloop);
1069    }
1070}
1071
1072/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1073   into TARGET loop, placing newly created loops into loop tree adding
1074   them to TARGETs sibling list at the end in order.  */
1075static void
1076copy_loops_to (class loop **copied_loops, int n, class loop *target)
1077{
1078  class loop *aloop, *tail;
1079  int i;
1080
1081  for (tail = target->inner; tail && tail->next; tail = tail->next)
1082    ;
1083  for (i = 0; i < n; i++)
1084    {
1085      aloop = duplicate_loop (copied_loops[i], target, tail);
1086      tail = aloop;
1087      gcc_assert(!tail->next);
1088      duplicate_subloops (copied_loops[i], aloop);
1089    }
1090}
1091
1092/* Redirects edge E to basic block DEST.  */
1093static void
1094loop_redirect_edge (edge e, basic_block dest)
1095{
1096  if (e->dest == dest)
1097    return;
1098
1099  redirect_edge_and_branch_force (e, dest);
1100}
1101
1102/* Check whether LOOP's body can be duplicated.  */
1103bool
1104can_duplicate_loop_p (const class loop *loop)
1105{
1106  int ret;
1107  basic_block *bbs = get_loop_body (loop);
1108
1109  ret = can_copy_bbs_p (bbs, loop->num_nodes);
1110  free (bbs);
1111
1112  return ret;
1113}
1114
1115/* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
1116   loop structure and dominators (order of inner subloops is retained).
1117   E's destination must be LOOP header for this to work, i.e. it must be entry
1118   or latch edge of this loop; these are unique, as the loops must have
1119   preheaders for this function to work correctly (in case E is latch, the
1120   function unrolls the loop, if E is entry edge, it peels the loop).  Store
1121   edges created by copying ORIG edge from copies corresponding to set bits in
1122   WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1123   are numbered in order given by control flow through them) into TO_REMOVE
1124   array.  Returns false if duplication is
1125   impossible.  */
1126
1127bool
1128duplicate_loop_to_header_edge (class loop *loop, edge e,
1129			       unsigned int ndupl, sbitmap wont_exit,
1130			       edge orig, vec<edge> *to_remove,
1131			       int flags)
1132{
1133  class loop *target, *aloop;
1134  class loop **orig_loops;
1135  unsigned n_orig_loops;
1136  basic_block header = loop->header, latch = loop->latch;
1137  basic_block *new_bbs, *bbs, *first_active;
1138  basic_block new_bb, bb, first_active_latch = NULL;
1139  edge ae, latch_edge;
1140  edge spec_edges[2], new_spec_edges[2];
1141  const int SE_LATCH = 0;
1142  const int SE_ORIG = 1;
1143  unsigned i, j, n;
1144  int is_latch = (latch == e->src);
1145  profile_probability *scale_step = NULL;
1146  profile_probability scale_main = profile_probability::always ();
1147  profile_probability scale_act = profile_probability::always ();
1148  profile_count after_exit_num = profile_count::zero (),
1149	        after_exit_den = profile_count::zero ();
1150  bool scale_after_exit = false;
1151  int add_irreducible_flag;
1152  basic_block place_after;
1153  bitmap bbs_to_scale = NULL;
1154  bitmap_iterator bi;
1155
1156  gcc_assert (e->dest == loop->header);
1157  gcc_assert (ndupl > 0);
1158
1159  if (orig)
1160    {
1161      /* Orig must be edge out of the loop.  */
1162      gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1163      gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1164    }
1165
1166  n = loop->num_nodes;
1167  bbs = get_loop_body_in_dom_order (loop);
1168  gcc_assert (bbs[0] == loop->header);
1169  gcc_assert (bbs[n  - 1] == loop->latch);
1170
1171  /* Check whether duplication is possible.  */
1172  if (!can_copy_bbs_p (bbs, loop->num_nodes))
1173    {
1174      free (bbs);
1175      return false;
1176    }
1177  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1178
1179  /* In case we are doing loop peeling and the loop is in the middle of
1180     irreducible region, the peeled copies will be inside it too.  */
1181  add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1182  gcc_assert (!is_latch || !add_irreducible_flag);
1183
1184  /* Find edge from latch.  */
1185  latch_edge = loop_latch_edge (loop);
1186
1187  if (flags & DLTHE_FLAG_UPDATE_FREQ)
1188    {
1189      /* Calculate coefficients by that we have to scale counts
1190	 of duplicated loop bodies.  */
1191      profile_count count_in = header->count;
1192      profile_count count_le = latch_edge->count ();
1193      profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1194      profile_probability prob_pass_thru = count_le.probability_in (count_in);
1195      profile_probability prob_pass_wont_exit =
1196	      (count_le + count_out_orig).probability_in (count_in);
1197
1198      if (orig && orig->probability.initialized_p ()
1199	  && !(orig->probability == profile_probability::always ()))
1200	{
1201	  /* The blocks that are dominated by a removed exit edge ORIG have
1202	     frequencies scaled by this.  */
1203	  if (orig->count ().initialized_p ())
1204	    {
1205	      after_exit_num = orig->src->count;
1206	      after_exit_den = after_exit_num - orig->count ();
1207	      scale_after_exit = true;
1208	    }
1209	  bbs_to_scale = BITMAP_ALLOC (NULL);
1210	  for (i = 0; i < n; i++)
1211	    {
1212	      if (bbs[i] != orig->src
1213		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1214		bitmap_set_bit (bbs_to_scale, i);
1215	    }
1216	}
1217
1218      scale_step = XNEWVEC (profile_probability, ndupl);
1219
1220      for (i = 1; i <= ndupl; i++)
1221	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1222				? prob_pass_wont_exit
1223				: prob_pass_thru;
1224
1225      /* Complete peeling is special as the probability of exit in last
1226	 copy becomes 1.  */
1227      if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1228	{
1229	  profile_count wanted_count = e->count ();
1230
1231	  gcc_assert (!is_latch);
1232	  /* First copy has count of incoming edge.  Each subsequent
1233	     count should be reduced by prob_pass_wont_exit.  Caller
1234	     should've managed the flags so all except for original loop
1235	     has won't exist set.  */
1236	  scale_act = wanted_count.probability_in (count_in);
1237	  /* Now simulate the duplication adjustments and compute header
1238	     frequency of the last copy.  */
1239	  for (i = 0; i < ndupl; i++)
1240	    wanted_count = wanted_count.apply_probability (scale_step [i]);
1241	  scale_main = wanted_count.probability_in (count_in);
1242	}
1243      /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1244	 First iteration will be original loop followed by duplicated bodies.
1245	 It is necessary to scale down the original so we get right overall
1246	 number of iterations.  */
1247      else if (is_latch)
1248	{
1249	  profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1250							? prob_pass_wont_exit
1251							: prob_pass_thru;
1252	  profile_probability p = prob_pass_main;
1253	  profile_count scale_main_den = count_in;
1254	  for (i = 0; i < ndupl; i++)
1255	    {
1256	      scale_main_den += count_in.apply_probability (p);
1257	      p = p * scale_step[i];
1258	    }
1259	  /* If original loop is executed COUNT_IN times, the unrolled
1260	     loop will account SCALE_MAIN_DEN times.  */
1261	  scale_main = count_in.probability_in (scale_main_den);
1262	  scale_act = scale_main * prob_pass_main;
1263	}
1264      else
1265	{
1266	  profile_count preheader_count = e->count ();
1267	  for (i = 0; i < ndupl; i++)
1268	    scale_main = scale_main * scale_step[i];
1269	  scale_act = preheader_count.probability_in (count_in);
1270	}
1271    }
1272
1273  /* Loop the new bbs will belong to.  */
1274  target = e->src->loop_father;
1275
1276  /* Original loops.  */
1277  n_orig_loops = 0;
1278  for (aloop = loop->inner; aloop; aloop = aloop->next)
1279    n_orig_loops++;
1280  orig_loops = XNEWVEC (class loop *, n_orig_loops);
1281  for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1282    orig_loops[i] = aloop;
1283
1284  set_loop_copy (loop, target);
1285
1286  first_active = XNEWVEC (basic_block, n);
1287  if (is_latch)
1288    {
1289      memcpy (first_active, bbs, n * sizeof (basic_block));
1290      first_active_latch = latch;
1291    }
1292
1293  spec_edges[SE_ORIG] = orig;
1294  spec_edges[SE_LATCH] = latch_edge;
1295
1296  place_after = e->src;
1297  for (j = 0; j < ndupl; j++)
1298    {
1299      /* Copy loops.  */
1300      copy_loops_to (orig_loops, n_orig_loops, target);
1301
1302      /* Copy bbs.  */
1303      copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1304		place_after, true);
1305      place_after = new_spec_edges[SE_LATCH]->src;
1306
1307      if (flags & DLTHE_RECORD_COPY_NUMBER)
1308	for (i = 0; i < n; i++)
1309	  {
1310	    gcc_assert (!new_bbs[i]->aux);
1311	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
1312	  }
1313
1314      /* Note whether the blocks and edges belong to an irreducible loop.  */
1315      if (add_irreducible_flag)
1316	{
1317	  for (i = 0; i < n; i++)
1318	    new_bbs[i]->flags |= BB_DUPLICATED;
1319	  for (i = 0; i < n; i++)
1320	    {
1321	      edge_iterator ei;
1322	      new_bb = new_bbs[i];
1323	      if (new_bb->loop_father == target)
1324		new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1325
1326	      FOR_EACH_EDGE (ae, ei, new_bb->succs)
1327		if ((ae->dest->flags & BB_DUPLICATED)
1328		    && (ae->src->loop_father == target
1329			|| ae->dest->loop_father == target))
1330		  ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1331	    }
1332	  for (i = 0; i < n; i++)
1333	    new_bbs[i]->flags &= ~BB_DUPLICATED;
1334	}
1335
1336      /* Redirect the special edges.  */
1337      if (is_latch)
1338	{
1339	  redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1340	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1341					  loop->header);
1342	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1343	  latch = loop->latch = new_bbs[n - 1];
1344	  e = latch_edge = new_spec_edges[SE_LATCH];
1345	}
1346      else
1347	{
1348	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1349					  loop->header);
1350	  redirect_edge_and_branch_force (e, new_bbs[0]);
1351	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1352	  e = new_spec_edges[SE_LATCH];
1353	}
1354
1355      /* Record exit edge in this copy.  */
1356      if (orig && bitmap_bit_p (wont_exit, j + 1))
1357	{
1358	  if (to_remove)
1359	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
1360	  force_edge_cold (new_spec_edges[SE_ORIG], true);
1361
1362	  /* Scale the frequencies of the blocks dominated by the exit.  */
1363	  if (bbs_to_scale && scale_after_exit)
1364	    {
1365	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1366		scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1367						     after_exit_den);
1368	    }
1369	}
1370
1371      /* Record the first copy in the control flow order if it is not
1372	 the original loop (i.e. in case of peeling).  */
1373      if (!first_active_latch)
1374	{
1375	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
1376	  first_active_latch = new_bbs[n - 1];
1377	}
1378
1379      /* Set counts and frequencies.  */
1380      if (flags & DLTHE_FLAG_UPDATE_FREQ)
1381	{
1382	  scale_bbs_frequencies (new_bbs, n, scale_act);
1383	  scale_act = scale_act * scale_step[j];
1384	}
1385    }
1386  free (new_bbs);
1387  free (orig_loops);
1388
1389  /* Record the exit edge in the original loop body, and update the frequencies.  */
1390  if (orig && bitmap_bit_p (wont_exit, 0))
1391    {
1392      if (to_remove)
1393	to_remove->safe_push (orig);
1394      force_edge_cold (orig, true);
1395
1396      /* Scale the frequencies of the blocks dominated by the exit.  */
1397      if (bbs_to_scale && scale_after_exit)
1398	{
1399	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1400	    scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1401						 after_exit_den);
1402	}
1403    }
1404
1405  /* Update the original loop.  */
1406  if (!is_latch)
1407    set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1408  if (flags & DLTHE_FLAG_UPDATE_FREQ)
1409    {
1410      scale_bbs_frequencies (bbs, n, scale_main);
1411      free (scale_step);
1412    }
1413
1414  /* Update dominators of outer blocks if affected.  */
1415  for (i = 0; i < n; i++)
1416    {
1417      basic_block dominated, dom_bb;
1418      vec<basic_block> dom_bbs;
1419      unsigned j;
1420
1421      bb = bbs[i];
1422      bb->aux = 0;
1423
1424      dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1425      FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1426	{
1427	  if (flow_bb_inside_loop_p (loop, dominated))
1428	    continue;
1429	  dom_bb = nearest_common_dominator (
1430			CDI_DOMINATORS, first_active[i], first_active_latch);
1431	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1432	}
1433      dom_bbs.release ();
1434    }
1435  free (first_active);
1436
1437  free (bbs);
1438  BITMAP_FREE (bbs_to_scale);
1439
1440  return true;
1441}
1442
1443/* A callback for make_forwarder block, to redirect all edges except for
1444   MFB_KJ_EDGE to the entry part.  E is the edge for that we should decide
1445   whether to redirect it.  */
1446
1447edge mfb_kj_edge;
1448bool
1449mfb_keep_just (edge e)
1450{
1451  return e != mfb_kj_edge;
1452}
1453
1454/* True when a candidate preheader BLOCK has predecessors from LOOP.  */
1455
1456static bool
1457has_preds_from_loop (basic_block block, class loop *loop)
1458{
1459  edge e;
1460  edge_iterator ei;
1461
1462  FOR_EACH_EDGE (e, ei, block->preds)
1463    if (e->src->loop_father == loop)
1464      return true;
1465  return false;
1466}
1467
1468/* Creates a pre-header for a LOOP.  Returns newly created block.  Unless
1469   CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1470   entry; otherwise we also force preheader block to have only one successor.
1471   When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1472   to be a fallthru predecessor to the loop header and to have only
1473   predecessors from outside of the loop.
1474   The function also updates dominators.  */
1475
1476basic_block
1477create_preheader (class loop *loop, int flags)
1478{
1479  edge e;
1480  basic_block dummy;
1481  int nentry = 0;
1482  bool irred = false;
1483  bool latch_edge_was_fallthru;
1484  edge one_succ_pred = NULL, single_entry = NULL;
1485  edge_iterator ei;
1486
1487  FOR_EACH_EDGE (e, ei, loop->header->preds)
1488    {
1489      if (e->src == loop->latch)
1490	continue;
1491      irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1492      nentry++;
1493      single_entry = e;
1494      if (single_succ_p (e->src))
1495	one_succ_pred = e;
1496    }
1497  gcc_assert (nentry);
1498  if (nentry == 1)
1499    {
1500      bool need_forwarder_block = false;
1501
1502      /* We do not allow entry block to be the loop preheader, since we
1503	     cannot emit code there.  */
1504      if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1505        need_forwarder_block = true;
1506      else
1507        {
1508          /* If we want simple preheaders, also force the preheader to have
1509	     just a single successor and a normal edge.  */
1510          if ((flags & CP_SIMPLE_PREHEADERS)
1511	      && ((single_entry->flags & EDGE_COMPLEX)
1512		  || !single_succ_p (single_entry->src)))
1513            need_forwarder_block = true;
1514          /* If we want fallthru preheaders, also create forwarder block when
1515             preheader ends with a jump or has predecessors from loop.  */
1516          else if ((flags & CP_FALLTHRU_PREHEADERS)
1517                   && (JUMP_P (BB_END (single_entry->src))
1518                       || has_preds_from_loop (single_entry->src, loop)))
1519            need_forwarder_block = true;
1520        }
1521      if (! need_forwarder_block)
1522	return NULL;
1523    }
1524
1525  mfb_kj_edge = loop_latch_edge (loop);
1526  latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1527  if (nentry == 1
1528      && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1529  	  || (single_entry->flags & EDGE_CROSSING) == 0))
1530    dummy = split_edge (single_entry);
1531  else
1532    {
1533      edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1534      dummy = fallthru->src;
1535      loop->header = fallthru->dest;
1536    }
1537
1538  /* Try to be clever in placing the newly created preheader.  The idea is to
1539     avoid breaking any "fallthruness" relationship between blocks.
1540
1541     The preheader was created just before the header and all incoming edges
1542     to the header were redirected to the preheader, except the latch edge.
1543     So the only problematic case is when this latch edge was a fallthru
1544     edge: it is not anymore after the preheader creation so we have broken
1545     the fallthruness.  We're therefore going to look for a better place.  */
1546  if (latch_edge_was_fallthru)
1547    {
1548      if (one_succ_pred)
1549	e = one_succ_pred;
1550      else
1551	e = EDGE_PRED (dummy, 0);
1552
1553      move_block_after (dummy, e->src);
1554    }
1555
1556  if (irred)
1557    {
1558      dummy->flags |= BB_IRREDUCIBLE_LOOP;
1559      single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1560    }
1561
1562  if (dump_file)
1563    fprintf (dump_file, "Created preheader block for loop %i\n",
1564	     loop->num);
1565
1566  if (flags & CP_FALLTHRU_PREHEADERS)
1567    gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1568                && !JUMP_P (BB_END (dummy)));
1569
1570  return dummy;
1571}
1572
1573/* Create preheaders for each loop; for meaning of FLAGS see create_preheader.  */
1574
1575void
1576create_preheaders (int flags)
1577{
1578  class loop *loop;
1579
1580  if (!current_loops)
1581    return;
1582
1583  FOR_EACH_LOOP (loop, 0)
1584    create_preheader (loop, flags);
1585  loops_state_set (LOOPS_HAVE_PREHEADERS);
1586}
1587
1588/* Forces all loop latches to have only single successor.  */
1589
1590void
1591force_single_succ_latches (void)
1592{
1593  class loop *loop;
1594  edge e;
1595
1596  FOR_EACH_LOOP (loop, 0)
1597    {
1598      if (loop->latch != loop->header && single_succ_p (loop->latch))
1599	continue;
1600
1601      e = find_edge (loop->latch, loop->header);
1602      gcc_checking_assert (e != NULL);
1603
1604      split_edge (e);
1605    }
1606  loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1607}
1608
1609/* This function is called from loop_version.  It splits the entry edge
1610   of the loop we want to version, adds the versioning condition, and
1611   adjust the edges to the two versions of the loop appropriately.
1612   e is an incoming edge. Returns the basic block containing the
1613   condition.
1614
1615   --- edge e ---- > [second_head]
1616
1617   Split it and insert new conditional expression and adjust edges.
1618
1619    --- edge e ---> [cond expr] ---> [first_head]
1620			|
1621			+---------> [second_head]
1622
1623  THEN_PROB is the probability of then branch of the condition.
1624  ELSE_PROB is the probability of else branch. Note that they may be both
1625  REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1626  IFN_LOOP_DIST_ALIAS.  */
1627
1628static basic_block
1629lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1630			   edge e, void *cond_expr,
1631			   profile_probability then_prob,
1632			   profile_probability else_prob)
1633{
1634  basic_block new_head = NULL;
1635  edge e1;
1636
1637  gcc_assert (e->dest == second_head);
1638
1639  /* Split edge 'e'. This will create a new basic block, where we can
1640     insert conditional expr.  */
1641  new_head = split_edge (e);
1642
1643  lv_add_condition_to_bb (first_head, second_head, new_head,
1644			  cond_expr);
1645
1646  /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
1647  e = single_succ_edge (new_head);
1648  e1 = make_edge (new_head, first_head,
1649		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1650  e1->probability = then_prob;
1651  e->probability = else_prob;
1652
1653  set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1654  set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1655
1656  /* Adjust loop header phi nodes.  */
1657  lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1658
1659  return new_head;
1660}
1661
1662/* Main entry point for Loop Versioning transformation.
1663
1664   This transformation given a condition and a loop, creates
1665   -if (condition) { loop_copy1 } else { loop_copy2 },
1666   where loop_copy1 is the loop transformed in one way, and loop_copy2
1667   is the loop transformed in another way (or unchanged). COND_EXPR
1668   may be a run time test for things that were not resolved by static
1669   analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1670
1671   If non-NULL, CONDITION_BB is set to the basic block containing the
1672   condition.
1673
1674   THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
1675   is the ratio by that the frequencies in the original loop should
1676   be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
1677   new loop should be scaled.
1678
1679   If PLACE_AFTER is true, we place the new loop after LOOP in the
1680   instruction stream, otherwise it is placed before LOOP.  */
1681
1682class loop *
1683loop_version (class loop *loop,
1684	      void *cond_expr, basic_block *condition_bb,
1685	      profile_probability then_prob, profile_probability else_prob,
1686	      profile_probability then_scale, profile_probability else_scale,
1687	      bool place_after)
1688{
1689  basic_block first_head, second_head;
1690  edge entry, latch_edge, true_edge, false_edge;
1691  int irred_flag;
1692  class loop *nloop;
1693  basic_block cond_bb;
1694
1695  /* Record entry and latch edges for the loop */
1696  entry = loop_preheader_edge (loop);
1697  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1698  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1699
1700  /* Note down head of loop as first_head.  */
1701  first_head = entry->dest;
1702
1703  /* Duplicate loop.  */
1704  if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1705					       NULL, NULL, NULL, 0))
1706    {
1707      entry->flags |= irred_flag;
1708      return NULL;
1709    }
1710
1711  /* After duplication entry edge now points to new loop head block.
1712     Note down new head as second_head.  */
1713  second_head = entry->dest;
1714
1715  /* Split loop entry edge and insert new block with cond expr.  */
1716  cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
1717					entry, cond_expr, then_prob, else_prob);
1718  if (condition_bb)
1719    *condition_bb = cond_bb;
1720
1721  if (!cond_bb)
1722    {
1723      entry->flags |= irred_flag;
1724      return NULL;
1725    }
1726
1727  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1728
1729  extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1730  nloop = loopify (latch_edge,
1731		   single_pred_edge (get_bb_copy (loop->header)),
1732		   cond_bb, true_edge, false_edge,
1733		   false /* Do not redirect all edges.  */,
1734		   then_scale, else_scale);
1735
1736  copy_loop_info (loop, nloop);
1737  set_loop_copy (loop, nloop);
1738
1739  /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
1740  lv_flush_pending_stmts (latch_edge);
1741
1742  /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS.  */
1743  extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1744  lv_flush_pending_stmts (false_edge);
1745  /* Adjust irreducible flag.  */
1746  if (irred_flag)
1747    {
1748      cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1749      loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1750      loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1751      single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1752    }
1753
1754  if (place_after)
1755    {
1756      basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1757      unsigned i;
1758
1759      after = loop->latch;
1760
1761      for (i = 0; i < nloop->num_nodes; i++)
1762	{
1763	  move_block_after (bbs[i], after);
1764	  after = bbs[i];
1765	}
1766      free (bbs);
1767    }
1768
1769  /* At this point condition_bb is loop preheader with two successors,
1770     first_head and second_head.   Make sure that loop preheader has only
1771     one successor.  */
1772  split_edge (loop_preheader_edge (loop));
1773  split_edge (loop_preheader_edge (nloop));
1774
1775  return nloop;
1776}
1777