1/* Loop manipulation code for GNU compiler.
2   Copyright (C) 2002-2022 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "backend.h"
24#include "rtl.h"
25#include "tree.h"
26#include "gimple.h"
27#include "cfghooks.h"
28#include "cfganal.h"
29#include "cfgloop.h"
30#include "gimple-iterator.h"
31#include "gimplify-me.h"
32#include "tree-ssa-loop-manip.h"
33#include "dumpfile.h"
34
35static void copy_loops_to (class loop **, int,
36			   class loop *);
37static void loop_redirect_edge (edge, basic_block);
38static void remove_bbs (basic_block *, int);
39static bool rpe_enum_p (const_basic_block, const void *);
40static int find_path (edge, basic_block **);
41static void fix_loop_placements (class loop *, bool *);
42static bool fix_bb_placement (basic_block);
43static void fix_bb_placements (basic_block, bool *, bitmap);
44
45/* Checks whether basic block BB is dominated by DATA.  */
46static bool
47rpe_enum_p (const_basic_block bb, const void *data)
48{
49  return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
50}
51
52/* Remove basic blocks BBS.  NBBS is the number of the basic blocks.  */
53
54static void
55remove_bbs (basic_block *bbs, int nbbs)
56{
57  int i;
58
59  for (i = 0; i < nbbs; i++)
60    delete_basic_block (bbs[i]);
61}
62
63/* Find path -- i.e. the basic blocks dominated by edge E and put them
64   into array BBS, that will be allocated large enough to contain them.
65   E->dest must have exactly one predecessor for this to work (it is
66   easy to achieve and we do not put it here because we do not want to
67   alter anything by this function).  The number of basic blocks in the
68   path is returned.  */
69static int
70find_path (edge e, basic_block **bbs)
71{
72  gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
73
74  /* Find bbs in the path.  */
75  *bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
76  return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
77			     n_basic_blocks_for_fn (cfun), e->dest);
78}
79
80/* Fix placement of basic block BB inside loop hierarchy --
81   Let L be a loop to that BB belongs.  Then every successor of BB must either
82     1) belong to some superloop of loop L, or
83     2) be a header of loop K such that K->outer is superloop of L
84   Returns true if we had to move BB into other loop to enforce this condition,
85   false if the placement of BB was already correct (provided that placements
86   of its successors are correct).  */
87static bool
88fix_bb_placement (basic_block bb)
89{
90  edge e;
91  edge_iterator ei;
92  class loop *loop = current_loops->tree_root, *act;
93
94  FOR_EACH_EDGE (e, ei, bb->succs)
95    {
96      if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
97	continue;
98
99      act = e->dest->loop_father;
100      if (act->header == e->dest)
101	act = loop_outer (act);
102
103      if (flow_loop_nested_p (loop, act))
104	loop = act;
105    }
106
107  if (loop == bb->loop_father)
108    return false;
109
110  remove_bb_from_loops (bb);
111  add_bb_to_loop (bb, loop);
112
113  return true;
114}
115
116/* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
117   of LOOP to that leads at least one exit edge of LOOP, and set it
118   as the immediate superloop of LOOP.  Return true if the immediate superloop
119   of LOOP changed.
120
121   IRRED_INVALIDATED is set to true if a change in the loop structures might
122   invalidate the information about irreducible regions.  */
123
124static bool
125fix_loop_placement (class loop *loop, bool *irred_invalidated)
126{
127  unsigned i;
128  edge e;
129  auto_vec<edge> exits = get_loop_exit_edges (loop);
130  class loop *father = current_loops->tree_root, *act;
131  bool ret = false;
132
133  FOR_EACH_VEC_ELT (exits, i, e)
134    {
135      act = find_common_loop (loop, e->dest->loop_father);
136      if (flow_loop_nested_p (father, act))
137	father = act;
138    }
139
140  if (father != loop_outer (loop))
141    {
142      for (act = loop_outer (loop); act != father; act = loop_outer (act))
143	act->num_nodes -= loop->num_nodes;
144      flow_loop_tree_node_remove (loop);
145      flow_loop_tree_node_add (father, loop);
146
147      /* The exit edges of LOOP no longer exits its original immediate
148	 superloops; remove them from the appropriate exit lists.  */
149      FOR_EACH_VEC_ELT (exits, i, e)
150	{
151	  /* We may need to recompute irreducible loops.  */
152	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
153	    *irred_invalidated = true;
154	  rescan_loop_exit (e, false, false);
155	}
156
157      ret = true;
158    }
159
160  return ret;
161}
162
163/* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
164   enforce condition stated in description of fix_bb_placement. We
165   start from basic block FROM that had some of its successors removed, so that
166   his placement no longer has to be correct, and iteratively fix placement of
167   its predecessors that may change if placement of FROM changed.  Also fix
168   placement of subloops of FROM->loop_father, that might also be altered due
169   to this change; the condition for them is similar, except that instead of
170   successors we consider edges coming out of the loops.
171
172   If the changes may invalidate the information about irreducible regions,
173   IRRED_INVALIDATED is set to true.
174
175   If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
176   changed loop_father are collected there. */
177
178static void
179fix_bb_placements (basic_block from,
180		   bool *irred_invalidated,
181		   bitmap loop_closed_ssa_invalidated)
182{
183  basic_block *queue, *qtop, *qbeg, *qend;
184  class loop *base_loop, *target_loop;
185  edge e;
186
187  /* We pass through blocks back-reachable from FROM, testing whether some
188     of their successors moved to outer loop.  It may be necessary to
189     iterate several times, but it is finite, as we stop unless we move
190     the basic block up the loop structure.  The whole story is a bit
191     more complicated due to presence of subloops, those are moved using
192     fix_loop_placement.  */
193
194  base_loop = from->loop_father;
195  /* If we are already in the outermost loop, the basic blocks cannot be moved
196     outside of it.  If FROM is the header of the base loop, it cannot be moved
197     outside of it, either.  In both cases, we can end now.  */
198  if (base_loop == current_loops->tree_root
199      || from == base_loop->header)
200    return;
201
202  auto_sbitmap in_queue (last_basic_block_for_fn (cfun));
203  bitmap_clear (in_queue);
204  bitmap_set_bit (in_queue, from->index);
205  /* Prevent us from going out of the base_loop.  */
206  bitmap_set_bit (in_queue, base_loop->header->index);
207
208  queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
209  qtop = queue + base_loop->num_nodes + 1;
210  qbeg = queue;
211  qend = queue + 1;
212  *qbeg = from;
213
214  while (qbeg != qend)
215    {
216      edge_iterator ei;
217      from = *qbeg;
218      qbeg++;
219      if (qbeg == qtop)
220	qbeg = queue;
221      bitmap_clear_bit (in_queue, from->index);
222
223      if (from->loop_father->header == from)
224	{
225	  /* Subloop header, maybe move the loop upward.  */
226	  if (!fix_loop_placement (from->loop_father, irred_invalidated))
227	    continue;
228	  target_loop = loop_outer (from->loop_father);
229	  if (loop_closed_ssa_invalidated)
230	    {
231	      basic_block *bbs = get_loop_body (from->loop_father);
232	      for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
233		bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
234	      free (bbs);
235	    }
236	}
237      else
238	{
239	  /* Ordinary basic block.  */
240	  if (!fix_bb_placement (from))
241	    continue;
242	  target_loop = from->loop_father;
243	  if (loop_closed_ssa_invalidated)
244	    bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
245	}
246
247      FOR_EACH_EDGE (e, ei, from->succs)
248	{
249	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
250	    *irred_invalidated = true;
251	}
252
253      /* Something has changed, insert predecessors into queue.  */
254      FOR_EACH_EDGE (e, ei, from->preds)
255	{
256	  basic_block pred = e->src;
257	  class loop *nca;
258
259	  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
260	    *irred_invalidated = true;
261
262	  if (bitmap_bit_p (in_queue, pred->index))
263	    continue;
264
265	  /* If it is subloop, then it either was not moved, or
266	     the path up the loop tree from base_loop do not contain
267	     it.  */
268	  nca = find_common_loop (pred->loop_father, base_loop);
269	  if (pred->loop_father != base_loop
270	      && (nca == base_loop
271		  || nca != pred->loop_father))
272	    pred = pred->loop_father->header;
273	  else if (!flow_loop_nested_p (target_loop, pred->loop_father))
274	    {
275	      /* If PRED is already higher in the loop hierarchy than the
276		 TARGET_LOOP to that we moved FROM, the change of the position
277		 of FROM does not affect the position of PRED, so there is no
278		 point in processing it.  */
279	      continue;
280	    }
281
282	  if (bitmap_bit_p (in_queue, pred->index))
283	    continue;
284
285	  /* Schedule the basic block.  */
286	  *qend = pred;
287	  qend++;
288	  if (qend == qtop)
289	    qend = queue;
290	  bitmap_set_bit (in_queue, pred->index);
291	}
292    }
293  free (queue);
294}
295
296/* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
297   and update loop structures and dominators.  Return true if we were able
298   to remove the path, false otherwise (and nothing is affected then).  */
299bool
300remove_path (edge e, bool *irred_invalidated,
301	     bitmap loop_closed_ssa_invalidated)
302{
303  edge ae;
304  basic_block *rem_bbs, *bord_bbs, from, bb;
305  vec<basic_block> dom_bbs;
306  int i, nrem, n_bord_bbs;
307  bool local_irred_invalidated = false;
308  edge_iterator ei;
309  class loop *l, *f;
310
311  if (! irred_invalidated)
312    irred_invalidated = &local_irred_invalidated;
313
314  if (!can_remove_branch_p (e))
315    return false;
316
317  /* Keep track of whether we need to update information about irreducible
318     regions.  This is the case if the removed area is a part of the
319     irreducible region, or if the set of basic blocks that belong to a loop
320     that is inside an irreducible region is changed, or if such a loop is
321     removed.  */
322  if (e->flags & EDGE_IRREDUCIBLE_LOOP)
323    *irred_invalidated = true;
324
325  /* We need to check whether basic blocks are dominated by the edge
326     e, but we only have basic block dominators.  This is easy to
327     fix -- when e->dest has exactly one predecessor, this corresponds
328     to blocks dominated by e->dest, if not, split the edge.  */
329  if (!single_pred_p (e->dest))
330    e = single_pred_edge (split_edge (e));
331
332  /* It may happen that by removing path we remove one or more loops
333     we belong to.  In this case first unloop the loops, then proceed
334     normally.   We may assume that e->dest is not a header of any loop,
335     as it now has exactly one predecessor.  */
336  for (l = e->src->loop_father; loop_outer (l); l = f)
337    {
338      f = loop_outer (l);
339      if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
340        unloop (l, irred_invalidated, loop_closed_ssa_invalidated);
341    }
342
343  /* Identify the path.  */
344  nrem = find_path (e, &rem_bbs);
345
346  n_bord_bbs = 0;
347  bord_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
348  auto_sbitmap seen (last_basic_block_for_fn (cfun));
349  bitmap_clear (seen);
350
351  /* Find "border" hexes -- i.e. those with predecessor in removed path.  */
352  for (i = 0; i < nrem; i++)
353    bitmap_set_bit (seen, rem_bbs[i]->index);
354  if (!*irred_invalidated)
355    FOR_EACH_EDGE (ae, ei, e->src->succs)
356      if (ae != e && ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
357	  && !bitmap_bit_p (seen, ae->dest->index)
358	  && ae->flags & EDGE_IRREDUCIBLE_LOOP)
359	{
360	  *irred_invalidated = true;
361	  break;
362	}
363
364  for (i = 0; i < nrem; i++)
365    {
366      FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
367	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
368	    && !bitmap_bit_p (seen, ae->dest->index))
369	  {
370	    bitmap_set_bit (seen, ae->dest->index);
371	    bord_bbs[n_bord_bbs++] = ae->dest;
372
373	    if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
374	      *irred_invalidated = true;
375	  }
376    }
377
378  /* Remove the path.  */
379  from = e->src;
380  remove_branch (e);
381  dom_bbs.create (0);
382
383  /* Cancel loops contained in the path.  */
384  for (i = 0; i < nrem; i++)
385    if (rem_bbs[i]->loop_father->header == rem_bbs[i])
386      cancel_loop_tree (rem_bbs[i]->loop_father);
387
388  remove_bbs (rem_bbs, nrem);
389  free (rem_bbs);
390
391  /* Find blocks whose dominators may be affected.  */
392  bitmap_clear (seen);
393  for (i = 0; i < n_bord_bbs; i++)
394    {
395      basic_block ldom;
396
397      bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
398      if (bitmap_bit_p (seen, bb->index))
399	continue;
400      bitmap_set_bit (seen, bb->index);
401
402      for (ldom = first_dom_son (CDI_DOMINATORS, bb);
403	   ldom;
404	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
405	if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
406	  dom_bbs.safe_push (ldom);
407    }
408
409  /* Recount dominators.  */
410  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
411  dom_bbs.release ();
412  free (bord_bbs);
413
414  /* Fix placements of basic blocks inside loops and the placement of
415     loops in the loop tree.  */
416  fix_bb_placements (from, irred_invalidated, loop_closed_ssa_invalidated);
417  fix_loop_placements (from->loop_father, irred_invalidated);
418
419  if (local_irred_invalidated
420      && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
421    mark_irreducible_loops ();
422
423  return true;
424}
425
426/* Creates place for a new LOOP in loops structure of FN.  */
427
428void
429place_new_loop (struct function *fn, class loop *loop)
430{
431  loop->num = number_of_loops (fn);
432  vec_safe_push (loops_for_fn (fn)->larray, loop);
433}
434
435/* Given LOOP structure with filled header and latch, find the body of the
436   corresponding loop and add it to loops tree.  Insert the LOOP as a son of
437   outer.  */
438
439void
440add_loop (class loop *loop, class loop *outer)
441{
442  basic_block *bbs;
443  int i, n;
444  class loop *subloop;
445  edge e;
446  edge_iterator ei;
447
448  /* Add it to loop structure.  */
449  place_new_loop (cfun, loop);
450  flow_loop_tree_node_add (outer, loop);
451
452  /* Find its nodes.  */
453  bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
454  n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
455
456  for (i = 0; i < n; i++)
457    {
458      if (bbs[i]->loop_father == outer)
459	{
460	  remove_bb_from_loops (bbs[i]);
461	  add_bb_to_loop (bbs[i], loop);
462	  continue;
463	}
464
465      loop->num_nodes++;
466
467      /* If we find a direct subloop of OUTER, move it to LOOP.  */
468      subloop = bbs[i]->loop_father;
469      if (loop_outer (subloop) == outer
470	  && subloop->header == bbs[i])
471	{
472	  flow_loop_tree_node_remove (subloop);
473	  flow_loop_tree_node_add (loop, subloop);
474	}
475    }
476
477  /* Update the information about loop exit edges.  */
478  for (i = 0; i < n; i++)
479    {
480      FOR_EACH_EDGE (e, ei, bbs[i]->succs)
481	{
482	  rescan_loop_exit (e, false, false);
483	}
484    }
485
486  free (bbs);
487}
488
489/* Scale profile of loop by P.  */
490
491void
492scale_loop_frequencies (class loop *loop, profile_probability p)
493{
494  basic_block *bbs;
495
496  bbs = get_loop_body (loop);
497  scale_bbs_frequencies (bbs, loop->num_nodes, p);
498  free (bbs);
499}
500
501/* Scale profile in LOOP by P.
502   If ITERATION_BOUND is non-zero, scale even further if loop is predicted
503   to iterate too many times.
504   Before caling this function, preheader block profile should be already
505   scaled to final count.  This is necessary because loop iterations are
506   determined by comparing header edge count to latch ege count and thus
507   they need to be scaled synchronously.  */
508
509void
510scale_loop_profile (class loop *loop, profile_probability p,
511		    gcov_type iteration_bound)
512{
513  edge e, preheader_e;
514  edge_iterator ei;
515
516  if (dump_file && (dump_flags & TDF_DETAILS))
517    {
518      fprintf (dump_file, ";; Scaling loop %i with scale ",
519	       loop->num);
520      p.dump (dump_file);
521      fprintf (dump_file, " bounding iterations to %i\n",
522	       (int)iteration_bound);
523    }
524
525  /* Scale the probabilities.  */
526  scale_loop_frequencies (loop, p);
527
528  if (iteration_bound == 0)
529    return;
530
531  gcov_type iterations = expected_loop_iterations_unbounded (loop, NULL, true);
532
533  if (dump_file && (dump_flags & TDF_DETAILS))
534    {
535      fprintf (dump_file, ";; guessed iterations after scaling %i\n",
536	       (int)iterations);
537    }
538
539  /* See if loop is predicted to iterate too many times.  */
540  if (iterations <= iteration_bound)
541    return;
542
543  preheader_e = loop_preheader_edge (loop);
544
545  /* We could handle also loops without preheaders, but bounding is
546     currently used only by optimizers that have preheaders constructed.  */
547  gcc_checking_assert (preheader_e);
548  profile_count count_in = preheader_e->count ();
549
550  if (count_in > profile_count::zero ()
551      && loop->header->count.initialized_p ())
552    {
553      profile_count count_delta = profile_count::zero ();
554
555      e = single_exit (loop);
556      if (e)
557	{
558	  edge other_e;
559	  FOR_EACH_EDGE (other_e, ei, e->src->succs)
560	    if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
561		&& e != other_e)
562	      break;
563
564	  /* Probability of exit must be 1/iterations.  */
565	  count_delta = e->count ();
566	  e->probability = profile_probability::always ()
567				    .apply_scale (1, iteration_bound);
568	  other_e->probability = e->probability.invert ();
569
570	  /* In code below we only handle the following two updates.  */
571	  if (other_e->dest != loop->header
572	      && other_e->dest != loop->latch
573	      && (dump_file && (dump_flags & TDF_DETAILS)))
574	    {
575	      fprintf (dump_file, ";; giving up on update of paths from "
576		       "exit condition to latch\n");
577	    }
578	}
579      else
580        if (dump_file && (dump_flags & TDF_DETAILS))
581	  fprintf (dump_file, ";; Loop has multiple exit edges; "
582	      		      "giving up on exit condition update\n");
583
584      /* Roughly speaking we want to reduce the loop body profile by the
585	 difference of loop iterations.  We however can do better if
586	 we look at the actual profile, if it is available.  */
587      p = profile_probability::always ();
588
589      count_in = count_in.apply_scale (iteration_bound, 1);
590      p = count_in.probability_in (loop->header->count);
591      if (!(p > profile_probability::never ()))
592	p = profile_probability::very_unlikely ();
593
594      if (p == profile_probability::always ()
595	  || !p.initialized_p ())
596	return;
597
598      /* If latch exists, change its count, since we changed
599	 probability of exit.  Theoretically we should update everything from
600	 source of exit edge to latch, but for vectorizer this is enough.  */
601      if (loop->latch && loop->latch != e->src)
602	loop->latch->count += count_delta;
603
604      /* Scale the probabilities.  */
605      scale_loop_frequencies (loop, p);
606
607      /* Change latch's count back.  */
608      if (loop->latch && loop->latch != e->src)
609	loop->latch->count -= count_delta;
610
611      if (dump_file && (dump_flags & TDF_DETAILS))
612	fprintf (dump_file, ";; guessed iterations are now %i\n",
613		 (int)expected_loop_iterations_unbounded (loop, NULL, true));
614    }
615}
616
617/* Recompute dominance information for basic blocks outside LOOP.  */
618
619static void
620update_dominators_in_loop (class loop *loop)
621{
622  vec<basic_block> dom_bbs = vNULL;
623  basic_block *body;
624  unsigned i;
625
626  auto_sbitmap seen (last_basic_block_for_fn (cfun));
627  bitmap_clear (seen);
628  body = get_loop_body (loop);
629
630  for (i = 0; i < loop->num_nodes; i++)
631    bitmap_set_bit (seen, body[i]->index);
632
633  for (i = 0; i < loop->num_nodes; i++)
634    {
635      basic_block ldom;
636
637      for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
638	   ldom;
639	   ldom = next_dom_son (CDI_DOMINATORS, ldom))
640	if (!bitmap_bit_p (seen, ldom->index))
641	  {
642	    bitmap_set_bit (seen, ldom->index);
643	    dom_bbs.safe_push (ldom);
644	  }
645    }
646
647  iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
648  free (body);
649  dom_bbs.release ();
650}
651
652/* Creates an if region as shown above. CONDITION is used to create
653   the test for the if.
654
655   |
656   |     -------------                 -------------
657   |     |  pred_bb  |                 |  pred_bb  |
658   |     -------------                 -------------
659   |           |                             |
660   |           |                             | ENTRY_EDGE
661   |           | ENTRY_EDGE                  V
662   |           |             ====>     -------------
663   |           |                       |  cond_bb  |
664   |           |                       | CONDITION |
665   |           |                       -------------
666   |           V                        /         \
667   |     -------------         e_false /           \ e_true
668   |     |  succ_bb  |                V             V
669   |     -------------         -----------       -----------
670   |                           | false_bb |      | true_bb |
671   |                           -----------       -----------
672   |                                   \           /
673   |                                    \         /
674   |                                     V       V
675   |                                   -------------
676   |                                   |  join_bb  |
677   |                                   -------------
678   |                                         | exit_edge (result)
679   |                                         V
680   |                                    -----------
681   |                                    | succ_bb |
682   |                                    -----------
683   |
684 */
685
686edge
687create_empty_if_region_on_edge (edge entry_edge, tree condition)
688{
689
690  basic_block cond_bb, true_bb, false_bb, join_bb;
691  edge e_true, e_false, exit_edge;
692  gcond *cond_stmt;
693  tree simple_cond;
694  gimple_stmt_iterator gsi;
695
696  cond_bb = split_edge (entry_edge);
697
698  /* Insert condition in cond_bb.  */
699  gsi = gsi_last_bb (cond_bb);
700  simple_cond =
701    force_gimple_operand_gsi (&gsi, condition, true, NULL,
702			      false, GSI_NEW_STMT);
703  cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
704  gsi = gsi_last_bb (cond_bb);
705  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
706
707  join_bb = split_edge (single_succ_edge (cond_bb));
708
709  e_true = single_succ_edge (cond_bb);
710  true_bb = split_edge (e_true);
711
712  e_false = make_edge (cond_bb, join_bb, 0);
713  false_bb = split_edge (e_false);
714
715  e_true->flags &= ~EDGE_FALLTHRU;
716  e_true->flags |= EDGE_TRUE_VALUE;
717  e_false->flags &= ~EDGE_FALLTHRU;
718  e_false->flags |= EDGE_FALSE_VALUE;
719
720  set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
721  set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
722  set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
723  set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
724
725  exit_edge = single_succ_edge (join_bb);
726
727  if (single_pred_p (exit_edge->dest))
728    set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
729
730  return exit_edge;
731}
732
733/* create_empty_loop_on_edge
734   |
735   |    - pred_bb -                   ------ pred_bb ------
736   |   |           |                 | iv0 = initial_value |
737   |    -----|-----                   ---------|-----------
738   |         |                       ______    | entry_edge
739   |         | entry_edge           /      |   |
740   |         |             ====>   |      -V---V- loop_header -------------
741   |         V                     |     | iv_before = phi (iv0, iv_after) |
742   |    - succ_bb -                |      ---|-----------------------------
743   |   |           |               |         |
744   |    -----------                |      ---V--- loop_body ---------------
745   |                               |     | iv_after = iv_before + stride   |
746   |                               |     | if (iv_before < upper_bound)    |
747   |                               |      ---|--------------\--------------
748   |                               |         |               \ exit_e
749   |                               |         V                \
750   |                               |       - loop_latch -      V- succ_bb -
751   |                               |      |              |     |           |
752   |                               |       /-------------       -----------
753   |                                \ ___ /
754
755   Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
756   that is used before the increment of IV. IV_BEFORE should be used for
757   adding code to the body that uses the IV.  OUTER is the outer loop in
758   which the new loop should be inserted.
759
760   Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
761   inserted on the loop entry edge.  This implies that this function
762   should be used only when the UPPER_BOUND expression is a loop
763   invariant.  */
764
765class loop *
766create_empty_loop_on_edge (edge entry_edge,
767			   tree initial_value,
768			   tree stride, tree upper_bound,
769			   tree iv,
770			   tree *iv_before,
771			   tree *iv_after,
772			   class loop *outer)
773{
774  basic_block loop_header, loop_latch, succ_bb, pred_bb;
775  class loop *loop;
776  gimple_stmt_iterator gsi;
777  gimple_seq stmts;
778  gcond *cond_expr;
779  tree exit_test;
780  edge exit_e;
781
782  gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
783
784  /* Create header, latch and wire up the loop.  */
785  pred_bb = entry_edge->src;
786  loop_header = split_edge (entry_edge);
787  loop_latch = split_edge (single_succ_edge (loop_header));
788  succ_bb = single_succ (loop_latch);
789  make_edge (loop_header, succ_bb, 0);
790  redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
791
792  /* Set immediate dominator information.  */
793  set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
794  set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
795  set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
796
797  /* Initialize a loop structure and put it in a loop hierarchy.  */
798  loop = alloc_loop ();
799  loop->header = loop_header;
800  loop->latch = loop_latch;
801  add_loop (loop, outer);
802
803  /* TODO: Fix counts.  */
804  scale_loop_frequencies (loop, profile_probability::even ());
805
806  /* Update dominators.  */
807  update_dominators_in_loop (loop);
808
809  /* Modify edge flags.  */
810  exit_e = single_exit (loop);
811  exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
812  single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
813
814  /* Construct IV code in loop.  */
815  initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
816  if (stmts)
817    {
818      gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
819      gsi_commit_edge_inserts ();
820    }
821
822  upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
823  if (stmts)
824    {
825      gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
826      gsi_commit_edge_inserts ();
827    }
828
829  gsi = gsi_last_bb (loop_header);
830  create_iv (initial_value, stride, iv, loop, &gsi, false,
831	     iv_before, iv_after);
832
833  /* Insert loop exit condition.  */
834  cond_expr = gimple_build_cond
835    (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
836
837  exit_test = gimple_cond_lhs (cond_expr);
838  exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
839					false, GSI_NEW_STMT);
840  gimple_cond_set_lhs (cond_expr, exit_test);
841  gsi = gsi_last_bb (exit_e->src);
842  gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
843
844  split_block_after_labels (loop_header);
845
846  return loop;
847}
848
849/* Remove the latch edge of a LOOP and update loops to indicate that
850   the LOOP was removed.  After this function, original loop latch will
851   have no successor, which caller is expected to fix somehow.
852
853   If this may cause the information about irreducible regions to become
854   invalid, IRRED_INVALIDATED is set to true.
855
856   LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
857   basic blocks that had non-trivial update on their loop_father.*/
858
859void
860unloop (class loop *loop, bool *irred_invalidated,
861	bitmap loop_closed_ssa_invalidated)
862{
863  basic_block *body;
864  class loop *ploop;
865  unsigned i, n;
866  basic_block latch = loop->latch;
867  bool dummy = false;
868
869  if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
870    *irred_invalidated = true;
871
872  /* This is relatively straightforward.  The dominators are unchanged, as
873     loop header dominates loop latch, so the only thing we have to care of
874     is the placement of loops and basic blocks inside the loop tree.  We
875     move them all to the loop->outer, and then let fix_bb_placements do
876     its work.  */
877
878  body = get_loop_body (loop);
879  n = loop->num_nodes;
880  for (i = 0; i < n; i++)
881    if (body[i]->loop_father == loop)
882      {
883	remove_bb_from_loops (body[i]);
884	add_bb_to_loop (body[i], loop_outer (loop));
885      }
886  free (body);
887
888  while (loop->inner)
889    {
890      ploop = loop->inner;
891      flow_loop_tree_node_remove (ploop);
892      flow_loop_tree_node_add (loop_outer (loop), ploop);
893    }
894
895  /* Remove the loop and free its data.  */
896  delete_loop (loop);
897
898  remove_edge (single_succ_edge (latch));
899
900  /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
901     there is an irreducible region inside the cancelled loop, the flags will
902     be still correct.  */
903  fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
904}
905
906/* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
907   condition stated in description of fix_loop_placement holds for them.
908   It is used in case when we removed some edges coming out of LOOP, which
909   may cause the right placement of LOOP inside loop tree to change.
910
911   IRRED_INVALIDATED is set to true if a change in the loop structures might
912   invalidate the information about irreducible regions.  */
913
914static void
915fix_loop_placements (class loop *loop, bool *irred_invalidated)
916{
917  class loop *outer;
918
919  while (loop_outer (loop))
920    {
921      outer = loop_outer (loop);
922      if (!fix_loop_placement (loop, irred_invalidated))
923	break;
924
925      /* Changing the placement of a loop in the loop tree may alter the
926	 validity of condition 2) of the description of fix_bb_placement
927	 for its preheader, because the successor is the header and belongs
928	 to the loop.  So call fix_bb_placements to fix up the placement
929	 of the preheader and (possibly) of its predecessors.  */
930      fix_bb_placements (loop_preheader_edge (loop)->src,
931			 irred_invalidated, NULL);
932      loop = outer;
933    }
934}
935
936/* Duplicate loop bounds and other information we store about
937   the loop into its duplicate.  */
938
939void
940copy_loop_info (class loop *loop, class loop *target)
941{
942  gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
943  target->any_upper_bound = loop->any_upper_bound;
944  target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
945  target->any_likely_upper_bound = loop->any_likely_upper_bound;
946  target->nb_iterations_likely_upper_bound
947    = loop->nb_iterations_likely_upper_bound;
948  target->any_estimate = loop->any_estimate;
949  target->nb_iterations_estimate = loop->nb_iterations_estimate;
950  target->estimate_state = loop->estimate_state;
951  target->safelen = loop->safelen;
952  target->simdlen = loop->simdlen;
953  target->constraints = loop->constraints;
954  target->can_be_parallel = loop->can_be_parallel;
955  target->warned_aggressive_loop_optimizations
956    |= loop->warned_aggressive_loop_optimizations;
957  target->dont_vectorize = loop->dont_vectorize;
958  target->force_vectorize = loop->force_vectorize;
959  target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
960  target->finite_p = loop->finite_p;
961  target->unroll = loop->unroll;
962  target->owned_clique = loop->owned_clique;
963}
964
965/* Copies copy of LOOP as subloop of TARGET loop, placing newly
966   created loop into loops structure.  If AFTER is non-null
967   the new loop is added at AFTER->next, otherwise in front of TARGETs
968   sibling list.  */
969class loop *
970duplicate_loop (class loop *loop, class loop *target, class loop *after)
971{
972  class loop *cloop;
973  cloop = alloc_loop ();
974  place_new_loop (cfun, cloop);
975
976  copy_loop_info (loop, cloop);
977
978  /* Mark the new loop as copy of LOOP.  */
979  set_loop_copy (loop, cloop);
980
981  /* Add it to target.  */
982  flow_loop_tree_node_add (target, cloop, after);
983
984  return cloop;
985}
986
987/* Copies structure of subloops of LOOP into TARGET loop, placing
988   newly created loops into loop tree at the end of TARGETs sibling
989   list in the original order.  */
990void
991duplicate_subloops (class loop *loop, class loop *target)
992{
993  class loop *aloop, *cloop, *tail;
994
995  for (tail = target->inner; tail && tail->next; tail = tail->next)
996    ;
997  for (aloop = loop->inner; aloop; aloop = aloop->next)
998    {
999      cloop = duplicate_loop (aloop, target, tail);
1000      tail = cloop;
1001      gcc_assert(!tail->next);
1002      duplicate_subloops (aloop, cloop);
1003    }
1004}
1005
1006/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1007   into TARGET loop, placing newly created loops into loop tree adding
1008   them to TARGETs sibling list at the end in order.  */
1009static void
1010copy_loops_to (class loop **copied_loops, int n, class loop *target)
1011{
1012  class loop *aloop, *tail;
1013  int i;
1014
1015  for (tail = target->inner; tail && tail->next; tail = tail->next)
1016    ;
1017  for (i = 0; i < n; i++)
1018    {
1019      aloop = duplicate_loop (copied_loops[i], target, tail);
1020      tail = aloop;
1021      gcc_assert(!tail->next);
1022      duplicate_subloops (copied_loops[i], aloop);
1023    }
1024}
1025
1026/* Redirects edge E to basic block DEST.  */
1027static void
1028loop_redirect_edge (edge e, basic_block dest)
1029{
1030  if (e->dest == dest)
1031    return;
1032
1033  redirect_edge_and_branch_force (e, dest);
1034}
1035
1036/* Check whether LOOP's body can be duplicated.  */
1037bool
1038can_duplicate_loop_p (const class loop *loop)
1039{
1040  int ret;
1041  basic_block *bbs = get_loop_body (loop);
1042
1043  ret = can_copy_bbs_p (bbs, loop->num_nodes);
1044  free (bbs);
1045
1046  return ret;
1047}
1048
1049/* Duplicates body of LOOP to given edge E NDUPL times.  Takes care of updating
1050   loop structure and dominators (order of inner subloops is retained).
1051   E's destination must be LOOP header for this to work, i.e. it must be entry
1052   or latch edge of this loop; these are unique, as the loops must have
1053   preheaders for this function to work correctly (in case E is latch, the
1054   function unrolls the loop, if E is entry edge, it peels the loop).  Store
1055   edges created by copying ORIG edge from copies corresponding to set bits in
1056   WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
1057   are numbered in order given by control flow through them) into TO_REMOVE
1058   array.  Returns false if duplication is
1059   impossible.  */
1060
1061bool
1062duplicate_loop_body_to_header_edge (class loop *loop, edge e,
1063				    unsigned int ndupl, sbitmap wont_exit,
1064				    edge orig, vec<edge> *to_remove, int flags)
1065{
1066  class loop *target, *aloop;
1067  class loop **orig_loops;
1068  unsigned n_orig_loops;
1069  basic_block header = loop->header, latch = loop->latch;
1070  basic_block *new_bbs, *bbs, *first_active;
1071  basic_block new_bb, bb, first_active_latch = NULL;
1072  edge ae, latch_edge;
1073  edge spec_edges[2], new_spec_edges[2];
1074  const int SE_LATCH = 0;
1075  const int SE_ORIG = 1;
1076  unsigned i, j, n;
1077  int is_latch = (latch == e->src);
1078  profile_probability *scale_step = NULL;
1079  profile_probability scale_main = profile_probability::always ();
1080  profile_probability scale_act = profile_probability::always ();
1081  profile_count after_exit_num = profile_count::zero (),
1082	        after_exit_den = profile_count::zero ();
1083  bool scale_after_exit = false;
1084  int add_irreducible_flag;
1085  basic_block place_after;
1086  bitmap bbs_to_scale = NULL;
1087  bitmap_iterator bi;
1088
1089  gcc_assert (e->dest == loop->header);
1090  gcc_assert (ndupl > 0);
1091
1092  if (orig)
1093    {
1094      /* Orig must be edge out of the loop.  */
1095      gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1096      gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1097    }
1098
1099  n = loop->num_nodes;
1100  bbs = get_loop_body_in_dom_order (loop);
1101  gcc_assert (bbs[0] == loop->header);
1102  gcc_assert (bbs[n  - 1] == loop->latch);
1103
1104  /* Check whether duplication is possible.  */
1105  if (!can_copy_bbs_p (bbs, loop->num_nodes))
1106    {
1107      free (bbs);
1108      return false;
1109    }
1110  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1111
1112  /* In case we are doing loop peeling and the loop is in the middle of
1113     irreducible region, the peeled copies will be inside it too.  */
1114  add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1115  gcc_assert (!is_latch || !add_irreducible_flag);
1116
1117  /* Find edge from latch.  */
1118  latch_edge = loop_latch_edge (loop);
1119
1120  if (flags & DLTHE_FLAG_UPDATE_FREQ)
1121    {
1122      /* Calculate coefficients by that we have to scale counts
1123	 of duplicated loop bodies.  */
1124      profile_count count_in = header->count;
1125      profile_count count_le = latch_edge->count ();
1126      profile_count count_out_orig = orig ? orig->count () : count_in - count_le;
1127      profile_probability prob_pass_thru = count_le.probability_in (count_in);
1128      profile_probability prob_pass_wont_exit =
1129	      (count_le + count_out_orig).probability_in (count_in);
1130
1131      if (orig && orig->probability.initialized_p ()
1132	  && !(orig->probability == profile_probability::always ()))
1133	{
1134	  /* The blocks that are dominated by a removed exit edge ORIG have
1135	     frequencies scaled by this.  */
1136	  if (orig->count ().initialized_p ())
1137	    {
1138	      after_exit_num = orig->src->count;
1139	      after_exit_den = after_exit_num - orig->count ();
1140	      scale_after_exit = true;
1141	    }
1142	  bbs_to_scale = BITMAP_ALLOC (NULL);
1143	  for (i = 0; i < n; i++)
1144	    {
1145	      if (bbs[i] != orig->src
1146		  && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1147		bitmap_set_bit (bbs_to_scale, i);
1148	    }
1149	}
1150
1151      scale_step = XNEWVEC (profile_probability, ndupl);
1152
1153      for (i = 1; i <= ndupl; i++)
1154	scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1155				? prob_pass_wont_exit
1156				: prob_pass_thru;
1157
1158      /* Complete peeling is special as the probability of exit in last
1159	 copy becomes 1.  */
1160      if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1161	{
1162	  profile_count wanted_count = e->count ();
1163
1164	  gcc_assert (!is_latch);
1165	  /* First copy has count of incoming edge.  Each subsequent
1166	     count should be reduced by prob_pass_wont_exit.  Caller
1167	     should've managed the flags so all except for original loop
1168	     has won't exist set.  */
1169	  scale_act = wanted_count.probability_in (count_in);
1170	  /* Now simulate the duplication adjustments and compute header
1171	     frequency of the last copy.  */
1172	  for (i = 0; i < ndupl; i++)
1173	    wanted_count = wanted_count.apply_probability (scale_step [i]);
1174	  scale_main = wanted_count.probability_in (count_in);
1175	}
1176      /* Here we insert loop bodies inside the loop itself (for loop unrolling).
1177	 First iteration will be original loop followed by duplicated bodies.
1178	 It is necessary to scale down the original so we get right overall
1179	 number of iterations.  */
1180      else if (is_latch)
1181	{
1182	  profile_probability prob_pass_main = bitmap_bit_p (wont_exit, 0)
1183							? prob_pass_wont_exit
1184							: prob_pass_thru;
1185	  profile_probability p = prob_pass_main;
1186	  profile_count scale_main_den = count_in;
1187	  for (i = 0; i < ndupl; i++)
1188	    {
1189	      scale_main_den += count_in.apply_probability (p);
1190	      p = p * scale_step[i];
1191	    }
1192	  /* If original loop is executed COUNT_IN times, the unrolled
1193	     loop will account SCALE_MAIN_DEN times.  */
1194	  scale_main = count_in.probability_in (scale_main_den);
1195	  scale_act = scale_main * prob_pass_main;
1196	}
1197      else
1198	{
1199	  profile_count preheader_count = e->count ();
1200	  for (i = 0; i < ndupl; i++)
1201	    scale_main = scale_main * scale_step[i];
1202	  scale_act = preheader_count.probability_in (count_in);
1203	}
1204    }
1205
1206  /* Loop the new bbs will belong to.  */
1207  target = e->src->loop_father;
1208
1209  /* Original loops.  */
1210  n_orig_loops = 0;
1211  for (aloop = loop->inner; aloop; aloop = aloop->next)
1212    n_orig_loops++;
1213  orig_loops = XNEWVEC (class loop *, n_orig_loops);
1214  for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1215    orig_loops[i] = aloop;
1216
1217  set_loop_copy (loop, target);
1218
1219  first_active = XNEWVEC (basic_block, n);
1220  if (is_latch)
1221    {
1222      memcpy (first_active, bbs, n * sizeof (basic_block));
1223      first_active_latch = latch;
1224    }
1225
1226  spec_edges[SE_ORIG] = orig;
1227  spec_edges[SE_LATCH] = latch_edge;
1228
1229  place_after = e->src;
1230  for (j = 0; j < ndupl; j++)
1231    {
1232      /* Copy loops.  */
1233      copy_loops_to (orig_loops, n_orig_loops, target);
1234
1235      /* Copy bbs.  */
1236      copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1237		place_after, true);
1238      place_after = new_spec_edges[SE_LATCH]->src;
1239
1240      if (flags & DLTHE_RECORD_COPY_NUMBER)
1241	for (i = 0; i < n; i++)
1242	  {
1243	    gcc_assert (!new_bbs[i]->aux);
1244	    new_bbs[i]->aux = (void *)(size_t)(j + 1);
1245	  }
1246
1247      /* Note whether the blocks and edges belong to an irreducible loop.  */
1248      if (add_irreducible_flag)
1249	{
1250	  for (i = 0; i < n; i++)
1251	    new_bbs[i]->flags |= BB_DUPLICATED;
1252	  for (i = 0; i < n; i++)
1253	    {
1254	      edge_iterator ei;
1255	      new_bb = new_bbs[i];
1256	      if (new_bb->loop_father == target)
1257		new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1258
1259	      FOR_EACH_EDGE (ae, ei, new_bb->succs)
1260		if ((ae->dest->flags & BB_DUPLICATED)
1261		    && (ae->src->loop_father == target
1262			|| ae->dest->loop_father == target))
1263		  ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1264	    }
1265	  for (i = 0; i < n; i++)
1266	    new_bbs[i]->flags &= ~BB_DUPLICATED;
1267	}
1268
1269      /* Redirect the special edges.  */
1270      if (is_latch)
1271	{
1272	  redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1273	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1274					  loop->header);
1275	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1276	  latch = loop->latch = new_bbs[n - 1];
1277	  e = latch_edge = new_spec_edges[SE_LATCH];
1278	}
1279      else
1280	{
1281	  redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1282					  loop->header);
1283	  redirect_edge_and_branch_force (e, new_bbs[0]);
1284	  set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1285	  e = new_spec_edges[SE_LATCH];
1286	}
1287
1288      /* Record exit edge in this copy.  */
1289      if (orig && bitmap_bit_p (wont_exit, j + 1))
1290	{
1291	  if (to_remove)
1292	    to_remove->safe_push (new_spec_edges[SE_ORIG]);
1293	  force_edge_cold (new_spec_edges[SE_ORIG], true);
1294
1295	  /* Scale the frequencies of the blocks dominated by the exit.  */
1296	  if (bbs_to_scale && scale_after_exit)
1297	    {
1298	      EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1299		scale_bbs_frequencies_profile_count (new_bbs + i, 1, after_exit_num,
1300						     after_exit_den);
1301	    }
1302	}
1303
1304      /* Record the first copy in the control flow order if it is not
1305	 the original loop (i.e. in case of peeling).  */
1306      if (!first_active_latch)
1307	{
1308	  memcpy (first_active, new_bbs, n * sizeof (basic_block));
1309	  first_active_latch = new_bbs[n - 1];
1310	}
1311
1312      /* Set counts and frequencies.  */
1313      if (flags & DLTHE_FLAG_UPDATE_FREQ)
1314	{
1315	  scale_bbs_frequencies (new_bbs, n, scale_act);
1316	  scale_act = scale_act * scale_step[j];
1317	}
1318    }
1319  free (new_bbs);
1320  free (orig_loops);
1321
1322  /* Record the exit edge in the original loop body, and update the frequencies.  */
1323  if (orig && bitmap_bit_p (wont_exit, 0))
1324    {
1325      if (to_remove)
1326	to_remove->safe_push (orig);
1327      force_edge_cold (orig, true);
1328
1329      /* Scale the frequencies of the blocks dominated by the exit.  */
1330      if (bbs_to_scale && scale_after_exit)
1331	{
1332	  EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1333	    scale_bbs_frequencies_profile_count (bbs + i, 1, after_exit_num,
1334						 after_exit_den);
1335	}
1336    }
1337
1338  /* Update the original loop.  */
1339  if (!is_latch)
1340    set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1341  if (flags & DLTHE_FLAG_UPDATE_FREQ)
1342    {
1343      scale_bbs_frequencies (bbs, n, scale_main);
1344      free (scale_step);
1345    }
1346
1347  /* Update dominators of outer blocks if affected.  */
1348  for (i = 0; i < n; i++)
1349    {
1350      basic_block dominated, dom_bb;
1351      unsigned j;
1352
1353      bb = bbs[i];
1354      bb->aux = 0;
1355
1356      auto_vec<basic_block> dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1357      FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1358	{
1359	  if (flow_bb_inside_loop_p (loop, dominated))
1360	    continue;
1361	  dom_bb = nearest_common_dominator (
1362			CDI_DOMINATORS, first_active[i], first_active_latch);
1363	  set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1364	}
1365    }
1366  free (first_active);
1367
1368  free (bbs);
1369  BITMAP_FREE (bbs_to_scale);
1370
1371  return true;
1372}
1373
1374/* A callback for make_forwarder block, to redirect all edges except for
1375   MFB_KJ_EDGE to the entry part.  E is the edge for that we should decide
1376   whether to redirect it.  */
1377
1378edge mfb_kj_edge;
1379bool
1380mfb_keep_just (edge e)
1381{
1382  return e != mfb_kj_edge;
1383}
1384
1385/* True when a candidate preheader BLOCK has predecessors from LOOP.  */
1386
1387static bool
1388has_preds_from_loop (basic_block block, class loop *loop)
1389{
1390  edge e;
1391  edge_iterator ei;
1392
1393  FOR_EACH_EDGE (e, ei, block->preds)
1394    if (e->src->loop_father == loop)
1395      return true;
1396  return false;
1397}
1398
1399/* Creates a pre-header for a LOOP.  Returns newly created block.  Unless
1400   CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1401   entry; otherwise we also force preheader block to have only one successor.
1402   When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1403   to be a fallthru predecessor to the loop header and to have only
1404   predecessors from outside of the loop.
1405   The function also updates dominators.  */
1406
1407basic_block
1408create_preheader (class loop *loop, int flags)
1409{
1410  edge e;
1411  basic_block dummy;
1412  int nentry = 0;
1413  bool irred = false;
1414  bool latch_edge_was_fallthru;
1415  edge one_succ_pred = NULL, single_entry = NULL;
1416  edge_iterator ei;
1417
1418  FOR_EACH_EDGE (e, ei, loop->header->preds)
1419    {
1420      if (e->src == loop->latch)
1421	continue;
1422      irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1423      nentry++;
1424      single_entry = e;
1425      if (single_succ_p (e->src))
1426	one_succ_pred = e;
1427    }
1428  gcc_assert (nentry);
1429  if (nentry == 1)
1430    {
1431      bool need_forwarder_block = false;
1432
1433      /* We do not allow entry block to be the loop preheader, since we
1434	     cannot emit code there.  */
1435      if (single_entry->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1436        need_forwarder_block = true;
1437      else
1438        {
1439          /* If we want simple preheaders, also force the preheader to have
1440	     just a single successor and a normal edge.  */
1441          if ((flags & CP_SIMPLE_PREHEADERS)
1442	      && ((single_entry->flags & EDGE_COMPLEX)
1443		  || !single_succ_p (single_entry->src)))
1444            need_forwarder_block = true;
1445          /* If we want fallthru preheaders, also create forwarder block when
1446             preheader ends with a jump or has predecessors from loop.  */
1447          else if ((flags & CP_FALLTHRU_PREHEADERS)
1448                   && (JUMP_P (BB_END (single_entry->src))
1449                       || has_preds_from_loop (single_entry->src, loop)))
1450            need_forwarder_block = true;
1451        }
1452      if (! need_forwarder_block)
1453	return NULL;
1454    }
1455
1456  mfb_kj_edge = loop_latch_edge (loop);
1457  latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1458  if (nentry == 1
1459      && ((flags & CP_FALLTHRU_PREHEADERS) == 0
1460  	  || (single_entry->flags & EDGE_CROSSING) == 0))
1461    dummy = split_edge (single_entry);
1462  else
1463    {
1464      edge fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1465      dummy = fallthru->src;
1466      loop->header = fallthru->dest;
1467    }
1468
1469  /* Try to be clever in placing the newly created preheader.  The idea is to
1470     avoid breaking any "fallthruness" relationship between blocks.
1471
1472     The preheader was created just before the header and all incoming edges
1473     to the header were redirected to the preheader, except the latch edge.
1474     So the only problematic case is when this latch edge was a fallthru
1475     edge: it is not anymore after the preheader creation so we have broken
1476     the fallthruness.  We're therefore going to look for a better place.  */
1477  if (latch_edge_was_fallthru)
1478    {
1479      if (one_succ_pred)
1480	e = one_succ_pred;
1481      else
1482	e = EDGE_PRED (dummy, 0);
1483
1484      move_block_after (dummy, e->src);
1485    }
1486
1487  if (irred)
1488    {
1489      dummy->flags |= BB_IRREDUCIBLE_LOOP;
1490      single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1491    }
1492
1493  if (dump_file)
1494    fprintf (dump_file, "Created preheader block for loop %i\n",
1495	     loop->num);
1496
1497  if (flags & CP_FALLTHRU_PREHEADERS)
1498    gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1499                && !JUMP_P (BB_END (dummy)));
1500
1501  return dummy;
1502}
1503
1504/* Create preheaders for each loop; for meaning of FLAGS see create_preheader.  */
1505
1506void
1507create_preheaders (int flags)
1508{
1509  if (!current_loops)
1510    return;
1511
1512  for (auto loop : loops_list (cfun, 0))
1513    create_preheader (loop, flags);
1514  loops_state_set (LOOPS_HAVE_PREHEADERS);
1515}
1516
1517/* Forces all loop latches to have only single successor.  */
1518
1519void
1520force_single_succ_latches (void)
1521{
1522  edge e;
1523
1524  for (auto loop : loops_list (cfun, 0))
1525    {
1526      if (loop->latch != loop->header && single_succ_p (loop->latch))
1527	continue;
1528
1529      e = find_edge (loop->latch, loop->header);
1530      gcc_checking_assert (e != NULL);
1531
1532      split_edge (e);
1533    }
1534  loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1535}
1536
1537/* This function is called from loop_version.  It splits the entry edge
1538   of the loop we want to version, adds the versioning condition, and
1539   adjust the edges to the two versions of the loop appropriately.
1540   e is an incoming edge. Returns the basic block containing the
1541   condition.
1542
1543   --- edge e ---- > [second_head]
1544
1545   Split it and insert new conditional expression and adjust edges.
1546
1547    --- edge e ---> [cond expr] ---> [first_head]
1548			|
1549			+---------> [second_head]
1550
1551  THEN_PROB is the probability of then branch of the condition.
1552  ELSE_PROB is the probability of else branch. Note that they may be both
1553  REG_BR_PROB_BASE when condition is IFN_LOOP_VECTORIZED or
1554  IFN_LOOP_DIST_ALIAS.  */
1555
1556static basic_block
1557lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1558			   edge e, void *cond_expr,
1559			   profile_probability then_prob,
1560			   profile_probability else_prob)
1561{
1562  basic_block new_head = NULL;
1563  edge e1;
1564
1565  gcc_assert (e->dest == second_head);
1566
1567  /* Split edge 'e'. This will create a new basic block, where we can
1568     insert conditional expr.  */
1569  new_head = split_edge (e);
1570
1571  lv_add_condition_to_bb (first_head, second_head, new_head,
1572			  cond_expr);
1573
1574  /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there.  */
1575  e = single_succ_edge (new_head);
1576  e1 = make_edge (new_head, first_head,
1577		  current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1578  e1->probability = then_prob;
1579  e->probability = else_prob;
1580
1581  set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1582  set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1583
1584  /* Adjust loop header phi nodes.  */
1585  lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1586
1587  return new_head;
1588}
1589
1590/* Main entry point for Loop Versioning transformation.
1591
1592   This transformation given a condition and a loop, creates
1593   -if (condition) { loop_copy1 } else { loop_copy2 },
1594   where loop_copy1 is the loop transformed in one way, and loop_copy2
1595   is the loop transformed in another way (or unchanged). COND_EXPR
1596   may be a run time test for things that were not resolved by static
1597   analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1598
1599   If non-NULL, CONDITION_BB is set to the basic block containing the
1600   condition.
1601
1602   THEN_PROB is the probability of the then edge of the if.  THEN_SCALE
1603   is the ratio by that the frequencies in the original loop should
1604   be scaled.  ELSE_SCALE is the ratio by that the frequencies in the
1605   new loop should be scaled.
1606
1607   If PLACE_AFTER is true, we place the new loop after LOOP in the
1608   instruction stream, otherwise it is placed before LOOP.  */
1609
1610class loop *
1611loop_version (class loop *loop,
1612	      void *cond_expr, basic_block *condition_bb,
1613	      profile_probability then_prob, profile_probability else_prob,
1614	      profile_probability then_scale, profile_probability else_scale,
1615	      bool place_after)
1616{
1617  basic_block first_head, second_head;
1618  edge entry, latch_edge;
1619  int irred_flag;
1620  class loop *nloop;
1621  basic_block cond_bb;
1622
1623  /* Record entry and latch edges for the loop */
1624  entry = loop_preheader_edge (loop);
1625  irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1626  entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1627
1628  /* Note down head of loop as first_head.  */
1629  first_head = entry->dest;
1630
1631  /* 1) Duplicate loop on the entry edge.  */
1632  if (!cfg_hook_duplicate_loop_body_to_header_edge (loop, entry, 1, NULL, NULL,
1633						    NULL, 0))
1634    {
1635      entry->flags |= irred_flag;
1636      return NULL;
1637    }
1638
1639  /* 2) loopify the duplicated new loop. */
1640  latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1641  nloop = alloc_loop ();
1642  class loop *outer = loop_outer (latch_edge->dest->loop_father);
1643  edge new_header_edge = single_pred_edge (get_bb_copy (loop->header));
1644  nloop->header = new_header_edge->dest;
1645  nloop->latch = latch_edge->src;
1646  loop_redirect_edge (latch_edge, nloop->header);
1647
1648  /* Compute new loop.  */
1649  add_loop (nloop, outer);
1650  copy_loop_info (loop, nloop);
1651  set_loop_copy (loop, nloop);
1652
1653  /* loopify redirected latch_edge. Update its PENDING_STMTS.  */
1654  lv_flush_pending_stmts (latch_edge);
1655
1656  /* After duplication entry edge now points to new loop head block.
1657     Note down new head as second_head.  */
1658  second_head = entry->dest;
1659
1660  /* 3) Split loop entry edge and insert new block with cond expr.  */
1661  cond_bb =  lv_adjust_loop_entry_edge (first_head, second_head,
1662					entry, cond_expr, then_prob, else_prob);
1663  if (condition_bb)
1664    *condition_bb = cond_bb;
1665
1666  if (!cond_bb)
1667    {
1668      entry->flags |= irred_flag;
1669      return NULL;
1670    }
1671
1672  /* Add cond_bb to appropriate loop.  */
1673  if (cond_bb->loop_father)
1674    remove_bb_from_loops (cond_bb);
1675  add_bb_to_loop (cond_bb, outer);
1676
1677  /* 4) Scale the original loop and new loop frequency.  */
1678  scale_loop_frequencies (loop, then_scale);
1679  scale_loop_frequencies (nloop, else_scale);
1680  update_dominators_in_loop (loop);
1681  update_dominators_in_loop (nloop);
1682
1683  /* Adjust irreducible flag.  */
1684  if (irred_flag)
1685    {
1686      cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1687      loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1688      loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1689      single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1690    }
1691
1692  if (place_after)
1693    {
1694      basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1695      unsigned i;
1696
1697      after = loop->latch;
1698
1699      for (i = 0; i < nloop->num_nodes; i++)
1700	{
1701	  move_block_after (bbs[i], after);
1702	  after = bbs[i];
1703	}
1704      free (bbs);
1705    }
1706
1707  /* At this point condition_bb is loop preheader with two successors,
1708     first_head and second_head.   Make sure that loop preheader has only
1709     one successor.  */
1710  split_edge (loop_preheader_edge (loop));
1711  split_edge (loop_preheader_edge (nloop));
1712
1713  return nloop;
1714}
1715