1/* Vectorizer Specific Loop Manipulations
2   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5   and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "cfgloop.h"
34#include "cfglayout.h"
35#include "expr.h"
36#include "toplev.h"
37#include "tree-scalar-evolution.h"
38#include "tree-vectorizer.h"
39#include "langhooks.h"
40
41/*************************************************************************
42  Simple Loop Peeling Utilities
43
44  Utilities to support loop peeling for vectorization purposes.
45 *************************************************************************/
46
47
48/* Renames the use *OP_P.  */
49
50static void
51rename_use_op (use_operand_p op_p)
52{
53  tree new_name;
54
55  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
56    return;
57
58  new_name = get_current_def (USE_FROM_PTR (op_p));
59
60  /* Something defined outside of the loop.  */
61  if (!new_name)
62    return;
63
64  /* An ordinary ssa name defined in the loop.  */
65
66  SET_USE (op_p, new_name);
67}
68
69
70/* Renames the variables in basic block BB.  */
71
72void
73rename_variables_in_bb (basic_block bb)
74{
75  gimple_stmt_iterator gsi;
76  gimple stmt;
77  use_operand_p use_p;
78  ssa_op_iter iter;
79  edge e;
80  edge_iterator ei;
81  struct loop *loop = bb->loop_father;
82
83  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
84    {
85      stmt = gsi_stmt (gsi);
86      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
87	rename_use_op (use_p);
88    }
89
90  FOR_EACH_EDGE (e, ei, bb->succs)
91    {
92      if (!flow_bb_inside_loop_p (loop, e->dest))
93	continue;
94      for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
95        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
96    }
97}
98
99
100/* Renames variables in new generated LOOP.  */
101
102void
103rename_variables_in_loop (struct loop *loop)
104{
105  unsigned i;
106  basic_block *bbs;
107
108  bbs = get_loop_body (loop);
109
110  for (i = 0; i < loop->num_nodes; i++)
111    rename_variables_in_bb (bbs[i]);
112
113  free (bbs);
114}
115
116typedef struct
117{
118  tree from, to;
119  basic_block bb;
120} adjust_info;
121
122DEF_VEC_O(adjust_info);
123DEF_VEC_ALLOC_O_STACK(adjust_info);
124#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
125
126/* A stack of values to be adjusted in debug stmts.  We have to
127   process them LIFO, so that the closest substitution applies.  If we
128   processed them FIFO, without the stack, we might substitute uses
129   with a PHI DEF that would soon become non-dominant, and when we got
130   to the suitable one, it wouldn't have anything to substitute any
131   more.  */
132static VEC(adjust_info, stack) *adjust_vec;
133
134/* Adjust any debug stmts that referenced AI->from values to use the
135   loop-closed AI->to, if the references are dominated by AI->bb and
136   not by the definition of AI->from.  */
137
138static void
139adjust_debug_stmts_now (adjust_info *ai)
140{
141  basic_block bbphi = ai->bb;
142  tree orig_def = ai->from;
143  tree new_def = ai->to;
144  imm_use_iterator imm_iter;
145  gimple stmt;
146  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
147
148  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
149
150  /* Adjust any debug stmts that held onto non-loop-closed
151     references.  */
152  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
153    {
154      use_operand_p use_p;
155      basic_block bbuse;
156
157      if (!is_gimple_debug (stmt))
158	continue;
159
160      gcc_assert (gimple_debug_bind_p (stmt));
161
162      bbuse = gimple_bb (stmt);
163
164      if ((bbuse == bbphi
165	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
166	  && !(bbuse == bbdef
167	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
168	{
169	  if (new_def)
170	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
171	      SET_USE (use_p, new_def);
172	  else
173	    {
174	      gimple_debug_bind_reset_value (stmt);
175	      update_stmt (stmt);
176	    }
177	}
178    }
179}
180
181/* Adjust debug stmts as scheduled before.  */
182
183static void
184adjust_vec_debug_stmts (void)
185{
186  if (!MAY_HAVE_DEBUG_STMTS)
187    return;
188
189  gcc_assert (adjust_vec);
190
191  while (!VEC_empty (adjust_info, adjust_vec))
192    {
193      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
194      VEC_pop (adjust_info, adjust_vec);
195    }
196
197  VEC_free (adjust_info, stack, adjust_vec);
198}
199
200/* Adjust any debug stmts that referenced FROM values to use the
201   loop-closed TO, if the references are dominated by BB and not by
202   the definition of FROM.  If adjust_vec is non-NULL, adjustments
203   will be postponed until adjust_vec_debug_stmts is called.  */
204
205static void
206adjust_debug_stmts (tree from, tree to, basic_block bb)
207{
208  adjust_info ai;
209
210  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
211      && SSA_NAME_VAR (from) != gimple_vop (cfun))
212    {
213      ai.from = from;
214      ai.to = to;
215      ai.bb = bb;
216
217      if (adjust_vec)
218	VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
219      else
220	adjust_debug_stmts_now (&ai);
221    }
222}
223
224/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
225   to adjust any debug stmts that referenced the old phi arg,
226   presumably non-loop-closed references left over from other
227   transformations.  */
228
229static void
230adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
231{
232  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
233
234  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
235
236  if (MAY_HAVE_DEBUG_STMTS)
237    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
238			gimple_bb (update_phi));
239}
240
241
242/* Update the PHI nodes of NEW_LOOP.
243
244   NEW_LOOP is a duplicate of ORIG_LOOP.
245   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
246   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
247   executes before it.  */
248
249static void
250slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
251				       struct loop *new_loop, bool after)
252{
253  tree new_ssa_name;
254  gimple phi_new, phi_orig;
255  tree def;
256  edge orig_loop_latch = loop_latch_edge (orig_loop);
257  edge orig_entry_e = loop_preheader_edge (orig_loop);
258  edge new_loop_exit_e = single_exit (new_loop);
259  edge new_loop_entry_e = loop_preheader_edge (new_loop);
260  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
261  gimple_stmt_iterator gsi_new, gsi_orig;
262
263  /*
264     step 1. For each loop-header-phi:
265             Add the first phi argument for the phi in NEW_LOOP
266            (the one associated with the entry of NEW_LOOP)
267
268     step 2. For each loop-header-phi:
269             Add the second phi argument for the phi in NEW_LOOP
270            (the one associated with the latch of NEW_LOOP)
271
272     step 3. Update the phis in the successor block of NEW_LOOP.
273
274        case 1: NEW_LOOP was placed before ORIG_LOOP:
275                The successor block of NEW_LOOP is the header of ORIG_LOOP.
276                Updating the phis in the successor block can therefore be done
277                along with the scanning of the loop header phis, because the
278                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279                phi nodes, organized in the same order.
280
281        case 2: NEW_LOOP was placed after ORIG_LOOP:
282                The successor block of NEW_LOOP is the original exit block of
283                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284                We postpone updating these phis to a later stage (when
285                loop guards are added).
286   */
287
288
289  /* Scan the phis in the headers of the old and new loops
290     (they are organized in exactly the same order).  */
291
292  for (gsi_new = gsi_start_phis (new_loop->header),
293       gsi_orig = gsi_start_phis (orig_loop->header);
294       !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
295       gsi_next (&gsi_new), gsi_next (&gsi_orig))
296    {
297      source_location locus;
298      phi_new = gsi_stmt (gsi_new);
299      phi_orig = gsi_stmt (gsi_orig);
300
301      /* step 1.  */
302      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
303      locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
304      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
305
306      /* step 2.  */
307      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
308      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
309      if (TREE_CODE (def) != SSA_NAME)
310        continue;
311
312      new_ssa_name = get_current_def (def);
313      if (!new_ssa_name)
314	{
315	  /* This only happens if there are no definitions
316	     inside the loop. use the phi_result in this case.  */
317	  new_ssa_name = PHI_RESULT (phi_new);
318	}
319
320      /* An ordinary ssa name defined in the loop.  */
321      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
322
323      /* Drop any debug references outside the loop, if they would
324	 become ill-formed SSA.  */
325      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
326
327      /* step 3 (case 1).  */
328      if (!after)
329        {
330          gcc_assert (new_loop_exit_e == orig_entry_e);
331	  adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332        }
333    }
334}
335
336
337/* Update PHI nodes for a guard of the LOOP.
338
339   Input:
340   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
341        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
342        originates from the guard-bb, skips LOOP and reaches the (unique) exit
343        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
344        We denote this bb NEW_MERGE_BB because before the guard code was added
345        it had a single predecessor (the LOOP header), and now it became a merge
346        point of two paths - the path that ends with the LOOP exit-edge, and
347        the path that ends with GUARD_EDGE.
348   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
349        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
350
351   ===> The CFG before the guard-code was added:
352        LOOP_header_bb:
353          loop_body
354          if (exit_loop) goto update_bb
355          else           goto LOOP_header_bb
356        update_bb:
357
358   ==> The CFG after the guard-code was added:
359        guard_bb:
360          if (LOOP_guard_condition) goto new_merge_bb
361          else                      goto LOOP_header_bb
362        LOOP_header_bb:
363          loop_body
364          if (exit_loop_condition) goto new_merge_bb
365          else                     goto LOOP_header_bb
366        new_merge_bb:
367          goto update_bb
368        update_bb:
369
370   ==> The CFG after this function:
371        guard_bb:
372          if (LOOP_guard_condition) goto new_merge_bb
373          else                      goto LOOP_header_bb
374        LOOP_header_bb:
375          loop_body
376          if (exit_loop_condition) goto new_exit_bb
377          else                     goto LOOP_header_bb
378        new_exit_bb:
379        new_merge_bb:
380          goto update_bb
381        update_bb:
382
383   This function:
384   1. creates and updates the relevant phi nodes to account for the new
385      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
386      1.1. Create phi nodes at NEW_MERGE_BB.
387      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
388           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
389   2. preserves loop-closed-ssa-form by creating the required phi nodes
390      at the exit of LOOP (i.e, in NEW_EXIT_BB).
391
392   There are two flavors to this function:
393
394   slpeel_update_phi_nodes_for_guard1:
395     Here the guard controls whether we enter or skip LOOP, where LOOP is a
396     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
397     for variables that have phis in the loop header.
398
399   slpeel_update_phi_nodes_for_guard2:
400     Here the guard controls whether we enter or skip LOOP, where LOOP is an
401     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
402     for variables that have phis in the loop exit.
403
404   I.E., the overall structure is:
405
406        loop1_preheader_bb:
407                guard1 (goto loop1/merge1_bb)
408        loop1
409        loop1_exit_bb:
410                guard2 (goto merge1_bb/merge2_bb)
411        merge1_bb
412        loop2
413        loop2_exit_bb
414        merge2_bb
415        next_bb
416
417   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
418   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
419   that have phis in loop1->header).
420
421   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
422   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
423   that have phis in next_bb). It also adds some of these phis to
424   loop1_exit_bb.
425
426   slpeel_update_phi_nodes_for_guard1 is always called before
427   slpeel_update_phi_nodes_for_guard2. They are both needed in order
428   to create correct data-flow and loop-closed-ssa-form.
429
430   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
431   that change between iterations of a loop (and therefore have a phi-node
432   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
433   phis for variables that are used out of the loop (and therefore have
434   loop-closed exit phis). Some variables may be both updated between
435   iterations and used after the loop. This is why in loop1_exit_bb we
436   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
437   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
438
439   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
440     an original loop. i.e., we have:
441
442           orig_loop
443           guard_bb (goto LOOP/new_merge)
444           new_loop <-- LOOP
445           new_exit
446           new_merge
447           next_bb
448
449     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
450     have:
451
452           new_loop
453           guard_bb (goto LOOP/new_merge)
454           orig_loop <-- LOOP
455           new_exit
456           new_merge
457           next_bb
458
459     The SSA names defined in the original loop have a current
460     reaching definition that that records the corresponding new
461     ssa-name used in the new duplicated loop copy.
462  */
463
464/* Function slpeel_update_phi_nodes_for_guard1
465
466   Input:
467   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
468   - DEFS - a bitmap of ssa names to mark new names for which we recorded
469            information.
470
471   In the context of the overall structure, we have:
472
473        loop1_preheader_bb:
474                guard1 (goto loop1/merge1_bb)
475LOOP->  loop1
476        loop1_exit_bb:
477                guard2 (goto merge1_bb/merge2_bb)
478        merge1_bb
479        loop2
480        loop2_exit_bb
481        merge2_bb
482        next_bb
483
484   For each name updated between loop iterations (i.e - for each name that has
485   an entry (loop-header) phi in LOOP) we create a new phi in:
486   1. merge1_bb (to account for the edge from guard1)
487   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
488*/
489
490static void
491slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
492                                    bool is_new_loop, basic_block *new_exit_bb,
493                                    bitmap *defs)
494{
495  gimple orig_phi, new_phi;
496  gimple update_phi, update_phi2;
497  tree guard_arg, loop_arg;
498  basic_block new_merge_bb = guard_edge->dest;
499  edge e = EDGE_SUCC (new_merge_bb, 0);
500  basic_block update_bb = e->dest;
501  basic_block orig_bb = loop->header;
502  edge new_exit_e;
503  tree current_new_name;
504  gimple_stmt_iterator gsi_orig, gsi_update;
505
506  /* Create new bb between loop and new_merge_bb.  */
507  *new_exit_bb = split_edge (single_exit (loop));
508
509  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
510
511  for (gsi_orig = gsi_start_phis (orig_bb),
512       gsi_update = gsi_start_phis (update_bb);
513       !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
514       gsi_next (&gsi_orig), gsi_next (&gsi_update))
515    {
516      source_location loop_locus, guard_locus;;
517      orig_phi = gsi_stmt (gsi_orig);
518      update_phi = gsi_stmt (gsi_update);
519
520      /** 1. Handle new-merge-point phis  **/
521
522      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
523      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524                                 new_merge_bb);
525
526      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
528      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529      loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
530						      EDGE_SUCC (loop->latch,
531								 0));
532      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
533      guard_locus
534	= gimple_phi_arg_location_from_edge (orig_phi,
535					     loop_preheader_edge (loop));
536
537      add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
538      add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
539
540      /* 1.3. Update phi in successor block.  */
541      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
542                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
543      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
544      update_phi2 = new_phi;
545
546
547      /** 2. Handle loop-closed-ssa-form phis  **/
548
549      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
550	continue;
551
552      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
553      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
554                                 *new_exit_bb);
555
556      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
557      add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
558
559      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
560      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
561      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
562				  PHI_RESULT (new_phi));
563
564      /* 2.4. Record the newly created name with set_current_def.
565         We want to find a name such that
566                name = get_current_def (orig_loop_name)
567         and to set its current definition as follows:
568                set_current_def (name, new_phi_name)
569
570         If LOOP is a new loop then loop_arg is already the name we're
571         looking for. If LOOP is the original loop, then loop_arg is
572         the orig_loop_name and the relevant name is recorded in its
573         current reaching definition.  */
574      if (is_new_loop)
575        current_new_name = loop_arg;
576      else
577        {
578          current_new_name = get_current_def (loop_arg);
579	  /* current_def is not available only if the variable does not
580	     change inside the loop, in which case we also don't care
581	     about recording a current_def for it because we won't be
582	     trying to create loop-exit-phis for it.  */
583	  if (!current_new_name)
584	    continue;
585        }
586      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
587
588      set_current_def (current_new_name, PHI_RESULT (new_phi));
589      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
590    }
591}
592
593
594/* Function slpeel_update_phi_nodes_for_guard2
595
596   Input:
597   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
598
599   In the context of the overall structure, we have:
600
601        loop1_preheader_bb:
602                guard1 (goto loop1/merge1_bb)
603        loop1
604        loop1_exit_bb:
605                guard2 (goto merge1_bb/merge2_bb)
606        merge1_bb
607LOOP->  loop2
608        loop2_exit_bb
609        merge2_bb
610        next_bb
611
612   For each name used out side the loop (i.e - for each name that has an exit
613   phi in next_bb) we create a new phi in:
614   1. merge2_bb (to account for the edge from guard_bb)
615   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
616   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
617      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
618*/
619
620static void
621slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
622                                    bool is_new_loop, basic_block *new_exit_bb)
623{
624  gimple orig_phi, new_phi;
625  gimple update_phi, update_phi2;
626  tree guard_arg, loop_arg;
627  basic_block new_merge_bb = guard_edge->dest;
628  edge e = EDGE_SUCC (new_merge_bb, 0);
629  basic_block update_bb = e->dest;
630  edge new_exit_e;
631  tree orig_def, orig_def_new_name;
632  tree new_name, new_name2;
633  tree arg;
634  gimple_stmt_iterator gsi;
635
636  /* Create new bb between loop and new_merge_bb.  */
637  *new_exit_bb = split_edge (single_exit (loop));
638
639  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
640
641  for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
642    {
643      update_phi = gsi_stmt (gsi);
644      orig_phi = update_phi;
645      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
646      /* This loop-closed-phi actually doesn't represent a use
647         out of the loop - the phi arg is a constant.  */
648      if (TREE_CODE (orig_def) != SSA_NAME)
649        continue;
650      orig_def_new_name = get_current_def (orig_def);
651      arg = NULL_TREE;
652
653      /** 1. Handle new-merge-point phis  **/
654
655      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
656      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
657                                 new_merge_bb);
658
659      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
660            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
661      new_name = orig_def;
662      new_name2 = NULL_TREE;
663      if (orig_def_new_name)
664        {
665          new_name = orig_def_new_name;
666	  /* Some variables have both loop-entry-phis and loop-exit-phis.
667	     Such variables were given yet newer names by phis placed in
668	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
669	     new_name2 = get_current_def (get_current_def (orig_name)).  */
670          new_name2 = get_current_def (new_name);
671        }
672
673      if (is_new_loop)
674        {
675          guard_arg = orig_def;
676          loop_arg = new_name;
677        }
678      else
679        {
680          guard_arg = new_name;
681          loop_arg = orig_def;
682        }
683      if (new_name2)
684        guard_arg = new_name2;
685
686      add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
687      add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
688
689      /* 1.3. Update phi in successor block.  */
690      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
691      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
692      update_phi2 = new_phi;
693
694
695      /** 2. Handle loop-closed-ssa-form phis  **/
696
697      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
698      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
699                                 *new_exit_bb);
700
701      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
702      add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
703
704      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
705      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
706      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
707				  PHI_RESULT (new_phi));
708
709
710      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
711
712      /* 3.1. Find the relevant names that need an exit-phi in
713	 GUARD_BB, i.e. names for which
714	 slpeel_update_phi_nodes_for_guard1 had not already created a
715	 phi node. This is the case for names that are used outside
716	 the loop (and therefore need an exit phi) but are not updated
717	 across loop iterations (and therefore don't have a
718	 loop-header-phi).
719
720	 slpeel_update_phi_nodes_for_guard1 is responsible for
721	 creating loop-exit phis in GUARD_BB for names that have a
722	 loop-header-phi.  When such a phi is created we also record
723	 the new name in its current definition.  If this new name
724	 exists, then guard_arg was set to this new name (see 1.2
725	 above).  Therefore, if guard_arg is not this new name, this
726	 is an indication that an exit-phi in GUARD_BB was not yet
727	 created, so we take care of it here.  */
728      if (guard_arg == new_name2)
729	continue;
730      arg = guard_arg;
731
732      /* 3.2. Generate new phi node in GUARD_BB:  */
733      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
734                                 guard_edge->src);
735
736      /* 3.3. GUARD_BB has one incoming edge:  */
737      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
738      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
739		   UNKNOWN_LOCATION);
740
741      /* 3.4. Update phi in successor of GUARD_BB:  */
742      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
743                                                                == guard_arg);
744      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
745				  PHI_RESULT (new_phi));
746    }
747}
748
749
750/* Make the LOOP iterate NITERS times. This is done by adding a new IV
751   that starts at zero, increases by one and its limit is NITERS.
752
753   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
754
755void
756slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
757{
758  tree indx_before_incr, indx_after_incr;
759  gimple cond_stmt;
760  gimple orig_cond;
761  edge exit_edge = single_exit (loop);
762  gimple_stmt_iterator loop_cond_gsi;
763  gimple_stmt_iterator incr_gsi;
764  bool insert_after;
765  tree init = build_int_cst (TREE_TYPE (niters), 0);
766  tree step = build_int_cst (TREE_TYPE (niters), 1);
767  LOC loop_loc;
768  enum tree_code code;
769
770  orig_cond = get_loop_exit_condition (loop);
771  gcc_assert (orig_cond);
772  loop_cond_gsi = gsi_for_stmt (orig_cond);
773
774  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
775  create_iv (init, step, NULL_TREE, loop,
776             &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
777
778  indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
779					      true, NULL_TREE, true,
780					      GSI_SAME_STMT);
781  niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
782				     true, GSI_SAME_STMT);
783
784  code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
785  cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
786				 NULL_TREE);
787
788  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
789
790  /* Remove old loop exit test:  */
791  gsi_remove (&loop_cond_gsi, true);
792
793  loop_loc = find_loop_location (loop);
794  if (dump_file && (dump_flags & TDF_DETAILS))
795    {
796      if (loop_loc != UNKNOWN_LOC)
797        fprintf (dump_file, "\nloop at %s:%d: ",
798                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
799      print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
800    }
801
802  loop->nb_iterations = niters;
803}
804
805
806/* Given LOOP this function generates a new copy of it and puts it
807   on E which is either the entry or exit of LOOP.  */
808
809struct loop *
810slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
811{
812  struct loop *new_loop;
813  basic_block *new_bbs, *bbs;
814  bool at_exit;
815  bool was_imm_dom;
816  basic_block exit_dest;
817  gimple phi;
818  tree phi_arg;
819  edge exit, new_exit;
820  gimple_stmt_iterator gsi;
821
822  at_exit = (e == single_exit (loop));
823  if (!at_exit && e != loop_preheader_edge (loop))
824    return NULL;
825
826  bbs = get_loop_body (loop);
827
828  /* Check whether duplication is possible.  */
829  if (!can_copy_bbs_p (bbs, loop->num_nodes))
830    {
831      free (bbs);
832      return NULL;
833    }
834
835  /* Generate new loop structure.  */
836  new_loop = duplicate_loop (loop, loop_outer (loop));
837  if (!new_loop)
838    {
839      free (bbs);
840      return NULL;
841    }
842
843  exit_dest = single_exit (loop)->dest;
844  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
845					  exit_dest) == loop->header ?
846		 true : false);
847
848  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
849
850  exit = single_exit (loop);
851  copy_bbs (bbs, loop->num_nodes, new_bbs,
852	    &exit, 1, &new_exit, NULL,
853	    e->src);
854
855  /* Duplicating phi args at exit bbs as coming
856     also from exit of duplicated loop.  */
857  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
858    {
859      phi = gsi_stmt (gsi);
860      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
861      if (phi_arg)
862	{
863	  edge new_loop_exit_edge;
864	  source_location locus;
865
866	  locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
867	  if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
868	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
869	  else
870	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
871
872	  add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
873	}
874    }
875
876  if (at_exit) /* Add the loop copy at exit.  */
877    {
878      redirect_edge_and_branch_force (e, new_loop->header);
879      PENDING_STMT (e) = NULL;
880      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
881      if (was_imm_dom)
882	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
883    }
884  else /* Add the copy at entry.  */
885    {
886      edge new_exit_e;
887      edge entry_e = loop_preheader_edge (loop);
888      basic_block preheader = entry_e->src;
889
890      if (!flow_bb_inside_loop_p (new_loop,
891				  EDGE_SUCC (new_loop->header, 0)->dest))
892        new_exit_e = EDGE_SUCC (new_loop->header, 0);
893      else
894	new_exit_e = EDGE_SUCC (new_loop->header, 1);
895
896      redirect_edge_and_branch_force (new_exit_e, loop->header);
897      PENDING_STMT (new_exit_e) = NULL;
898      set_immediate_dominator (CDI_DOMINATORS, loop->header,
899			       new_exit_e->src);
900
901      /* We have to add phi args to the loop->header here as coming
902	 from new_exit_e edge.  */
903      for (gsi = gsi_start_phis (loop->header);
904           !gsi_end_p (gsi);
905           gsi_next (&gsi))
906	{
907	  phi = gsi_stmt (gsi);
908	  phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
909	  if (phi_arg)
910	    add_phi_arg (phi, phi_arg, new_exit_e,
911			 gimple_phi_arg_location_from_edge (phi, entry_e));
912	}
913
914      redirect_edge_and_branch_force (entry_e, new_loop->header);
915      PENDING_STMT (entry_e) = NULL;
916      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
917    }
918
919  free (new_bbs);
920  free (bbs);
921
922  return new_loop;
923}
924
925
926/* Given the condition statement COND, put it as the last statement
927   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
928   Assumes that this is the single exit of the guarded loop.
929   Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
930
931static edge
932slpeel_add_loop_guard (basic_block guard_bb, tree cond,
933		       gimple_seq cond_expr_stmt_list,
934		       basic_block exit_bb, basic_block dom_bb)
935{
936  gimple_stmt_iterator gsi;
937  edge new_e, enter_e;
938  gimple cond_stmt;
939  gimple_seq gimplify_stmt_list = NULL;
940
941  enter_e = EDGE_SUCC (guard_bb, 0);
942  enter_e->flags &= ~EDGE_FALLTHRU;
943  enter_e->flags |= EDGE_FALSE_VALUE;
944  gsi = gsi_last_bb (guard_bb);
945
946  cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
947  if (gimplify_stmt_list)
948    gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
949  cond_stmt = gimple_build_cond (NE_EXPR,
950				 cond, build_int_cst (TREE_TYPE (cond), 0),
951				 NULL_TREE, NULL_TREE);
952  if (cond_expr_stmt_list)
953    gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
954
955  gsi = gsi_last_bb (guard_bb);
956  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
957
958  /* Add new edge to connect guard block to the merge/loop-exit block.  */
959  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
960  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
961  return new_e;
962}
963
964
965/* This function verifies that the following restrictions apply to LOOP:
966   (1) it is innermost
967   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
968   (3) it is single entry, single exit
969   (4) its exit condition is the last stmt in the header
970   (5) E is the entry/exit edge of LOOP.
971 */
972
973bool
974slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
975{
976  edge exit_e = single_exit (loop);
977  edge entry_e = loop_preheader_edge (loop);
978  gimple orig_cond = get_loop_exit_condition (loop);
979  gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
980
981  if (need_ssa_update_p (cfun))
982    return false;
983
984  if (loop->inner
985      /* All loops have an outer scope; the only case loop->outer is NULL is for
986         the function itself.  */
987      || !loop_outer (loop)
988      || loop->num_nodes != 2
989      || !empty_block_p (loop->latch)
990      || !single_exit (loop)
991      /* Verify that new loop exit condition can be trivially modified.  */
992      || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
993      || (e != exit_e && e != entry_e))
994    return false;
995
996  return true;
997}
998
999#ifdef ENABLE_CHECKING
1000static void
1001slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1002                                 struct loop *second_loop)
1003{
1004  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1005  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1006  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1007
1008  /* A guard that controls whether the second_loop is to be executed or skipped
1009     is placed in first_loop->exit.  first_loop->exit therefore has two
1010     successors - one is the preheader of second_loop, and the other is a bb
1011     after second_loop.
1012   */
1013  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1014
1015  /* 1. Verify that one of the successors of first_loop->exit is the preheader
1016        of second_loop.  */
1017
1018  /* The preheader of new_loop is expected to have two predecessors:
1019     first_loop->exit and the block that precedes first_loop.  */
1020
1021  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1022              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1023                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1024               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1025                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1026
1027  /* Verify that the other successor of first_loop->exit is after the
1028     second_loop.  */
1029  /* TODO */
1030}
1031#endif
1032
1033/* If the run time cost model check determines that vectorization is
1034   not profitable and hence scalar loop should be generated then set
1035   FIRST_NITERS to prologue peeled iterations. This will allow all the
1036   iterations to be executed in the prologue peeled scalar loop.  */
1037
1038static void
1039set_prologue_iterations (basic_block bb_before_first_loop,
1040			 tree first_niters,
1041			 struct loop *loop,
1042			 unsigned int th)
1043{
1044  edge e;
1045  basic_block cond_bb, then_bb;
1046  tree var, prologue_after_cost_adjust_name;
1047  gimple_stmt_iterator gsi;
1048  gimple newphi;
1049  edge e_true, e_false, e_fallthru;
1050  gimple cond_stmt;
1051  gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1052  tree cost_pre_condition = NULL_TREE;
1053  tree scalar_loop_iters =
1054    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1055
1056  e = single_pred_edge (bb_before_first_loop);
1057  cond_bb = split_edge(e);
1058
1059  e = single_pred_edge (bb_before_first_loop);
1060  then_bb = split_edge(e);
1061  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1062
1063  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1064				   EDGE_FALSE_VALUE);
1065  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1066
1067  e_true = EDGE_PRED (then_bb, 0);
1068  e_true->flags &= ~EDGE_FALLTHRU;
1069  e_true->flags |= EDGE_TRUE_VALUE;
1070
1071  e_fallthru = EDGE_SUCC (then_bb, 0);
1072
1073  cost_pre_condition =
1074    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1075	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1076  cost_pre_condition =
1077    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1078			  true, NULL_TREE);
1079  cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1080				 build_int_cst (TREE_TYPE (cost_pre_condition),
1081						0), NULL_TREE, NULL_TREE);
1082
1083  gsi = gsi_last_bb (cond_bb);
1084  if (gimplify_stmt_list)
1085    gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1086
1087  gsi = gsi_last_bb (cond_bb);
1088  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089
1090  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091			"prologue_after_cost_adjust");
1092  add_referenced_var (var);
1093  prologue_after_cost_adjust_name =
1094    force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1095
1096  gsi = gsi_last_bb (then_bb);
1097  if (stmts)
1098    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1099
1100  newphi = create_phi_node (var, bb_before_first_loop);
1101  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1102	       UNKNOWN_LOCATION);
1103  add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1104
1105  first_niters = PHI_RESULT (newphi);
1106}
1107
1108
1109/* Function slpeel_tree_peel_loop_to_edge.
1110
1111   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1112   that is placed on the entry (exit) edge E of LOOP. After this transformation
1113   we have two loops one after the other - first-loop iterates FIRST_NITERS
1114   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1115   If the cost model indicates that it is profitable to emit a scalar
1116   loop instead of the vector one, then the prolog (epilog) loop will iterate
1117   for the entire unchanged scalar iterations of the loop.
1118
1119   Input:
1120   - LOOP: the loop to be peeled.
1121   - E: the exit or entry edge of LOOP.
1122        If it is the entry edge, we peel the first iterations of LOOP. In this
1123        case first-loop is LOOP, and second-loop is the newly created loop.
1124        If it is the exit edge, we peel the last iterations of LOOP. In this
1125        case, first-loop is the newly created loop, and second-loop is LOOP.
1126   - NITERS: the number of iterations that LOOP iterates.
1127   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1128   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1129        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1130        is false, the caller of this function may want to take care of this
1131        (this can be useful if we don't want new stmts added to first-loop).
1132   - TH: cost model profitability threshold of iterations for vectorization.
1133   - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1134                          during versioning and hence needs to occur during
1135			  prologue generation or whether cost model check
1136			  has not occurred during prologue generation and hence
1137			  needs to occur during epilogue generation.
1138
1139
1140   Output:
1141   The function returns a pointer to the new loop-copy, or NULL if it failed
1142   to perform the transformation.
1143
1144   The function generates two if-then-else guards: one before the first loop,
1145   and the other before the second loop:
1146   The first guard is:
1147     if (FIRST_NITERS == 0) then skip the first loop,
1148     and go directly to the second loop.
1149   The second guard is:
1150     if (FIRST_NITERS == NITERS) then skip the second loop.
1151
1152   If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1153   then the generated condition is combined with COND_EXPR and the
1154   statements in COND_EXPR_STMT_LIST are emitted together with it.
1155
1156   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1157   FORNOW the resulting code will not be in loop-closed-ssa form.
1158*/
1159
1160static struct loop*
1161slpeel_tree_peel_loop_to_edge (struct loop *loop,
1162			       edge e, tree first_niters,
1163			       tree niters, bool update_first_loop_count,
1164			       unsigned int th, bool check_profitability,
1165			       tree cond_expr, gimple_seq cond_expr_stmt_list)
1166{
1167  struct loop *new_loop = NULL, *first_loop, *second_loop;
1168  edge skip_e;
1169  tree pre_condition = NULL_TREE;
1170  bitmap definitions;
1171  basic_block bb_before_second_loop, bb_after_second_loop;
1172  basic_block bb_before_first_loop;
1173  basic_block bb_between_loops;
1174  basic_block new_exit_bb;
1175  edge exit_e = single_exit (loop);
1176  LOC loop_loc;
1177  tree cost_pre_condition = NULL_TREE;
1178
1179  if (!slpeel_can_duplicate_loop_p (loop, e))
1180    return NULL;
1181
1182  /* We have to initialize cfg_hooks. Then, when calling
1183   cfg_hooks->split_edge, the function tree_split_edge
1184   is actually called and, when calling cfg_hooks->duplicate_block,
1185   the function tree_duplicate_bb is called.  */
1186  gimple_register_cfg_hooks ();
1187
1188
1189  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1190        Resulting CFG would be:
1191
1192        first_loop:
1193        do {
1194        } while ...
1195
1196        second_loop:
1197        do {
1198        } while ...
1199
1200        orig_exit_bb:
1201   */
1202
1203  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1204    {
1205      loop_loc = find_loop_location (loop);
1206      if (dump_file && (dump_flags & TDF_DETAILS))
1207        {
1208          if (loop_loc != UNKNOWN_LOC)
1209            fprintf (dump_file, "\n%s:%d: note: ",
1210                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1211          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1212        }
1213      return NULL;
1214    }
1215
1216  if (MAY_HAVE_DEBUG_STMTS)
1217    {
1218      gcc_assert (!adjust_vec);
1219      adjust_vec = VEC_alloc (adjust_info, stack, 32);
1220    }
1221
1222  if (e == exit_e)
1223    {
1224      /* NEW_LOOP was placed after LOOP.  */
1225      first_loop = loop;
1226      second_loop = new_loop;
1227    }
1228  else
1229    {
1230      /* NEW_LOOP was placed before LOOP.  */
1231      first_loop = new_loop;
1232      second_loop = loop;
1233    }
1234
1235  definitions = ssa_names_to_replace ();
1236  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1237  rename_variables_in_loop (new_loop);
1238
1239
1240  /* 2.  Add the guard code in one of the following ways:
1241
1242     2.a Add the guard that controls whether the first loop is executed.
1243         This occurs when this function is invoked for prologue or epilogue
1244	 generation and when the cost model check can be done at compile time.
1245
1246         Resulting CFG would be:
1247
1248         bb_before_first_loop:
1249         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1250                                GOTO first-loop
1251
1252         first_loop:
1253         do {
1254         } while ...
1255
1256         bb_before_second_loop:
1257
1258         second_loop:
1259         do {
1260         } while ...
1261
1262         orig_exit_bb:
1263
1264     2.b Add the cost model check that allows the prologue
1265         to iterate for the entire unchanged scalar
1266         iterations of the loop in the event that the cost
1267         model indicates that the scalar loop is more
1268         profitable than the vector one. This occurs when
1269	 this function is invoked for prologue generation
1270	 and the cost model check needs to be done at run
1271	 time.
1272
1273         Resulting CFG after prologue peeling would be:
1274
1275         if (scalar_loop_iterations <= th)
1276           FIRST_NITERS = scalar_loop_iterations
1277
1278         bb_before_first_loop:
1279         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1280                                GOTO first-loop
1281
1282         first_loop:
1283         do {
1284         } while ...
1285
1286         bb_before_second_loop:
1287
1288         second_loop:
1289         do {
1290         } while ...
1291
1292         orig_exit_bb:
1293
1294     2.c Add the cost model check that allows the epilogue
1295         to iterate for the entire unchanged scalar
1296         iterations of the loop in the event that the cost
1297         model indicates that the scalar loop is more
1298         profitable than the vector one. This occurs when
1299	 this function is invoked for epilogue generation
1300	 and the cost model check needs to be done at run
1301	 time.  This check is combined with any pre-existing
1302	 check in COND_EXPR to avoid versioning.
1303
1304         Resulting CFG after prologue peeling would be:
1305
1306         bb_before_first_loop:
1307         if ((scalar_loop_iterations <= th)
1308             ||
1309             FIRST_NITERS == 0) GOTO bb_before_second_loop
1310                                GOTO first-loop
1311
1312         first_loop:
1313         do {
1314         } while ...
1315
1316         bb_before_second_loop:
1317
1318         second_loop:
1319         do {
1320         } while ...
1321
1322         orig_exit_bb:
1323  */
1324
1325  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1326  bb_before_second_loop = split_edge (single_exit (first_loop));
1327
1328  /* Epilogue peeling.  */
1329  if (!update_first_loop_count)
1330    {
1331      pre_condition =
1332	fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1333		     build_int_cst (TREE_TYPE (first_niters), 0));
1334      if (check_profitability)
1335	{
1336	  tree scalar_loop_iters
1337	    = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1338					(loop_vec_info_for_loop (loop)));
1339	  cost_pre_condition =
1340	    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1341		         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1342
1343	  pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1344				       cost_pre_condition, pre_condition);
1345	}
1346      if (cond_expr)
1347	{
1348	  pre_condition =
1349	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1350			 pre_condition,
1351			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1352				      cond_expr));
1353	}
1354    }
1355
1356  /* Prologue peeling.  */
1357  else
1358    {
1359      if (check_profitability)
1360	set_prologue_iterations (bb_before_first_loop, first_niters,
1361				 loop, th);
1362
1363      pre_condition =
1364	fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1365		     build_int_cst (TREE_TYPE (first_niters), 0));
1366    }
1367
1368  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1369				  cond_expr_stmt_list,
1370                                  bb_before_second_loop, bb_before_first_loop);
1371  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1372				      first_loop == new_loop,
1373				      &new_exit_bb, &definitions);
1374
1375
1376  /* 3. Add the guard that controls whether the second loop is executed.
1377        Resulting CFG would be:
1378
1379        bb_before_first_loop:
1380        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1381                               GOTO first-loop
1382
1383        first_loop:
1384        do {
1385        } while ...
1386
1387        bb_between_loops:
1388        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1389                                    GOTO bb_before_second_loop
1390
1391        bb_before_second_loop:
1392
1393        second_loop:
1394        do {
1395        } while ...
1396
1397        bb_after_second_loop:
1398
1399        orig_exit_bb:
1400   */
1401
1402  bb_between_loops = new_exit_bb;
1403  bb_after_second_loop = split_edge (single_exit (second_loop));
1404
1405  pre_condition =
1406	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1407  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1408                                  bb_after_second_loop, bb_before_first_loop);
1409  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1410                                     second_loop == new_loop, &new_exit_bb);
1411
1412  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1413   */
1414  if (update_first_loop_count)
1415    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1416
1417  adjust_vec_debug_stmts ();
1418
1419  BITMAP_FREE (definitions);
1420  delete_update_ssa ();
1421
1422  return new_loop;
1423}
1424
1425/* Function vect_get_loop_location.
1426
1427   Extract the location of the loop in the source code.
1428   If the loop is not well formed for vectorization, an estimated
1429   location is calculated.
1430   Return the loop location if succeed and NULL if not.  */
1431
1432LOC
1433find_loop_location (struct loop *loop)
1434{
1435  gimple stmt = NULL;
1436  basic_block bb;
1437  gimple_stmt_iterator si;
1438
1439  if (!loop)
1440    return UNKNOWN_LOC;
1441
1442  stmt = get_loop_exit_condition (loop);
1443
1444  if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1445    return gimple_location (stmt);
1446
1447  /* If we got here the loop is probably not "well formed",
1448     try to estimate the loop location */
1449
1450  if (!loop->header)
1451    return UNKNOWN_LOC;
1452
1453  bb = loop->header;
1454
1455  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1456    {
1457      stmt = gsi_stmt (si);
1458      if (gimple_location (stmt) != UNKNOWN_LOC)
1459        return gimple_location (stmt);
1460    }
1461
1462  return UNKNOWN_LOC;
1463}
1464
1465
1466/* This function builds ni_name = number of iterations loop executes
1467   on the loop preheader.  If SEQ is given the stmt is instead emitted
1468   there.  */
1469
1470static tree
1471vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1472{
1473  tree ni_name, var;
1474  gimple_seq stmts = NULL;
1475  edge pe;
1476  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1477  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1478
1479  var = create_tmp_var (TREE_TYPE (ni), "niters");
1480  add_referenced_var (var);
1481  ni_name = force_gimple_operand (ni, &stmts, false, var);
1482
1483  pe = loop_preheader_edge (loop);
1484  if (stmts)
1485    {
1486      if (seq)
1487	gimple_seq_add_seq (&seq, stmts);
1488      else
1489	{
1490	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1491	  gcc_assert (!new_bb);
1492	}
1493    }
1494
1495  return ni_name;
1496}
1497
1498
1499/* This function generates the following statements:
1500
1501 ni_name = number of iterations loop executes
1502 ratio = ni_name / vf
1503 ratio_mult_vf_name = ratio * vf
1504
1505 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1506 if that is non-NULL.  */
1507
1508static void
1509vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1510				 tree *ni_name_ptr,
1511				 tree *ratio_mult_vf_name_ptr,
1512				 tree *ratio_name_ptr,
1513				 gimple_seq cond_expr_stmt_list)
1514{
1515
1516  edge pe;
1517  basic_block new_bb;
1518  gimple_seq stmts;
1519  tree ni_name;
1520  tree var;
1521  tree ratio_name;
1522  tree ratio_mult_vf_name;
1523  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1524  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1525  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1526  tree log_vf;
1527
1528  pe = loop_preheader_edge (loop);
1529
1530  /* Generate temporary variable that contains
1531     number of iterations loop executes.  */
1532
1533  ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1534  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1535
1536  /* Create: ratio = ni >> log2(vf) */
1537
1538  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
1539  if (!is_gimple_val (ratio_name))
1540    {
1541      var = create_tmp_var (TREE_TYPE (ni), "bnd");
1542      add_referenced_var (var);
1543
1544      stmts = NULL;
1545      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1546      if (cond_expr_stmt_list)
1547	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1548      else
1549	{
1550	  pe = loop_preheader_edge (loop);
1551	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1552	  gcc_assert (!new_bb);
1553	}
1554    }
1555
1556  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1557
1558  ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1559				    ratio_name, log_vf);
1560  if (!is_gimple_val (ratio_mult_vf_name))
1561    {
1562      var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1563      add_referenced_var (var);
1564
1565      stmts = NULL;
1566      ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1567						 true, var);
1568      if (cond_expr_stmt_list)
1569	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1570      else
1571	{
1572	  pe = loop_preheader_edge (loop);
1573	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1574	  gcc_assert (!new_bb);
1575	}
1576    }
1577
1578  *ni_name_ptr = ni_name;
1579  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1580  *ratio_name_ptr = ratio_name;
1581
1582  return;
1583}
1584
1585/* Function vect_can_advance_ivs_p
1586
1587   In case the number of iterations that LOOP iterates is unknown at compile
1588   time, an epilog loop will be generated, and the loop induction variables
1589   (IVs) will be "advanced" to the value they are supposed to take just before
1590   the epilog loop.  Here we check that the access function of the loop IVs
1591   and the expression that represents the loop bound are simple enough.
1592   These restrictions will be relaxed in the future.  */
1593
1594bool
1595vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1596{
1597  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1598  basic_block bb = loop->header;
1599  gimple phi;
1600  gimple_stmt_iterator gsi;
1601
1602  /* Analyze phi functions of the loop header.  */
1603
1604  if (vect_print_dump_info (REPORT_DETAILS))
1605    fprintf (vect_dump, "vect_can_advance_ivs_p:");
1606
1607  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1608    {
1609      tree access_fn = NULL;
1610      tree evolution_part;
1611
1612      phi = gsi_stmt (gsi);
1613      if (vect_print_dump_info (REPORT_DETAILS))
1614	{
1615          fprintf (vect_dump, "Analyze phi: ");
1616          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1617	}
1618
1619      /* Skip virtual phi's. The data dependences that are associated with
1620         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1621
1622      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1623	{
1624	  if (vect_print_dump_info (REPORT_DETAILS))
1625	    fprintf (vect_dump, "virtual phi. skip.");
1626	  continue;
1627	}
1628
1629      /* Skip reduction phis.  */
1630
1631      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1632        {
1633          if (vect_print_dump_info (REPORT_DETAILS))
1634            fprintf (vect_dump, "reduc phi. skip.");
1635          continue;
1636        }
1637
1638      /* Analyze the evolution function.  */
1639
1640      access_fn = instantiate_parameters
1641	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1642
1643      if (!access_fn)
1644	{
1645	  if (vect_print_dump_info (REPORT_DETAILS))
1646	    fprintf (vect_dump, "No Access function.");
1647	  return false;
1648	}
1649
1650      if (vect_print_dump_info (REPORT_DETAILS))
1651        {
1652	  fprintf (vect_dump, "Access function of PHI: ");
1653	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1654        }
1655
1656      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1657
1658      if (evolution_part == NULL_TREE)
1659        {
1660	  if (vect_print_dump_info (REPORT_DETAILS))
1661	    fprintf (vect_dump, "No evolution.");
1662	  return false;
1663        }
1664
1665      /* FORNOW: We do not transform initial conditions of IVs
1666	 which evolution functions are a polynomial of degree >= 2.  */
1667
1668      if (tree_is_chrec (evolution_part))
1669	return false;
1670    }
1671
1672  return true;
1673}
1674
1675
1676/*   Function vect_update_ivs_after_vectorizer.
1677
1678     "Advance" the induction variables of LOOP to the value they should take
1679     after the execution of LOOP.  This is currently necessary because the
1680     vectorizer does not handle induction variables that are used after the
1681     loop.  Such a situation occurs when the last iterations of LOOP are
1682     peeled, because:
1683     1. We introduced new uses after LOOP for IVs that were not originally used
1684        after LOOP: the IVs of LOOP are now used by an epilog loop.
1685     2. LOOP is going to be vectorized; this means that it will iterate N/VF
1686        times, whereas the loop IVs should be bumped N times.
1687
1688     Input:
1689     - LOOP - a loop that is going to be vectorized. The last few iterations
1690              of LOOP were peeled.
1691     - NITERS - the number of iterations that LOOP executes (before it is
1692                vectorized). i.e, the number of times the ivs should be bumped.
1693     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1694                  coming out from LOOP on which there are uses of the LOOP ivs
1695		  (this is the path from LOOP->exit to epilog_loop->preheader).
1696
1697                  The new definitions of the ivs are placed in LOOP->exit.
1698                  The phi args associated with the edge UPDATE_E in the bb
1699                  UPDATE_E->dest are updated accordingly.
1700
1701     Assumption 1: Like the rest of the vectorizer, this function assumes
1702     a single loop exit that has a single predecessor.
1703
1704     Assumption 2: The phi nodes in the LOOP header and in update_bb are
1705     organized in the same order.
1706
1707     Assumption 3: The access function of the ivs is simple enough (see
1708     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1709
1710     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1711     coming out of LOOP on which the ivs of LOOP are used (this is the path
1712     that leads to the epilog loop; other paths skip the epilog loop).  This
1713     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1714     needs to have its phis updated.
1715 */
1716
1717static void
1718vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1719				  edge update_e)
1720{
1721  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1722  basic_block exit_bb = single_exit (loop)->dest;
1723  gimple phi, phi1;
1724  gimple_stmt_iterator gsi, gsi1;
1725  basic_block update_bb = update_e->dest;
1726
1727  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1728
1729  /* Make sure there exists a single-predecessor exit bb:  */
1730  gcc_assert (single_pred_p (exit_bb));
1731
1732  for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1733       !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1734       gsi_next (&gsi), gsi_next (&gsi1))
1735    {
1736      tree access_fn = NULL;
1737      tree evolution_part;
1738      tree init_expr;
1739      tree step_expr, off;
1740      tree type;
1741      tree var, ni, ni_name;
1742      gimple_stmt_iterator last_gsi;
1743
1744      phi = gsi_stmt (gsi);
1745      phi1 = gsi_stmt (gsi1);
1746      if (vect_print_dump_info (REPORT_DETAILS))
1747        {
1748          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1749	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1750        }
1751
1752      /* Skip virtual phi's.  */
1753      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1754	{
1755	  if (vect_print_dump_info (REPORT_DETAILS))
1756	    fprintf (vect_dump, "virtual phi. skip.");
1757	  continue;
1758	}
1759
1760      /* Skip reduction phis.  */
1761      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1762        {
1763          if (vect_print_dump_info (REPORT_DETAILS))
1764            fprintf (vect_dump, "reduc phi. skip.");
1765          continue;
1766        }
1767
1768      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1769      gcc_assert (access_fn);
1770      /* We can end up with an access_fn like
1771           (short int) {(short unsigned int) i_49, +, 1}_1
1772	 for further analysis we need to strip the outer cast but we
1773	 need to preserve the original type.  */
1774      type = TREE_TYPE (access_fn);
1775      STRIP_NOPS (access_fn);
1776      evolution_part =
1777	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1778      gcc_assert (evolution_part != NULL_TREE);
1779
1780      /* FORNOW: We do not support IVs whose evolution function is a polynomial
1781         of degree >= 2 or exponential.  */
1782      gcc_assert (!tree_is_chrec (evolution_part));
1783
1784      step_expr = evolution_part;
1785      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1786							       loop->num));
1787      init_expr = fold_convert (type, init_expr);
1788
1789      off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1790			 fold_convert (TREE_TYPE (step_expr), niters),
1791			 step_expr);
1792      if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1793	ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1794			  init_expr,
1795			  fold_convert (sizetype, off));
1796      else
1797	ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1798			  init_expr,
1799			  fold_convert (TREE_TYPE (init_expr), off));
1800
1801      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1802      add_referenced_var (var);
1803
1804      last_gsi = gsi_last_bb (exit_bb);
1805      ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1806					  true, GSI_SAME_STMT);
1807
1808      /* Fix phi expressions in the successor bb.  */
1809      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1810    }
1811}
1812
1813/* Return the more conservative threshold between the
1814   min_profitable_iters returned by the cost model and the user
1815   specified threshold, if provided.  */
1816
1817static unsigned int
1818conservative_cost_threshold (loop_vec_info loop_vinfo,
1819			     int min_profitable_iters)
1820{
1821  unsigned int th;
1822  int min_scalar_loop_bound;
1823
1824  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1825			    * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1826
1827  /* Use the cost model only if it is more conservative than user specified
1828     threshold.  */
1829  th = (unsigned) min_scalar_loop_bound;
1830  if (min_profitable_iters
1831      && (!min_scalar_loop_bound
1832          || min_profitable_iters > min_scalar_loop_bound))
1833    th = (unsigned) min_profitable_iters;
1834
1835  if (th && vect_print_dump_info (REPORT_COST))
1836    fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1837
1838  return th;
1839}
1840
1841/* Function vect_do_peeling_for_loop_bound
1842
1843   Peel the last iterations of the loop represented by LOOP_VINFO.
1844   The peeled iterations form a new epilog loop.  Given that the loop now
1845   iterates NITERS times, the new epilog loop iterates
1846   NITERS % VECTORIZATION_FACTOR times.
1847
1848   The original loop will later be made to iterate
1849   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1850
1851   COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1852   test.  */
1853
1854void
1855vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1856				tree cond_expr, gimple_seq cond_expr_stmt_list)
1857{
1858  tree ni_name, ratio_mult_vf_name;
1859  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1860  struct loop *new_loop;
1861  edge update_e;
1862  basic_block preheader;
1863  int loop_num;
1864  bool check_profitability = false;
1865  unsigned int th = 0;
1866  int min_profitable_iters;
1867
1868  if (vect_print_dump_info (REPORT_DETAILS))
1869    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1870
1871  initialize_original_copy_tables ();
1872
1873  /* Generate the following variables on the preheader of original loop:
1874
1875     ni_name = number of iteration the original loop executes
1876     ratio = ni_name / vf
1877     ratio_mult_vf_name = ratio * vf  */
1878  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1879				   &ratio_mult_vf_name, ratio,
1880				   cond_expr_stmt_list);
1881
1882  loop_num  = loop->num;
1883
1884  /* If cost model check not done during versioning and
1885     peeling for alignment.  */
1886  if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1887      && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1888      && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1889      && !cond_expr)
1890    {
1891      check_profitability = true;
1892
1893      /* Get profitability threshold for vectorized loop.  */
1894      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1895
1896      th = conservative_cost_threshold (loop_vinfo,
1897					min_profitable_iters);
1898    }
1899
1900  new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1901                                            ratio_mult_vf_name, ni_name, false,
1902                                            th, check_profitability,
1903					    cond_expr, cond_expr_stmt_list);
1904  gcc_assert (new_loop);
1905  gcc_assert (loop_num == loop->num);
1906#ifdef ENABLE_CHECKING
1907  slpeel_verify_cfg_after_peeling (loop, new_loop);
1908#endif
1909
1910  /* A guard that controls whether the new_loop is to be executed or skipped
1911     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1912     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1913     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1914     is on the path where the LOOP IVs are used and need to be updated.  */
1915
1916  preheader = loop_preheader_edge (new_loop)->src;
1917  if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1918    update_e = EDGE_PRED (preheader, 0);
1919  else
1920    update_e = EDGE_PRED (preheader, 1);
1921
1922  /* Update IVs of original loop as if they were advanced
1923     by ratio_mult_vf_name steps.  */
1924  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1925
1926  /* After peeling we have to reset scalar evolution analyzer.  */
1927  scev_reset ();
1928
1929  free_original_copy_tables ();
1930}
1931
1932
1933/* Function vect_gen_niters_for_prolog_loop
1934
1935   Set the number of iterations for the loop represented by LOOP_VINFO
1936   to the minimum between LOOP_NITERS (the original iteration count of the loop)
1937   and the misalignment of DR - the data reference recorded in
1938   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1939   this loop, the data reference DR will refer to an aligned location.
1940
1941   The following computation is generated:
1942
1943   If the misalignment of DR is known at compile time:
1944     addr_mis = int mis = DR_MISALIGNMENT (dr);
1945   Else, compute address misalignment in bytes:
1946     addr_mis = addr & (vectype_size - 1)
1947
1948   prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1949
1950   (elem_size = element type size; an element is the scalar element whose type
1951   is the inner type of the vectype)
1952
1953   When the step of the data-ref in the loop is not 1 (as in interleaved data
1954   and SLP), the number of iterations of the prolog must be divided by the step
1955   (which is equal to the size of interleaved group).
1956
1957   The above formulas assume that VF == number of elements in the vector. This
1958   may not hold when there are multiple-types in the loop.
1959   In this case, for some data-references in the loop the VF does not represent
1960   the number of elements that fit in the vector.  Therefore, instead of VF we
1961   use TYPE_VECTOR_SUBPARTS.  */
1962
1963static tree
1964vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
1965				 tree *wide_prolog_niters)
1966{
1967  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1968  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1969  tree var;
1970  gimple_seq stmts;
1971  tree iters, iters_name;
1972  edge pe;
1973  basic_block new_bb;
1974  gimple dr_stmt = DR_STMT (dr);
1975  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1976  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1977  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1978  tree niters_type = TREE_TYPE (loop_niters);
1979  int step = 1;
1980  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1981  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1982
1983  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1984    step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1985
1986  pe = loop_preheader_edge (loop);
1987
1988  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1989    {
1990      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1991      int elem_misalign = byte_misalign / element_size;
1992
1993      if (vect_print_dump_info (REPORT_DETAILS))
1994        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1995
1996      iters = build_int_cst (niters_type,
1997                     (((nelements - elem_misalign) & (nelements - 1)) / step));
1998    }
1999  else
2000    {
2001      gimple_seq new_stmts = NULL;
2002      tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2003						&new_stmts, NULL_TREE, loop);
2004      tree ptr_type = TREE_TYPE (start_addr);
2005      tree size = TYPE_SIZE (ptr_type);
2006      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2007      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2008      tree elem_size_log =
2009        build_int_cst (type, exact_log2 (vectype_align/nelements));
2010      tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2011      tree nelements_tree = build_int_cst (type, nelements);
2012      tree byte_misalign;
2013      tree elem_misalign;
2014
2015      new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2016      gcc_assert (!new_bb);
2017
2018      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2019      byte_misalign =
2020        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
2021
2022      /* Create:  elem_misalign = byte_misalign / element_size  */
2023      elem_misalign =
2024        fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2025
2026      /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2027      iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2028      iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2029      iters = fold_convert (niters_type, iters);
2030    }
2031
2032  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2033  /* If the loop bound is known at compile time we already verified that it is
2034     greater than vf; since the misalignment ('iters') is at most vf, there's
2035     no need to generate the MIN_EXPR in this case.  */
2036  if (TREE_CODE (loop_niters) != INTEGER_CST)
2037    iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2038
2039  if (vect_print_dump_info (REPORT_DETAILS))
2040    {
2041      fprintf (vect_dump, "niters for prolog loop: ");
2042      print_generic_expr (vect_dump, iters, TDF_SLIM);
2043    }
2044
2045  var = create_tmp_var (niters_type, "prolog_loop_niters");
2046  add_referenced_var (var);
2047  stmts = NULL;
2048  iters_name = force_gimple_operand (iters, &stmts, false, var);
2049  if (types_compatible_p (sizetype, niters_type))
2050    *wide_prolog_niters = iters_name;
2051  else
2052    {
2053      gimple_seq seq = NULL;
2054      tree wide_iters = fold_convert (sizetype, iters);
2055      var = create_tmp_var (sizetype, "prolog_loop_niters");
2056      add_referenced_var (var);
2057      *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2058						  var);
2059      if (seq)
2060	gimple_seq_add_seq (&stmts, seq);
2061    }
2062
2063  /* Insert stmt on loop preheader edge.  */
2064  if (stmts)
2065    {
2066      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2067      gcc_assert (!new_bb);
2068    }
2069
2070  return iters_name;
2071}
2072
2073
2074/* Function vect_update_init_of_dr
2075
2076   NITERS iterations were peeled from LOOP.  DR represents a data reference
2077   in LOOP.  This function updates the information recorded in DR to
2078   account for the fact that the first NITERS iterations had already been
2079   executed.  Specifically, it updates the OFFSET field of DR.  */
2080
2081static void
2082vect_update_init_of_dr (struct data_reference *dr, tree niters)
2083{
2084  tree offset = DR_OFFSET (dr);
2085
2086  niters = fold_build2 (MULT_EXPR, sizetype,
2087			fold_convert (sizetype, niters),
2088			fold_convert (sizetype, DR_STEP (dr)));
2089  offset = fold_build2 (PLUS_EXPR, sizetype,
2090			fold_convert (sizetype, offset), niters);
2091  DR_OFFSET (dr) = offset;
2092}
2093
2094
2095/* Function vect_update_inits_of_drs
2096
2097   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2098   This function updates the information recorded for the data references in
2099   the loop to account for the fact that the first NITERS iterations had
2100   already been executed.  Specifically, it updates the initial_condition of
2101   the access_function of all the data_references in the loop.  */
2102
2103static void
2104vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2105{
2106  unsigned int i;
2107  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2108  struct data_reference *dr;
2109
2110  if (vect_print_dump_info (REPORT_DETAILS))
2111    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2112
2113  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2114    vect_update_init_of_dr (dr, niters);
2115}
2116
2117
2118/* Function vect_do_peeling_for_alignment
2119
2120   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2121   'niters' is set to the misalignment of one of the data references in the
2122   loop, thereby forcing it to refer to an aligned location at the beginning
2123   of the execution of this loop.  The data reference for which we are
2124   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2125
2126void
2127vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2128{
2129  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2130  tree niters_of_prolog_loop, ni_name;
2131  tree n_iters;
2132  tree wide_prolog_niters;
2133  struct loop *new_loop;
2134  unsigned int th = 0;
2135  int min_profitable_iters;
2136
2137  if (vect_print_dump_info (REPORT_DETAILS))
2138    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2139
2140  initialize_original_copy_tables ();
2141
2142  ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2143  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2144							   &wide_prolog_niters);
2145
2146
2147  /* Get profitability threshold for vectorized loop.  */
2148  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2149  th = conservative_cost_threshold (loop_vinfo,
2150				    min_profitable_iters);
2151
2152  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2153  new_loop =
2154    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2155				   niters_of_prolog_loop, ni_name, true,
2156				   th, true, NULL_TREE, NULL);
2157
2158  gcc_assert (new_loop);
2159#ifdef ENABLE_CHECKING
2160  slpeel_verify_cfg_after_peeling (new_loop, loop);
2161#endif
2162
2163  /* Update number of times loop executes.  */
2164  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2165  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2166		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2167
2168  /* Update the init conditions of the access functions of all data refs.  */
2169  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2170
2171  /* After peeling we have to reset scalar evolution analyzer.  */
2172  scev_reset ();
2173
2174  free_original_copy_tables ();
2175}
2176
2177
2178/* Function vect_create_cond_for_align_checks.
2179
2180   Create a conditional expression that represents the alignment checks for
2181   all of data references (array element references) whose alignment must be
2182   checked at runtime.
2183
2184   Input:
2185   COND_EXPR  - input conditional expression.  New conditions will be chained
2186                with logical AND operation.
2187   LOOP_VINFO - two fields of the loop information are used.
2188                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2189                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2190
2191   Output:
2192   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2193                         expression.
2194   The returned value is the conditional expression to be used in the if
2195   statement that controls which version of the loop gets executed at runtime.
2196
2197   The algorithm makes two assumptions:
2198     1) The number of bytes "n" in a vector is a power of 2.
2199     2) An address "a" is aligned if a%n is zero and that this
2200        test can be done as a&(n-1) == 0.  For example, for 16
2201        byte vectors the test is a&0xf == 0.  */
2202
2203static void
2204vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2205                                   tree *cond_expr,
2206				   gimple_seq *cond_expr_stmt_list)
2207{
2208  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2209  VEC(gimple,heap) *may_misalign_stmts
2210    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2211  gimple ref_stmt;
2212  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2213  tree mask_cst;
2214  unsigned int i;
2215  tree psize;
2216  tree int_ptrsize_type;
2217  char tmp_name[20];
2218  tree or_tmp_name = NULL_TREE;
2219  tree and_tmp, and_tmp_name;
2220  gimple and_stmt;
2221  tree ptrsize_zero;
2222  tree part_cond_expr;
2223
2224  /* Check that mask is one less than a power of 2, i.e., mask is
2225     all zeros followed by all ones.  */
2226  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2227
2228  /* CHECKME: what is the best integer or unsigned type to use to hold a
2229     cast from a pointer value?  */
2230  psize = TYPE_SIZE (ptr_type_node);
2231  int_ptrsize_type
2232    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2233
2234  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2235     of the first vector of the i'th data reference. */
2236
2237  for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2238    {
2239      gimple_seq new_stmt_list = NULL;
2240      tree addr_base;
2241      tree addr_tmp, addr_tmp_name;
2242      tree or_tmp, new_or_tmp_name;
2243      gimple addr_stmt, or_stmt;
2244
2245      /* create: addr_tmp = (int)(address_of_first_vector) */
2246      addr_base =
2247	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2248					      NULL_TREE, loop);
2249      if (new_stmt_list != NULL)
2250	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2251
2252      sprintf (tmp_name, "%s%d", "addr2int", i);
2253      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2254      add_referenced_var (addr_tmp);
2255      addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2256      addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2257						addr_base, NULL_TREE);
2258      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2259      gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2260
2261      /* The addresses are OR together.  */
2262
2263      if (or_tmp_name != NULL_TREE)
2264        {
2265          /* create: or_tmp = or_tmp | addr_tmp */
2266          sprintf (tmp_name, "%s%d", "orptrs", i);
2267          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2268          add_referenced_var (or_tmp);
2269	  new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2270	  or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2271						  new_or_tmp_name,
2272						  or_tmp_name, addr_tmp_name);
2273          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2274	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2275          or_tmp_name = new_or_tmp_name;
2276        }
2277      else
2278        or_tmp_name = addr_tmp_name;
2279
2280    } /* end for i */
2281
2282  mask_cst = build_int_cst (int_ptrsize_type, mask);
2283
2284  /* create: and_tmp = or_tmp & mask  */
2285  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2286  add_referenced_var (and_tmp);
2287  and_tmp_name = make_ssa_name (and_tmp, NULL);
2288
2289  and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2290					   or_tmp_name, mask_cst);
2291  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2292  gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2293
2294  /* Make and_tmp the left operand of the conditional test against zero.
2295     if and_tmp has a nonzero bit then some address is unaligned.  */
2296  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2297  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2298				and_tmp_name, ptrsize_zero);
2299  if (*cond_expr)
2300    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2301			      *cond_expr, part_cond_expr);
2302  else
2303    *cond_expr = part_cond_expr;
2304}
2305
2306
2307/* Function vect_vfa_segment_size.
2308
2309   Create an expression that computes the size of segment
2310   that will be accessed for a data reference.  The functions takes into
2311   account that realignment loads may access one more vector.
2312
2313   Input:
2314     DR: The data reference.
2315     VECT_FACTOR: vectorization factor.
2316
2317   Return an expression whose value is the size of segment which will be
2318   accessed by DR.  */
2319
2320static tree
2321vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
2322{
2323  tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
2324			             DR_STEP (dr), vect_factor);
2325
2326  if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2327    {
2328      tree vector_size = TYPE_SIZE_UNIT
2329			  (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2330
2331      segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
2332				    segment_length, vector_size);
2333    }
2334  return fold_convert (sizetype, segment_length);
2335}
2336
2337
2338/* Function vect_create_cond_for_alias_checks.
2339
2340   Create a conditional expression that represents the run-time checks for
2341   overlapping of address ranges represented by a list of data references
2342   relations passed as input.
2343
2344   Input:
2345   COND_EXPR  - input conditional expression.  New conditions will be chained
2346                with logical AND operation.
2347   LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2348	        to be checked.
2349
2350   Output:
2351   COND_EXPR - conditional expression.
2352   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2353                         expression.
2354
2355
2356   The returned value is the conditional expression to be used in the if
2357   statement that controls which version of the loop gets executed at runtime.
2358*/
2359
2360static void
2361vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2362				   tree * cond_expr,
2363				   gimple_seq * cond_expr_stmt_list)
2364{
2365  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2366  VEC (ddr_p, heap) * may_alias_ddrs =
2367    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2368  tree vect_factor =
2369    build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2370
2371  ddr_p ddr;
2372  unsigned int i;
2373  tree part_cond_expr;
2374
2375  /* Create expression
2376     ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2377     || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2378     &&
2379     ...
2380     &&
2381     ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2382     || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2383
2384  if (VEC_empty (ddr_p, may_alias_ddrs))
2385    return;
2386
2387  for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2388    {
2389      struct data_reference *dr_a, *dr_b;
2390      gimple dr_group_first_a, dr_group_first_b;
2391      tree addr_base_a, addr_base_b;
2392      tree segment_length_a, segment_length_b;
2393      gimple stmt_a, stmt_b;
2394
2395      dr_a = DDR_A (ddr);
2396      stmt_a = DR_STMT (DDR_A (ddr));
2397      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2398      if (dr_group_first_a)
2399        {
2400	  stmt_a = dr_group_first_a;
2401	  dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2402	}
2403
2404      dr_b = DDR_B (ddr);
2405      stmt_b = DR_STMT (DDR_B (ddr));
2406      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2407      if (dr_group_first_b)
2408        {
2409	  stmt_b = dr_group_first_b;
2410	  dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2411	}
2412
2413      addr_base_a =
2414        vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2415					      NULL_TREE, loop);
2416      addr_base_b =
2417        vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2418					      NULL_TREE, loop);
2419
2420      segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
2421      segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
2422
2423      if (vect_print_dump_info (REPORT_DR_DETAILS))
2424	{
2425	  fprintf (vect_dump,
2426		   "create runtime check for data references ");
2427	  print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2428	  fprintf (vect_dump, " and ");
2429	  print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2430	}
2431
2432
2433      part_cond_expr =
2434      	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2435	  fold_build2 (LT_EXPR, boolean_type_node,
2436	    fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2437	      addr_base_a,
2438	      segment_length_a),
2439	    addr_base_b),
2440	  fold_build2 (LT_EXPR, boolean_type_node,
2441	    fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2442	      addr_base_b,
2443	      segment_length_b),
2444	    addr_base_a));
2445
2446      if (*cond_expr)
2447	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2448				  *cond_expr, part_cond_expr);
2449      else
2450	*cond_expr = part_cond_expr;
2451    }
2452
2453  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2454    fprintf (vect_dump, "created %u versioning for alias checks.\n",
2455             VEC_length (ddr_p, may_alias_ddrs));
2456}
2457
2458
2459/* Function vect_loop_versioning.
2460
2461   If the loop has data references that may or may not be aligned or/and
2462   has data reference relations whose independence was not proven then
2463   two versions of the loop need to be generated, one which is vectorized
2464   and one which isn't.  A test is then generated to control which of the
2465   loops is executed.  The test checks for the alignment of all of the
2466   data references that may or may not be aligned.  An additional
2467   sequence of runtime tests is generated for each pairs of DDRs whose
2468   independence was not proven.  The vectorized version of loop is
2469   executed only if both alias and alignment tests are passed.
2470
2471   The test generated to check which version of loop is executed
2472   is modified to also check for profitability as indicated by the
2473   cost model initially.
2474
2475   The versioning precondition(s) are placed in *COND_EXPR and
2476   *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2477   also performed, otherwise only the conditions are generated.  */
2478
2479void
2480vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2481		      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2482{
2483  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2484  basic_block condition_bb;
2485  gimple_stmt_iterator gsi, cond_exp_gsi;
2486  basic_block merge_bb;
2487  basic_block new_exit_bb;
2488  edge new_exit_e, e;
2489  gimple orig_phi, new_phi;
2490  tree arg;
2491  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2492  gimple_seq gimplify_stmt_list = NULL;
2493  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2494  int min_profitable_iters = 0;
2495  unsigned int th;
2496
2497  /* Get profitability threshold for vectorized loop.  */
2498  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2499
2500  th = conservative_cost_threshold (loop_vinfo,
2501				    min_profitable_iters);
2502
2503  *cond_expr =
2504    fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2505 	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2506
2507  *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2508				     false, NULL_TREE);
2509
2510  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2511      vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2512					 cond_expr_stmt_list);
2513
2514  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2515    vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2516				       cond_expr_stmt_list);
2517
2518  *cond_expr =
2519    fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2520  *cond_expr =
2521    force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2522  gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2523
2524  /* If we only needed the extra conditions and a new loop copy
2525     bail out here.  */
2526  if (!do_versioning)
2527    return;
2528
2529  initialize_original_copy_tables ();
2530  loop_version (loop, *cond_expr, &condition_bb,
2531		prob, prob, REG_BR_PROB_BASE - prob, true);
2532  free_original_copy_tables();
2533
2534  /* Loop versioning violates an assumption we try to maintain during
2535     vectorization - that the loop exit block has a single predecessor.
2536     After versioning, the exit block of both loop versions is the same
2537     basic block (i.e. it has two predecessors). Just in order to simplify
2538     following transformations in the vectorizer, we fix this situation
2539     here by adding a new (empty) block on the exit-edge of the loop,
2540     with the proper loop-exit phis to maintain loop-closed-form.  */
2541
2542  merge_bb = single_exit (loop)->dest;
2543  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2544  new_exit_bb = split_edge (single_exit (loop));
2545  new_exit_e = single_exit (loop);
2546  e = EDGE_SUCC (new_exit_bb, 0);
2547
2548  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2549    {
2550      orig_phi = gsi_stmt (gsi);
2551      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2552				  new_exit_bb);
2553      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2554      add_phi_arg (new_phi, arg, new_exit_e,
2555		   gimple_phi_arg_location_from_edge (orig_phi, e));
2556      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2557    }
2558
2559  /* End loop-exit-fixes after versioning.  */
2560
2561  update_ssa (TODO_update_ssa);
2562  if (*cond_expr_stmt_list)
2563    {
2564      cond_exp_gsi = gsi_last_bb (condition_bb);
2565      gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2566			     GSI_SAME_STMT);
2567      *cond_expr_stmt_list = NULL;
2568    }
2569  *cond_expr = NULL_TREE;
2570}
2571
2572