tree-vect-loop-manip.c revision 1.1.1.1
1/* Vectorizer Specific Loop Manipulations
2   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5   and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "cfgloop.h"
34#include "cfglayout.h"
35#include "expr.h"
36#include "toplev.h"
37#include "tree-scalar-evolution.h"
38#include "tree-vectorizer.h"
39#include "langhooks.h"
40
41/*************************************************************************
42  Simple Loop Peeling Utilities
43
44  Utilities to support loop peeling for vectorization purposes.
45 *************************************************************************/
46
47
48/* Renames the use *OP_P.  */
49
50static void
51rename_use_op (use_operand_p op_p)
52{
53  tree new_name;
54
55  if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
56    return;
57
58  new_name = get_current_def (USE_FROM_PTR (op_p));
59
60  /* Something defined outside of the loop.  */
61  if (!new_name)
62    return;
63
64  /* An ordinary ssa name defined in the loop.  */
65
66  SET_USE (op_p, new_name);
67}
68
69
70/* Renames the variables in basic block BB.  */
71
72void
73rename_variables_in_bb (basic_block bb)
74{
75  gimple_stmt_iterator gsi;
76  gimple stmt;
77  use_operand_p use_p;
78  ssa_op_iter iter;
79  edge e;
80  edge_iterator ei;
81  struct loop *loop = bb->loop_father;
82
83  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
84    {
85      stmt = gsi_stmt (gsi);
86      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
87	rename_use_op (use_p);
88    }
89
90  FOR_EACH_EDGE (e, ei, bb->succs)
91    {
92      if (!flow_bb_inside_loop_p (loop, e->dest))
93	continue;
94      for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
95        rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
96    }
97}
98
99
100/* Renames variables in new generated LOOP.  */
101
102void
103rename_variables_in_loop (struct loop *loop)
104{
105  unsigned i;
106  basic_block *bbs;
107
108  bbs = get_loop_body (loop);
109
110  for (i = 0; i < loop->num_nodes; i++)
111    rename_variables_in_bb (bbs[i]);
112
113  free (bbs);
114}
115
116typedef struct
117{
118  tree from, to;
119  basic_block bb;
120} adjust_info;
121
122DEF_VEC_O(adjust_info);
123DEF_VEC_ALLOC_O_STACK(adjust_info);
124#define VEC_adjust_info_stack_alloc(alloc) VEC_stack_alloc (adjust_info, alloc)
125
126/* A stack of values to be adjusted in debug stmts.  We have to
127   process them LIFO, so that the closest substitution applies.  If we
128   processed them FIFO, without the stack, we might substitute uses
129   with a PHI DEF that would soon become non-dominant, and when we got
130   to the suitable one, it wouldn't have anything to substitute any
131   more.  */
132static VEC(adjust_info, stack) *adjust_vec;
133
134/* Adjust any debug stmts that referenced AI->from values to use the
135   loop-closed AI->to, if the references are dominated by AI->bb and
136   not by the definition of AI->from.  */
137
138static void
139adjust_debug_stmts_now (adjust_info *ai)
140{
141  basic_block bbphi = ai->bb;
142  tree orig_def = ai->from;
143  tree new_def = ai->to;
144  imm_use_iterator imm_iter;
145  gimple stmt;
146  basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
147
148  gcc_assert (dom_info_available_p (CDI_DOMINATORS));
149
150  /* Adjust any debug stmts that held onto non-loop-closed
151     references.  */
152  FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
153    {
154      use_operand_p use_p;
155      basic_block bbuse;
156
157      if (!is_gimple_debug (stmt))
158	continue;
159
160      gcc_assert (gimple_debug_bind_p (stmt));
161
162      bbuse = gimple_bb (stmt);
163
164      if ((bbuse == bbphi
165	   || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
166	  && !(bbuse == bbdef
167	       || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
168	{
169	  if (new_def)
170	    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
171	      SET_USE (use_p, new_def);
172	  else
173	    {
174	      gimple_debug_bind_reset_value (stmt);
175	      update_stmt (stmt);
176	    }
177	}
178    }
179}
180
181/* Adjust debug stmts as scheduled before.  */
182
183static void
184adjust_vec_debug_stmts (void)
185{
186  if (!MAY_HAVE_DEBUG_STMTS)
187    return;
188
189  gcc_assert (adjust_vec);
190
191  while (!VEC_empty (adjust_info, adjust_vec))
192    {
193      adjust_debug_stmts_now (VEC_last (adjust_info, adjust_vec));
194      VEC_pop (adjust_info, adjust_vec);
195    }
196
197  VEC_free (adjust_info, stack, adjust_vec);
198}
199
200/* Adjust any debug stmts that referenced FROM values to use the
201   loop-closed TO, if the references are dominated by BB and not by
202   the definition of FROM.  If adjust_vec is non-NULL, adjustments
203   will be postponed until adjust_vec_debug_stmts is called.  */
204
205static void
206adjust_debug_stmts (tree from, tree to, basic_block bb)
207{
208  adjust_info ai;
209
210  if (MAY_HAVE_DEBUG_STMTS && TREE_CODE (from) == SSA_NAME
211      && SSA_NAME_VAR (from) != gimple_vop (cfun))
212    {
213      ai.from = from;
214      ai.to = to;
215      ai.bb = bb;
216
217      if (adjust_vec)
218	VEC_safe_push (adjust_info, stack, adjust_vec, &ai);
219      else
220	adjust_debug_stmts_now (&ai);
221    }
222}
223
224/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
225   to adjust any debug stmts that referenced the old phi arg,
226   presumably non-loop-closed references left over from other
227   transformations.  */
228
229static void
230adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
231{
232  tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
233
234  SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
235
236  if (MAY_HAVE_DEBUG_STMTS)
237    adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
238			gimple_bb (update_phi));
239}
240
241
242/* Update the PHI nodes of NEW_LOOP.
243
244   NEW_LOOP is a duplicate of ORIG_LOOP.
245   AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
246   AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
247   executes before it.  */
248
249static void
250slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
251				       struct loop *new_loop, bool after)
252{
253  tree new_ssa_name;
254  gimple phi_new, phi_orig;
255  tree def;
256  edge orig_loop_latch = loop_latch_edge (orig_loop);
257  edge orig_entry_e = loop_preheader_edge (orig_loop);
258  edge new_loop_exit_e = single_exit (new_loop);
259  edge new_loop_entry_e = loop_preheader_edge (new_loop);
260  edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
261  gimple_stmt_iterator gsi_new, gsi_orig;
262
263  /*
264     step 1. For each loop-header-phi:
265             Add the first phi argument for the phi in NEW_LOOP
266            (the one associated with the entry of NEW_LOOP)
267
268     step 2. For each loop-header-phi:
269             Add the second phi argument for the phi in NEW_LOOP
270            (the one associated with the latch of NEW_LOOP)
271
272     step 3. Update the phis in the successor block of NEW_LOOP.
273
274        case 1: NEW_LOOP was placed before ORIG_LOOP:
275                The successor block of NEW_LOOP is the header of ORIG_LOOP.
276                Updating the phis in the successor block can therefore be done
277                along with the scanning of the loop header phis, because the
278                header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
279                phi nodes, organized in the same order.
280
281        case 2: NEW_LOOP was placed after ORIG_LOOP:
282                The successor block of NEW_LOOP is the original exit block of
283                ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
284                We postpone updating these phis to a later stage (when
285                loop guards are added).
286   */
287
288
289  /* Scan the phis in the headers of the old and new loops
290     (they are organized in exactly the same order).  */
291
292  for (gsi_new = gsi_start_phis (new_loop->header),
293       gsi_orig = gsi_start_phis (orig_loop->header);
294       !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
295       gsi_next (&gsi_new), gsi_next (&gsi_orig))
296    {
297      source_location locus;
298      phi_new = gsi_stmt (gsi_new);
299      phi_orig = gsi_stmt (gsi_orig);
300
301      /* step 1.  */
302      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
303      locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e);
304      add_phi_arg (phi_new, def, new_loop_entry_e, locus);
305
306      /* step 2.  */
307      def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
308      locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch);
309      if (TREE_CODE (def) != SSA_NAME)
310        continue;
311
312      new_ssa_name = get_current_def (def);
313      if (!new_ssa_name)
314	{
315	  /* This only happens if there are no definitions
316	     inside the loop. use the phi_result in this case.  */
317	  new_ssa_name = PHI_RESULT (phi_new);
318	}
319
320      /* An ordinary ssa name defined in the loop.  */
321      add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus);
322
323      /* Drop any debug references outside the loop, if they would
324	 become ill-formed SSA.  */
325      adjust_debug_stmts (def, NULL, single_exit (orig_loop)->dest);
326
327      /* step 3 (case 1).  */
328      if (!after)
329        {
330          gcc_assert (new_loop_exit_e == orig_entry_e);
331	  adjust_phi_and_debug_stmts (phi_orig, new_loop_exit_e, new_ssa_name);
332        }
333    }
334}
335
336
337/* Update PHI nodes for a guard of the LOOP.
338
339   Input:
340   - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
341        controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
342        originates from the guard-bb, skips LOOP and reaches the (unique) exit
343        bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
344        We denote this bb NEW_MERGE_BB because before the guard code was added
345        it had a single predecessor (the LOOP header), and now it became a merge
346        point of two paths - the path that ends with the LOOP exit-edge, and
347        the path that ends with GUARD_EDGE.
348   - NEW_EXIT_BB: New basic block that is added by this function between LOOP
349        and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
350
351   ===> The CFG before the guard-code was added:
352        LOOP_header_bb:
353          loop_body
354          if (exit_loop) goto update_bb
355          else           goto LOOP_header_bb
356        update_bb:
357
358   ==> The CFG after the guard-code was added:
359        guard_bb:
360          if (LOOP_guard_condition) goto new_merge_bb
361          else                      goto LOOP_header_bb
362        LOOP_header_bb:
363          loop_body
364          if (exit_loop_condition) goto new_merge_bb
365          else                     goto LOOP_header_bb
366        new_merge_bb:
367          goto update_bb
368        update_bb:
369
370   ==> The CFG after this function:
371        guard_bb:
372          if (LOOP_guard_condition) goto new_merge_bb
373          else                      goto LOOP_header_bb
374        LOOP_header_bb:
375          loop_body
376          if (exit_loop_condition) goto new_exit_bb
377          else                     goto LOOP_header_bb
378        new_exit_bb:
379        new_merge_bb:
380          goto update_bb
381        update_bb:
382
383   This function:
384   1. creates and updates the relevant phi nodes to account for the new
385      incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
386      1.1. Create phi nodes at NEW_MERGE_BB.
387      1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
388           UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
389   2. preserves loop-closed-ssa-form by creating the required phi nodes
390      at the exit of LOOP (i.e, in NEW_EXIT_BB).
391
392   There are two flavors to this function:
393
394   slpeel_update_phi_nodes_for_guard1:
395     Here the guard controls whether we enter or skip LOOP, where LOOP is a
396     prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
397     for variables that have phis in the loop header.
398
399   slpeel_update_phi_nodes_for_guard2:
400     Here the guard controls whether we enter or skip LOOP, where LOOP is an
401     epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
402     for variables that have phis in the loop exit.
403
404   I.E., the overall structure is:
405
406        loop1_preheader_bb:
407                guard1 (goto loop1/merge1_bb)
408        loop1
409        loop1_exit_bb:
410                guard2 (goto merge1_bb/merge2_bb)
411        merge1_bb
412        loop2
413        loop2_exit_bb
414        merge2_bb
415        next_bb
416
417   slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
418   loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
419   that have phis in loop1->header).
420
421   slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
422   loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
423   that have phis in next_bb). It also adds some of these phis to
424   loop1_exit_bb.
425
426   slpeel_update_phi_nodes_for_guard1 is always called before
427   slpeel_update_phi_nodes_for_guard2. They are both needed in order
428   to create correct data-flow and loop-closed-ssa-form.
429
430   Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
431   that change between iterations of a loop (and therefore have a phi-node
432   at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
433   phis for variables that are used out of the loop (and therefore have
434   loop-closed exit phis). Some variables may be both updated between
435   iterations and used after the loop. This is why in loop1_exit_bb we
436   may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
437   and exit phis (created by slpeel_update_phi_nodes_for_guard2).
438
439   - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
440     an original loop. i.e., we have:
441
442           orig_loop
443           guard_bb (goto LOOP/new_merge)
444           new_loop <-- LOOP
445           new_exit
446           new_merge
447           next_bb
448
449     If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
450     have:
451
452           new_loop
453           guard_bb (goto LOOP/new_merge)
454           orig_loop <-- LOOP
455           new_exit
456           new_merge
457           next_bb
458
459     The SSA names defined in the original loop have a current
460     reaching definition that that records the corresponding new
461     ssa-name used in the new duplicated loop copy.
462  */
463
464/* Function slpeel_update_phi_nodes_for_guard1
465
466   Input:
467   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
468   - DEFS - a bitmap of ssa names to mark new names for which we recorded
469            information.
470
471   In the context of the overall structure, we have:
472
473        loop1_preheader_bb:
474                guard1 (goto loop1/merge1_bb)
475LOOP->  loop1
476        loop1_exit_bb:
477                guard2 (goto merge1_bb/merge2_bb)
478        merge1_bb
479        loop2
480        loop2_exit_bb
481        merge2_bb
482        next_bb
483
484   For each name updated between loop iterations (i.e - for each name that has
485   an entry (loop-header) phi in LOOP) we create a new phi in:
486   1. merge1_bb (to account for the edge from guard1)
487   2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
488*/
489
490static void
491slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
492                                    bool is_new_loop, basic_block *new_exit_bb,
493                                    bitmap *defs)
494{
495  gimple orig_phi, new_phi;
496  gimple update_phi, update_phi2;
497  tree guard_arg, loop_arg;
498  basic_block new_merge_bb = guard_edge->dest;
499  edge e = EDGE_SUCC (new_merge_bb, 0);
500  basic_block update_bb = e->dest;
501  basic_block orig_bb = loop->header;
502  edge new_exit_e;
503  tree current_new_name;
504  gimple_stmt_iterator gsi_orig, gsi_update;
505
506  /* Create new bb between loop and new_merge_bb.  */
507  *new_exit_bb = split_edge (single_exit (loop));
508
509  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
510
511  for (gsi_orig = gsi_start_phis (orig_bb),
512       gsi_update = gsi_start_phis (update_bb);
513       !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
514       gsi_next (&gsi_orig), gsi_next (&gsi_update))
515    {
516      source_location loop_locus, guard_locus;;
517      orig_phi = gsi_stmt (gsi_orig);
518      update_phi = gsi_stmt (gsi_update);
519
520      /** 1. Handle new-merge-point phis  **/
521
522      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
523      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
524                                 new_merge_bb);
525
526      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
527            of LOOP. Set the two phi args in NEW_PHI for these edges:  */
528      loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
529      loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
530						      EDGE_SUCC (loop->latch,
531								 0));
532      guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
533      guard_locus
534	= gimple_phi_arg_location_from_edge (orig_phi,
535					     loop_preheader_edge (loop));
536
537      add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
538      add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
539
540      /* 1.3. Update phi in successor block.  */
541      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
542                  || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
543      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
544      update_phi2 = new_phi;
545
546
547      /** 2. Handle loop-closed-ssa-form phis  **/
548
549      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
550	continue;
551
552      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
553      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
554                                 *new_exit_bb);
555
556      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
557      add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
558
559      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
560      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
561      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
562				  PHI_RESULT (new_phi));
563
564      /* 2.4. Record the newly created name with set_current_def.
565         We want to find a name such that
566                name = get_current_def (orig_loop_name)
567         and to set its current definition as follows:
568                set_current_def (name, new_phi_name)
569
570         If LOOP is a new loop then loop_arg is already the name we're
571         looking for. If LOOP is the original loop, then loop_arg is
572         the orig_loop_name and the relevant name is recorded in its
573         current reaching definition.  */
574      if (is_new_loop)
575        current_new_name = loop_arg;
576      else
577        {
578          current_new_name = get_current_def (loop_arg);
579	  /* current_def is not available only if the variable does not
580	     change inside the loop, in which case we also don't care
581	     about recording a current_def for it because we won't be
582	     trying to create loop-exit-phis for it.  */
583	  if (!current_new_name)
584	    continue;
585        }
586      gcc_assert (get_current_def (current_new_name) == NULL_TREE);
587
588      set_current_def (current_new_name, PHI_RESULT (new_phi));
589      bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
590    }
591}
592
593
594/* Function slpeel_update_phi_nodes_for_guard2
595
596   Input:
597   - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
598
599   In the context of the overall structure, we have:
600
601        loop1_preheader_bb:
602                guard1 (goto loop1/merge1_bb)
603        loop1
604        loop1_exit_bb:
605                guard2 (goto merge1_bb/merge2_bb)
606        merge1_bb
607LOOP->  loop2
608        loop2_exit_bb
609        merge2_bb
610        next_bb
611
612   For each name used out side the loop (i.e - for each name that has an exit
613   phi in next_bb) we create a new phi in:
614   1. merge2_bb (to account for the edge from guard_bb)
615   2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
616   3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
617      if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
618*/
619
620static void
621slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
622                                    bool is_new_loop, basic_block *new_exit_bb)
623{
624  gimple orig_phi, new_phi;
625  gimple update_phi, update_phi2;
626  tree guard_arg, loop_arg;
627  basic_block new_merge_bb = guard_edge->dest;
628  edge e = EDGE_SUCC (new_merge_bb, 0);
629  basic_block update_bb = e->dest;
630  edge new_exit_e;
631  tree orig_def, orig_def_new_name;
632  tree new_name, new_name2;
633  tree arg;
634  gimple_stmt_iterator gsi;
635
636  /* Create new bb between loop and new_merge_bb.  */
637  *new_exit_bb = split_edge (single_exit (loop));
638
639  new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
640
641  for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
642    {
643      update_phi = gsi_stmt (gsi);
644      orig_phi = update_phi;
645      orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
646      /* This loop-closed-phi actually doesn't represent a use
647         out of the loop - the phi arg is a constant.  */
648      if (TREE_CODE (orig_def) != SSA_NAME)
649        continue;
650      orig_def_new_name = get_current_def (orig_def);
651      arg = NULL_TREE;
652
653      /** 1. Handle new-merge-point phis  **/
654
655      /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
656      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
657                                 new_merge_bb);
658
659      /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
660            of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
661      new_name = orig_def;
662      new_name2 = NULL_TREE;
663      if (orig_def_new_name)
664        {
665          new_name = orig_def_new_name;
666	  /* Some variables have both loop-entry-phis and loop-exit-phis.
667	     Such variables were given yet newer names by phis placed in
668	     guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
669	     new_name2 = get_current_def (get_current_def (orig_name)).  */
670          new_name2 = get_current_def (new_name);
671        }
672
673      if (is_new_loop)
674        {
675          guard_arg = orig_def;
676          loop_arg = new_name;
677        }
678      else
679        {
680          guard_arg = new_name;
681          loop_arg = orig_def;
682        }
683      if (new_name2)
684        guard_arg = new_name2;
685
686      add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
687      add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
688
689      /* 1.3. Update phi in successor block.  */
690      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
691      adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
692      update_phi2 = new_phi;
693
694
695      /** 2. Handle loop-closed-ssa-form phis  **/
696
697      /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
698      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
699                                 *new_exit_bb);
700
701      /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
702      add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
703
704      /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
705      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
706      adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
707				  PHI_RESULT (new_phi));
708
709
710      /** 3. Handle loop-closed-ssa-form phis for first loop  **/
711
712      /* 3.1. Find the relevant names that need an exit-phi in
713	 GUARD_BB, i.e. names for which
714	 slpeel_update_phi_nodes_for_guard1 had not already created a
715	 phi node. This is the case for names that are used outside
716	 the loop (and therefore need an exit phi) but are not updated
717	 across loop iterations (and therefore don't have a
718	 loop-header-phi).
719
720	 slpeel_update_phi_nodes_for_guard1 is responsible for
721	 creating loop-exit phis in GUARD_BB for names that have a
722	 loop-header-phi.  When such a phi is created we also record
723	 the new name in its current definition.  If this new name
724	 exists, then guard_arg was set to this new name (see 1.2
725	 above).  Therefore, if guard_arg is not this new name, this
726	 is an indication that an exit-phi in GUARD_BB was not yet
727	 created, so we take care of it here.  */
728      if (guard_arg == new_name2)
729	continue;
730      arg = guard_arg;
731
732      /* 3.2. Generate new phi node in GUARD_BB:  */
733      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
734                                 guard_edge->src);
735
736      /* 3.3. GUARD_BB has one incoming edge:  */
737      gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
738      add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
739		   UNKNOWN_LOCATION);
740
741      /* 3.4. Update phi in successor of GUARD_BB:  */
742      gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
743                                                                == guard_arg);
744      adjust_phi_and_debug_stmts (update_phi2, guard_edge,
745				  PHI_RESULT (new_phi));
746    }
747}
748
749
750/* Make the LOOP iterate NITERS times. This is done by adding a new IV
751   that starts at zero, increases by one and its limit is NITERS.
752
753   Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
754
755void
756slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
757{
758  tree indx_before_incr, indx_after_incr;
759  gimple cond_stmt;
760  gimple orig_cond;
761  edge exit_edge = single_exit (loop);
762  gimple_stmt_iterator loop_cond_gsi;
763  gimple_stmt_iterator incr_gsi;
764  bool insert_after;
765  tree init = build_int_cst (TREE_TYPE (niters), 0);
766  tree step = build_int_cst (TREE_TYPE (niters), 1);
767  LOC loop_loc;
768  enum tree_code code;
769
770  orig_cond = get_loop_exit_condition (loop);
771  gcc_assert (orig_cond);
772  loop_cond_gsi = gsi_for_stmt (orig_cond);
773
774  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
775  create_iv (init, step, NULL_TREE, loop,
776             &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
777
778  indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
779					      true, NULL_TREE, true,
780					      GSI_SAME_STMT);
781  niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
782				     true, GSI_SAME_STMT);
783
784  code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
785  cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
786				 NULL_TREE);
787
788  gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
789
790  /* Remove old loop exit test:  */
791  gsi_remove (&loop_cond_gsi, true);
792
793  loop_loc = find_loop_location (loop);
794  if (dump_file && (dump_flags & TDF_DETAILS))
795    {
796      if (loop_loc != UNKNOWN_LOC)
797        fprintf (dump_file, "\nloop at %s:%d: ",
798                 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
799      print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
800    }
801
802  loop->nb_iterations = niters;
803}
804
805
806/* Given LOOP this function generates a new copy of it and puts it
807   on E which is either the entry or exit of LOOP.  */
808
809struct loop *
810slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
811{
812  struct loop *new_loop;
813  basic_block *new_bbs, *bbs;
814  bool at_exit;
815  bool was_imm_dom;
816  basic_block exit_dest;
817  gimple phi;
818  tree phi_arg;
819  edge exit, new_exit;
820  gimple_stmt_iterator gsi;
821
822  at_exit = (e == single_exit (loop));
823  if (!at_exit && e != loop_preheader_edge (loop))
824    return NULL;
825
826  bbs = get_loop_body (loop);
827
828  /* Check whether duplication is possible.  */
829  if (!can_copy_bbs_p (bbs, loop->num_nodes))
830    {
831      free (bbs);
832      return NULL;
833    }
834
835  /* Generate new loop structure.  */
836  new_loop = duplicate_loop (loop, loop_outer (loop));
837  if (!new_loop)
838    {
839      free (bbs);
840      return NULL;
841    }
842
843  exit_dest = single_exit (loop)->dest;
844  was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
845					  exit_dest) == loop->header ?
846		 true : false);
847
848  new_bbs = XNEWVEC (basic_block, loop->num_nodes);
849
850  exit = single_exit (loop);
851  copy_bbs (bbs, loop->num_nodes, new_bbs,
852	    &exit, 1, &new_exit, NULL,
853	    e->src);
854
855  /* Duplicating phi args at exit bbs as coming
856     also from exit of duplicated loop.  */
857  for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
858    {
859      phi = gsi_stmt (gsi);
860      phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
861      if (phi_arg)
862	{
863	  edge new_loop_exit_edge;
864	  source_location locus;
865
866	  locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop));
867	  if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
868	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
869	  else
870	    new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
871
872	  add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus);
873	}
874    }
875
876  if (at_exit) /* Add the loop copy at exit.  */
877    {
878      redirect_edge_and_branch_force (e, new_loop->header);
879      PENDING_STMT (e) = NULL;
880      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
881      if (was_imm_dom)
882	set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
883    }
884  else /* Add the copy at entry.  */
885    {
886      edge new_exit_e;
887      edge entry_e = loop_preheader_edge (loop);
888      basic_block preheader = entry_e->src;
889
890      if (!flow_bb_inside_loop_p (new_loop,
891				  EDGE_SUCC (new_loop->header, 0)->dest))
892        new_exit_e = EDGE_SUCC (new_loop->header, 0);
893      else
894	new_exit_e = EDGE_SUCC (new_loop->header, 1);
895
896      redirect_edge_and_branch_force (new_exit_e, loop->header);
897      PENDING_STMT (new_exit_e) = NULL;
898      set_immediate_dominator (CDI_DOMINATORS, loop->header,
899			       new_exit_e->src);
900
901      /* We have to add phi args to the loop->header here as coming
902	 from new_exit_e edge.  */
903      for (gsi = gsi_start_phis (loop->header);
904           !gsi_end_p (gsi);
905           gsi_next (&gsi))
906	{
907	  phi = gsi_stmt (gsi);
908	  phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
909	  if (phi_arg)
910	    add_phi_arg (phi, phi_arg, new_exit_e,
911			 gimple_phi_arg_location_from_edge (phi, entry_e));
912	}
913
914      redirect_edge_and_branch_force (entry_e, new_loop->header);
915      PENDING_STMT (entry_e) = NULL;
916      set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
917    }
918
919  free (new_bbs);
920  free (bbs);
921
922  return new_loop;
923}
924
925
926/* Given the condition statement COND, put it as the last statement
927   of GUARD_BB; EXIT_BB is the basic block to skip the loop;
928   Assumes that this is the single exit of the guarded loop.
929   Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST.  */
930
931static edge
932slpeel_add_loop_guard (basic_block guard_bb, tree cond,
933		       gimple_seq cond_expr_stmt_list,
934		       basic_block exit_bb, basic_block dom_bb)
935{
936  gimple_stmt_iterator gsi;
937  edge new_e, enter_e;
938  gimple cond_stmt;
939  gimple_seq gimplify_stmt_list = NULL;
940
941  enter_e = EDGE_SUCC (guard_bb, 0);
942  enter_e->flags &= ~EDGE_FALLTHRU;
943  enter_e->flags |= EDGE_FALSE_VALUE;
944  gsi = gsi_last_bb (guard_bb);
945
946  cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
947  if (gimplify_stmt_list)
948    gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
949  cond_stmt = gimple_build_cond (NE_EXPR,
950				 cond, build_int_cst (TREE_TYPE (cond), 0),
951				 NULL_TREE, NULL_TREE);
952  if (cond_expr_stmt_list)
953    gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
954
955  gsi = gsi_last_bb (guard_bb);
956  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
957
958  /* Add new edge to connect guard block to the merge/loop-exit block.  */
959  new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
960  set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
961  return new_e;
962}
963
964
965/* This function verifies that the following restrictions apply to LOOP:
966   (1) it is innermost
967   (2) it consists of exactly 2 basic blocks - header, and an empty latch.
968   (3) it is single entry, single exit
969   (4) its exit condition is the last stmt in the header
970   (5) E is the entry/exit edge of LOOP.
971 */
972
973bool
974slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
975{
976  edge exit_e = single_exit (loop);
977  edge entry_e = loop_preheader_edge (loop);
978  gimple orig_cond = get_loop_exit_condition (loop);
979  gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
980
981  if (need_ssa_update_p (cfun))
982    return false;
983
984  if (loop->inner
985      /* All loops have an outer scope; the only case loop->outer is NULL is for
986         the function itself.  */
987      || !loop_outer (loop)
988      || loop->num_nodes != 2
989      || !empty_block_p (loop->latch)
990      || !single_exit (loop)
991      /* Verify that new loop exit condition can be trivially modified.  */
992      || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
993      || (e != exit_e && e != entry_e))
994    return false;
995
996  return true;
997}
998
999#ifdef ENABLE_CHECKING
1000static void
1001slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1002                                 struct loop *second_loop)
1003{
1004  basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1005  basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1006  basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1007
1008  /* A guard that controls whether the second_loop is to be executed or skipped
1009     is placed in first_loop->exit.  first_loop->exit therefore has two
1010     successors - one is the preheader of second_loop, and the other is a bb
1011     after second_loop.
1012   */
1013  gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1014
1015  /* 1. Verify that one of the successors of first_loop->exit is the preheader
1016        of second_loop.  */
1017
1018  /* The preheader of new_loop is expected to have two predecessors:
1019     first_loop->exit and the block that precedes first_loop.  */
1020
1021  gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1022              && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1023                   && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1024               || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1025                   && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1026
1027  /* Verify that the other successor of first_loop->exit is after the
1028     second_loop.  */
1029  /* TODO */
1030}
1031#endif
1032
1033/* If the run time cost model check determines that vectorization is
1034   not profitable and hence scalar loop should be generated then set
1035   FIRST_NITERS to prologue peeled iterations. This will allow all the
1036   iterations to be executed in the prologue peeled scalar loop.  */
1037
1038static void
1039set_prologue_iterations (basic_block bb_before_first_loop,
1040			 tree first_niters,
1041			 struct loop *loop,
1042			 unsigned int th)
1043{
1044  edge e;
1045  basic_block cond_bb, then_bb;
1046  tree var, prologue_after_cost_adjust_name;
1047  gimple_stmt_iterator gsi;
1048  gimple newphi;
1049  edge e_true, e_false, e_fallthru;
1050  gimple cond_stmt;
1051  gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
1052  tree cost_pre_condition = NULL_TREE;
1053  tree scalar_loop_iters =
1054    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1055
1056  e = single_pred_edge (bb_before_first_loop);
1057  cond_bb = split_edge(e);
1058
1059  e = single_pred_edge (bb_before_first_loop);
1060  then_bb = split_edge(e);
1061  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1062
1063  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1064				   EDGE_FALSE_VALUE);
1065  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1066
1067  e_true = EDGE_PRED (then_bb, 0);
1068  e_true->flags &= ~EDGE_FALLTHRU;
1069  e_true->flags |= EDGE_TRUE_VALUE;
1070
1071  e_fallthru = EDGE_SUCC (then_bb, 0);
1072
1073  cost_pre_condition =
1074    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1075	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1076  cost_pre_condition =
1077    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1078			  true, NULL_TREE);
1079  cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
1080				 build_int_cst (TREE_TYPE (cost_pre_condition),
1081						0), NULL_TREE, NULL_TREE);
1082
1083  gsi = gsi_last_bb (cond_bb);
1084  if (gimplify_stmt_list)
1085    gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
1086
1087  gsi = gsi_last_bb (cond_bb);
1088  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1089
1090  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1091			"prologue_after_cost_adjust");
1092  add_referenced_var (var);
1093  prologue_after_cost_adjust_name =
1094    force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1095
1096  gsi = gsi_last_bb (then_bb);
1097  if (stmts)
1098    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1099
1100  newphi = create_phi_node (var, bb_before_first_loop);
1101  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
1102	       UNKNOWN_LOCATION);
1103  add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION);
1104
1105  first_niters = PHI_RESULT (newphi);
1106}
1107
1108
1109/* Function slpeel_tree_peel_loop_to_edge.
1110
1111   Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1112   that is placed on the entry (exit) edge E of LOOP. After this transformation
1113   we have two loops one after the other - first-loop iterates FIRST_NITERS
1114   times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1115   If the cost model indicates that it is profitable to emit a scalar
1116   loop instead of the vector one, then the prolog (epilog) loop will iterate
1117   for the entire unchanged scalar iterations of the loop.
1118
1119   Input:
1120   - LOOP: the loop to be peeled.
1121   - E: the exit or entry edge of LOOP.
1122        If it is the entry edge, we peel the first iterations of LOOP. In this
1123        case first-loop is LOOP, and second-loop is the newly created loop.
1124        If it is the exit edge, we peel the last iterations of LOOP. In this
1125        case, first-loop is the newly created loop, and second-loop is LOOP.
1126   - NITERS: the number of iterations that LOOP iterates.
1127   - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1128   - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1129        for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1130        is false, the caller of this function may want to take care of this
1131        (this can be useful if we don't want new stmts added to first-loop).
1132   - TH: cost model profitability threshold of iterations for vectorization.
1133   - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1134                          during versioning and hence needs to occur during
1135			  prologue generation or whether cost model check
1136			  has not occurred during prologue generation and hence
1137			  needs to occur during epilogue generation.
1138
1139
1140   Output:
1141   The function returns a pointer to the new loop-copy, or NULL if it failed
1142   to perform the transformation.
1143
1144   The function generates two if-then-else guards: one before the first loop,
1145   and the other before the second loop:
1146   The first guard is:
1147     if (FIRST_NITERS == 0) then skip the first loop,
1148     and go directly to the second loop.
1149   The second guard is:
1150     if (FIRST_NITERS == NITERS) then skip the second loop.
1151
1152   If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1153   then the generated condition is combined with COND_EXPR and the
1154   statements in COND_EXPR_STMT_LIST are emitted together with it.
1155
1156   FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1157   FORNOW the resulting code will not be in loop-closed-ssa form.
1158*/
1159
1160static struct loop*
1161slpeel_tree_peel_loop_to_edge (struct loop *loop,
1162			       edge e, tree first_niters,
1163			       tree niters, bool update_first_loop_count,
1164			       unsigned int th, bool check_profitability,
1165			       tree cond_expr, gimple_seq cond_expr_stmt_list)
1166{
1167  struct loop *new_loop = NULL, *first_loop, *second_loop;
1168  edge skip_e;
1169  tree pre_condition = NULL_TREE;
1170  bitmap definitions;
1171  basic_block bb_before_second_loop, bb_after_second_loop;
1172  basic_block bb_before_first_loop;
1173  basic_block bb_between_loops;
1174  basic_block new_exit_bb;
1175  edge exit_e = single_exit (loop);
1176  LOC loop_loc;
1177  tree cost_pre_condition = NULL_TREE;
1178
1179  if (!slpeel_can_duplicate_loop_p (loop, e))
1180    return NULL;
1181
1182  /* We have to initialize cfg_hooks. Then, when calling
1183   cfg_hooks->split_edge, the function tree_split_edge
1184   is actually called and, when calling cfg_hooks->duplicate_block,
1185   the function tree_duplicate_bb is called.  */
1186  gimple_register_cfg_hooks ();
1187
1188
1189  /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1190        Resulting CFG would be:
1191
1192        first_loop:
1193        do {
1194        } while ...
1195
1196        second_loop:
1197        do {
1198        } while ...
1199
1200        orig_exit_bb:
1201   */
1202
1203  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1204    {
1205      loop_loc = find_loop_location (loop);
1206      if (dump_file && (dump_flags & TDF_DETAILS))
1207        {
1208          if (loop_loc != UNKNOWN_LOC)
1209            fprintf (dump_file, "\n%s:%d: note: ",
1210                     LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1211          fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1212        }
1213      return NULL;
1214    }
1215
1216  if (MAY_HAVE_DEBUG_STMTS)
1217    {
1218      gcc_assert (!adjust_vec);
1219      adjust_vec = VEC_alloc (adjust_info, stack, 32);
1220    }
1221
1222  if (e == exit_e)
1223    {
1224      /* NEW_LOOP was placed after LOOP.  */
1225      first_loop = loop;
1226      second_loop = new_loop;
1227    }
1228  else
1229    {
1230      /* NEW_LOOP was placed before LOOP.  */
1231      first_loop = new_loop;
1232      second_loop = loop;
1233    }
1234
1235  definitions = ssa_names_to_replace ();
1236  slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1237  rename_variables_in_loop (new_loop);
1238
1239
1240  /* 2.  Add the guard code in one of the following ways:
1241
1242     2.a Add the guard that controls whether the first loop is executed.
1243         This occurs when this function is invoked for prologue or epilogue
1244	 generation and when the cost model check can be done at compile time.
1245
1246         Resulting CFG would be:
1247
1248         bb_before_first_loop:
1249         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1250                                GOTO first-loop
1251
1252         first_loop:
1253         do {
1254         } while ...
1255
1256         bb_before_second_loop:
1257
1258         second_loop:
1259         do {
1260         } while ...
1261
1262         orig_exit_bb:
1263
1264     2.b Add the cost model check that allows the prologue
1265         to iterate for the entire unchanged scalar
1266         iterations of the loop in the event that the cost
1267         model indicates that the scalar loop is more
1268         profitable than the vector one. This occurs when
1269	 this function is invoked for prologue generation
1270	 and the cost model check needs to be done at run
1271	 time.
1272
1273         Resulting CFG after prologue peeling would be:
1274
1275         if (scalar_loop_iterations <= th)
1276           FIRST_NITERS = scalar_loop_iterations
1277
1278         bb_before_first_loop:
1279         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1280                                GOTO first-loop
1281
1282         first_loop:
1283         do {
1284         } while ...
1285
1286         bb_before_second_loop:
1287
1288         second_loop:
1289         do {
1290         } while ...
1291
1292         orig_exit_bb:
1293
1294     2.c Add the cost model check that allows the epilogue
1295         to iterate for the entire unchanged scalar
1296         iterations of the loop in the event that the cost
1297         model indicates that the scalar loop is more
1298         profitable than the vector one. This occurs when
1299	 this function is invoked for epilogue generation
1300	 and the cost model check needs to be done at run
1301	 time.  This check is combined with any pre-existing
1302	 check in COND_EXPR to avoid versioning.
1303
1304         Resulting CFG after prologue peeling would be:
1305
1306         bb_before_first_loop:
1307         if ((scalar_loop_iterations <= th)
1308             ||
1309             FIRST_NITERS == 0) GOTO bb_before_second_loop
1310                                GOTO first-loop
1311
1312         first_loop:
1313         do {
1314         } while ...
1315
1316         bb_before_second_loop:
1317
1318         second_loop:
1319         do {
1320         } while ...
1321
1322         orig_exit_bb:
1323  */
1324
1325  bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1326  bb_before_second_loop = split_edge (single_exit (first_loop));
1327
1328  /* Epilogue peeling.  */
1329  if (!update_first_loop_count)
1330    {
1331      pre_condition =
1332	fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1333		     build_int_cst (TREE_TYPE (first_niters), 0));
1334      if (check_profitability)
1335	{
1336	  tree scalar_loop_iters
1337	    = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1338					(loop_vec_info_for_loop (loop)));
1339	  cost_pre_condition =
1340	    fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1341		         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1342
1343	  pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1344				       cost_pre_condition, pre_condition);
1345	}
1346      if (cond_expr)
1347	{
1348	  pre_condition =
1349	    fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1350			 pre_condition,
1351			 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1352				      cond_expr));
1353	}
1354    }
1355
1356  /* Prologue peeling.  */
1357  else
1358    {
1359      if (check_profitability)
1360	set_prologue_iterations (bb_before_first_loop, first_niters,
1361				 loop, th);
1362
1363      pre_condition =
1364	fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1365		     build_int_cst (TREE_TYPE (first_niters), 0));
1366    }
1367
1368  skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1369				  cond_expr_stmt_list,
1370                                  bb_before_second_loop, bb_before_first_loop);
1371  slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1372				      first_loop == new_loop,
1373				      &new_exit_bb, &definitions);
1374
1375
1376  /* 3. Add the guard that controls whether the second loop is executed.
1377        Resulting CFG would be:
1378
1379        bb_before_first_loop:
1380        if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1381                               GOTO first-loop
1382
1383        first_loop:
1384        do {
1385        } while ...
1386
1387        bb_between_loops:
1388        if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1389                                    GOTO bb_before_second_loop
1390
1391        bb_before_second_loop:
1392
1393        second_loop:
1394        do {
1395        } while ...
1396
1397        bb_after_second_loop:
1398
1399        orig_exit_bb:
1400   */
1401
1402  bb_between_loops = new_exit_bb;
1403  bb_after_second_loop = split_edge (single_exit (second_loop));
1404
1405  pre_condition =
1406	fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1407  skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
1408                                  bb_after_second_loop, bb_before_first_loop);
1409  slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1410                                     second_loop == new_loop, &new_exit_bb);
1411
1412  /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1413   */
1414  if (update_first_loop_count)
1415    slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1416
1417  adjust_vec_debug_stmts ();
1418
1419  BITMAP_FREE (definitions);
1420  delete_update_ssa ();
1421
1422  return new_loop;
1423}
1424
1425/* Function vect_get_loop_location.
1426
1427   Extract the location of the loop in the source code.
1428   If the loop is not well formed for vectorization, an estimated
1429   location is calculated.
1430   Return the loop location if succeed and NULL if not.  */
1431
1432LOC
1433find_loop_location (struct loop *loop)
1434{
1435  gimple stmt = NULL;
1436  basic_block bb;
1437  gimple_stmt_iterator si;
1438
1439  if (!loop)
1440    return UNKNOWN_LOC;
1441
1442  stmt = get_loop_exit_condition (loop);
1443
1444  if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1445    return gimple_location (stmt);
1446
1447  /* If we got here the loop is probably not "well formed",
1448     try to estimate the loop location */
1449
1450  if (!loop->header)
1451    return UNKNOWN_LOC;
1452
1453  bb = loop->header;
1454
1455  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1456    {
1457      stmt = gsi_stmt (si);
1458      if (gimple_location (stmt) != UNKNOWN_LOC)
1459        return gimple_location (stmt);
1460    }
1461
1462  return UNKNOWN_LOC;
1463}
1464
1465
1466/* This function builds ni_name = number of iterations loop executes
1467   on the loop preheader.  If SEQ is given the stmt is instead emitted
1468   there.  */
1469
1470static tree
1471vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq)
1472{
1473  tree ni_name, var;
1474  gimple_seq stmts = NULL;
1475  edge pe;
1476  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1477  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1478
1479  var = create_tmp_var (TREE_TYPE (ni), "niters");
1480  add_referenced_var (var);
1481  ni_name = force_gimple_operand (ni, &stmts, false, var);
1482
1483  pe = loop_preheader_edge (loop);
1484  if (stmts)
1485    {
1486      if (seq)
1487	gimple_seq_add_seq (&seq, stmts);
1488      else
1489	{
1490	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1491	  gcc_assert (!new_bb);
1492	}
1493    }
1494
1495  return ni_name;
1496}
1497
1498
1499/* This function generates the following statements:
1500
1501 ni_name = number of iterations loop executes
1502 ratio = ni_name / vf
1503 ratio_mult_vf_name = ratio * vf
1504
1505 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST
1506 if that is non-NULL.  */
1507
1508static void
1509vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1510				 tree *ni_name_ptr,
1511				 tree *ratio_mult_vf_name_ptr,
1512				 tree *ratio_name_ptr,
1513				 gimple_seq cond_expr_stmt_list)
1514{
1515
1516  edge pe;
1517  basic_block new_bb;
1518  gimple_seq stmts;
1519  tree ni_name, ni_minus_gap_name;
1520  tree var;
1521  tree ratio_name;
1522  tree ratio_mult_vf_name;
1523  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1524  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1525  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1526  tree log_vf;
1527
1528  pe = loop_preheader_edge (loop);
1529
1530  /* Generate temporary variable that contains
1531     number of iterations loop executes.  */
1532
1533  ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list);
1534  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
1535
1536  /* If epilogue loop is required because of data accesses with gaps, we
1537     subtract one iteration from the total number of iterations here for
1538     correct calculation of RATIO.  */
1539  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1540    {
1541      ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
1542				       ni_name,
1543			               build_one_cst (TREE_TYPE (ni_name)));
1544      if (!is_gimple_val (ni_minus_gap_name))
1545	{
1546	  var = create_tmp_var (TREE_TYPE (ni), "ni_gap");
1547          add_referenced_var (var);
1548
1549          stmts = NULL;
1550          ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
1551						    true, var);
1552          if (cond_expr_stmt_list)
1553            gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1554          else
1555            {
1556              pe = loop_preheader_edge (loop);
1557              new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1558              gcc_assert (!new_bb);
1559            }
1560        }
1561    }
1562  else
1563    ni_minus_gap_name = ni_name;
1564
1565  /* Create: ratio = ni >> log2(vf) */
1566
1567  ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_minus_gap_name),
1568			    ni_minus_gap_name, log_vf);
1569  if (!is_gimple_val (ratio_name))
1570    {
1571      var = create_tmp_var (TREE_TYPE (ni), "bnd");
1572      add_referenced_var (var);
1573
1574      stmts = NULL;
1575      ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
1576      if (cond_expr_stmt_list)
1577	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1578      else
1579	{
1580	  pe = loop_preheader_edge (loop);
1581	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1582	  gcc_assert (!new_bb);
1583	}
1584    }
1585
1586  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
1587
1588  ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
1589				    ratio_name, log_vf);
1590  if (!is_gimple_val (ratio_mult_vf_name))
1591    {
1592      var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1593      add_referenced_var (var);
1594
1595      stmts = NULL;
1596      ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
1597						 true, var);
1598      if (cond_expr_stmt_list)
1599	gimple_seq_add_seq (&cond_expr_stmt_list, stmts);
1600      else
1601	{
1602	  pe = loop_preheader_edge (loop);
1603	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1604	  gcc_assert (!new_bb);
1605	}
1606    }
1607
1608  *ni_name_ptr = ni_name;
1609  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1610  *ratio_name_ptr = ratio_name;
1611
1612  return;
1613}
1614
1615/* Function vect_can_advance_ivs_p
1616
1617   In case the number of iterations that LOOP iterates is unknown at compile
1618   time, an epilog loop will be generated, and the loop induction variables
1619   (IVs) will be "advanced" to the value they are supposed to take just before
1620   the epilog loop.  Here we check that the access function of the loop IVs
1621   and the expression that represents the loop bound are simple enough.
1622   These restrictions will be relaxed in the future.  */
1623
1624bool
1625vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1626{
1627  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1628  basic_block bb = loop->header;
1629  gimple phi;
1630  gimple_stmt_iterator gsi;
1631
1632  /* Analyze phi functions of the loop header.  */
1633
1634  if (vect_print_dump_info (REPORT_DETAILS))
1635    fprintf (vect_dump, "vect_can_advance_ivs_p:");
1636
1637  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1638    {
1639      tree access_fn = NULL;
1640      tree evolution_part;
1641
1642      phi = gsi_stmt (gsi);
1643      if (vect_print_dump_info (REPORT_DETAILS))
1644	{
1645          fprintf (vect_dump, "Analyze phi: ");
1646          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1647	}
1648
1649      /* Skip virtual phi's. The data dependences that are associated with
1650         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1651
1652      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1653	{
1654	  if (vect_print_dump_info (REPORT_DETAILS))
1655	    fprintf (vect_dump, "virtual phi. skip.");
1656	  continue;
1657	}
1658
1659      /* Skip reduction phis.  */
1660
1661      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1662        {
1663          if (vect_print_dump_info (REPORT_DETAILS))
1664            fprintf (vect_dump, "reduc phi. skip.");
1665          continue;
1666        }
1667
1668      /* Analyze the evolution function.  */
1669
1670      access_fn = instantiate_parameters
1671	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1672
1673      if (!access_fn)
1674	{
1675	  if (vect_print_dump_info (REPORT_DETAILS))
1676	    fprintf (vect_dump, "No Access function.");
1677	  return false;
1678	}
1679
1680      if (vect_print_dump_info (REPORT_DETAILS))
1681        {
1682	  fprintf (vect_dump, "Access function of PHI: ");
1683	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1684        }
1685
1686      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1687
1688      if (evolution_part == NULL_TREE)
1689        {
1690	  if (vect_print_dump_info (REPORT_DETAILS))
1691	    fprintf (vect_dump, "No evolution.");
1692	  return false;
1693        }
1694
1695      /* FORNOW: We do not transform initial conditions of IVs
1696	 which evolution functions are a polynomial of degree >= 2.  */
1697
1698      if (tree_is_chrec (evolution_part))
1699	return false;
1700    }
1701
1702  return true;
1703}
1704
1705
1706/*   Function vect_update_ivs_after_vectorizer.
1707
1708     "Advance" the induction variables of LOOP to the value they should take
1709     after the execution of LOOP.  This is currently necessary because the
1710     vectorizer does not handle induction variables that are used after the
1711     loop.  Such a situation occurs when the last iterations of LOOP are
1712     peeled, because:
1713     1. We introduced new uses after LOOP for IVs that were not originally used
1714        after LOOP: the IVs of LOOP are now used by an epilog loop.
1715     2. LOOP is going to be vectorized; this means that it will iterate N/VF
1716        times, whereas the loop IVs should be bumped N times.
1717
1718     Input:
1719     - LOOP - a loop that is going to be vectorized. The last few iterations
1720              of LOOP were peeled.
1721     - NITERS - the number of iterations that LOOP executes (before it is
1722                vectorized). i.e, the number of times the ivs should be bumped.
1723     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1724                  coming out from LOOP on which there are uses of the LOOP ivs
1725		  (this is the path from LOOP->exit to epilog_loop->preheader).
1726
1727                  The new definitions of the ivs are placed in LOOP->exit.
1728                  The phi args associated with the edge UPDATE_E in the bb
1729                  UPDATE_E->dest are updated accordingly.
1730
1731     Assumption 1: Like the rest of the vectorizer, this function assumes
1732     a single loop exit that has a single predecessor.
1733
1734     Assumption 2: The phi nodes in the LOOP header and in update_bb are
1735     organized in the same order.
1736
1737     Assumption 3: The access function of the ivs is simple enough (see
1738     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
1739
1740     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1741     coming out of LOOP on which the ivs of LOOP are used (this is the path
1742     that leads to the epilog loop; other paths skip the epilog loop).  This
1743     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1744     needs to have its phis updated.
1745 */
1746
1747static void
1748vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1749				  edge update_e)
1750{
1751  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1752  basic_block exit_bb = single_exit (loop)->dest;
1753  gimple phi, phi1;
1754  gimple_stmt_iterator gsi, gsi1;
1755  basic_block update_bb = update_e->dest;
1756
1757  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1758
1759  /* Make sure there exists a single-predecessor exit bb:  */
1760  gcc_assert (single_pred_p (exit_bb));
1761
1762  for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1763       !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1764       gsi_next (&gsi), gsi_next (&gsi1))
1765    {
1766      tree access_fn = NULL;
1767      tree evolution_part;
1768      tree init_expr;
1769      tree step_expr, off;
1770      tree type;
1771      tree var, ni, ni_name;
1772      gimple_stmt_iterator last_gsi;
1773
1774      phi = gsi_stmt (gsi);
1775      phi1 = gsi_stmt (gsi1);
1776      if (vect_print_dump_info (REPORT_DETAILS))
1777        {
1778          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
1779	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1780        }
1781
1782      /* Skip virtual phi's.  */
1783      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1784	{
1785	  if (vect_print_dump_info (REPORT_DETAILS))
1786	    fprintf (vect_dump, "virtual phi. skip.");
1787	  continue;
1788	}
1789
1790      /* Skip reduction phis.  */
1791      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1792        {
1793          if (vect_print_dump_info (REPORT_DETAILS))
1794            fprintf (vect_dump, "reduc phi. skip.");
1795          continue;
1796        }
1797
1798      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1799      gcc_assert (access_fn);
1800      /* We can end up with an access_fn like
1801           (short int) {(short unsigned int) i_49, +, 1}_1
1802	 for further analysis we need to strip the outer cast but we
1803	 need to preserve the original type.  */
1804      type = TREE_TYPE (access_fn);
1805      STRIP_NOPS (access_fn);
1806      evolution_part =
1807	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1808      gcc_assert (evolution_part != NULL_TREE);
1809
1810      /* FORNOW: We do not support IVs whose evolution function is a polynomial
1811         of degree >= 2 or exponential.  */
1812      gcc_assert (!tree_is_chrec (evolution_part));
1813
1814      step_expr = evolution_part;
1815      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1816							       loop->num));
1817      init_expr = fold_convert (type, init_expr);
1818
1819      off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1820			 fold_convert (TREE_TYPE (step_expr), niters),
1821			 step_expr);
1822      if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
1823	ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
1824			  init_expr,
1825			  fold_convert (sizetype, off));
1826      else
1827	ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1828			  init_expr,
1829			  fold_convert (TREE_TYPE (init_expr), off));
1830
1831      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1832      add_referenced_var (var);
1833
1834      last_gsi = gsi_last_bb (exit_bb);
1835      ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1836					  true, GSI_SAME_STMT);
1837
1838      /* Fix phi expressions in the successor bb.  */
1839      adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
1840    }
1841}
1842
1843/* Return the more conservative threshold between the
1844   min_profitable_iters returned by the cost model and the user
1845   specified threshold, if provided.  */
1846
1847static unsigned int
1848conservative_cost_threshold (loop_vec_info loop_vinfo,
1849			     int min_profitable_iters)
1850{
1851  unsigned int th;
1852  int min_scalar_loop_bound;
1853
1854  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1855			    * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
1856
1857  /* Use the cost model only if it is more conservative than user specified
1858     threshold.  */
1859  th = (unsigned) min_scalar_loop_bound;
1860  if (min_profitable_iters
1861      && (!min_scalar_loop_bound
1862          || min_profitable_iters > min_scalar_loop_bound))
1863    th = (unsigned) min_profitable_iters;
1864
1865  if (th && vect_print_dump_info (REPORT_COST))
1866    fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th);
1867
1868  return th;
1869}
1870
1871/* Function vect_do_peeling_for_loop_bound
1872
1873   Peel the last iterations of the loop represented by LOOP_VINFO.
1874   The peeled iterations form a new epilog loop.  Given that the loop now
1875   iterates NITERS times, the new epilog loop iterates
1876   NITERS % VECTORIZATION_FACTOR times.
1877
1878   The original loop will later be made to iterate
1879   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1880
1881   COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1882   test.  */
1883
1884void
1885vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1886				tree cond_expr, gimple_seq cond_expr_stmt_list)
1887{
1888  tree ni_name, ratio_mult_vf_name;
1889  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1890  struct loop *new_loop;
1891  edge update_e;
1892  basic_block preheader;
1893  int loop_num;
1894  bool check_profitability = false;
1895  unsigned int th = 0;
1896  int min_profitable_iters;
1897
1898  if (vect_print_dump_info (REPORT_DETAILS))
1899    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
1900
1901  initialize_original_copy_tables ();
1902
1903  /* Generate the following variables on the preheader of original loop:
1904
1905     ni_name = number of iteration the original loop executes
1906     ratio = ni_name / vf
1907     ratio_mult_vf_name = ratio * vf  */
1908  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1909				   &ratio_mult_vf_name, ratio,
1910				   cond_expr_stmt_list);
1911
1912  loop_num  = loop->num;
1913
1914  /* If cost model check not done during versioning and
1915     peeling for alignment.  */
1916  if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1917      && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1918      && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)
1919      && !cond_expr)
1920    {
1921      check_profitability = true;
1922
1923      /* Get profitability threshold for vectorized loop.  */
1924      min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
1925
1926      th = conservative_cost_threshold (loop_vinfo,
1927					min_profitable_iters);
1928    }
1929
1930  new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
1931                                            ratio_mult_vf_name, ni_name, false,
1932                                            th, check_profitability,
1933					    cond_expr, cond_expr_stmt_list);
1934  gcc_assert (new_loop);
1935  gcc_assert (loop_num == loop->num);
1936#ifdef ENABLE_CHECKING
1937  slpeel_verify_cfg_after_peeling (loop, new_loop);
1938#endif
1939
1940  /* A guard that controls whether the new_loop is to be executed or skipped
1941     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
1942     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
1943     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
1944     is on the path where the LOOP IVs are used and need to be updated.  */
1945
1946  preheader = loop_preheader_edge (new_loop)->src;
1947  if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1948    update_e = EDGE_PRED (preheader, 0);
1949  else
1950    update_e = EDGE_PRED (preheader, 1);
1951
1952  /* Update IVs of original loop as if they were advanced
1953     by ratio_mult_vf_name steps.  */
1954  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1955
1956  /* After peeling we have to reset scalar evolution analyzer.  */
1957  scev_reset ();
1958
1959  free_original_copy_tables ();
1960}
1961
1962
1963/* Function vect_gen_niters_for_prolog_loop
1964
1965   Set the number of iterations for the loop represented by LOOP_VINFO
1966   to the minimum between LOOP_NITERS (the original iteration count of the loop)
1967   and the misalignment of DR - the data reference recorded in
1968   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
1969   this loop, the data reference DR will refer to an aligned location.
1970
1971   The following computation is generated:
1972
1973   If the misalignment of DR is known at compile time:
1974     addr_mis = int mis = DR_MISALIGNMENT (dr);
1975   Else, compute address misalignment in bytes:
1976     addr_mis = addr & (vectype_size - 1)
1977
1978   prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1979
1980   (elem_size = element type size; an element is the scalar element whose type
1981   is the inner type of the vectype)
1982
1983   When the step of the data-ref in the loop is not 1 (as in interleaved data
1984   and SLP), the number of iterations of the prolog must be divided by the step
1985   (which is equal to the size of interleaved group).
1986
1987   The above formulas assume that VF == number of elements in the vector. This
1988   may not hold when there are multiple-types in the loop.
1989   In this case, for some data-references in the loop the VF does not represent
1990   the number of elements that fit in the vector.  Therefore, instead of VF we
1991   use TYPE_VECTOR_SUBPARTS.  */
1992
1993static tree
1994vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
1995				 tree *wide_prolog_niters)
1996{
1997  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1998  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1999  tree var;
2000  gimple_seq stmts;
2001  tree iters, iters_name;
2002  edge pe;
2003  basic_block new_bb;
2004  gimple dr_stmt = DR_STMT (dr);
2005  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2006  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2007  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2008  tree niters_type = TREE_TYPE (loop_niters);
2009  int step = 1;
2010  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
2011  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2012
2013  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
2014    step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
2015
2016  pe = loop_preheader_edge (loop);
2017
2018  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2019    {
2020      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2021      int elem_misalign = byte_misalign / element_size;
2022
2023      if (vect_print_dump_info (REPORT_DETAILS))
2024        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2025
2026      iters = build_int_cst (niters_type,
2027                     (((nelements - elem_misalign) & (nelements - 1)) / step));
2028    }
2029  else
2030    {
2031      gimple_seq new_stmts = NULL;
2032      tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
2033						&new_stmts, NULL_TREE, loop);
2034      tree ptr_type = TREE_TYPE (start_addr);
2035      tree size = TYPE_SIZE (ptr_type);
2036      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2037      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2038      tree elem_size_log =
2039        build_int_cst (type, exact_log2 (vectype_align/nelements));
2040      tree nelements_minus_1 = build_int_cst (type, nelements - 1);
2041      tree nelements_tree = build_int_cst (type, nelements);
2042      tree byte_misalign;
2043      tree elem_misalign;
2044
2045      new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
2046      gcc_assert (!new_bb);
2047
2048      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2049      byte_misalign =
2050        fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
2051
2052      /* Create:  elem_misalign = byte_misalign / element_size  */
2053      elem_misalign =
2054        fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2055
2056      /* Create:  (niters_type) (nelements - elem_misalign)&(nelements - 1)  */
2057      iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
2058      iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
2059      iters = fold_convert (niters_type, iters);
2060    }
2061
2062  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2063  /* If the loop bound is known at compile time we already verified that it is
2064     greater than vf; since the misalignment ('iters') is at most vf, there's
2065     no need to generate the MIN_EXPR in this case.  */
2066  if (TREE_CODE (loop_niters) != INTEGER_CST)
2067    iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
2068
2069  if (vect_print_dump_info (REPORT_DETAILS))
2070    {
2071      fprintf (vect_dump, "niters for prolog loop: ");
2072      print_generic_expr (vect_dump, iters, TDF_SLIM);
2073    }
2074
2075  var = create_tmp_var (niters_type, "prolog_loop_niters");
2076  add_referenced_var (var);
2077  stmts = NULL;
2078  iters_name = force_gimple_operand (iters, &stmts, false, var);
2079  if (types_compatible_p (sizetype, niters_type))
2080    *wide_prolog_niters = iters_name;
2081  else
2082    {
2083      gimple_seq seq = NULL;
2084      tree wide_iters = fold_convert (sizetype, iters);
2085      var = create_tmp_var (sizetype, "prolog_loop_niters");
2086      add_referenced_var (var);
2087      *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2088						  var);
2089      if (seq)
2090	gimple_seq_add_seq (&stmts, seq);
2091    }
2092
2093  /* Insert stmt on loop preheader edge.  */
2094  if (stmts)
2095    {
2096      basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2097      gcc_assert (!new_bb);
2098    }
2099
2100  return iters_name;
2101}
2102
2103
2104/* Function vect_update_init_of_dr
2105
2106   NITERS iterations were peeled from LOOP.  DR represents a data reference
2107   in LOOP.  This function updates the information recorded in DR to
2108   account for the fact that the first NITERS iterations had already been
2109   executed.  Specifically, it updates the OFFSET field of DR.  */
2110
2111static void
2112vect_update_init_of_dr (struct data_reference *dr, tree niters)
2113{
2114  tree offset = DR_OFFSET (dr);
2115
2116  niters = fold_build2 (MULT_EXPR, sizetype,
2117			fold_convert (sizetype, niters),
2118			fold_convert (sizetype, DR_STEP (dr)));
2119  offset = fold_build2 (PLUS_EXPR, sizetype,
2120			fold_convert (sizetype, offset), niters);
2121  DR_OFFSET (dr) = offset;
2122}
2123
2124
2125/* Function vect_update_inits_of_drs
2126
2127   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2128   This function updates the information recorded for the data references in
2129   the loop to account for the fact that the first NITERS iterations had
2130   already been executed.  Specifically, it updates the initial_condition of
2131   the access_function of all the data_references in the loop.  */
2132
2133static void
2134vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2135{
2136  unsigned int i;
2137  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2138  struct data_reference *dr;
2139
2140  if (vect_print_dump_info (REPORT_DETAILS))
2141    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2142
2143  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2144    vect_update_init_of_dr (dr, niters);
2145}
2146
2147
2148/* Function vect_do_peeling_for_alignment
2149
2150   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2151   'niters' is set to the misalignment of one of the data references in the
2152   loop, thereby forcing it to refer to an aligned location at the beginning
2153   of the execution of this loop.  The data reference for which we are
2154   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2155
2156void
2157vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
2158{
2159  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2160  tree niters_of_prolog_loop, ni_name;
2161  tree n_iters;
2162  tree wide_prolog_niters;
2163  struct loop *new_loop;
2164  unsigned int th = 0;
2165  int min_profitable_iters;
2166
2167  if (vect_print_dump_info (REPORT_DETAILS))
2168    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2169
2170  initialize_original_copy_tables ();
2171
2172  ni_name = vect_build_loop_niters (loop_vinfo, NULL);
2173  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
2174							   &wide_prolog_niters);
2175
2176
2177  /* Get profitability threshold for vectorized loop.  */
2178  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2179  th = conservative_cost_threshold (loop_vinfo,
2180				    min_profitable_iters);
2181
2182  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2183  new_loop =
2184    slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2185				   niters_of_prolog_loop, ni_name, true,
2186				   th, true, NULL_TREE, NULL);
2187
2188  gcc_assert (new_loop);
2189#ifdef ENABLE_CHECKING
2190  slpeel_verify_cfg_after_peeling (new_loop, loop);
2191#endif
2192
2193  /* Update number of times loop executes.  */
2194  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2195  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2196		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2197
2198  /* Update the init conditions of the access functions of all data refs.  */
2199  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
2200
2201  /* After peeling we have to reset scalar evolution analyzer.  */
2202  scev_reset ();
2203
2204  free_original_copy_tables ();
2205}
2206
2207
2208/* Function vect_create_cond_for_align_checks.
2209
2210   Create a conditional expression that represents the alignment checks for
2211   all of data references (array element references) whose alignment must be
2212   checked at runtime.
2213
2214   Input:
2215   COND_EXPR  - input conditional expression.  New conditions will be chained
2216                with logical AND operation.
2217   LOOP_VINFO - two fields of the loop information are used.
2218                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2219                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2220
2221   Output:
2222   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2223                         expression.
2224   The returned value is the conditional expression to be used in the if
2225   statement that controls which version of the loop gets executed at runtime.
2226
2227   The algorithm makes two assumptions:
2228     1) The number of bytes "n" in a vector is a power of 2.
2229     2) An address "a" is aligned if a%n is zero and that this
2230        test can be done as a&(n-1) == 0.  For example, for 16
2231        byte vectors the test is a&0xf == 0.  */
2232
2233static void
2234vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2235                                   tree *cond_expr,
2236				   gimple_seq *cond_expr_stmt_list)
2237{
2238  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2239  VEC(gimple,heap) *may_misalign_stmts
2240    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2241  gimple ref_stmt;
2242  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2243  tree mask_cst;
2244  unsigned int i;
2245  tree psize;
2246  tree int_ptrsize_type;
2247  char tmp_name[20];
2248  tree or_tmp_name = NULL_TREE;
2249  tree and_tmp, and_tmp_name;
2250  gimple and_stmt;
2251  tree ptrsize_zero;
2252  tree part_cond_expr;
2253
2254  /* Check that mask is one less than a power of 2, i.e., mask is
2255     all zeros followed by all ones.  */
2256  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2257
2258  /* CHECKME: what is the best integer or unsigned type to use to hold a
2259     cast from a pointer value?  */
2260  psize = TYPE_SIZE (ptr_type_node);
2261  int_ptrsize_type
2262    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2263
2264  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2265     of the first vector of the i'th data reference. */
2266
2267  for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++)
2268    {
2269      gimple_seq new_stmt_list = NULL;
2270      tree addr_base;
2271      tree addr_tmp, addr_tmp_name;
2272      tree or_tmp, new_or_tmp_name;
2273      gimple addr_stmt, or_stmt;
2274
2275      /* create: addr_tmp = (int)(address_of_first_vector) */
2276      addr_base =
2277	vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
2278					      NULL_TREE, loop);
2279      if (new_stmt_list != NULL)
2280	gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2281
2282      sprintf (tmp_name, "%s%d", "addr2int", i);
2283      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2284      add_referenced_var (addr_tmp);
2285      addr_tmp_name = make_ssa_name (addr_tmp, NULL);
2286      addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2287						addr_base, NULL_TREE);
2288      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2289      gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2290
2291      /* The addresses are OR together.  */
2292
2293      if (or_tmp_name != NULL_TREE)
2294        {
2295          /* create: or_tmp = or_tmp | addr_tmp */
2296          sprintf (tmp_name, "%s%d", "orptrs", i);
2297          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2298          add_referenced_var (or_tmp);
2299	  new_or_tmp_name = make_ssa_name (or_tmp, NULL);
2300	  or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2301						  new_or_tmp_name,
2302						  or_tmp_name, addr_tmp_name);
2303          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2304	  gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2305          or_tmp_name = new_or_tmp_name;
2306        }
2307      else
2308        or_tmp_name = addr_tmp_name;
2309
2310    } /* end for i */
2311
2312  mask_cst = build_int_cst (int_ptrsize_type, mask);
2313
2314  /* create: and_tmp = or_tmp & mask  */
2315  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2316  add_referenced_var (and_tmp);
2317  and_tmp_name = make_ssa_name (and_tmp, NULL);
2318
2319  and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2320					   or_tmp_name, mask_cst);
2321  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2322  gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2323
2324  /* Make and_tmp the left operand of the conditional test against zero.
2325     if and_tmp has a nonzero bit then some address is unaligned.  */
2326  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2327  part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2328				and_tmp_name, ptrsize_zero);
2329  if (*cond_expr)
2330    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2331			      *cond_expr, part_cond_expr);
2332  else
2333    *cond_expr = part_cond_expr;
2334}
2335
2336
2337/* Function vect_vfa_segment_size.
2338
2339   Create an expression that computes the size of segment
2340   that will be accessed for a data reference.  The functions takes into
2341   account that realignment loads may access one more vector.
2342
2343   Input:
2344     DR: The data reference.
2345     LENGTH_FACTOR: segment length to consider.
2346
2347   Return an expression whose value is the size of segment which will be
2348   accessed by DR.  */
2349
2350static tree
2351vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2352{
2353  tree segment_length;
2354  segment_length = size_binop (MULT_EXPR,
2355			       fold_convert (sizetype, DR_STEP (dr)),
2356			       fold_convert (sizetype, length_factor));
2357  if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
2358    {
2359      tree vector_size = TYPE_SIZE_UNIT
2360			  (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2361
2362      segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2363    }
2364  return segment_length;
2365}
2366
2367
2368/* Function vect_create_cond_for_alias_checks.
2369
2370   Create a conditional expression that represents the run-time checks for
2371   overlapping of address ranges represented by a list of data references
2372   relations passed as input.
2373
2374   Input:
2375   COND_EXPR  - input conditional expression.  New conditions will be chained
2376                with logical AND operation.
2377   LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2378	        to be checked.
2379
2380   Output:
2381   COND_EXPR - conditional expression.
2382   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2383                         expression.
2384
2385
2386   The returned value is the conditional expression to be used in the if
2387   statement that controls which version of the loop gets executed at runtime.
2388*/
2389
2390static void
2391vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
2392				   tree * cond_expr,
2393				   gimple_seq * cond_expr_stmt_list)
2394{
2395  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2396  VEC (ddr_p, heap) * may_alias_ddrs =
2397    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2398  int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2399  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2400
2401  ddr_p ddr;
2402  unsigned int i;
2403  tree part_cond_expr, length_factor;
2404
2405  /* Create expression
2406     ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
2407     || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
2408     &&
2409     ...
2410     &&
2411     ((store_ptr_n + store_segment_length_n) < load_ptr_n)
2412     || (load_ptr_n + load_segment_length_n) < store_ptr_n))  */
2413
2414  if (VEC_empty (ddr_p, may_alias_ddrs))
2415    return;
2416
2417  for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
2418    {
2419      struct data_reference *dr_a, *dr_b;
2420      gimple dr_group_first_a, dr_group_first_b;
2421      tree addr_base_a, addr_base_b;
2422      tree segment_length_a, segment_length_b;
2423      gimple stmt_a, stmt_b;
2424
2425      dr_a = DDR_A (ddr);
2426      stmt_a = DR_STMT (DDR_A (ddr));
2427      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
2428      if (dr_group_first_a)
2429        {
2430	  stmt_a = dr_group_first_a;
2431	  dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2432	}
2433
2434      dr_b = DDR_B (ddr);
2435      stmt_b = DR_STMT (DDR_B (ddr));
2436      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
2437      if (dr_group_first_b)
2438        {
2439	  stmt_b = dr_group_first_b;
2440	  dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2441	}
2442
2443      addr_base_a =
2444        vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
2445					      NULL_TREE, loop);
2446      addr_base_b =
2447        vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
2448					      NULL_TREE, loop);
2449
2450      if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2451	length_factor = scalar_loop_iters;
2452      else
2453	length_factor = size_int (vect_factor);
2454      segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2455      segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2456
2457      if (vect_print_dump_info (REPORT_DR_DETAILS))
2458	{
2459	  fprintf (vect_dump,
2460		   "create runtime check for data references ");
2461	  print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
2462	  fprintf (vect_dump, " and ");
2463	  print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
2464	}
2465
2466
2467      part_cond_expr =
2468      	fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2469	  fold_build2 (LT_EXPR, boolean_type_node,
2470	    fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
2471	      addr_base_a,
2472	      segment_length_a),
2473	    addr_base_b),
2474	  fold_build2 (LT_EXPR, boolean_type_node,
2475	    fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
2476	      addr_base_b,
2477	      segment_length_b),
2478	    addr_base_a));
2479
2480      if (*cond_expr)
2481	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2482				  *cond_expr, part_cond_expr);
2483      else
2484	*cond_expr = part_cond_expr;
2485    }
2486
2487  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
2488    fprintf (vect_dump, "created %u versioning for alias checks.\n",
2489             VEC_length (ddr_p, may_alias_ddrs));
2490}
2491
2492
2493/* Function vect_loop_versioning.
2494
2495   If the loop has data references that may or may not be aligned or/and
2496   has data reference relations whose independence was not proven then
2497   two versions of the loop need to be generated, one which is vectorized
2498   and one which isn't.  A test is then generated to control which of the
2499   loops is executed.  The test checks for the alignment of all of the
2500   data references that may or may not be aligned.  An additional
2501   sequence of runtime tests is generated for each pairs of DDRs whose
2502   independence was not proven.  The vectorized version of loop is
2503   executed only if both alias and alignment tests are passed.
2504
2505   The test generated to check which version of loop is executed
2506   is modified to also check for profitability as indicated by the
2507   cost model initially.
2508
2509   The versioning precondition(s) are placed in *COND_EXPR and
2510   *COND_EXPR_STMT_LIST.  If DO_VERSIONING is true versioning is
2511   also performed, otherwise only the conditions are generated.  */
2512
2513void
2514vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning,
2515		      tree *cond_expr, gimple_seq *cond_expr_stmt_list)
2516{
2517  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2518  basic_block condition_bb;
2519  gimple_stmt_iterator gsi, cond_exp_gsi;
2520  basic_block merge_bb;
2521  basic_block new_exit_bb;
2522  edge new_exit_e, e;
2523  gimple orig_phi, new_phi;
2524  tree arg;
2525  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2526  gimple_seq gimplify_stmt_list = NULL;
2527  tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2528  int min_profitable_iters = 0;
2529  unsigned int th;
2530
2531  /* Get profitability threshold for vectorized loop.  */
2532  min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
2533
2534  th = conservative_cost_threshold (loop_vinfo,
2535				    min_profitable_iters);
2536
2537  *cond_expr =
2538    fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2539 	         build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2540
2541  *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list,
2542				     false, NULL_TREE);
2543
2544  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2545      vect_create_cond_for_align_checks (loop_vinfo, cond_expr,
2546					 cond_expr_stmt_list);
2547
2548  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2549    vect_create_cond_for_alias_checks (loop_vinfo, cond_expr,
2550				       cond_expr_stmt_list);
2551
2552  *cond_expr =
2553    fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node);
2554  *cond_expr =
2555    force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE);
2556  gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list);
2557
2558  /* If we only needed the extra conditions and a new loop copy
2559     bail out here.  */
2560  if (!do_versioning)
2561    return;
2562
2563  initialize_original_copy_tables ();
2564  loop_version (loop, *cond_expr, &condition_bb,
2565		prob, prob, REG_BR_PROB_BASE - prob, true);
2566  free_original_copy_tables();
2567
2568  /* Loop versioning violates an assumption we try to maintain during
2569     vectorization - that the loop exit block has a single predecessor.
2570     After versioning, the exit block of both loop versions is the same
2571     basic block (i.e. it has two predecessors). Just in order to simplify
2572     following transformations in the vectorizer, we fix this situation
2573     here by adding a new (empty) block on the exit-edge of the loop,
2574     with the proper loop-exit phis to maintain loop-closed-form.  */
2575
2576  merge_bb = single_exit (loop)->dest;
2577  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2578  new_exit_bb = split_edge (single_exit (loop));
2579  new_exit_e = single_exit (loop);
2580  e = EDGE_SUCC (new_exit_bb, 0);
2581
2582  for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2583    {
2584      orig_phi = gsi_stmt (gsi);
2585      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2586				  new_exit_bb);
2587      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2588      add_phi_arg (new_phi, arg, new_exit_e,
2589		   gimple_phi_arg_location_from_edge (orig_phi, e));
2590      adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2591    }
2592
2593  /* End loop-exit-fixes after versioning.  */
2594
2595  update_ssa (TODO_update_ssa);
2596  if (*cond_expr_stmt_list)
2597    {
2598      cond_exp_gsi = gsi_last_bb (condition_bb);
2599      gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list,
2600			     GSI_SAME_STMT);
2601      *cond_expr_stmt_list = NULL;
2602    }
2603  *cond_expr = NULL_TREE;
2604}
2605
2606