1/* Transformation Utilities for Loop Vectorization.
2   Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
28#include "target.h"
29#include "rtl.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "timevar.h"
35#include "cfgloop.h"
36#include "expr.h"
37#include "optabs.h"
38#include "recog.h"
39#include "tree-data-ref.h"
40#include "tree-chrec.h"
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
43#include "langhooks.h"
44#include "tree-pass.h"
45#include "toplev.h"
46#include "real.h"
47
48/* Utility functions for the code transformation.  */
49static bool vect_transform_stmt (tree, block_stmt_iterator *);
50static void vect_align_data_ref (tree);
51static tree vect_create_destination_var (tree, tree);
52static tree vect_create_data_ref_ptr
53  (tree, block_stmt_iterator *, tree, tree *, bool);
54static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57static tree vect_init_vector (tree, tree);
58static void vect_finish_stmt_generation
59  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60static bool vect_is_simple_cond (tree, loop_vec_info);
61static void update_vuses_to_preheader (tree, struct loop*);
62static tree get_initial_def_for_reduction (tree, tree, tree *);
63
64/* Utility function dealing with loop peeling (not peeling itself).  */
65static void vect_generate_tmps_on_preheader
66  (loop_vec_info, tree *, tree *, tree *);
67static tree vect_build_loop_niters (loop_vec_info);
68static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70static void vect_update_init_of_dr (struct data_reference *, tree niters);
71static void vect_update_inits_of_drs (loop_vec_info, tree);
72static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73static void vect_do_peeling_for_loop_bound
74  (loop_vec_info, tree *, struct loops *);
75static int vect_min_worthwhile_factor (enum tree_code);
76
77
78/* Function vect_get_new_vect_var.
79
80   Returns a name for a new variable. The current naming scheme appends the
81   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82   the name of vectorizer generated variables, and appends that to NAME if
83   provided.  */
84
85static tree
86vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
87{
88  const char *prefix;
89  tree new_vect_var;
90
91  switch (var_kind)
92  {
93  case vect_simple_var:
94    prefix = "vect_";
95    break;
96  case vect_scalar_var:
97    prefix = "stmp_";
98    break;
99  case vect_pointer_var:
100    prefix = "vect_p";
101    break;
102  default:
103    gcc_unreachable ();
104  }
105
106  if (name)
107    new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
108  else
109    new_vect_var = create_tmp_var (type, prefix);
110
111  return new_vect_var;
112}
113
114
115/* Function vect_create_addr_base_for_vector_ref.
116
117   Create an expression that computes the address of the first memory location
118   that will be accessed for a data reference.
119
120   Input:
121   STMT: The statement containing the data reference.
122   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
123   OFFSET: Optional. If supplied, it is be added to the initial address.
124
125   Output:
126   1. Return an SSA_NAME whose value is the address of the memory location of
127      the first vector of the data reference.
128   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
129      these statement(s) which define the returned SSA_NAME.
130
131   FORNOW: We are only handling array accesses with step 1.  */
132
133static tree
134vect_create_addr_base_for_vector_ref (tree stmt,
135                                      tree *new_stmt_list,
136				      tree offset)
137{
138  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
139  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
140  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
141  tree base_name = build_fold_indirect_ref (data_ref_base);
142  tree ref = DR_REF (dr);
143  tree scalar_type = TREE_TYPE (ref);
144  tree scalar_ptr_type = build_pointer_type (scalar_type);
145  tree vec_stmt;
146  tree new_temp;
147  tree addr_base, addr_expr;
148  tree dest, new_stmt;
149  tree base_offset = unshare_expr (DR_OFFSET (dr));
150  tree init = unshare_expr (DR_INIT (dr));
151
152  /* Create base_offset */
153  base_offset = size_binop (PLUS_EXPR, base_offset, init);
154  dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
155  add_referenced_tmp_var (dest);
156  base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
157  append_to_statement_list_force (new_stmt, new_stmt_list);
158
159  if (offset)
160    {
161      tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
162      add_referenced_tmp_var (tmp);
163      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
164			    DR_STEP (dr));
165      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
166				 base_offset, offset);
167      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
168      append_to_statement_list_force (new_stmt, new_stmt_list);
169    }
170
171  /* base + base_offset */
172  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
173			   base_offset);
174
175  /* addr_expr = addr_base */
176  addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
177                                     get_name (base_name));
178  add_referenced_tmp_var (addr_expr);
179  vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
180  new_temp = make_ssa_name (addr_expr, vec_stmt);
181  TREE_OPERAND (vec_stmt, 0) = new_temp;
182  append_to_statement_list_force (vec_stmt, new_stmt_list);
183
184  if (vect_print_dump_info (REPORT_DETAILS))
185    {
186      fprintf (vect_dump, "created ");
187      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
188    }
189  return new_temp;
190}
191
192
193/* Function vect_align_data_ref.
194
195   Handle misalignment of a memory accesses.
196
197   FORNOW: Can't handle misaligned accesses.
198   Make sure that the dataref is aligned.  */
199
200static void
201vect_align_data_ref (tree stmt)
202{
203  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
204  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
205
206  /* FORNOW: can't handle misaligned accesses;
207             all accesses expected to be aligned.  */
208  gcc_assert (aligned_access_p (dr));
209}
210
211
212/* Function vect_create_data_ref_ptr.
213
214   Create a memory reference expression for vector access, to be used in a
215   vector load/store stmt. The reference is based on a new pointer to vector
216   type (vp).
217
218   Input:
219   1. STMT: a stmt that references memory. Expected to be of the form
220         MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
221   2. BSI: block_stmt_iterator where new stmts can be added.
222   3. OFFSET (optional): an offset to be added to the initial address accessed
223        by the data-ref in STMT.
224   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
225        pointing to the initial address.
226
227   Output:
228   1. Declare a new ptr to vector_type, and have it point to the base of the
229      data reference (initial addressed accessed by the data reference).
230      For example, for vector of type V8HI, the following code is generated:
231
232      v8hi *vp;
233      vp = (v8hi *)initial_address;
234
235      if OFFSET is not supplied:
236         initial_address = &a[init];
237      if OFFSET is supplied:
238         initial_address = &a[init + OFFSET];
239
240      Return the initial_address in INITIAL_ADDRESS.
241
242   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
243      a data-reference in the loop based on the new vector pointer vp.  This
244      new data reference will by some means be updated each iteration of
245      the loop.  Return the pointer vp'.
246
247   FORNOW: handle only aligned and consecutive accesses.  */
248
249static tree
250vect_create_data_ref_ptr (tree stmt,
251			  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
252			  tree offset, tree *initial_address, bool only_init)
253{
254  tree base_name;
255  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
257  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
258  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
259  tree vect_ptr_type;
260  tree vect_ptr;
261  tree tag;
262  tree new_temp;
263  tree vec_stmt;
264  tree new_stmt_list = NULL_TREE;
265  edge pe = loop_preheader_edge (loop);
266  basic_block new_bb;
267  tree vect_ptr_init;
268  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
269
270  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
271
272  if (vect_print_dump_info (REPORT_DETAILS))
273    {
274      tree data_ref_base = base_name;
275      fprintf (vect_dump, "create vector-pointer variable to type: ");
276      print_generic_expr (vect_dump, vectype, TDF_SLIM);
277      if (TREE_CODE (data_ref_base) == VAR_DECL)
278        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
279      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
280        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
281      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
282        fprintf (vect_dump, "  vectorizing a record based array ref: ");
283      else if (TREE_CODE (data_ref_base) == SSA_NAME)
284        fprintf (vect_dump, "  vectorizing a pointer ref: ");
285      print_generic_expr (vect_dump, base_name, TDF_SLIM);
286    }
287
288  /** (1) Create the new vector-pointer variable:  **/
289
290  vect_ptr_type = build_pointer_type (vectype);
291  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
292                                    get_name (base_name));
293  add_referenced_tmp_var (vect_ptr);
294
295
296  /** (2) Add aliasing information to the new vector-pointer:
297          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
298
299  tag = DR_MEMTAG (dr);
300  gcc_assert (tag);
301
302  /* If tag is a variable (and NOT_A_TAG) than a new type alias
303     tag must be created with tag added to its may alias list.  */
304  if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
305    new_type_alias (vect_ptr, tag);
306  else
307    var_ann (vect_ptr)->type_mem_tag = tag;
308
309  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
310
311  /** (3) Calculate the initial address the vector-pointer, and set
312          the vector-pointer to point to it before the loop:  **/
313
314  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
315  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
316                                                   offset);
317  pe = loop_preheader_edge (loop);
318  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
319  gcc_assert (!new_bb);
320  *initial_address = new_temp;
321
322  /* Create: p = (vectype *) initial_base  */
323  vec_stmt = fold_convert (vect_ptr_type, new_temp);
324  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
325  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
326  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
327  new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
328  gcc_assert (!new_bb);
329
330
331  /** (4) Handle the updating of the vector-pointer inside the loop: **/
332
333  if (only_init) /* No update in loop is required.  */
334    {
335      /* Copy the points-to information if it exists. */
336      if (DR_PTR_INFO (dr))
337        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
338      return vect_ptr_init;
339    }
340  else
341    {
342      block_stmt_iterator incr_bsi;
343      bool insert_after;
344      tree indx_before_incr, indx_after_incr;
345      tree incr;
346
347      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
348      create_iv (vect_ptr_init,
349		 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
350		 NULL_TREE, loop, &incr_bsi, insert_after,
351		 &indx_before_incr, &indx_after_incr);
352      incr = bsi_stmt (incr_bsi);
353      set_stmt_info ((tree_ann_t)stmt_ann (incr),
354		     new_stmt_vec_info (incr, loop_vinfo));
355
356      /* Copy the points-to information if it exists. */
357      if (DR_PTR_INFO (dr))
358	{
359	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
360	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
361	}
362      merge_alias_info (vect_ptr_init, indx_before_incr);
363      merge_alias_info (vect_ptr_init, indx_after_incr);
364
365      return indx_before_incr;
366    }
367}
368
369
370/* Function vect_create_destination_var.
371
372   Create a new temporary of type VECTYPE.  */
373
374static tree
375vect_create_destination_var (tree scalar_dest, tree vectype)
376{
377  tree vec_dest;
378  const char *new_name;
379  tree type;
380  enum vect_var_kind kind;
381
382  kind = vectype ? vect_simple_var : vect_scalar_var;
383  type = vectype ? vectype : TREE_TYPE (scalar_dest);
384
385  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
386
387  new_name = get_name (scalar_dest);
388  if (!new_name)
389    new_name = "var_";
390  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
391  add_referenced_tmp_var (vec_dest);
392
393  return vec_dest;
394}
395
396
397/* Function vect_init_vector.
398
399   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
400   the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
401   used in the vectorization of STMT.  */
402
403static tree
404vect_init_vector (tree stmt, tree vector_var)
405{
406  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
407  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
408  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
409  tree new_var;
410  tree init_stmt;
411  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
412  tree vec_oprnd;
413  edge pe;
414  tree new_temp;
415  basic_block new_bb;
416
417  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
418  add_referenced_tmp_var (new_var);
419
420  init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
421  new_temp = make_ssa_name (new_var, init_stmt);
422  TREE_OPERAND (init_stmt, 0) = new_temp;
423
424  pe = loop_preheader_edge (loop);
425  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
426  gcc_assert (!new_bb);
427
428  if (vect_print_dump_info (REPORT_DETAILS))
429    {
430      fprintf (vect_dump, "created new init_stmt: ");
431      print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
432    }
433
434  vec_oprnd = TREE_OPERAND (init_stmt, 0);
435  return vec_oprnd;
436}
437
438
439/* Function vect_get_vec_def_for_operand.
440
441   OP is an operand in STMT. This function returns a (vector) def that will be
442   used in the vectorized stmt for STMT.
443
444   In the case that OP is an SSA_NAME which is defined in the loop, then
445   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
446
447   In case OP is an invariant or constant, a new stmt that creates a vector def
448   needs to be introduced.  */
449
450static tree
451vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
452{
453  tree vec_oprnd;
454  tree vec_stmt;
455  tree def_stmt;
456  stmt_vec_info def_stmt_info = NULL;
457  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
459  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
460  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
461  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
462  tree vec_inv;
463  tree vec_cst;
464  tree t = NULL_TREE;
465  tree def;
466  int i;
467  enum vect_def_type dt;
468  bool is_simple_use;
469
470  if (vect_print_dump_info (REPORT_DETAILS))
471    {
472      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
473      print_generic_expr (vect_dump, op, TDF_SLIM);
474    }
475
476  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
477  gcc_assert (is_simple_use);
478  if (vect_print_dump_info (REPORT_DETAILS))
479    {
480      if (def)
481        {
482          fprintf (vect_dump, "def =  ");
483          print_generic_expr (vect_dump, def, TDF_SLIM);
484        }
485      if (def_stmt)
486        {
487          fprintf (vect_dump, "  def_stmt =  ");
488          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
489        }
490    }
491
492  switch (dt)
493    {
494    /* Case 1: operand is a constant.  */
495    case vect_constant_def:
496      {
497	if (scalar_def)
498	  *scalar_def = op;
499
500        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
501        if (vect_print_dump_info (REPORT_DETAILS))
502          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
503
504        for (i = nunits - 1; i >= 0; --i)
505          {
506            t = tree_cons (NULL_TREE, op, t);
507          }
508        vec_cst = build_vector (vectype, t);
509        return vect_init_vector (stmt, vec_cst);
510      }
511
512    /* Case 2: operand is defined outside the loop - loop invariant.  */
513    case vect_invariant_def:
514      {
515	if (scalar_def)
516	  *scalar_def = def;
517
518        /* Create 'vec_inv = {inv,inv,..,inv}'  */
519        if (vect_print_dump_info (REPORT_DETAILS))
520          fprintf (vect_dump, "Create vector_inv.");
521
522        for (i = nunits - 1; i >= 0; --i)
523          {
524            t = tree_cons (NULL_TREE, def, t);
525          }
526
527	/* FIXME: use build_constructor directly.  */
528        vec_inv = build_constructor_from_list (vectype, t);
529        return vect_init_vector (stmt, vec_inv);
530      }
531
532    /* Case 3: operand is defined inside the loop.  */
533    case vect_loop_def:
534      {
535	if (scalar_def)
536	  *scalar_def = def_stmt;
537
538        /* Get the def from the vectorized stmt.  */
539        def_stmt_info = vinfo_for_stmt (def_stmt);
540        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
541        gcc_assert (vec_stmt);
542        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
543        return vec_oprnd;
544      }
545
546    /* Case 4: operand is defined by a loop header phi - reduction  */
547    case vect_reduction_def:
548      {
549        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
550
551        /* Get the def before the loop  */
552        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
553        return get_initial_def_for_reduction (stmt, op, scalar_def);
554     }
555
556    /* Case 5: operand is defined by loop-header phi - induction.  */
557    case vect_induction_def:
558      {
559        if (vect_print_dump_info (REPORT_DETAILS))
560          fprintf (vect_dump, "induction - unsupported.");
561        internal_error ("no support for induction"); /* FORNOW */
562      }
563
564    default:
565      gcc_unreachable ();
566    }
567}
568
569
570/* Function vect_finish_stmt_generation.
571
572   Insert a new stmt.  */
573
574static void
575vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
576{
577  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
578
579  if (vect_print_dump_info (REPORT_DETAILS))
580    {
581      fprintf (vect_dump, "add new stmt: ");
582      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
583    }
584
585  /* Make sure bsi points to the stmt that is being vectorized.  */
586  gcc_assert (stmt == bsi_stmt (*bsi));
587
588#ifdef USE_MAPPED_LOCATION
589  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
590#else
591  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
592#endif
593}
594
595
596#define ADJUST_IN_EPILOG 1
597
598/* Function get_initial_def_for_reduction
599
600   Input:
601   STMT - a stmt that performs a reduction operation in the loop.
602   INIT_VAL - the initial value of the reduction variable
603
604   Output:
605   SCALAR_DEF - a tree that holds a value to be added to the final result
606	of the reduction (used for "ADJUST_IN_EPILOG" - see below).
607   Return a vector variable, initialized according to the operation that STMT
608	performs. This vector will be used as the initial value of the
609	vector of partial results.
610
611   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
612     add:         [0,0,...,0,0]
613     mult:        [1,1,...,1,1]
614     min/max:     [init_val,init_val,..,init_val,init_val]
615     bit and/or:  [init_val,init_val,..,init_val,init_val]
616   and when necessary (e.g. add/mult case) let the caller know
617   that it needs to adjust the result by init_val.
618
619   Option2: Initialize the vector as follows:
620     add:         [0,0,...,0,init_val]
621     mult:        [1,1,...,1,init_val]
622     min/max:     [init_val,init_val,...,init_val]
623     bit and/or:  [init_val,init_val,...,init_val]
624   and no adjustments are needed.
625
626   For example, for the following code:
627
628   s = init_val;
629   for (i=0;i<n;i++)
630     s = s + a[i];
631
632   STMT is 's = s + a[i]', and the reduction variable is 's'.
633   For a vector of 4 units, we want to return either [0,0,0,init_val],
634   or [0,0,0,0] and let the caller know that it needs to adjust
635   the result at the end by 'init_val'.
636
637   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
638   TODO: Use some cost-model to estimate which scheme is more profitable.
639*/
640
641static tree
642get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
643{
644  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
645  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
646  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
647  int nelements;
648  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
649  tree type = TREE_TYPE (init_val);
650  tree def;
651  tree vec, t = NULL_TREE;
652  bool need_epilog_adjust;
653  int i;
654
655  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
656
657  switch (code)
658  {
659  case PLUS_EXPR:
660    if (INTEGRAL_TYPE_P (type))
661      def = build_int_cst (type, 0);
662    else
663      def = build_real (type, dconst0);
664
665#ifdef ADJUST_IN_EPILOG
666    /* All the 'nunits' elements are set to 0. The final result will be
667       adjusted by 'init_val' at the loop epilog.  */
668    nelements = nunits;
669    need_epilog_adjust = true;
670#else
671    /* 'nunits - 1' elements are set to 0; The last element is set to
672        'init_val'.  No further adjustments at the epilog are needed.  */
673    nelements = nunits - 1;
674    need_epilog_adjust = false;
675#endif
676    break;
677
678  case MIN_EXPR:
679  case MAX_EXPR:
680    def = init_val;
681    nelements = nunits;
682    need_epilog_adjust = false;
683    break;
684
685  default:
686    gcc_unreachable ();
687  }
688
689  for (i = nelements - 1; i >= 0; --i)
690    t = tree_cons (NULL_TREE, def, t);
691
692  if (nelements == nunits - 1)
693    {
694      /* Set the last element of the vector.  */
695      t = tree_cons (NULL_TREE, init_val, t);
696      nelements += 1;
697    }
698  gcc_assert (nelements == nunits);
699
700  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
701    vec = build_vector (vectype, t);
702  else
703    vec = build_constructor_from_list (vectype, t);
704
705  if (!need_epilog_adjust)
706    *scalar_def = NULL_TREE;
707  else
708    *scalar_def = init_val;
709
710  return vect_init_vector (stmt, vec);
711}
712
713
714/* Function vect_create_epilog_for_reduction:
715
716   Create code at the loop-epilog to finalize the result of a reduction
717   computation.
718
719   LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
720   into a single result, by applying the operation REDUC_CODE on the
721   partial-results-vector. For this, we need to create a new phi node at the
722   loop exit to preserve loop-closed form, as illustrated below.
723
724   STMT is the original scalar reduction stmt that is being vectorized.
725   REDUCTION_OP is the scalar reduction-variable.
726   REDUCTION_PHI is the phi-node that carries the reduction computation.
727   This function also sets the arguments for the REDUCTION_PHI:
728   The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
729   The loop-latch argument is VECT_DEF - the vector of partial sums.
730
731     This function transforms this:
732
733        loop:
734          vec_def = phi <null, null>    # REDUCTION_PHI
735          ....
736          VECT_DEF = ...
737
738        loop_exit:
739          s_out0 = phi <s_loop>         # EXIT_PHI
740
741          use <s_out0>
742          use <s_out0>
743
744     Into:
745
746        loop:
747          vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
748          ....
749          VECT_DEF = ...
750
751        loop_exit:
752          s_out0 = phi <s_loop>         # EXIT_PHI
753          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
754
755          v_out2 = reduc_expr <v_out1>
756          s_out3 = extract_field <v_out2, 0>
757
758          use <s_out3>
759          use <s_out3>
760*/
761
762static void
763vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
764                                  enum tree_code reduc_code, tree reduction_phi)
765{
766  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
767  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
768  enum machine_mode mode = TYPE_MODE (vectype);
769  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
770  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
771  basic_block exit_bb;
772  tree scalar_dest = TREE_OPERAND (stmt, 0);
773  tree scalar_type = TREE_TYPE (scalar_dest);
774  tree new_phi;
775  block_stmt_iterator exit_bsi;
776  tree vec_dest;
777  tree new_temp;
778  tree new_name;
779  tree epilog_stmt;
780  tree new_scalar_dest, exit_phi;
781  tree bitsize, bitpos, bytesize;
782  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
783  tree scalar_initial_def;
784  tree vec_initial_def;
785  tree orig_name;
786  imm_use_iterator imm_iter;
787  use_operand_p use_p;
788  bool extract_scalar_result;
789
790  /*** 1. Create the reduction def-use cycle  ***/
791
792  /* 1.1 set the loop-entry arg of the reduction-phi:  */
793  /* For the case of reduction, vect_get_vec_def_for_operand returns
794     the scalar def before the loop, that defines the initial value
795     of the reduction variable.  */
796  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
797						  &scalar_initial_def);
798  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
799
800
801  /* 1.2 set the loop-latch arg for the reduction-phi:  */
802  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
803
804  if (vect_print_dump_info (REPORT_DETAILS))
805    {
806      fprintf (vect_dump, "transform reduction: created def-use cycle:");
807      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
808      fprintf (vect_dump, "\n");
809      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
810    }
811
812
813  /*** 2. Create epilog code ***/
814
815  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
816        v_out1 = phi <v_loop>  */
817
818  exit_bb = loop->single_exit->dest;
819  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
820  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
821
822  exit_bsi = bsi_start (exit_bb);
823
824
825  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
826  bitsize = TYPE_SIZE (scalar_type);
827  bytesize = TYPE_SIZE_UNIT (scalar_type);
828
829  /* 2.2 Create the reduction code.  */
830
831  if (reduc_code < NUM_TREE_CODES)
832    {
833      /*** Case 1:  Create:
834	   v_out2 = reduc_expr <v_out1>  */
835
836      if (vect_print_dump_info (REPORT_DETAILS))
837	fprintf (vect_dump, "Reduce using direct vector reduction.");
838
839      vec_dest = vect_create_destination_var (scalar_dest, vectype);
840      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
841			build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
842      new_temp = make_ssa_name (vec_dest, epilog_stmt);
843      TREE_OPERAND (epilog_stmt, 0) = new_temp;
844      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
845
846      extract_scalar_result = true;
847    }
848  else
849    {
850      enum tree_code shift_code = 0;
851      bool have_whole_vector_shift = true;
852      enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
853      int bit_offset;
854      int element_bitsize = tree_low_cst (bitsize, 1);
855      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
856      tree vec_temp;
857
858      /* The result of the reduction is expected to be at the least
859	 significant bits of the vector.  This is merely convention,
860	 as it's the extraction later that really matters, and that
861	 is also under our control.  */
862      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
863	shift_code = VEC_RSHIFT_EXPR;
864      else
865	have_whole_vector_shift = false;
866
867      /* Regardless of whether we have a whole vector shift, if we're
868	 emulating the operation via tree-vect-generic, we don't want
869	 to use it.  Only the first round of the reduction is likely
870	 to still be profitable via emulation.  */
871      /* ??? It might be better to emit a reduction tree code here, so that
872	 tree-vect-generic can expand the first round via bit tricks.  */
873      if (!VECTOR_MODE_P (mode))
874	have_whole_vector_shift = false;
875      else
876	{
877	  optab optab = optab_for_tree_code (code, vectype);
878	  if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
879	    have_whole_vector_shift = false;
880	}
881
882      if (have_whole_vector_shift)
883        {
884	  /*** Case 2:
885	     for (offset = VS/2; offset >= element_size; offset/=2)
886	        {
887	          Create:  va' = vec_shift <va, offset>
888	          Create:  va = vop <va, va'>
889	        }  */
890
891	  if (vect_print_dump_info (REPORT_DETAILS))
892	    fprintf (vect_dump, "Reduce using vector shifts");
893
894	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
895	  new_temp = PHI_RESULT (new_phi);
896
897	  for (bit_offset = vec_size_in_bits/2;
898	       bit_offset >= element_bitsize;
899	       bit_offset /= 2)
900	    {
901	      tree bitpos = size_int (bit_offset);
902
903	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
904	      build2 (shift_code, vectype, new_temp, bitpos));
905	      new_name = make_ssa_name (vec_dest, epilog_stmt);
906	      TREE_OPERAND (epilog_stmt, 0) = new_name;
907	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
908	      if (vect_print_dump_info (REPORT_DETAILS))
909		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
910
911
912	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
913	      build2 (code, vectype, new_name, new_temp));
914	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
915	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
916	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
917	      if (vect_print_dump_info (REPORT_DETAILS))
918		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
919	    }
920
921	  extract_scalar_result = true;
922	}
923      else
924        {
925	  tree rhs;
926
927	  /*** Case 3:
928	     Create:
929	     s = extract_field <v_out2, 0>
930	     for (offset=element_size; offset<vector_size; offset+=element_size;)
931	       {
932	         Create:  s' = extract_field <v_out2, offset>
933	         Create:  s = op <s, s'>
934	       }  */
935
936	  if (vect_print_dump_info (REPORT_DETAILS))
937	    fprintf (vect_dump, "Reduce using scalar code. ");
938
939	  vec_temp = PHI_RESULT (new_phi);
940	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
941
942	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
943			 bitsize_zero_node);
944
945	  BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
946	  epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
947			        rhs);
948	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
949	  TREE_OPERAND (epilog_stmt, 0) = new_temp;
950	  bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
951	  if (vect_print_dump_info (REPORT_DETAILS))
952	    print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
953
954	  for (bit_offset = element_bitsize;
955	       bit_offset < vec_size_in_bits;
956	       bit_offset += element_bitsize)
957	    {
958	      tree bitpos = bitsize_int (bit_offset);
959	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
960				 bitpos);
961
962	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
963	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
964				    rhs);
965	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
966	      TREE_OPERAND (epilog_stmt, 0) = new_name;
967	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
968	      if (vect_print_dump_info (REPORT_DETAILS))
969		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
970
971
972	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
973				build2 (code, scalar_type, new_name, new_temp));
974	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
975	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
976	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
977	      if (vect_print_dump_info (REPORT_DETAILS))
978		print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
979	    }
980
981	  extract_scalar_result = false;
982	}
983    }
984
985
986  /* 2.3  Extract the final scalar result.  Create:
987         s_out3 = extract_field <v_out2, bitpos>  */
988
989  if (extract_scalar_result)
990    {
991      tree rhs;
992
993      if (vect_print_dump_info (REPORT_DETAILS))
994	fprintf (vect_dump, "extract scalar result");
995
996      /* The result is in the low order bits.  */
997      if (BYTES_BIG_ENDIAN)
998	bitpos = size_binop (MULT_EXPR,
999		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1000		       TYPE_SIZE (scalar_type));
1001      else
1002	bitpos = bitsize_zero_node;
1003
1004      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1005      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1006      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1007      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1008      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1009      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1010      if (vect_print_dump_info (REPORT_DETAILS))
1011	print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1012    }
1013
1014
1015  /* 2.4 Adjust the final result by the initial value of the reduction
1016	 variable. (when such adjustment is not needed, then
1017	 'scalar_initial_def' is zero).
1018
1019	 Create:
1020	 s_out = scalar_expr <s_out, scalar_initial_def>  */
1021
1022  if (scalar_initial_def)
1023    {
1024      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1025                      build2 (code, scalar_type, new_temp, scalar_initial_def));
1026      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1027      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1028      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1029
1030      if (vect_print_dump_info (REPORT_DETAILS))
1031        print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1032    }
1033
1034
1035  /* 2.5 Replace uses of s_out0 with uses of s_out3  */
1036
1037  /* Find the loop-closed-use at the loop exit of the original
1038     scalar result.  (The reduction result is expected to have
1039     two immediate uses - one at the latch block, and one at the
1040     loop exit).  */
1041  exit_phi = NULL;
1042  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1043    {
1044      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1045	{
1046	  exit_phi = USE_STMT (use_p);
1047	  break;
1048	}
1049    }
1050
1051  orig_name = PHI_RESULT (exit_phi);
1052
1053  FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1054    SET_USE (use_p, new_temp);
1055}
1056
1057
1058/* Function vectorizable_reduction.
1059
1060   Check if STMT performs a reduction operation that can be vectorized.
1061   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1062   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1063   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1064
1065bool
1066vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1067{
1068  tree vec_dest;
1069  tree scalar_dest;
1070  tree op0, op1;
1071  tree loop_vec_def;
1072  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1073  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1074  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1075  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1076  tree operation;
1077  enum tree_code code, reduc_code = 0;
1078  enum machine_mode vec_mode;
1079  int op_type;
1080  optab optab, reduc_optab;
1081  tree new_temp;
1082  tree def0, def1, def_stmt0, def_stmt1;
1083  enum vect_def_type dt0, dt1;
1084  tree new_phi;
1085  tree scalar_type;
1086  bool is_simple_use0;
1087  bool is_simple_use1;
1088
1089  /* Is vectorizable reduction?  */
1090
1091  /* Not supportable if the reduction variable is used in the loop.  */
1092  if (STMT_VINFO_RELEVANT_P (stmt_info))
1093    return false;
1094
1095  if (!STMT_VINFO_LIVE_P (stmt_info))
1096    return false;
1097
1098  /* Make sure it was already recognized as a reduction pattern.  */
1099  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1100    return false;
1101
1102  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1103
1104  operation = TREE_OPERAND (stmt, 1);
1105  code = TREE_CODE (operation);
1106  op_type = TREE_CODE_LENGTH (code);
1107
1108  if (op_type != binary_op)
1109    return false;
1110
1111  op0 = TREE_OPERAND (operation, 0);
1112  op1 = TREE_OPERAND (operation, 1);
1113  scalar_dest = TREE_OPERAND (stmt, 0);
1114  scalar_type = TREE_TYPE (scalar_dest);
1115
1116  /* Check the first operand. It is expected to be defined inside the loop.  */
1117  is_simple_use0 =
1118        vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1119  is_simple_use1 =
1120        vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1121
1122  gcc_assert (is_simple_use0);
1123  gcc_assert (is_simple_use1);
1124  gcc_assert (dt0 == vect_loop_def);
1125  gcc_assert (dt1 == vect_reduction_def);
1126  gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1127  gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1128
1129  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1130   return false;
1131
1132  /* Supportable by target?  */
1133
1134  /* check support for the operation in the loop  */
1135  optab = optab_for_tree_code (code, vectype);
1136  if (!optab)
1137    {
1138      if (vect_print_dump_info (REPORT_DETAILS))
1139        fprintf (vect_dump, "no optab.");
1140      return false;
1141    }
1142  vec_mode = TYPE_MODE (vectype);
1143  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1144    {
1145      if (vect_print_dump_info (REPORT_DETAILS))
1146        fprintf (vect_dump, "op not supported by target.");
1147      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1148          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1149	     < vect_min_worthwhile_factor (code))
1150        return false;
1151      if (vect_print_dump_info (REPORT_DETAILS))
1152	fprintf (vect_dump, "proceeding using word mode.");
1153    }
1154
1155  /* Worthwhile without SIMD support?  */
1156  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1157      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1158	 < vect_min_worthwhile_factor (code))
1159    {
1160      if (vect_print_dump_info (REPORT_DETAILS))
1161	fprintf (vect_dump, "not worthwhile without SIMD support.");
1162      return false;
1163    }
1164
1165  /* check support for the epilog operation  */
1166  if (!reduction_code_for_scalar_code (code, &reduc_code))
1167    return false;
1168  reduc_optab = optab_for_tree_code (reduc_code, vectype);
1169  if (!reduc_optab)
1170    {
1171      if (vect_print_dump_info (REPORT_DETAILS))
1172        fprintf (vect_dump, "no optab for reduction.");
1173      reduc_code = NUM_TREE_CODES;
1174    }
1175  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1176    {
1177      if (vect_print_dump_info (REPORT_DETAILS))
1178        fprintf (vect_dump, "reduc op not supported by target.");
1179      reduc_code = NUM_TREE_CODES;
1180    }
1181
1182  if (!vec_stmt) /* transformation not required.  */
1183    {
1184      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1185      return true;
1186    }
1187
1188  /** Transform.  **/
1189
1190  if (vect_print_dump_info (REPORT_DETAILS))
1191    fprintf (vect_dump, "transform reduction.");
1192
1193  /* Create the destination vector  */
1194  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1195
1196
1197  /* Create the reduction-phi that defines the reduction-operand.  */
1198  new_phi = create_phi_node (vec_dest, loop->header);
1199
1200
1201  /* Prepare the operand that is defined inside the loop body  */
1202  loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1203
1204  /* Create the vectorized operation that computes the partial results  */
1205  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1206                build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1207  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1208  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1209  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1210
1211
1212  /* Finalize the reduction-phi (set it's arguments) and create the
1213     epilog reduction code.  */
1214  vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1215  return true;
1216}
1217
1218
1219/* Function vectorizable_assignment.
1220
1221   Check if STMT performs an assignment (copy) that can be vectorized.
1222   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1223   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1224   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1225
1226bool
1227vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1228{
1229  tree vec_dest;
1230  tree scalar_dest;
1231  tree op;
1232  tree vec_oprnd;
1233  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1234  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1235  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1236  tree new_temp;
1237  tree def, def_stmt;
1238  enum vect_def_type dt;
1239
1240  /* Is vectorizable assignment?  */
1241  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1242    return false;
1243
1244  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1245
1246  if (TREE_CODE (stmt) != MODIFY_EXPR)
1247    return false;
1248
1249  scalar_dest = TREE_OPERAND (stmt, 0);
1250  if (TREE_CODE (scalar_dest) != SSA_NAME)
1251    return false;
1252
1253  op = TREE_OPERAND (stmt, 1);
1254  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1255    {
1256      if (vect_print_dump_info (REPORT_DETAILS))
1257        fprintf (vect_dump, "use not simple.");
1258      return false;
1259    }
1260
1261  if (!vec_stmt) /* transformation not required.  */
1262    {
1263      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1264      return true;
1265    }
1266
1267  /** Transform.  **/
1268  if (vect_print_dump_info (REPORT_DETAILS))
1269    fprintf (vect_dump, "transform assignment.");
1270
1271  /* Handle def.  */
1272  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1273
1274  /* Handle use.  */
1275  op = TREE_OPERAND (stmt, 1);
1276  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1277
1278  /* Arguments are ready. create the new vector stmt.  */
1279  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1280  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1281  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1282  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1283
1284  return true;
1285}
1286
1287
1288/* Function vect_min_worthwhile_factor.
1289
1290   For a loop where we could vectorize the operation indicated by CODE,
1291   return the minimum vectorization factor that makes it worthwhile
1292   to use generic vectors.  */
1293static int
1294vect_min_worthwhile_factor (enum tree_code code)
1295{
1296  switch (code)
1297    {
1298    case PLUS_EXPR:
1299    case MINUS_EXPR:
1300    case NEGATE_EXPR:
1301      return 4;
1302
1303    case BIT_AND_EXPR:
1304    case BIT_IOR_EXPR:
1305    case BIT_XOR_EXPR:
1306    case BIT_NOT_EXPR:
1307      return 2;
1308
1309    default:
1310      return INT_MAX;
1311    }
1312}
1313
1314
1315/* Function vectorizable_operation.
1316
1317   Check if STMT performs a binary or unary operation that can be vectorized.
1318   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1319   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1320   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1321
1322bool
1323vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1324{
1325  tree vec_dest;
1326  tree scalar_dest;
1327  tree operation;
1328  tree op0, op1 = NULL;
1329  tree vec_oprnd0, vec_oprnd1=NULL;
1330  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1331  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1332  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1333  int i;
1334  enum tree_code code;
1335  enum machine_mode vec_mode;
1336  tree new_temp;
1337  int op_type;
1338  tree op;
1339  optab optab;
1340  int icode;
1341  enum machine_mode optab_op2_mode;
1342  tree def, def_stmt;
1343  enum vect_def_type dt;
1344
1345  /* Is STMT a vectorizable binary/unary operation?   */
1346  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1347    return false;
1348
1349  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1350
1351  if (STMT_VINFO_LIVE_P (stmt_info))
1352    {
1353      /* FORNOW: not yet supported.  */
1354      if (vect_print_dump_info (REPORT_DETAILS))
1355        fprintf (vect_dump, "value used after loop.");
1356      return false;
1357    }
1358
1359  if (TREE_CODE (stmt) != MODIFY_EXPR)
1360    return false;
1361
1362  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1363    return false;
1364
1365  operation = TREE_OPERAND (stmt, 1);
1366  code = TREE_CODE (operation);
1367  optab = optab_for_tree_code (code, vectype);
1368
1369  /* Support only unary or binary operations.  */
1370  op_type = TREE_CODE_LENGTH (code);
1371  if (op_type != unary_op && op_type != binary_op)
1372    {
1373      if (vect_print_dump_info (REPORT_DETAILS))
1374	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1375      return false;
1376    }
1377
1378  for (i = 0; i < op_type; i++)
1379    {
1380      op = TREE_OPERAND (operation, i);
1381      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1382	{
1383	  if (vect_print_dump_info (REPORT_DETAILS))
1384	    fprintf (vect_dump, "use not simple.");
1385	  return false;
1386	}
1387    }
1388
1389  /* Supportable by target?  */
1390  if (!optab)
1391    {
1392      if (vect_print_dump_info (REPORT_DETAILS))
1393	fprintf (vect_dump, "no optab.");
1394      return false;
1395    }
1396  vec_mode = TYPE_MODE (vectype);
1397  icode = (int) optab->handlers[(int) vec_mode].insn_code;
1398  if (icode == CODE_FOR_nothing)
1399    {
1400      if (vect_print_dump_info (REPORT_DETAILS))
1401	fprintf (vect_dump, "op not supported by target.");
1402      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1403          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1404	     < vect_min_worthwhile_factor (code))
1405        return false;
1406      if (vect_print_dump_info (REPORT_DETAILS))
1407	fprintf (vect_dump, "proceeding using word mode.");
1408    }
1409
1410  /* Worthwhile without SIMD support?  */
1411  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1412      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1413	 < vect_min_worthwhile_factor (code))
1414    {
1415      if (vect_print_dump_info (REPORT_DETAILS))
1416	fprintf (vect_dump, "not worthwhile without SIMD support.");
1417      return false;
1418    }
1419
1420  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1421    {
1422      /* FORNOW: not yet supported.  */
1423      if (!VECTOR_MODE_P (vec_mode))
1424	return false;
1425
1426      /* Invariant argument is needed for a vector shift
1427	 by a scalar shift operand.  */
1428      optab_op2_mode = insn_data[icode].operand[2].mode;
1429      if (! (VECTOR_MODE_P (optab_op2_mode)
1430	     || dt == vect_constant_def
1431	     || dt == vect_invariant_def))
1432	{
1433	  if (vect_print_dump_info (REPORT_DETAILS))
1434	    fprintf (vect_dump, "operand mode requires invariant argument.");
1435	  return false;
1436	}
1437    }
1438
1439  if (!vec_stmt) /* transformation not required.  */
1440    {
1441      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1442      return true;
1443    }
1444
1445  /** Transform.  **/
1446
1447  if (vect_print_dump_info (REPORT_DETAILS))
1448    fprintf (vect_dump, "transform binary/unary operation.");
1449
1450  /* Handle def.  */
1451  scalar_dest = TREE_OPERAND (stmt, 0);
1452  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1453
1454  /* Handle uses.  */
1455  op0 = TREE_OPERAND (operation, 0);
1456  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1457
1458  if (op_type == binary_op)
1459    {
1460      op1 = TREE_OPERAND (operation, 1);
1461
1462      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1463	{
1464	  /* Vector shl and shr insn patterns can be defined with
1465	     scalar operand 2 (shift operand).  In this case, use
1466	     constant or loop invariant op1 directly, without
1467	     extending it to vector mode first.  */
1468
1469	  optab_op2_mode = insn_data[icode].operand[2].mode;
1470	  if (!VECTOR_MODE_P (optab_op2_mode))
1471	    {
1472	      if (vect_print_dump_info (REPORT_DETAILS))
1473		fprintf (vect_dump, "operand 1 using scalar mode.");
1474	      vec_oprnd1 = op1;
1475	    }
1476	}
1477
1478      if (!vec_oprnd1)
1479	vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1480    }
1481
1482  /* Arguments are ready. create the new vector stmt.  */
1483
1484  if (op_type == binary_op)
1485    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1486		build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1487  else
1488    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1489		build1 (code, vectype, vec_oprnd0));
1490  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1491  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1492  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1493
1494  return true;
1495}
1496
1497
1498/* Function vectorizable_store.
1499
1500   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1501   can be vectorized.
1502   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1503   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1504   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1505
1506bool
1507vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1508{
1509  tree scalar_dest;
1510  tree data_ref;
1511  tree op;
1512  tree vec_oprnd1;
1513  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1514  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1515  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1516  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1517  enum machine_mode vec_mode;
1518  tree dummy;
1519  enum dr_alignment_support alignment_support_cheme;
1520  ssa_op_iter iter;
1521  tree def, def_stmt;
1522  enum vect_def_type dt;
1523
1524  /* Is vectorizable store? */
1525
1526  if (TREE_CODE (stmt) != MODIFY_EXPR)
1527    return false;
1528
1529  scalar_dest = TREE_OPERAND (stmt, 0);
1530  if (TREE_CODE (scalar_dest) != ARRAY_REF
1531      && TREE_CODE (scalar_dest) != INDIRECT_REF)
1532    return false;
1533
1534  op = TREE_OPERAND (stmt, 1);
1535  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1536    {
1537      if (vect_print_dump_info (REPORT_DETAILS))
1538        fprintf (vect_dump, "use not simple.");
1539      return false;
1540    }
1541
1542  vec_mode = TYPE_MODE (vectype);
1543  /* FORNOW. In some cases can vectorize even if data-type not supported
1544     (e.g. - array initialization with 0).  */
1545  if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1546    return false;
1547
1548  if (!STMT_VINFO_DATA_REF (stmt_info))
1549    return false;
1550
1551
1552  if (!vec_stmt) /* transformation not required.  */
1553    {
1554      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1555      return true;
1556    }
1557
1558  /** Transform.  **/
1559
1560  if (vect_print_dump_info (REPORT_DETAILS))
1561    fprintf (vect_dump, "transform store");
1562
1563  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1564  gcc_assert (alignment_support_cheme);
1565  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1566
1567  /* Handle use - get the vectorized def from the defining stmt.  */
1568  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1569
1570  /* Handle def.  */
1571  /* FORNOW: make sure the data reference is aligned.  */
1572  vect_align_data_ref (stmt);
1573  data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1574  data_ref = build_fold_indirect_ref (data_ref);
1575
1576  /* Arguments are ready. create the new vector stmt.  */
1577  *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1578  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1579
1580  /* Copy the V_MAY_DEFS representing the aliasing of the original array
1581     element's definition to the vector's definition then update the
1582     defining statement.  The original is being deleted so the same
1583     SSA_NAMEs can be used.  */
1584  copy_virtual_operands (*vec_stmt, stmt);
1585
1586  FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1587    {
1588      SSA_NAME_DEF_STMT (def) = *vec_stmt;
1589
1590      /* If this virtual def has a use outside the loop and a loop peel is
1591	 performed then the def may be renamed by the peel.  Mark it for
1592	 renaming so the later use will also be renamed.  */
1593      mark_sym_for_renaming (SSA_NAME_VAR (def));
1594    }
1595
1596  return true;
1597}
1598
1599
1600/* vectorizable_load.
1601
1602   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1603   can be vectorized.
1604   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1605   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1606   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1607
1608bool
1609vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1610{
1611  tree scalar_dest;
1612  tree vec_dest = NULL;
1613  tree data_ref = NULL;
1614  tree op;
1615  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1616  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1617  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1618  tree new_temp;
1619  int mode;
1620  tree init_addr;
1621  tree new_stmt;
1622  tree dummy;
1623  basic_block new_bb;
1624  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1625  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1626  edge pe = loop_preheader_edge (loop);
1627  enum dr_alignment_support alignment_support_cheme;
1628
1629  /* Is vectorizable load? */
1630  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1631    return false;
1632
1633  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1634
1635  if (STMT_VINFO_LIVE_P (stmt_info))
1636    {
1637      /* FORNOW: not yet supported.  */
1638      if (vect_print_dump_info (REPORT_DETAILS))
1639        fprintf (vect_dump, "value used after loop.");
1640      return false;
1641    }
1642
1643  if (TREE_CODE (stmt) != MODIFY_EXPR)
1644    return false;
1645
1646  scalar_dest = TREE_OPERAND (stmt, 0);
1647  if (TREE_CODE (scalar_dest) != SSA_NAME)
1648    return false;
1649
1650  op = TREE_OPERAND (stmt, 1);
1651  if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1652    return false;
1653
1654  if (!STMT_VINFO_DATA_REF (stmt_info))
1655    return false;
1656
1657  mode = (int) TYPE_MODE (vectype);
1658
1659  /* FORNOW. In some cases can vectorize even if data-type not supported
1660    (e.g. - data copies).  */
1661  if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1662    {
1663      if (vect_print_dump_info (REPORT_DETAILS))
1664	fprintf (vect_dump, "Aligned load, but unsupported type.");
1665      return false;
1666    }
1667
1668  if (!vec_stmt) /* transformation not required.  */
1669    {
1670      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1671      return true;
1672    }
1673
1674  /** Transform.  **/
1675
1676  if (vect_print_dump_info (REPORT_DETAILS))
1677    fprintf (vect_dump, "transform load.");
1678
1679  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1680  gcc_assert (alignment_support_cheme);
1681
1682  if (alignment_support_cheme == dr_aligned
1683      || alignment_support_cheme == dr_unaligned_supported)
1684    {
1685      /* Create:
1686         p = initial_addr;
1687         indx = 0;
1688         loop {
1689           vec_dest = *(p);
1690           indx = indx + 1;
1691         }
1692      */
1693
1694      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1695      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1696      if (aligned_access_p (dr))
1697        data_ref = build_fold_indirect_ref (data_ref);
1698      else
1699	{
1700	  int mis = DR_MISALIGNMENT (dr);
1701	  tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1702	  tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1703	  data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1704	}
1705      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1706      new_temp = make_ssa_name (vec_dest, new_stmt);
1707      TREE_OPERAND (new_stmt, 0) = new_temp;
1708      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1709      copy_virtual_operands (new_stmt, stmt);
1710    }
1711  else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1712    {
1713      /* Create:
1714	 p1 = initial_addr;
1715	 msq_init = *(floor(p1))
1716	 p2 = initial_addr + VS - 1;
1717	 magic = have_builtin ? builtin_result : initial_address;
1718	 indx = 0;
1719	 loop {
1720	   p2' = p2 + indx * vectype_size
1721	   lsq = *(floor(p2'))
1722	   vec_dest = realign_load (msq, lsq, magic)
1723	   indx = indx + 1;
1724	   msq = lsq;
1725	 }
1726      */
1727
1728      tree offset;
1729      tree magic;
1730      tree phi_stmt;
1731      tree msq_init;
1732      tree msq, lsq;
1733      tree dataref_ptr;
1734      tree params;
1735
1736      /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1737      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1738      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1739					   &init_addr, true);
1740      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1741      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1742      new_temp = make_ssa_name (vec_dest, new_stmt);
1743      TREE_OPERAND (new_stmt, 0) = new_temp;
1744      new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1745      gcc_assert (!new_bb);
1746      msq_init = TREE_OPERAND (new_stmt, 0);
1747      copy_virtual_operands (new_stmt, stmt);
1748      update_vuses_to_preheader (new_stmt, loop);
1749
1750
1751      /* <2> Create lsq = *(floor(p2')) in the loop  */
1752      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1753      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1754      dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1755      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1756      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1757      new_temp = make_ssa_name (vec_dest, new_stmt);
1758      TREE_OPERAND (new_stmt, 0) = new_temp;
1759      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1760      lsq = TREE_OPERAND (new_stmt, 0);
1761      copy_virtual_operands (new_stmt, stmt);
1762
1763
1764      /* <3> */
1765      if (targetm.vectorize.builtin_mask_for_load)
1766	{
1767	  /* Create permutation mask, if required, in loop preheader.  */
1768	  tree builtin_decl;
1769	  params = build_tree_list (NULL_TREE, init_addr);
1770	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1771	  builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1772	  new_stmt = build_function_call_expr (builtin_decl, params);
1773	  new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1774	  new_temp = make_ssa_name (vec_dest, new_stmt);
1775	  TREE_OPERAND (new_stmt, 0) = new_temp;
1776	  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1777	  gcc_assert (!new_bb);
1778	  magic = TREE_OPERAND (new_stmt, 0);
1779
1780	  /* The result of the CALL_EXPR to this builtin is determined from
1781	     the value of the parameter and no global variables are touched
1782	     which makes the builtin a "const" function.  Requiring the
1783	     builtin to have the "const" attribute makes it unnecessary
1784	     to call mark_call_clobbered.  */
1785	  gcc_assert (TREE_READONLY (builtin_decl));
1786	}
1787      else
1788	{
1789	  /* Use current address instead of init_addr for reduced reg pressure.
1790	   */
1791	  magic = dataref_ptr;
1792	}
1793
1794
1795      /* <4> Create msq = phi <msq_init, lsq> in loop  */
1796      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1797      msq = make_ssa_name (vec_dest, NULL_TREE);
1798      phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1799      SSA_NAME_DEF_STMT (msq) = phi_stmt;
1800      add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1801      add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1802
1803
1804      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1805      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1806      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1807      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1808      new_temp = make_ssa_name (vec_dest, new_stmt);
1809      TREE_OPERAND (new_stmt, 0) = new_temp;
1810      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1811    }
1812  else
1813    gcc_unreachable ();
1814
1815  *vec_stmt = new_stmt;
1816  return true;
1817}
1818
1819
1820/* Function vectorizable_live_operation.
1821
1822   STMT computes a value that is used outside the loop. Check if
1823   it can be supported.  */
1824
1825bool
1826vectorizable_live_operation (tree stmt,
1827                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1828                             tree *vec_stmt ATTRIBUTE_UNUSED)
1829{
1830  tree operation;
1831  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1832  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1833  int i;
1834  enum tree_code code;
1835  int op_type;
1836  tree op;
1837  tree def, def_stmt;
1838  enum vect_def_type dt;
1839
1840  if (!STMT_VINFO_LIVE_P (stmt_info))
1841    return false;
1842
1843  if (TREE_CODE (stmt) != MODIFY_EXPR)
1844    return false;
1845
1846  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1847    return false;
1848
1849  operation = TREE_OPERAND (stmt, 1);
1850  code = TREE_CODE (operation);
1851
1852  op_type = TREE_CODE_LENGTH (code);
1853
1854  /* FORNOW: support only if all uses are invariant. This means
1855     that the scalar operations can remain in place, unvectorized.
1856     The original last scalar value that they compute will be used.  */
1857
1858  for (i = 0; i < op_type; i++)
1859    {
1860      op = TREE_OPERAND (operation, i);
1861      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1862        {
1863          if (vect_print_dump_info (REPORT_DETAILS))
1864            fprintf (vect_dump, "use not simple.");
1865          return false;
1866        }
1867
1868      if (dt != vect_invariant_def && dt != vect_constant_def)
1869        return false;
1870    }
1871
1872  /* No transformation is required for the cases we currently support.  */
1873  return true;
1874}
1875
1876
1877/* Function vect_is_simple_cond.
1878
1879   Input:
1880   LOOP - the loop that is being vectorized.
1881   COND - Condition that is checked for simple use.
1882
1883   Returns whether a COND can be vectorized.  Checks whether
1884   condition operands are supportable using vec_is_simple_use.  */
1885
1886static bool
1887vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1888{
1889  tree lhs, rhs;
1890  tree def;
1891  enum vect_def_type dt;
1892
1893  if (!COMPARISON_CLASS_P (cond))
1894    return false;
1895
1896  lhs = TREE_OPERAND (cond, 0);
1897  rhs = TREE_OPERAND (cond, 1);
1898
1899  if (TREE_CODE (lhs) == SSA_NAME)
1900    {
1901      tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1902      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1903	return false;
1904    }
1905  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1906    return false;
1907
1908  if (TREE_CODE (rhs) == SSA_NAME)
1909    {
1910      tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1911      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1912	return false;
1913    }
1914  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
1915    return false;
1916
1917  return true;
1918}
1919
1920/* vectorizable_condition.
1921
1922   Check if STMT is conditional modify expression that can be vectorized.
1923   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1924   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
1925   at BSI.
1926
1927   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1928
1929bool
1930vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1931{
1932  tree scalar_dest = NULL_TREE;
1933  tree vec_dest = NULL_TREE;
1934  tree op = NULL_TREE;
1935  tree cond_expr, then_clause, else_clause;
1936  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1937  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1938  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1939  tree vec_compare, vec_cond_expr;
1940  tree new_temp;
1941  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1942  enum machine_mode vec_mode;
1943  tree def;
1944  enum vect_def_type dt;
1945
1946  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1947    return false;
1948
1949  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1950
1951  if (STMT_VINFO_LIVE_P (stmt_info))
1952    {
1953      /* FORNOW: not yet supported.  */
1954      if (vect_print_dump_info (REPORT_DETAILS))
1955        fprintf (vect_dump, "value used after loop.");
1956      return false;
1957    }
1958
1959  if (TREE_CODE (stmt) != MODIFY_EXPR)
1960    return false;
1961
1962  op = TREE_OPERAND (stmt, 1);
1963
1964  if (TREE_CODE (op) != COND_EXPR)
1965    return false;
1966
1967  cond_expr = TREE_OPERAND (op, 0);
1968  then_clause = TREE_OPERAND (op, 1);
1969  else_clause = TREE_OPERAND (op, 2);
1970
1971  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1972    return false;
1973
1974  /* We do not handle two different vector types for the condition
1975     and the values.  */
1976  if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
1977    return false;
1978
1979  if (TREE_CODE (then_clause) == SSA_NAME)
1980    {
1981      tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1982      if (!vect_is_simple_use (then_clause, loop_vinfo,
1983			       &then_def_stmt, &def, &dt))
1984	return false;
1985    }
1986  else if (TREE_CODE (then_clause) != INTEGER_CST
1987	   && TREE_CODE (then_clause) != REAL_CST)
1988    return false;
1989
1990  if (TREE_CODE (else_clause) == SSA_NAME)
1991    {
1992      tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1993      if (!vect_is_simple_use (else_clause, loop_vinfo,
1994			       &else_def_stmt, &def, &dt))
1995	return false;
1996    }
1997  else if (TREE_CODE (else_clause) != INTEGER_CST
1998	   && TREE_CODE (else_clause) != REAL_CST)
1999    return false;
2000
2001
2002  vec_mode = TYPE_MODE (vectype);
2003
2004  if (!vec_stmt)
2005    {
2006      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2007      return expand_vec_cond_expr_p (op, vec_mode);
2008    }
2009
2010  /* Transform */
2011
2012  /* Handle def.  */
2013  scalar_dest = TREE_OPERAND (stmt, 0);
2014  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2015
2016  /* Handle cond expr.  */
2017  vec_cond_lhs =
2018    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2019  vec_cond_rhs =
2020    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2021  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2022  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2023
2024  /* Arguments are ready. create the new vector stmt.  */
2025  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2026			vec_cond_lhs, vec_cond_rhs);
2027  vec_cond_expr = build (VEC_COND_EXPR, vectype,
2028			 vec_compare, vec_then_clause, vec_else_clause);
2029
2030  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2031  new_temp = make_ssa_name (vec_dest, *vec_stmt);
2032  TREE_OPERAND (*vec_stmt, 0) = new_temp;
2033  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2034
2035  return true;
2036}
2037
2038/* Function vect_transform_stmt.
2039
2040   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2041
2042bool
2043vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2044{
2045  bool is_store = false;
2046  tree vec_stmt = NULL_TREE;
2047  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048  bool done;
2049
2050  if (STMT_VINFO_RELEVANT_P (stmt_info))
2051    {
2052      switch (STMT_VINFO_TYPE (stmt_info))
2053      {
2054      case op_vec_info_type:
2055	done = vectorizable_operation (stmt, bsi, &vec_stmt);
2056	gcc_assert (done);
2057	break;
2058
2059      case assignment_vec_info_type:
2060	done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2061	gcc_assert (done);
2062	break;
2063
2064      case load_vec_info_type:
2065	done = vectorizable_load (stmt, bsi, &vec_stmt);
2066	gcc_assert (done);
2067	break;
2068
2069      case store_vec_info_type:
2070	done = vectorizable_store (stmt, bsi, &vec_stmt);
2071	gcc_assert (done);
2072	is_store = true;
2073	break;
2074
2075      case condition_vec_info_type:
2076	done = vectorizable_condition (stmt, bsi, &vec_stmt);
2077	gcc_assert (done);
2078	break;
2079
2080      default:
2081	if (vect_print_dump_info (REPORT_DETAILS))
2082	  fprintf (vect_dump, "stmt not supported.");
2083	gcc_unreachable ();
2084      }
2085
2086      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2087    }
2088
2089  if (STMT_VINFO_LIVE_P (stmt_info))
2090    {
2091      switch (STMT_VINFO_TYPE (stmt_info))
2092      {
2093      case reduc_vec_info_type:
2094        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2095        gcc_assert (done);
2096        break;
2097
2098      default:
2099        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2100        gcc_assert (done);
2101      }
2102
2103      if (vec_stmt)
2104        {
2105          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2106          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2107        }
2108    }
2109
2110  return is_store;
2111}
2112
2113
2114/* This function builds ni_name = number of iterations loop executes
2115   on the loop preheader.  */
2116
2117static tree
2118vect_build_loop_niters (loop_vec_info loop_vinfo)
2119{
2120  tree ni_name, stmt, var;
2121  edge pe;
2122  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2123  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2124
2125  var = create_tmp_var (TREE_TYPE (ni), "niters");
2126  add_referenced_tmp_var (var);
2127  ni_name = force_gimple_operand (ni, &stmt, false, var);
2128
2129  pe = loop_preheader_edge (loop);
2130  if (stmt)
2131    {
2132      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2133      gcc_assert (!new_bb);
2134    }
2135
2136  return ni_name;
2137}
2138
2139
2140/* This function generates the following statements:
2141
2142 ni_name = number of iterations loop executes
2143 ratio = ni_name / vf
2144 ratio_mult_vf_name = ratio * vf
2145
2146 and places them at the loop preheader edge.  */
2147
2148static void
2149vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2150				 tree *ni_name_ptr,
2151				 tree *ratio_mult_vf_name_ptr,
2152				 tree *ratio_name_ptr)
2153{
2154
2155  edge pe;
2156  basic_block new_bb;
2157  tree stmt, ni_name;
2158  tree var;
2159  tree ratio_name;
2160  tree ratio_mult_vf_name;
2161  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2162  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2163  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2164  tree log_vf;
2165
2166  pe = loop_preheader_edge (loop);
2167
2168  /* Generate temporary variable that contains
2169     number of iterations loop executes.  */
2170
2171  ni_name = vect_build_loop_niters (loop_vinfo);
2172  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2173
2174  /* Create: ratio = ni >> log2(vf) */
2175
2176  var = create_tmp_var (TREE_TYPE (ni), "bnd");
2177  add_referenced_tmp_var (var);
2178  ratio_name = make_ssa_name (var, NULL_TREE);
2179  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2180	   build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2181  SSA_NAME_DEF_STMT (ratio_name) = stmt;
2182
2183  pe = loop_preheader_edge (loop);
2184  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2185  gcc_assert (!new_bb);
2186
2187  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2188
2189  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2190  add_referenced_tmp_var (var);
2191  ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2192  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2193	   build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2194  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2195
2196  pe = loop_preheader_edge (loop);
2197  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2198  gcc_assert (!new_bb);
2199
2200  *ni_name_ptr = ni_name;
2201  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2202  *ratio_name_ptr = ratio_name;
2203
2204  return;
2205}
2206
2207
2208/* Function update_vuses_to_preheader.
2209
2210   Input:
2211   STMT - a statement with potential VUSEs.
2212   LOOP - the loop whose preheader will contain STMT.
2213
2214   It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2215   appears to be defined in a V_MAY_DEF in another statement in a loop.
2216   One such case is when the VUSE is at the dereference of a __restricted__
2217   pointer in a load and the V_MAY_DEF is at the dereference of a different
2218   __restricted__ pointer in a store.  Vectorization may result in
2219   copy_virtual_uses being called to copy the problematic VUSE to a new
2220   statement that is being inserted in the loop preheader.  This procedure
2221   is called to change the SSA_NAME in the new statement's VUSE from the
2222   SSA_NAME updated in the loop to the related SSA_NAME available on the
2223   path entering the loop.
2224
2225   When this function is called, we have the following situation:
2226
2227        # vuse <name1>
2228        S1: vload
2229    do {
2230        # name1 = phi < name0 , name2>
2231
2232        # vuse <name1>
2233        S2: vload
2234
2235        # name2 = vdef <name1>
2236        S3: vstore
2237
2238    }while...
2239
2240   Stmt S1 was created in the loop preheader block as part of misaligned-load
2241   handling. This function fixes the name of the vuse of S1 from 'name1' to
2242   'name0'.  */
2243
2244static void
2245update_vuses_to_preheader (tree stmt, struct loop *loop)
2246{
2247  basic_block header_bb = loop->header;
2248  edge preheader_e = loop_preheader_edge (loop);
2249  ssa_op_iter iter;
2250  use_operand_p use_p;
2251
2252  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2253    {
2254      tree ssa_name = USE_FROM_PTR (use_p);
2255      tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2256      tree name_var = SSA_NAME_VAR (ssa_name);
2257      basic_block bb = bb_for_stmt (def_stmt);
2258
2259      /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2260      if (!IS_EMPTY_STMT (def_stmt)
2261	  && flow_bb_inside_loop_p (loop, bb))
2262        {
2263          /* If the block containing the statement defining the SSA_NAME
2264             is in the loop then it's necessary to find the definition
2265             outside the loop using the PHI nodes of the header.  */
2266	  tree phi;
2267	  bool updated = false;
2268
2269	  for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2270	    {
2271	      if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2272		{
2273		  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2274		  updated = true;
2275		  break;
2276		}
2277	    }
2278	  gcc_assert (updated);
2279	}
2280    }
2281}
2282
2283
2284/*   Function vect_update_ivs_after_vectorizer.
2285
2286     "Advance" the induction variables of LOOP to the value they should take
2287     after the execution of LOOP.  This is currently necessary because the
2288     vectorizer does not handle induction variables that are used after the
2289     loop.  Such a situation occurs when the last iterations of LOOP are
2290     peeled, because:
2291     1. We introduced new uses after LOOP for IVs that were not originally used
2292        after LOOP: the IVs of LOOP are now used by an epilog loop.
2293     2. LOOP is going to be vectorized; this means that it will iterate N/VF
2294        times, whereas the loop IVs should be bumped N times.
2295
2296     Input:
2297     - LOOP - a loop that is going to be vectorized. The last few iterations
2298              of LOOP were peeled.
2299     - NITERS - the number of iterations that LOOP executes (before it is
2300                vectorized). i.e, the number of times the ivs should be bumped.
2301     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2302                  coming out from LOOP on which there are uses of the LOOP ivs
2303		  (this is the path from LOOP->exit to epilog_loop->preheader).
2304
2305                  The new definitions of the ivs are placed in LOOP->exit.
2306                  The phi args associated with the edge UPDATE_E in the bb
2307                  UPDATE_E->dest are updated accordingly.
2308
2309     Assumption 1: Like the rest of the vectorizer, this function assumes
2310     a single loop exit that has a single predecessor.
2311
2312     Assumption 2: The phi nodes in the LOOP header and in update_bb are
2313     organized in the same order.
2314
2315     Assumption 3: The access function of the ivs is simple enough (see
2316     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2317
2318     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2319     coming out of LOOP on which the ivs of LOOP are used (this is the path
2320     that leads to the epilog loop; other paths skip the epilog loop).  This
2321     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2322     needs to have its phis updated.
2323 */
2324
2325static void
2326vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2327				  edge update_e)
2328{
2329  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330  basic_block exit_bb = loop->single_exit->dest;
2331  tree phi, phi1;
2332  basic_block update_bb = update_e->dest;
2333
2334  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2335
2336  /* Make sure there exists a single-predecessor exit bb:  */
2337  gcc_assert (single_pred_p (exit_bb));
2338
2339  for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2340       phi && phi1;
2341       phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2342    {
2343      tree access_fn = NULL;
2344      tree evolution_part;
2345      tree init_expr;
2346      tree step_expr;
2347      tree var, stmt, ni, ni_name;
2348      block_stmt_iterator last_bsi;
2349
2350      if (vect_print_dump_info (REPORT_DETAILS))
2351        {
2352          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2353          print_generic_expr (vect_dump, phi, TDF_SLIM);
2354        }
2355
2356      /* Skip virtual phi's.  */
2357      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2358	{
2359	  if (vect_print_dump_info (REPORT_DETAILS))
2360	    fprintf (vect_dump, "virtual phi. skip.");
2361	  continue;
2362	}
2363
2364      /* Skip reduction phis.  */
2365      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2366        {
2367          if (vect_print_dump_info (REPORT_DETAILS))
2368            fprintf (vect_dump, "reduc phi. skip.");
2369          continue;
2370        }
2371
2372      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2373      gcc_assert (access_fn);
2374      evolution_part =
2375	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2376      gcc_assert (evolution_part != NULL_TREE);
2377
2378      /* FORNOW: We do not support IVs whose evolution function is a polynomial
2379         of degree >= 2 or exponential.  */
2380      gcc_assert (!tree_is_chrec (evolution_part));
2381
2382      step_expr = evolution_part;
2383      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2384							       loop->num));
2385
2386      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2387		  build2 (MULT_EXPR, TREE_TYPE (niters),
2388		       niters, step_expr), init_expr);
2389
2390      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2391      add_referenced_tmp_var (var);
2392
2393      ni_name = force_gimple_operand (ni, &stmt, false, var);
2394
2395      /* Insert stmt into exit_bb.  */
2396      last_bsi = bsi_last (exit_bb);
2397      if (stmt)
2398        bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2399
2400      /* Fix phi expressions in the successor bb.  */
2401      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2402    }
2403}
2404
2405
2406/* Function vect_do_peeling_for_loop_bound
2407
2408   Peel the last iterations of the loop represented by LOOP_VINFO.
2409   The peeled iterations form a new epilog loop.  Given that the loop now
2410   iterates NITERS times, the new epilog loop iterates
2411   NITERS % VECTORIZATION_FACTOR times.
2412
2413   The original loop will later be made to iterate
2414   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2415
2416static void
2417vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2418				struct loops *loops)
2419{
2420  tree ni_name, ratio_mult_vf_name;
2421  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2422  struct loop *new_loop;
2423  edge update_e;
2424  basic_block preheader;
2425  int loop_num;
2426
2427  if (vect_print_dump_info (REPORT_DETAILS))
2428    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2429
2430  initialize_original_copy_tables ();
2431
2432  /* Generate the following variables on the preheader of original loop:
2433
2434     ni_name = number of iteration the original loop executes
2435     ratio = ni_name / vf
2436     ratio_mult_vf_name = ratio * vf  */
2437  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2438				   &ratio_mult_vf_name, ratio);
2439
2440  loop_num  = loop->num;
2441  new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2442					    ratio_mult_vf_name, ni_name, false);
2443  gcc_assert (new_loop);
2444  gcc_assert (loop_num == loop->num);
2445#ifdef ENABLE_CHECKING
2446  slpeel_verify_cfg_after_peeling (loop, new_loop);
2447#endif
2448
2449  /* A guard that controls whether the new_loop is to be executed or skipped
2450     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2451     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2452     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2453     is on the path where the LOOP IVs are used and need to be updated.  */
2454
2455  preheader = loop_preheader_edge (new_loop)->src;
2456  if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2457    update_e = EDGE_PRED (preheader, 0);
2458  else
2459    update_e = EDGE_PRED (preheader, 1);
2460
2461  /* Update IVs of original loop as if they were advanced
2462     by ratio_mult_vf_name steps.  */
2463  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2464
2465  /* After peeling we have to reset scalar evolution analyzer.  */
2466  scev_reset ();
2467
2468  free_original_copy_tables ();
2469}
2470
2471
2472/* Function vect_gen_niters_for_prolog_loop
2473
2474   Set the number of iterations for the loop represented by LOOP_VINFO
2475   to the minimum between LOOP_NITERS (the original iteration count of the loop)
2476   and the misalignment of DR - the data reference recorded in
2477   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2478   this loop, the data reference DR will refer to an aligned location.
2479
2480   The following computation is generated:
2481
2482   If the misalignment of DR is known at compile time:
2483     addr_mis = int mis = DR_MISALIGNMENT (dr);
2484   Else, compute address misalignment in bytes:
2485     addr_mis = addr & (vectype_size - 1)
2486
2487   prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2488
2489   (elem_size = element type size; an element is the scalar element
2490	whose type is the inner type of the vectype)  */
2491
2492static tree
2493vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2494{
2495  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2496  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2497  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498  tree var, stmt;
2499  tree iters, iters_name;
2500  edge pe;
2501  basic_block new_bb;
2502  tree dr_stmt = DR_STMT (dr);
2503  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2504  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2505  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2506  tree niters_type = TREE_TYPE (loop_niters);
2507
2508  pe = loop_preheader_edge (loop);
2509
2510  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2511    {
2512      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2513      int element_size = vectype_align/vf;
2514      int elem_misalign = byte_misalign / element_size;
2515
2516      if (vect_print_dump_info (REPORT_DETAILS))
2517        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2518      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2519    }
2520  else
2521    {
2522      tree new_stmts = NULL_TREE;
2523      tree start_addr =
2524        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2525      tree ptr_type = TREE_TYPE (start_addr);
2526      tree size = TYPE_SIZE (ptr_type);
2527      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2528      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2529      tree elem_size_log =
2530        build_int_cst (type, exact_log2 (vectype_align/vf));
2531      tree vf_minus_1 = build_int_cst (type, vf - 1);
2532      tree vf_tree = build_int_cst (type, vf);
2533      tree byte_misalign;
2534      tree elem_misalign;
2535
2536      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2537      gcc_assert (!new_bb);
2538
2539      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2540      byte_misalign =
2541        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2542
2543      /* Create:  elem_misalign = byte_misalign / element_size  */
2544      elem_misalign =
2545        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2546
2547      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2548      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2549      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2550      iters = fold_convert (niters_type, iters);
2551    }
2552
2553  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2554  /* If the loop bound is known at compile time we already verified that it is
2555     greater than vf; since the misalignment ('iters') is at most vf, there's
2556     no need to generate the MIN_EXPR in this case.  */
2557  if (TREE_CODE (loop_niters) != INTEGER_CST)
2558    iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2559
2560  if (vect_print_dump_info (REPORT_DETAILS))
2561    {
2562      fprintf (vect_dump, "niters for prolog loop: ");
2563      print_generic_expr (vect_dump, iters, TDF_SLIM);
2564    }
2565
2566  var = create_tmp_var (niters_type, "prolog_loop_niters");
2567  add_referenced_tmp_var (var);
2568  iters_name = force_gimple_operand (iters, &stmt, false, var);
2569
2570  /* Insert stmt on loop preheader edge.  */
2571  if (stmt)
2572    {
2573      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2574      gcc_assert (!new_bb);
2575    }
2576
2577  return iters_name;
2578}
2579
2580
2581/* Function vect_update_init_of_dr
2582
2583   NITERS iterations were peeled from LOOP.  DR represents a data reference
2584   in LOOP.  This function updates the information recorded in DR to
2585   account for the fact that the first NITERS iterations had already been
2586   executed.  Specifically, it updates the OFFSET field of DR.  */
2587
2588static void
2589vect_update_init_of_dr (struct data_reference *dr, tree niters)
2590{
2591  tree offset = DR_OFFSET (dr);
2592
2593  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2594  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2595  DR_OFFSET (dr) = offset;
2596}
2597
2598
2599/* Function vect_update_inits_of_drs
2600
2601   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2602   This function updates the information recorded for the data references in
2603   the loop to account for the fact that the first NITERS iterations had
2604   already been executed.  Specifically, it updates the initial_condition of the
2605   access_function of all the data_references in the loop.  */
2606
2607static void
2608vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2609{
2610  unsigned int i;
2611  varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2612
2613  if (vect_dump && (dump_flags & TDF_DETAILS))
2614    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2615
2616  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2617    {
2618      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2619      vect_update_init_of_dr (dr, niters);
2620    }
2621}
2622
2623
2624/* Function vect_do_peeling_for_alignment
2625
2626   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2627   'niters' is set to the misalignment of one of the data references in the
2628   loop, thereby forcing it to refer to an aligned location at the beginning
2629   of the execution of this loop.  The data reference for which we are
2630   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2631
2632static void
2633vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2634{
2635  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2636  tree niters_of_prolog_loop, ni_name;
2637  tree n_iters;
2638  struct loop *new_loop;
2639
2640  if (vect_print_dump_info (REPORT_DETAILS))
2641    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2642
2643  initialize_original_copy_tables ();
2644
2645  ni_name = vect_build_loop_niters (loop_vinfo);
2646  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2647
2648  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2649  new_loop =
2650	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2651				       niters_of_prolog_loop, ni_name, true);
2652  gcc_assert (new_loop);
2653#ifdef ENABLE_CHECKING
2654  slpeel_verify_cfg_after_peeling (new_loop, loop);
2655#endif
2656
2657  /* Update number of times loop executes.  */
2658  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2659  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2660		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2661
2662  /* Update the init conditions of the access functions of all data refs.  */
2663  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2664
2665  /* After peeling we have to reset scalar evolution analyzer.  */
2666  scev_reset ();
2667
2668  free_original_copy_tables ();
2669}
2670
2671
2672/* Function vect_create_cond_for_align_checks.
2673
2674   Create a conditional expression that represents the alignment checks for
2675   all of data references (array element references) whose alignment must be
2676   checked at runtime.
2677
2678   Input:
2679   LOOP_VINFO - two fields of the loop information are used.
2680                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2681                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2682
2683   Output:
2684   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2685                         expression.
2686   The returned value is the conditional expression to be used in the if
2687   statement that controls which version of the loop gets executed at runtime.
2688
2689   The algorithm makes two assumptions:
2690     1) The number of bytes "n" in a vector is a power of 2.
2691     2) An address "a" is aligned if a%n is zero and that this
2692        test can be done as a&(n-1) == 0.  For example, for 16
2693        byte vectors the test is a&0xf == 0.  */
2694
2695static tree
2696vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2697                                   tree *cond_expr_stmt_list)
2698{
2699  VEC(tree,heap) *may_misalign_stmts
2700    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2701  tree ref_stmt;
2702  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2703  tree mask_cst;
2704  unsigned int i;
2705  tree psize;
2706  tree int_ptrsize_type;
2707  char tmp_name[20];
2708  tree or_tmp_name = NULL_TREE;
2709  tree and_tmp, and_tmp_name, and_stmt;
2710  tree ptrsize_zero;
2711
2712  /* Check that mask is one less than a power of 2, i.e., mask is
2713     all zeros followed by all ones.  */
2714  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2715
2716  /* CHECKME: what is the best integer or unsigned type to use to hold a
2717     cast from a pointer value?  */
2718  psize = TYPE_SIZE (ptr_type_node);
2719  int_ptrsize_type
2720    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2721
2722  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2723     of the first vector of the i'th data reference. */
2724
2725  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2726    {
2727      tree new_stmt_list = NULL_TREE;
2728      tree addr_base;
2729      tree addr_tmp, addr_tmp_name, addr_stmt;
2730      tree or_tmp, new_or_tmp_name, or_stmt;
2731
2732      /* create: addr_tmp = (int)(address_of_first_vector) */
2733      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2734							&new_stmt_list,
2735							NULL_TREE);
2736
2737      if (new_stmt_list != NULL_TREE)
2738        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2739
2740      sprintf (tmp_name, "%s%d", "addr2int", i);
2741      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2742      add_referenced_tmp_var (addr_tmp);
2743      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2744      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2745      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2746                          addr_tmp_name, addr_stmt);
2747      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2748      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2749
2750      /* The addresses are OR together.  */
2751
2752      if (or_tmp_name != NULL_TREE)
2753        {
2754          /* create: or_tmp = or_tmp | addr_tmp */
2755          sprintf (tmp_name, "%s%d", "orptrs", i);
2756          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2757          add_referenced_tmp_var (or_tmp);
2758          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2759          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2760                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
2761	                            or_tmp_name,
2762                                    addr_tmp_name));
2763          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2764          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2765          or_tmp_name = new_or_tmp_name;
2766        }
2767      else
2768        or_tmp_name = addr_tmp_name;
2769
2770    } /* end for i */
2771
2772  mask_cst = build_int_cst (int_ptrsize_type, mask);
2773
2774  /* create: and_tmp = or_tmp & mask  */
2775  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2776  add_referenced_tmp_var (and_tmp);
2777  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2778
2779  and_stmt = build2 (MODIFY_EXPR, void_type_node,
2780                     and_tmp_name,
2781                     build2 (BIT_AND_EXPR, int_ptrsize_type,
2782                             or_tmp_name, mask_cst));
2783  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2784  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2785
2786  /* Make and_tmp the left operand of the conditional test against zero.
2787     if and_tmp has a non-zero bit then some address is unaligned.  */
2788  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2789  return build2 (EQ_EXPR, boolean_type_node,
2790                 and_tmp_name, ptrsize_zero);
2791}
2792
2793
2794/* Function vect_transform_loop.
2795
2796   The analysis phase has determined that the loop is vectorizable.
2797   Vectorize the loop - created vectorized stmts to replace the scalar
2798   stmts in the loop, and update the loop exit condition.  */
2799
2800void
2801vect_transform_loop (loop_vec_info loop_vinfo,
2802		     struct loops *loops ATTRIBUTE_UNUSED)
2803{
2804  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2806  int nbbs = loop->num_nodes;
2807  block_stmt_iterator si;
2808  int i;
2809  tree ratio = NULL;
2810  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2811  bitmap_iterator bi;
2812  unsigned int j;
2813
2814  if (vect_print_dump_info (REPORT_DETAILS))
2815    fprintf (vect_dump, "=== vec_transform_loop ===");
2816
2817  /* If the loop has data references that may or may not be aligned then
2818     two versions of the loop need to be generated, one which is vectorized
2819     and one which isn't.  A test is then generated to control which of the
2820     loops is executed.  The test checks for the alignment of all of the
2821     data references that may or may not be aligned. */
2822
2823  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2824    {
2825      struct loop *nloop;
2826      tree cond_expr;
2827      tree cond_expr_stmt_list = NULL_TREE;
2828      basic_block condition_bb;
2829      block_stmt_iterator cond_exp_bsi;
2830      basic_block merge_bb;
2831      basic_block new_exit_bb;
2832      edge new_exit_e, e;
2833      tree orig_phi, new_phi, arg;
2834
2835      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
2836                                                     &cond_expr_stmt_list);
2837      initialize_original_copy_tables ();
2838      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
2839      free_original_copy_tables();
2840
2841      /** Loop versioning violates an assumption we try to maintain during
2842	 vectorization - that the loop exit block has a single predecessor.
2843	 After versioning, the exit block of both loop versions is the same
2844	 basic block (i.e. it has two predecessors). Just in order to simplify
2845	 following transformations in the vectorizer, we fix this situation
2846	 here by adding a new (empty) block on the exit-edge of the loop,
2847	 with the proper loop-exit phis to maintain loop-closed-form.  **/
2848
2849      merge_bb = loop->single_exit->dest;
2850      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2851      new_exit_bb = split_edge (loop->single_exit);
2852      add_bb_to_loop (new_exit_bb, loop->outer);
2853      new_exit_e = loop->single_exit;
2854      e = EDGE_SUCC (new_exit_bb, 0);
2855
2856      for (orig_phi = phi_nodes (merge_bb); orig_phi;
2857	   orig_phi = PHI_CHAIN (orig_phi))
2858	{
2859          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
2860				     new_exit_bb);
2861          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2862          add_phi_arg (new_phi, arg, new_exit_e);
2863	  SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
2864	}
2865
2866      /** end loop-exit-fixes after versioning  **/
2867
2868      update_ssa (TODO_update_ssa);
2869      cond_exp_bsi = bsi_last (condition_bb);
2870      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
2871    }
2872
2873  /* CHECKME: we wouldn't need this if we calles update_ssa once
2874     for all loops.  */
2875  bitmap_zero (vect_vnames_to_rename);
2876
2877  /* Peel the loop if there are data refs with unknown alignment.
2878     Only one data ref with unknown store is allowed.  */
2879
2880  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2881    vect_do_peeling_for_alignment (loop_vinfo, loops);
2882
2883  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2884     compile time constant), or it is a constant that doesn't divide by the
2885     vectorization factor, then an epilog loop needs to be created.
2886     We therefore duplicate the loop: the original loop will be vectorized,
2887     and will compute the first (n/VF) iterations. The second copy of the loop
2888     will remain scalar and will compute the remaining (n%VF) iterations.
2889     (VF is the vectorization factor).  */
2890
2891  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2892      || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2893          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2894    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2895  else
2896    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2897		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2898
2899  /* 1) Make sure the loop header has exactly two entries
2900     2) Make sure we have a preheader basic block.  */
2901
2902  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2903
2904  loop_split_edge_with (loop_preheader_edge (loop), NULL);
2905
2906
2907  /* FORNOW: the vectorizer supports only loops which body consist
2908     of one basic block (header + empty latch). When the vectorizer will
2909     support more involved loop forms, the order by which the BBs are
2910     traversed need to be reconsidered.  */
2911
2912  for (i = 0; i < nbbs; i++)
2913    {
2914      basic_block bb = bbs[i];
2915
2916      for (si = bsi_start (bb); !bsi_end_p (si);)
2917	{
2918	  tree stmt = bsi_stmt (si);
2919	  stmt_vec_info stmt_info;
2920	  bool is_store;
2921
2922	  if (vect_print_dump_info (REPORT_DETAILS))
2923	    {
2924	      fprintf (vect_dump, "------>vectorizing statement: ");
2925	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
2926	    }
2927	  stmt_info = vinfo_for_stmt (stmt);
2928	  gcc_assert (stmt_info);
2929	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
2930	      && !STMT_VINFO_LIVE_P (stmt_info))
2931	    {
2932	      bsi_next (&si);
2933	      continue;
2934	    }
2935	  /* FORNOW: Verify that all stmts operate on the same number of
2936	             units and no inner unrolling is necessary.  */
2937	  gcc_assert
2938		(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2939		 == (unsigned HOST_WIDE_INT) vectorization_factor);
2940
2941	  /* -------- vectorize statement ------------ */
2942	  if (vect_print_dump_info (REPORT_DETAILS))
2943	    fprintf (vect_dump, "transform statement.");
2944
2945	  is_store = vect_transform_stmt (stmt, &si);
2946	  if (is_store)
2947	    {
2948	      /* Free the attached stmt_vec_info and remove the stmt.  */
2949	      stmt_ann_t ann = stmt_ann (stmt);
2950	      free (stmt_info);
2951	      set_stmt_info ((tree_ann_t)ann, NULL);
2952	      bsi_remove (&si);
2953	      continue;
2954	    }
2955
2956	  bsi_next (&si);
2957	}		        /* stmts in BB */
2958    }				/* BBs in loop */
2959
2960  slpeel_make_loop_iterate_ntimes (loop, ratio);
2961
2962  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
2963    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
2964
2965  /* The memory tags and pointers in vectorized statements need to
2966     have their SSA forms updated.  FIXME, why can't this be delayed
2967     until all the loops have been transformed?  */
2968  update_ssa (TODO_update_ssa);
2969
2970  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2971    fprintf (vect_dump, "LOOP VECTORIZED.");
2972}
2973