1/* Transformation Utilities for Loop Vectorization.
2   Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
28#include "target.h"
29#include "rtl.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "timevar.h"
35#include "cfgloop.h"
36#include "expr.h"
37#include "optabs.h"
38#include "recog.h"
39#include "tree-data-ref.h"
40#include "tree-chrec.h"
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
43#include "langhooks.h"
44#include "tree-pass.h"
45#include "toplev.h"
46#include "real.h"
47
48/* Utility functions for the code transformation.  */
49static bool vect_transform_stmt (tree, block_stmt_iterator *);
50static void vect_align_data_ref (tree);
51static tree vect_create_destination_var (tree, tree);
52static tree vect_create_data_ref_ptr
53  (tree, block_stmt_iterator *, tree, tree *, bool);
54static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57static tree vect_init_vector (tree, tree);
58static void vect_finish_stmt_generation
59  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60static bool vect_is_simple_cond (tree, loop_vec_info);
61static void update_vuses_to_preheader (tree, struct loop*);
62static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63static tree get_initial_def_for_reduction (tree, tree, tree *);
64
65/* Utility function dealing with loop peeling (not peeling itself).  */
66static void vect_generate_tmps_on_preheader
67  (loop_vec_info, tree *, tree *, tree *);
68static tree vect_build_loop_niters (loop_vec_info);
69static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71static void vect_update_init_of_dr (struct data_reference *, tree niters);
72static void vect_update_inits_of_drs (loop_vec_info, tree);
73static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74static void vect_do_peeling_for_loop_bound
75  (loop_vec_info, tree *, struct loops *);
76static int vect_min_worthwhile_factor (enum tree_code);
77
78
79/* Function vect_get_new_vect_var.
80
81   Returns a name for a new variable. The current naming scheme appends the
82   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83   the name of vectorizer generated variables, and appends that to NAME if
84   provided.  */
85
86static tree
87vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88{
89  const char *prefix;
90  tree new_vect_var;
91
92  switch (var_kind)
93  {
94  case vect_simple_var:
95    prefix = "vect_";
96    break;
97  case vect_scalar_var:
98    prefix = "stmp_";
99    break;
100  case vect_pointer_var:
101    prefix = "vect_p";
102    break;
103  default:
104    gcc_unreachable ();
105  }
106
107  if (name)
108    new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109  else
110    new_vect_var = create_tmp_var (type, prefix);
111
112  return new_vect_var;
113}
114
115
116/* Function vect_create_addr_base_for_vector_ref.
117
118   Create an expression that computes the address of the first memory location
119   that will be accessed for a data reference.
120
121   Input:
122   STMT: The statement containing the data reference.
123   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124   OFFSET: Optional. If supplied, it is be added to the initial address.
125
126   Output:
127   1. Return an SSA_NAME whose value is the address of the memory location of
128      the first vector of the data reference.
129   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130      these statement(s) which define the returned SSA_NAME.
131
132   FORNOW: We are only handling array accesses with step 1.  */
133
134static tree
135vect_create_addr_base_for_vector_ref (tree stmt,
136                                      tree *new_stmt_list,
137				      tree offset)
138{
139  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142  tree base_name = build_fold_indirect_ref (data_ref_base);
143  tree ref = DR_REF (dr);
144  tree scalar_type = TREE_TYPE (ref);
145  tree scalar_ptr_type = build_pointer_type (scalar_type);
146  tree vec_stmt;
147  tree new_temp;
148  tree addr_base, addr_expr;
149  tree dest, new_stmt;
150  tree base_offset = unshare_expr (DR_OFFSET (dr));
151  tree init = unshare_expr (DR_INIT (dr));
152
153  /* Create base_offset */
154  base_offset = size_binop (PLUS_EXPR, base_offset, init);
155  dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156  add_referenced_var (dest);
157  base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158  append_to_statement_list_force (new_stmt, new_stmt_list);
159
160  if (offset)
161    {
162      tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163      add_referenced_var (tmp);
164      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
165			    DR_STEP (dr));
166      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167				 base_offset, offset);
168      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169      append_to_statement_list_force (new_stmt, new_stmt_list);
170    }
171
172  /* base + base_offset */
173  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
174			   base_offset);
175
176  /* addr_expr = addr_base */
177  addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178                                     get_name (base_name));
179  add_referenced_var (addr_expr);
180  vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181  new_temp = make_ssa_name (addr_expr, vec_stmt);
182  TREE_OPERAND (vec_stmt, 0) = new_temp;
183  append_to_statement_list_force (vec_stmt, new_stmt_list);
184
185  if (vect_print_dump_info (REPORT_DETAILS))
186    {
187      fprintf (vect_dump, "created ");
188      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
189    }
190  return new_temp;
191}
192
193
194/* Function vect_align_data_ref.
195
196   Handle misalignment of a memory accesses.
197
198   FORNOW: Can't handle misaligned accesses.
199   Make sure that the dataref is aligned.  */
200
201static void
202vect_align_data_ref (tree stmt)
203{
204  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
206
207  /* FORNOW: can't handle misaligned accesses;
208             all accesses expected to be aligned.  */
209  gcc_assert (aligned_access_p (dr));
210}
211
212
213/* Function vect_create_data_ref_ptr.
214
215   Create a memory reference expression for vector access, to be used in a
216   vector load/store stmt. The reference is based on a new pointer to vector
217   type (vp).
218
219   Input:
220   1. STMT: a stmt that references memory. Expected to be of the form
221         MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
222   2. BSI: block_stmt_iterator where new stmts can be added.
223   3. OFFSET (optional): an offset to be added to the initial address accessed
224        by the data-ref in STMT.
225   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
226        pointing to the initial address.
227
228   Output:
229   1. Declare a new ptr to vector_type, and have it point to the base of the
230      data reference (initial addressed accessed by the data reference).
231      For example, for vector of type V8HI, the following code is generated:
232
233      v8hi *vp;
234      vp = (v8hi *)initial_address;
235
236      if OFFSET is not supplied:
237         initial_address = &a[init];
238      if OFFSET is supplied:
239         initial_address = &a[init + OFFSET];
240
241      Return the initial_address in INITIAL_ADDRESS.
242
243   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
244      a data-reference in the loop based on the new vector pointer vp.  This
245      new data reference will by some means be updated each iteration of
246      the loop.  Return the pointer vp'.
247
248   FORNOW: handle only aligned and consecutive accesses.  */
249
250static tree
251vect_create_data_ref_ptr (tree stmt,
252			  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
253			  tree offset, tree *initial_address, bool only_init)
254{
255  tree base_name;
256  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
258  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
259  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
260  tree vect_ptr_type;
261  tree vect_ptr;
262  tree tag;
263  tree new_temp;
264  tree vec_stmt;
265  tree new_stmt_list = NULL_TREE;
266  edge pe = loop_preheader_edge (loop);
267  basic_block new_bb;
268  tree vect_ptr_init;
269  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
270
271  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
272
273  if (vect_print_dump_info (REPORT_DETAILS))
274    {
275      tree data_ref_base = base_name;
276      fprintf (vect_dump, "create vector-pointer variable to type: ");
277      print_generic_expr (vect_dump, vectype, TDF_SLIM);
278      if (TREE_CODE (data_ref_base) == VAR_DECL)
279        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
280      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
281        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
282      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
283        fprintf (vect_dump, "  vectorizing a record based array ref: ");
284      else if (TREE_CODE (data_ref_base) == SSA_NAME)
285        fprintf (vect_dump, "  vectorizing a pointer ref: ");
286      print_generic_expr (vect_dump, base_name, TDF_SLIM);
287    }
288
289  /** (1) Create the new vector-pointer variable:  **/
290
291  vect_ptr_type = build_pointer_type (vectype);
292  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
293                                    get_name (base_name));
294  add_referenced_var (vect_ptr);
295
296
297  /** (2) Add aliasing information to the new vector-pointer:
298          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
299
300  tag = DR_MEMTAG (dr);
301  gcc_assert (tag);
302
303  /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
304     tag must be created with tag added to its may alias list.  */
305  if (!MTAG_P (tag))
306    new_type_alias (vect_ptr, tag, DR_REF (dr));
307  else
308    var_ann (vect_ptr)->symbol_mem_tag = tag;
309
310  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
311
312  /** (3) Calculate the initial address the vector-pointer, and set
313          the vector-pointer to point to it before the loop:  **/
314
315  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
316  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
317                                                   offset);
318  pe = loop_preheader_edge (loop);
319  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
320  gcc_assert (!new_bb);
321  *initial_address = new_temp;
322
323  /* Create: p = (vectype *) initial_base  */
324  vec_stmt = fold_convert (vect_ptr_type, new_temp);
325  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
326  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
327  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
328  new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
329  gcc_assert (!new_bb);
330
331
332  /** (4) Handle the updating of the vector-pointer inside the loop: **/
333
334  if (only_init) /* No update in loop is required.  */
335    {
336      /* Copy the points-to information if it exists. */
337      if (DR_PTR_INFO (dr))
338        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
339      return vect_ptr_init;
340    }
341  else
342    {
343      block_stmt_iterator incr_bsi;
344      bool insert_after;
345      tree indx_before_incr, indx_after_incr;
346      tree incr;
347
348      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
349      create_iv (vect_ptr_init,
350		 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
351		 NULL_TREE, loop, &incr_bsi, insert_after,
352		 &indx_before_incr, &indx_after_incr);
353      incr = bsi_stmt (incr_bsi);
354      set_stmt_info (stmt_ann (incr),
355		     new_stmt_vec_info (incr, loop_vinfo));
356
357      /* Copy the points-to information if it exists. */
358      if (DR_PTR_INFO (dr))
359	{
360	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
361	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
362	}
363      merge_alias_info (vect_ptr_init, indx_before_incr);
364      merge_alias_info (vect_ptr_init, indx_after_incr);
365
366      return indx_before_incr;
367    }
368}
369
370
371/* Function vect_create_destination_var.
372
373   Create a new temporary of type VECTYPE.  */
374
375static tree
376vect_create_destination_var (tree scalar_dest, tree vectype)
377{
378  tree vec_dest;
379  const char *new_name;
380  tree type;
381  enum vect_var_kind kind;
382
383  kind = vectype ? vect_simple_var : vect_scalar_var;
384  type = vectype ? vectype : TREE_TYPE (scalar_dest);
385
386  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
387
388  new_name = get_name (scalar_dest);
389  if (!new_name)
390    new_name = "var_";
391  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
392  add_referenced_var (vec_dest);
393
394  return vec_dest;
395}
396
397
398/* Function vect_init_vector.
399
400   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
401   the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
402   used in the vectorization of STMT.  */
403
404static tree
405vect_init_vector (tree stmt, tree vector_var)
406{
407  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
408  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
409  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
410  tree new_var;
411  tree init_stmt;
412  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
413  tree vec_oprnd;
414  edge pe;
415  tree new_temp;
416  basic_block new_bb;
417
418  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
419  add_referenced_var (new_var);
420
421  init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
422  new_temp = make_ssa_name (new_var, init_stmt);
423  TREE_OPERAND (init_stmt, 0) = new_temp;
424
425  pe = loop_preheader_edge (loop);
426  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
427  gcc_assert (!new_bb);
428
429  if (vect_print_dump_info (REPORT_DETAILS))
430    {
431      fprintf (vect_dump, "created new init_stmt: ");
432      print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
433    }
434
435  vec_oprnd = TREE_OPERAND (init_stmt, 0);
436  return vec_oprnd;
437}
438
439
440/* Function vect_get_vec_def_for_operand.
441
442   OP is an operand in STMT. This function returns a (vector) def that will be
443   used in the vectorized stmt for STMT.
444
445   In the case that OP is an SSA_NAME which is defined in the loop, then
446   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
447
448   In case OP is an invariant or constant, a new stmt that creates a vector def
449   needs to be introduced.  */
450
451static tree
452vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
453{
454  tree vec_oprnd;
455  tree vec_stmt;
456  tree def_stmt;
457  stmt_vec_info def_stmt_info = NULL;
458  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
460  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
461  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
462  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463  tree vec_inv;
464  tree vec_cst;
465  tree t = NULL_TREE;
466  tree def;
467  int i;
468  enum vect_def_type dt;
469  bool is_simple_use;
470
471  if (vect_print_dump_info (REPORT_DETAILS))
472    {
473      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
474      print_generic_expr (vect_dump, op, TDF_SLIM);
475    }
476
477  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
478  gcc_assert (is_simple_use);
479  if (vect_print_dump_info (REPORT_DETAILS))
480    {
481      if (def)
482        {
483          fprintf (vect_dump, "def =  ");
484          print_generic_expr (vect_dump, def, TDF_SLIM);
485        }
486      if (def_stmt)
487        {
488          fprintf (vect_dump, "  def_stmt =  ");
489          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
490        }
491    }
492
493  switch (dt)
494    {
495    /* Case 1: operand is a constant.  */
496    case vect_constant_def:
497      {
498	if (scalar_def)
499	  *scalar_def = op;
500
501        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
502        if (vect_print_dump_info (REPORT_DETAILS))
503          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
504
505        for (i = nunits - 1; i >= 0; --i)
506          {
507            t = tree_cons (NULL_TREE, op, t);
508          }
509        vec_cst = build_vector (vectype, t);
510        return vect_init_vector (stmt, vec_cst);
511      }
512
513    /* Case 2: operand is defined outside the loop - loop invariant.  */
514    case vect_invariant_def:
515      {
516	if (scalar_def)
517	  *scalar_def = def;
518
519        /* Create 'vec_inv = {inv,inv,..,inv}'  */
520        if (vect_print_dump_info (REPORT_DETAILS))
521          fprintf (vect_dump, "Create vector_inv.");
522
523        for (i = nunits - 1; i >= 0; --i)
524          {
525            t = tree_cons (NULL_TREE, def, t);
526          }
527
528	/* FIXME: use build_constructor directly.  */
529        vec_inv = build_constructor_from_list (vectype, t);
530        return vect_init_vector (stmt, vec_inv);
531      }
532
533    /* Case 3: operand is defined inside the loop.  */
534    case vect_loop_def:
535      {
536	if (scalar_def)
537	  *scalar_def = def_stmt;
538
539        /* Get the def from the vectorized stmt.  */
540        def_stmt_info = vinfo_for_stmt (def_stmt);
541        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
542        gcc_assert (vec_stmt);
543        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
544        return vec_oprnd;
545      }
546
547    /* Case 4: operand is defined by a loop header phi - reduction  */
548    case vect_reduction_def:
549      {
550        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
551
552        /* Get the def before the loop  */
553        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
554        return get_initial_def_for_reduction (stmt, op, scalar_def);
555     }
556
557    /* Case 5: operand is defined by loop-header phi - induction.  */
558    case vect_induction_def:
559      {
560        if (vect_print_dump_info (REPORT_DETAILS))
561          fprintf (vect_dump, "induction - unsupported.");
562        internal_error ("no support for induction"); /* FORNOW */
563      }
564
565    default:
566      gcc_unreachable ();
567    }
568}
569
570
571/* Function vect_finish_stmt_generation.
572
573   Insert a new stmt.  */
574
575static void
576vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
577{
578  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
579
580  if (vect_print_dump_info (REPORT_DETAILS))
581    {
582      fprintf (vect_dump, "add new stmt: ");
583      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
584    }
585
586  /* Make sure bsi points to the stmt that is being vectorized.  */
587  gcc_assert (stmt == bsi_stmt (*bsi));
588
589#ifdef USE_MAPPED_LOCATION
590  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
591#else
592  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
593#endif
594}
595
596
597#define ADJUST_IN_EPILOG 1
598
599/* Function get_initial_def_for_reduction
600
601   Input:
602   STMT - a stmt that performs a reduction operation in the loop.
603   INIT_VAL - the initial value of the reduction variable
604
605   Output:
606   SCALAR_DEF - a tree that holds a value to be added to the final result
607	of the reduction (used for "ADJUST_IN_EPILOG" - see below).
608   Return a vector variable, initialized according to the operation that STMT
609	performs. This vector will be used as the initial value of the
610	vector of partial results.
611
612   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
613     add:         [0,0,...,0,0]
614     mult:        [1,1,...,1,1]
615     min/max:     [init_val,init_val,..,init_val,init_val]
616     bit and/or:  [init_val,init_val,..,init_val,init_val]
617   and when necessary (e.g. add/mult case) let the caller know
618   that it needs to adjust the result by init_val.
619
620   Option2: Initialize the vector as follows:
621     add:         [0,0,...,0,init_val]
622     mult:        [1,1,...,1,init_val]
623     min/max:     [init_val,init_val,...,init_val]
624     bit and/or:  [init_val,init_val,...,init_val]
625   and no adjustments are needed.
626
627   For example, for the following code:
628
629   s = init_val;
630   for (i=0;i<n;i++)
631     s = s + a[i];
632
633   STMT is 's = s + a[i]', and the reduction variable is 's'.
634   For a vector of 4 units, we want to return either [0,0,0,init_val],
635   or [0,0,0,0] and let the caller know that it needs to adjust
636   the result at the end by 'init_val'.
637
638   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
639   TODO: Use some cost-model to estimate which scheme is more profitable.
640*/
641
642static tree
643get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
644{
645  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
646  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
647  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
648  int nelements;
649  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
650  tree type = TREE_TYPE (init_val);
651  tree def;
652  tree vec, t = NULL_TREE;
653  bool need_epilog_adjust;
654  int i;
655
656  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
657
658  switch (code)
659  {
660  case WIDEN_SUM_EXPR:
661  case DOT_PROD_EXPR:
662  case PLUS_EXPR:
663    if (INTEGRAL_TYPE_P (type))
664      def = build_int_cst (type, 0);
665    else
666      def = build_real (type, dconst0);
667
668#ifdef ADJUST_IN_EPILOG
669    /* All the 'nunits' elements are set to 0. The final result will be
670       adjusted by 'init_val' at the loop epilog.  */
671    nelements = nunits;
672    need_epilog_adjust = true;
673#else
674    /* 'nunits - 1' elements are set to 0; The last element is set to
675        'init_val'.  No further adjustments at the epilog are needed.  */
676    nelements = nunits - 1;
677    need_epilog_adjust = false;
678#endif
679    break;
680
681  case MIN_EXPR:
682  case MAX_EXPR:
683    def = init_val;
684    nelements = nunits;
685    need_epilog_adjust = false;
686    break;
687
688  default:
689    gcc_unreachable ();
690  }
691
692  for (i = nelements - 1; i >= 0; --i)
693    t = tree_cons (NULL_TREE, def, t);
694
695  if (nelements == nunits - 1)
696    {
697      /* Set the last element of the vector.  */
698      t = tree_cons (NULL_TREE, init_val, t);
699      nelements += 1;
700    }
701  gcc_assert (nelements == nunits);
702
703  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
704    vec = build_vector (vectype, t);
705  else
706    vec = build_constructor_from_list (vectype, t);
707
708  if (!need_epilog_adjust)
709    *scalar_def = NULL_TREE;
710  else
711    *scalar_def = init_val;
712
713  return vect_init_vector (stmt, vec);
714}
715
716
717/* Function vect_create_epilog_for_reduction
718
719   Create code at the loop-epilog to finalize the result of a reduction
720   computation.
721
722   VECT_DEF is a vector of partial results.
723   REDUC_CODE is the tree-code for the epilog reduction.
724   STMT is the scalar reduction stmt that is being vectorized.
725   REDUCTION_PHI is the phi-node that carries the reduction computation.
726
727   This function:
728   1. Creates the reduction def-use cycle: sets the the arguments for
729      REDUCTION_PHI:
730      The loop-entry argument is the vectorized initial-value of the reduction.
731      The loop-latch argument is VECT_DEF - the vector of partial sums.
732   2. "Reduces" the vector of partial results VECT_DEF into a single result,
733      by applying the operation specified by REDUC_CODE if available, or by
734      other means (whole-vector shifts or a scalar loop).
735      The function also creates a new phi node at the loop exit to preserve
736      loop-closed form, as illustrated below.
737
738     The flow at the entry to this function:
739
740        loop:
741          vec_def = phi <null, null>            # REDUCTION_PHI
742          VECT_DEF = vector_stmt                # vectorized form of STMT
743          s_loop = scalar_stmt                  # (scalar) STMT
744        loop_exit:
745          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
746          use <s_out0>
747          use <s_out0>
748
749     The above is transformed by this function into:
750
751        loop:
752          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
753          VECT_DEF = vector_stmt                # vectorized form of STMT
754          s_loop = scalar_stmt                  # (scalar) STMT
755        loop_exit:
756          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
757          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
758          v_out2 = reduce <v_out1>
759          s_out3 = extract_field <v_out2, 0>
760          s_out4 = adjust_result <s_out3>
761          use <s_out4>
762          use <s_out4>
763*/
764
765static void
766vect_create_epilog_for_reduction (tree vect_def, tree stmt,
767                                  enum tree_code reduc_code, tree reduction_phi)
768{
769  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
770  tree vectype;
771  enum machine_mode mode;
772  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
773  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
774  basic_block exit_bb;
775  tree scalar_dest;
776  tree scalar_type;
777  tree new_phi;
778  block_stmt_iterator exit_bsi;
779  tree vec_dest;
780  tree new_temp;
781  tree new_name;
782  tree epilog_stmt;
783  tree new_scalar_dest, exit_phi;
784  tree bitsize, bitpos, bytesize;
785  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
786  tree scalar_initial_def;
787  tree vec_initial_def;
788  tree orig_name;
789  imm_use_iterator imm_iter;
790  use_operand_p use_p;
791  bool extract_scalar_result;
792  tree reduction_op;
793  tree orig_stmt;
794  tree use_stmt;
795  tree operation = TREE_OPERAND (stmt, 1);
796  int op_type;
797
798  op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
799  reduction_op = TREE_OPERAND (operation, op_type-1);
800  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
801  mode = TYPE_MODE (vectype);
802
803  /*** 1. Create the reduction def-use cycle  ***/
804
805  /* 1.1 set the loop-entry arg of the reduction-phi:  */
806  /* For the case of reduction, vect_get_vec_def_for_operand returns
807     the scalar def before the loop, that defines the initial value
808     of the reduction variable.  */
809  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
810						  &scalar_initial_def);
811  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
812
813  /* 1.2 set the loop-latch arg for the reduction-phi:  */
814  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
815
816  if (vect_print_dump_info (REPORT_DETAILS))
817    {
818      fprintf (vect_dump, "transform reduction: created def-use cycle:");
819      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
820      fprintf (vect_dump, "\n");
821      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
822    }
823
824
825  /*** 2. Create epilog code
826	  The reduction epilog code operates across the elements of the vector
827          of partial results computed by the vectorized loop.
828          The reduction epilog code consists of:
829          step 1: compute the scalar result in a vector (v_out2)
830          step 2: extract the scalar result (s_out3) from the vector (v_out2)
831          step 3: adjust the scalar result (s_out3) if needed.
832
833          Step 1 can be accomplished using one the following three schemes:
834          (scheme 1) using reduc_code, if available.
835          (scheme 2) using whole-vector shifts, if available.
836          (scheme 3) using a scalar loop. In this case steps 1+2 above are
837                     combined.
838
839          The overall epilog code looks like this:
840
841          s_out0 = phi <s_loop>         # original EXIT_PHI
842          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
843          v_out2 = reduce <v_out1>              # step 1
844          s_out3 = extract_field <v_out2, 0>    # step 2
845          s_out4 = adjust_result <s_out3>       # step 3
846
847          (step 3 is optional, and step2 1 and 2 may be combined).
848          Lastly, the uses of s_out0 are replaced by s_out4.
849
850	  ***/
851
852  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
853        v_out1 = phi <v_loop>  */
854
855  exit_bb = loop->single_exit->dest;
856  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
857  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
858  exit_bsi = bsi_start (exit_bb);
859
860  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
861         (i.e. when reduc_code is not available) and in the final adjustment code
862         (if needed).  Also get the original scalar reduction variable as
863         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
864         represents a reduction pattern), the tree-code and scalar-def are
865         taken from the original stmt that the pattern-stmt (STMT) replaces.
866         Otherwise (it is a regular reduction) - the tree-code and scalar-def
867         are taken from STMT.  */
868
869  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
870  if (!orig_stmt)
871    {
872      /* Regular reduction  */
873      orig_stmt = stmt;
874    }
875  else
876    {
877      /* Reduction pattern  */
878      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
879      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
880      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
881    }
882  code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
883  scalar_dest = TREE_OPERAND (orig_stmt, 0);
884  scalar_type = TREE_TYPE (scalar_dest);
885  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
886  bitsize = TYPE_SIZE (scalar_type);
887  bytesize = TYPE_SIZE_UNIT (scalar_type);
888
889  /* 2.3 Create the reduction code, using one of the three schemes described
890         above.  */
891
892  if (reduc_code < NUM_TREE_CODES)
893    {
894      /*** Case 1:  Create:
895	   v_out2 = reduc_expr <v_out1>  */
896
897      if (vect_print_dump_info (REPORT_DETAILS))
898	fprintf (vect_dump, "Reduce using direct vector reduction.");
899
900      vec_dest = vect_create_destination_var (scalar_dest, vectype);
901      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
902			build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
903      new_temp = make_ssa_name (vec_dest, epilog_stmt);
904      TREE_OPERAND (epilog_stmt, 0) = new_temp;
905      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
906
907      extract_scalar_result = true;
908    }
909  else
910    {
911      enum tree_code shift_code = 0;
912      bool have_whole_vector_shift = true;
913      int bit_offset;
914      int element_bitsize = tree_low_cst (bitsize, 1);
915      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
916      tree vec_temp;
917
918      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
919	shift_code = VEC_RSHIFT_EXPR;
920      else
921	have_whole_vector_shift = false;
922
923      /* Regardless of whether we have a whole vector shift, if we're
924	 emulating the operation via tree-vect-generic, we don't want
925	 to use it.  Only the first round of the reduction is likely
926	 to still be profitable via emulation.  */
927      /* ??? It might be better to emit a reduction tree code here, so that
928	 tree-vect-generic can expand the first round via bit tricks.  */
929      if (!VECTOR_MODE_P (mode))
930	have_whole_vector_shift = false;
931      else
932	{
933	  optab optab = optab_for_tree_code (code, vectype);
934	  if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
935	    have_whole_vector_shift = false;
936	}
937
938      if (have_whole_vector_shift)
939        {
940	  /*** Case 2: Create:
941	     for (offset = VS/2; offset >= element_size; offset/=2)
942	        {
943	          Create:  va' = vec_shift <va, offset>
944	          Create:  va = vop <va, va'>
945	        }  */
946
947	  if (vect_print_dump_info (REPORT_DETAILS))
948	    fprintf (vect_dump, "Reduce using vector shifts");
949
950	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
951	  new_temp = PHI_RESULT (new_phi);
952
953	  for (bit_offset = vec_size_in_bits/2;
954	       bit_offset >= element_bitsize;
955	       bit_offset /= 2)
956	    {
957	      tree bitpos = size_int (bit_offset);
958
959	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
960	      build2 (shift_code, vectype, new_temp, bitpos));
961	      new_name = make_ssa_name (vec_dest, epilog_stmt);
962	      TREE_OPERAND (epilog_stmt, 0) = new_name;
963	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
964
965	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
966	      build2 (code, vectype, new_name, new_temp));
967	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
968	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
969	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
970	    }
971
972	  extract_scalar_result = true;
973	}
974      else
975        {
976	  tree rhs;
977
978	  /*** Case 3: Create:
979	     s = extract_field <v_out2, 0>
980	     for (offset = element_size;
981		  offset < vector_size;
982		  offset += element_size;)
983	       {
984	         Create:  s' = extract_field <v_out2, offset>
985	         Create:  s = op <s, s'>
986	       }  */
987
988	  if (vect_print_dump_info (REPORT_DETAILS))
989	    fprintf (vect_dump, "Reduce using scalar code. ");
990
991	  vec_temp = PHI_RESULT (new_phi);
992	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
993	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
994			 bitsize_zero_node);
995	  BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
996	  epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
997	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
998	  TREE_OPERAND (epilog_stmt, 0) = new_temp;
999	  bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1000
1001	  for (bit_offset = element_bitsize;
1002	       bit_offset < vec_size_in_bits;
1003	       bit_offset += element_bitsize)
1004	    {
1005	      tree bitpos = bitsize_int (bit_offset);
1006	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1007				 bitpos);
1008
1009	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1010	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1011				    rhs);
1012	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1013	      TREE_OPERAND (epilog_stmt, 0) = new_name;
1014	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1015
1016	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1017				build2 (code, scalar_type, new_name, new_temp));
1018	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1019	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1020	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1021	    }
1022
1023	  extract_scalar_result = false;
1024	}
1025    }
1026
1027  /* 2.4  Extract the final scalar result.  Create:
1028         s_out3 = extract_field <v_out2, bitpos>  */
1029
1030  if (extract_scalar_result)
1031    {
1032      tree rhs;
1033
1034      if (vect_print_dump_info (REPORT_DETAILS))
1035	fprintf (vect_dump, "extract scalar result");
1036
1037      if (BYTES_BIG_ENDIAN)
1038	bitpos = size_binop (MULT_EXPR,
1039		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1040		       TYPE_SIZE (scalar_type));
1041      else
1042	bitpos = bitsize_zero_node;
1043
1044      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1045      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1046      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1047      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1048      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1049      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050    }
1051
1052  /* 2.4 Adjust the final result by the initial value of the reduction
1053	 variable. (When such adjustment is not needed, then
1054	 'scalar_initial_def' is zero).
1055
1056	 Create:
1057	 s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
1058
1059  if (scalar_initial_def)
1060    {
1061      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1062                      build2 (code, scalar_type, new_temp, scalar_initial_def));
1063      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1064      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1065      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1066    }
1067
1068  /* 2.6 Replace uses of s_out0 with uses of s_out3  */
1069
1070  /* Find the loop-closed-use at the loop exit of the original scalar result.
1071     (The reduction result is expected to have two immediate uses - one at the
1072     latch block, and one at the loop exit).  */
1073  exit_phi = NULL;
1074  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1075    {
1076      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1077	{
1078	  exit_phi = USE_STMT (use_p);
1079	  break;
1080	}
1081    }
1082  /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
1083  gcc_assert (exit_phi);
1084  /* Replace the uses:  */
1085  orig_name = PHI_RESULT (exit_phi);
1086  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1087    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1088      SET_USE (use_p, new_temp);
1089}
1090
1091
1092/* Function vectorizable_reduction.
1093
1094   Check if STMT performs a reduction operation that can be vectorized.
1095   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1096   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1097   Return FALSE if not a vectorizable STMT, TRUE otherwise.
1098
1099   This function also handles reduction idioms (patterns) that have been
1100   recognized in advance during vect_pattern_recog. In this case, STMT may be
1101   of this form:
1102     X = pattern_expr (arg0, arg1, ..., X)
1103   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1104   sequence that had been detected and replaced by the pattern-stmt (STMT).
1105
1106   In some cases of reduction patterns, the type of the reduction variable X is
1107   different than the type of the other arguments of STMT.
1108   In such cases, the vectype that is used when transforming STMT into a vector
1109   stmt is different than the vectype that is used to determine the
1110   vectorization factor, because it consists of a different number of elements
1111   than the actual number of elements that are being operated upon in parallel.
1112
1113   For example, consider an accumulation of shorts into an int accumulator.
1114   On some targets it's possible to vectorize this pattern operating on 8
1115   shorts at a time (hence, the vectype for purposes of determining the
1116   vectorization factor should be V8HI); on the other hand, the vectype that
1117   is used to create the vector form is actually V4SI (the type of the result).
1118
1119   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1120   indicates what is the actual level of parallelism (V8HI in the example), so
1121   that the right vectorization factor would be derived. This vectype
1122   corresponds to the type of arguments to the reduction stmt, and should *NOT*
1123   be used to create the vectorized stmt. The right vectype for the vectorized
1124   stmt is obtained from the type of the result X:
1125        get_vectype_for_scalar_type (TREE_TYPE (X))
1126
1127   This means that, contrary to "regular" reductions (or "regular" stmts in
1128   general), the following equation:
1129      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1130   does *NOT* necessarily hold for reduction patterns.  */
1131
1132bool
1133vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1134{
1135  tree vec_dest;
1136  tree scalar_dest;
1137  tree op;
1138  tree loop_vec_def0, loop_vec_def1;
1139  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1141  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1142  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1143  tree operation;
1144  enum tree_code code, orig_code, epilog_reduc_code = 0;
1145  enum machine_mode vec_mode;
1146  int op_type;
1147  optab optab, reduc_optab;
1148  tree new_temp;
1149  tree def, def_stmt;
1150  enum vect_def_type dt;
1151  tree new_phi;
1152  tree scalar_type;
1153  bool is_simple_use;
1154  tree orig_stmt;
1155  stmt_vec_info orig_stmt_info;
1156  tree expr = NULL_TREE;
1157  int i;
1158
1159  /* 1. Is vectorizable reduction?  */
1160
1161  /* Not supportable if the reduction variable is used in the loop.  */
1162  if (STMT_VINFO_RELEVANT_P (stmt_info))
1163    return false;
1164
1165  if (!STMT_VINFO_LIVE_P (stmt_info))
1166    return false;
1167
1168  /* Make sure it was already recognized as a reduction computation.  */
1169  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1170    return false;
1171
1172  /* 2. Has this been recognized as a reduction pattern?
1173
1174     Check if STMT represents a pattern that has been recognized
1175     in earlier analysis stages.  For stmts that represent a pattern,
1176     the STMT_VINFO_RELATED_STMT field records the last stmt in
1177     the original sequence that constitutes the pattern.  */
1178
1179  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1180  if (orig_stmt)
1181    {
1182      orig_stmt_info = vinfo_for_stmt (orig_stmt);
1183      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1184      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1185      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1186    }
1187
1188  /* 3. Check the operands of the operation. The first operands are defined
1189        inside the loop body. The last operand is the reduction variable,
1190        which is defined by the loop-header-phi.  */
1191
1192  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1193
1194  operation = TREE_OPERAND (stmt, 1);
1195  code = TREE_CODE (operation);
1196  op_type = TREE_CODE_LENGTH (code);
1197
1198  if (op_type != binary_op && op_type != ternary_op)
1199    return false;
1200  scalar_dest = TREE_OPERAND (stmt, 0);
1201  scalar_type = TREE_TYPE (scalar_dest);
1202
1203  /* All uses but the last are expected to be defined in the loop.
1204     The last use is the reduction variable.  */
1205  for (i = 0; i < op_type-1; i++)
1206    {
1207      op = TREE_OPERAND (operation, i);
1208      is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1209      gcc_assert (is_simple_use);
1210      gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1211                  dt == vect_constant_def);
1212    }
1213
1214  op = TREE_OPERAND (operation, i);
1215  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1216  gcc_assert (is_simple_use);
1217  gcc_assert (dt == vect_reduction_def);
1218  gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1219  if (orig_stmt)
1220    gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1221  else
1222    gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1223
1224  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1225    return false;
1226
1227  /* 4. Supportable by target?  */
1228
1229  /* 4.1. check support for the operation in the loop  */
1230  optab = optab_for_tree_code (code, vectype);
1231  if (!optab)
1232    {
1233      if (vect_print_dump_info (REPORT_DETAILS))
1234        fprintf (vect_dump, "no optab.");
1235      return false;
1236    }
1237  vec_mode = TYPE_MODE (vectype);
1238  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1239    {
1240      if (vect_print_dump_info (REPORT_DETAILS))
1241        fprintf (vect_dump, "op not supported by target.");
1242      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1243          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1244	     < vect_min_worthwhile_factor (code))
1245        return false;
1246      if (vect_print_dump_info (REPORT_DETAILS))
1247	fprintf (vect_dump, "proceeding using word mode.");
1248    }
1249
1250  /* Worthwhile without SIMD support?  */
1251  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1252      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1253	 < vect_min_worthwhile_factor (code))
1254    {
1255      if (vect_print_dump_info (REPORT_DETAILS))
1256	fprintf (vect_dump, "not worthwhile without SIMD support.");
1257      return false;
1258    }
1259
1260  /* 4.2. Check support for the epilog operation.
1261
1262          If STMT represents a reduction pattern, then the type of the
1263          reduction variable may be different than the type of the rest
1264          of the arguments.  For example, consider the case of accumulation
1265          of shorts into an int accumulator; The original code:
1266                        S1: int_a = (int) short_a;
1267          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
1268
1269          was replaced with:
1270                        STMT: int_acc = widen_sum <short_a, int_acc>
1271
1272          This means that:
1273          1. The tree-code that is used to create the vector operation in the
1274             epilog code (that reduces the partial results) is not the
1275             tree-code of STMT, but is rather the tree-code of the original
1276             stmt from the pattern that STMT is replacing. I.e, in the example
1277             above we want to use 'widen_sum' in the loop, but 'plus' in the
1278             epilog.
1279          2. The type (mode) we use to check available target support
1280             for the vector operation to be created in the *epilog*, is
1281             determined by the type of the reduction variable (in the example
1282             above we'd check this: plus_optab[vect_int_mode]).
1283             However the type (mode) we use to check available target support
1284             for the vector operation to be created *inside the loop*, is
1285             determined by the type of the other arguments to STMT (in the
1286             example we'd check this: widen_sum_optab[vect_short_mode]).
1287
1288          This is contrary to "regular" reductions, in which the types of all
1289          the arguments are the same as the type of the reduction variable.
1290          For "regular" reductions we can therefore use the same vector type
1291          (and also the same tree-code) when generating the epilog code and
1292          when generating the code inside the loop.  */
1293
1294  if (orig_stmt)
1295    {
1296      /* This is a reduction pattern: get the vectype from the type of the
1297         reduction variable, and get the tree-code from orig_stmt.  */
1298      orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1299      vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1300      vec_mode = TYPE_MODE (vectype);
1301    }
1302  else
1303    {
1304      /* Regular reduction: use the same vectype and tree-code as used for
1305         the vector code inside the loop can be used for the epilog code. */
1306      orig_code = code;
1307    }
1308
1309  if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1310    return false;
1311  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1312  if (!reduc_optab)
1313    {
1314      if (vect_print_dump_info (REPORT_DETAILS))
1315        fprintf (vect_dump, "no optab for reduction.");
1316      epilog_reduc_code = NUM_TREE_CODES;
1317    }
1318  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1319    {
1320      if (vect_print_dump_info (REPORT_DETAILS))
1321        fprintf (vect_dump, "reduc op not supported by target.");
1322      epilog_reduc_code = NUM_TREE_CODES;
1323    }
1324
1325  if (!vec_stmt) /* transformation not required.  */
1326    {
1327      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1328      return true;
1329    }
1330
1331  /** Transform.  **/
1332
1333  if (vect_print_dump_info (REPORT_DETAILS))
1334    fprintf (vect_dump, "transform reduction.");
1335
1336  /* Create the destination vector  */
1337  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1338
1339  /* Create the reduction-phi that defines the reduction-operand.  */
1340  new_phi = create_phi_node (vec_dest, loop->header);
1341
1342  /* Prepare the operand that is defined inside the loop body  */
1343  op = TREE_OPERAND (operation, 0);
1344  loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1345  if (op_type == binary_op)
1346    expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
1347  else if (op_type == ternary_op)
1348    {
1349      op = TREE_OPERAND (operation, 1);
1350      loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1351      expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1352		     PHI_RESULT (new_phi));
1353    }
1354
1355  /* Create the vectorized operation that computes the partial results  */
1356  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1357  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1358  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1359  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1360
1361  /* Finalize the reduction-phi (set it's arguments) and create the
1362     epilog reduction code.  */
1363  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1364  return true;
1365}
1366
1367
1368/* Function vectorizable_assignment.
1369
1370   Check if STMT performs an assignment (copy) that can be vectorized.
1371   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1372   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1373   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1374
1375bool
1376vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1377{
1378  tree vec_dest;
1379  tree scalar_dest;
1380  tree op;
1381  tree vec_oprnd;
1382  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1383  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1384  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1385  tree new_temp;
1386  tree def, def_stmt;
1387  enum vect_def_type dt;
1388
1389  /* Is vectorizable assignment?  */
1390  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1391    return false;
1392
1393  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1394
1395  if (TREE_CODE (stmt) != MODIFY_EXPR)
1396    return false;
1397
1398  scalar_dest = TREE_OPERAND (stmt, 0);
1399  if (TREE_CODE (scalar_dest) != SSA_NAME)
1400    return false;
1401
1402  op = TREE_OPERAND (stmt, 1);
1403  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1404    {
1405      if (vect_print_dump_info (REPORT_DETAILS))
1406        fprintf (vect_dump, "use not simple.");
1407      return false;
1408    }
1409
1410  if (!vec_stmt) /* transformation not required.  */
1411    {
1412      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1413      return true;
1414    }
1415
1416  /** Transform.  **/
1417  if (vect_print_dump_info (REPORT_DETAILS))
1418    fprintf (vect_dump, "transform assignment.");
1419
1420  /* Handle def.  */
1421  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1422
1423  /* Handle use.  */
1424  op = TREE_OPERAND (stmt, 1);
1425  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1426
1427  /* Arguments are ready. create the new vector stmt.  */
1428  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1429  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1430  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1431  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1432
1433  return true;
1434}
1435
1436
1437/* Function vect_min_worthwhile_factor.
1438
1439   For a loop where we could vectorize the operation indicated by CODE,
1440   return the minimum vectorization factor that makes it worthwhile
1441   to use generic vectors.  */
1442static int
1443vect_min_worthwhile_factor (enum tree_code code)
1444{
1445  switch (code)
1446    {
1447    case PLUS_EXPR:
1448    case MINUS_EXPR:
1449    case NEGATE_EXPR:
1450      return 4;
1451
1452    case BIT_AND_EXPR:
1453    case BIT_IOR_EXPR:
1454    case BIT_XOR_EXPR:
1455    case BIT_NOT_EXPR:
1456      return 2;
1457
1458    default:
1459      return INT_MAX;
1460    }
1461}
1462
1463
1464/* Function vectorizable_operation.
1465
1466   Check if STMT performs a binary or unary operation that can be vectorized.
1467   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1468   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1469   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1470
1471bool
1472vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1473{
1474  tree vec_dest;
1475  tree scalar_dest;
1476  tree operation;
1477  tree op0, op1 = NULL;
1478  tree vec_oprnd0, vec_oprnd1=NULL;
1479  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1480  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1481  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1482  int i;
1483  enum tree_code code;
1484  enum machine_mode vec_mode;
1485  tree new_temp;
1486  int op_type;
1487  tree op;
1488  optab optab;
1489  int icode;
1490  enum machine_mode optab_op2_mode;
1491  tree def, def_stmt;
1492  enum vect_def_type dt;
1493
1494  /* Is STMT a vectorizable binary/unary operation?   */
1495  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1496    return false;
1497
1498  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1499
1500  if (STMT_VINFO_LIVE_P (stmt_info))
1501    {
1502      /* FORNOW: not yet supported.  */
1503      if (vect_print_dump_info (REPORT_DETAILS))
1504        fprintf (vect_dump, "value used after loop.");
1505      return false;
1506    }
1507
1508  if (TREE_CODE (stmt) != MODIFY_EXPR)
1509    return false;
1510
1511  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1512    return false;
1513
1514  operation = TREE_OPERAND (stmt, 1);
1515  code = TREE_CODE (operation);
1516  optab = optab_for_tree_code (code, vectype);
1517
1518  /* Support only unary or binary operations.  */
1519  op_type = TREE_CODE_LENGTH (code);
1520  if (op_type != unary_op && op_type != binary_op)
1521    {
1522      if (vect_print_dump_info (REPORT_DETAILS))
1523	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1524      return false;
1525    }
1526
1527  for (i = 0; i < op_type; i++)
1528    {
1529      op = TREE_OPERAND (operation, i);
1530      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1531	{
1532	  if (vect_print_dump_info (REPORT_DETAILS))
1533	    fprintf (vect_dump, "use not simple.");
1534	  return false;
1535	}
1536    }
1537
1538  /* Supportable by target?  */
1539  if (!optab)
1540    {
1541      if (vect_print_dump_info (REPORT_DETAILS))
1542	fprintf (vect_dump, "no optab.");
1543      return false;
1544    }
1545  vec_mode = TYPE_MODE (vectype);
1546  icode = (int) optab->handlers[(int) vec_mode].insn_code;
1547  if (icode == CODE_FOR_nothing)
1548    {
1549      if (vect_print_dump_info (REPORT_DETAILS))
1550	fprintf (vect_dump, "op not supported by target.");
1551      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1552          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1553	     < vect_min_worthwhile_factor (code))
1554        return false;
1555      if (vect_print_dump_info (REPORT_DETAILS))
1556	fprintf (vect_dump, "proceeding using word mode.");
1557    }
1558
1559  /* Worthwhile without SIMD support?  */
1560  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1561      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1562	 < vect_min_worthwhile_factor (code))
1563    {
1564      if (vect_print_dump_info (REPORT_DETAILS))
1565	fprintf (vect_dump, "not worthwhile without SIMD support.");
1566      return false;
1567    }
1568
1569  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1570    {
1571      /* FORNOW: not yet supported.  */
1572      if (!VECTOR_MODE_P (vec_mode))
1573	return false;
1574
1575      /* Invariant argument is needed for a vector shift
1576	 by a scalar shift operand.  */
1577      optab_op2_mode = insn_data[icode].operand[2].mode;
1578      if (! (VECTOR_MODE_P (optab_op2_mode)
1579	     || dt == vect_constant_def
1580	     || dt == vect_invariant_def))
1581	{
1582	  if (vect_print_dump_info (REPORT_DETAILS))
1583	    fprintf (vect_dump, "operand mode requires invariant argument.");
1584	  return false;
1585	}
1586    }
1587
1588  if (!vec_stmt) /* transformation not required.  */
1589    {
1590      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1591      return true;
1592    }
1593
1594  /** Transform.  **/
1595
1596  if (vect_print_dump_info (REPORT_DETAILS))
1597    fprintf (vect_dump, "transform binary/unary operation.");
1598
1599  /* Handle def.  */
1600  scalar_dest = TREE_OPERAND (stmt, 0);
1601  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1602
1603  /* Handle uses.  */
1604  op0 = TREE_OPERAND (operation, 0);
1605  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1606
1607  if (op_type == binary_op)
1608    {
1609      op1 = TREE_OPERAND (operation, 1);
1610
1611      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1612	{
1613	  /* Vector shl and shr insn patterns can be defined with
1614	     scalar operand 2 (shift operand).  In this case, use
1615	     constant or loop invariant op1 directly, without
1616	     extending it to vector mode first.  */
1617
1618	  optab_op2_mode = insn_data[icode].operand[2].mode;
1619	  if (!VECTOR_MODE_P (optab_op2_mode))
1620	    {
1621	      if (vect_print_dump_info (REPORT_DETAILS))
1622		fprintf (vect_dump, "operand 1 using scalar mode.");
1623	      vec_oprnd1 = op1;
1624	    }
1625	}
1626
1627      if (!vec_oprnd1)
1628	vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1629    }
1630
1631  /* Arguments are ready. create the new vector stmt.  */
1632
1633  if (op_type == binary_op)
1634    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1635		build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1636  else
1637    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1638		build1 (code, vectype, vec_oprnd0));
1639  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1640  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1641  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1642
1643  return true;
1644}
1645
1646
1647/* Function vectorizable_store.
1648
1649   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1650   can be vectorized.
1651   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1652   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1653   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1654
1655bool
1656vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1657{
1658  tree scalar_dest;
1659  tree data_ref;
1660  tree op;
1661  tree vec_oprnd1;
1662  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1663  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1664  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1665  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666  enum machine_mode vec_mode;
1667  tree dummy;
1668  enum dr_alignment_support alignment_support_cheme;
1669  ssa_op_iter iter;
1670  tree def, def_stmt;
1671  enum vect_def_type dt;
1672
1673  /* Is vectorizable store? */
1674
1675  if (TREE_CODE (stmt) != MODIFY_EXPR)
1676    return false;
1677
1678  scalar_dest = TREE_OPERAND (stmt, 0);
1679  if (TREE_CODE (scalar_dest) != ARRAY_REF
1680      && TREE_CODE (scalar_dest) != INDIRECT_REF)
1681    return false;
1682
1683  op = TREE_OPERAND (stmt, 1);
1684  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1685    {
1686      if (vect_print_dump_info (REPORT_DETAILS))
1687        fprintf (vect_dump, "use not simple.");
1688      return false;
1689    }
1690
1691  vec_mode = TYPE_MODE (vectype);
1692  /* FORNOW. In some cases can vectorize even if data-type not supported
1693     (e.g. - array initialization with 0).  */
1694  if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1695    return false;
1696
1697  if (!STMT_VINFO_DATA_REF (stmt_info))
1698    return false;
1699
1700
1701  if (!vec_stmt) /* transformation not required.  */
1702    {
1703      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1704      return true;
1705    }
1706
1707  /** Transform.  **/
1708
1709  if (vect_print_dump_info (REPORT_DETAILS))
1710    fprintf (vect_dump, "transform store");
1711
1712  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1713  gcc_assert (alignment_support_cheme);
1714  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1715
1716  /* Handle use - get the vectorized def from the defining stmt.  */
1717  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1718
1719  /* Handle def.  */
1720  /* FORNOW: make sure the data reference is aligned.  */
1721  vect_align_data_ref (stmt);
1722  data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1723  data_ref = build_fold_indirect_ref (data_ref);
1724
1725  /* Arguments are ready. create the new vector stmt.  */
1726  *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1727  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1728
1729  /* Copy the V_MAY_DEFS representing the aliasing of the original array
1730     element's definition to the vector's definition then update the
1731     defining statement.  The original is being deleted so the same
1732     SSA_NAMEs can be used.  */
1733  copy_virtual_operands (*vec_stmt, stmt);
1734
1735  FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1736    {
1737      SSA_NAME_DEF_STMT (def) = *vec_stmt;
1738
1739      /* If this virtual def has a use outside the loop and a loop peel is
1740	 performed then the def may be renamed by the peel.  Mark it for
1741	 renaming so the later use will also be renamed.  */
1742      mark_sym_for_renaming (SSA_NAME_VAR (def));
1743    }
1744
1745  return true;
1746}
1747
1748
1749/* vectorizable_load.
1750
1751   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1752   can be vectorized.
1753   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1754   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1755   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1756
1757bool
1758vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1759{
1760  tree scalar_dest;
1761  tree vec_dest = NULL;
1762  tree data_ref = NULL;
1763  tree op;
1764  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1766  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1767  tree new_temp;
1768  int mode;
1769  tree init_addr;
1770  tree new_stmt;
1771  tree dummy;
1772  basic_block new_bb;
1773  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775  edge pe = loop_preheader_edge (loop);
1776  enum dr_alignment_support alignment_support_cheme;
1777
1778  /* Is vectorizable load? */
1779  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1780    return false;
1781
1782  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1783
1784  if (STMT_VINFO_LIVE_P (stmt_info))
1785    {
1786      /* FORNOW: not yet supported.  */
1787      if (vect_print_dump_info (REPORT_DETAILS))
1788        fprintf (vect_dump, "value used after loop.");
1789      return false;
1790    }
1791
1792  if (TREE_CODE (stmt) != MODIFY_EXPR)
1793    return false;
1794
1795  scalar_dest = TREE_OPERAND (stmt, 0);
1796  if (TREE_CODE (scalar_dest) != SSA_NAME)
1797    return false;
1798
1799  op = TREE_OPERAND (stmt, 1);
1800  if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1801    return false;
1802
1803  if (!STMT_VINFO_DATA_REF (stmt_info))
1804    return false;
1805
1806  mode = (int) TYPE_MODE (vectype);
1807
1808  /* FORNOW. In some cases can vectorize even if data-type not supported
1809    (e.g. - data copies).  */
1810  if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1811    {
1812      if (vect_print_dump_info (REPORT_DETAILS))
1813	fprintf (vect_dump, "Aligned load, but unsupported type.");
1814      return false;
1815    }
1816
1817  if (!vec_stmt) /* transformation not required.  */
1818    {
1819      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1820      return true;
1821    }
1822
1823  /** Transform.  **/
1824
1825  if (vect_print_dump_info (REPORT_DETAILS))
1826    fprintf (vect_dump, "transform load.");
1827
1828  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1829  gcc_assert (alignment_support_cheme);
1830
1831  if (alignment_support_cheme == dr_aligned
1832      || alignment_support_cheme == dr_unaligned_supported)
1833    {
1834      /* Create:
1835         p = initial_addr;
1836         indx = 0;
1837         loop {
1838           vec_dest = *(p);
1839           indx = indx + 1;
1840         }
1841      */
1842
1843      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1844      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1845      if (aligned_access_p (dr))
1846        data_ref = build_fold_indirect_ref (data_ref);
1847      else
1848	{
1849	  int mis = DR_MISALIGNMENT (dr);
1850	  tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1851	  tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1852	  data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1853	}
1854      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1855      new_temp = make_ssa_name (vec_dest, new_stmt);
1856      TREE_OPERAND (new_stmt, 0) = new_temp;
1857      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1858      copy_virtual_operands (new_stmt, stmt);
1859    }
1860  else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1861    {
1862      /* Create:
1863	 p1 = initial_addr;
1864	 msq_init = *(floor(p1))
1865	 p2 = initial_addr + VS - 1;
1866	 magic = have_builtin ? builtin_result : initial_address;
1867	 indx = 0;
1868	 loop {
1869	   p2' = p2 + indx * vectype_size
1870	   lsq = *(floor(p2'))
1871	   vec_dest = realign_load (msq, lsq, magic)
1872	   indx = indx + 1;
1873	   msq = lsq;
1874	 }
1875      */
1876
1877      tree offset;
1878      tree magic;
1879      tree phi_stmt;
1880      tree msq_init;
1881      tree msq, lsq;
1882      tree dataref_ptr;
1883      tree params;
1884
1885      /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1886      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1887      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1888					   &init_addr, true);
1889      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1890      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1891      new_temp = make_ssa_name (vec_dest, new_stmt);
1892      TREE_OPERAND (new_stmt, 0) = new_temp;
1893      new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1894      gcc_assert (!new_bb);
1895      msq_init = TREE_OPERAND (new_stmt, 0);
1896      copy_virtual_operands (new_stmt, stmt);
1897      update_vuses_to_preheader (new_stmt, loop);
1898
1899
1900      /* <2> Create lsq = *(floor(p2')) in the loop  */
1901      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1902      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1903      dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1904      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1905      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1906      new_temp = make_ssa_name (vec_dest, new_stmt);
1907      TREE_OPERAND (new_stmt, 0) = new_temp;
1908      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1909      lsq = TREE_OPERAND (new_stmt, 0);
1910      copy_virtual_operands (new_stmt, stmt);
1911
1912
1913      /* <3> */
1914      if (targetm.vectorize.builtin_mask_for_load)
1915	{
1916	  /* Create permutation mask, if required, in loop preheader.  */
1917	  tree builtin_decl;
1918	  params = build_tree_list (NULL_TREE, init_addr);
1919	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1920	  builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1921	  new_stmt = build_function_call_expr (builtin_decl, params);
1922	  new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1923	  new_temp = make_ssa_name (vec_dest, new_stmt);
1924	  TREE_OPERAND (new_stmt, 0) = new_temp;
1925	  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1926	  gcc_assert (!new_bb);
1927	  magic = TREE_OPERAND (new_stmt, 0);
1928
1929	  /* The result of the CALL_EXPR to this builtin is determined from
1930	     the value of the parameter and no global variables are touched
1931	     which makes the builtin a "const" function.  Requiring the
1932	     builtin to have the "const" attribute makes it unnecessary
1933	     to call mark_call_clobbered.  */
1934	  gcc_assert (TREE_READONLY (builtin_decl));
1935	}
1936      else
1937	{
1938	  /* Use current address instead of init_addr for reduced reg pressure.
1939	   */
1940	  magic = dataref_ptr;
1941	}
1942
1943
1944      /* <4> Create msq = phi <msq_init, lsq> in loop  */
1945      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1946      msq = make_ssa_name (vec_dest, NULL_TREE);
1947      phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1948      SSA_NAME_DEF_STMT (msq) = phi_stmt;
1949      add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1950      add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1951
1952
1953      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1954      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1955      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1956      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1957      new_temp = make_ssa_name (vec_dest, new_stmt);
1958      TREE_OPERAND (new_stmt, 0) = new_temp;
1959      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1960    }
1961  else
1962    gcc_unreachable ();
1963
1964  *vec_stmt = new_stmt;
1965  return true;
1966}
1967
1968
1969/* Function vectorizable_live_operation.
1970
1971   STMT computes a value that is used outside the loop. Check if
1972   it can be supported.  */
1973
1974bool
1975vectorizable_live_operation (tree stmt,
1976                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1977                             tree *vec_stmt ATTRIBUTE_UNUSED)
1978{
1979  tree operation;
1980  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1981  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1982  int i;
1983  enum tree_code code;
1984  int op_type;
1985  tree op;
1986  tree def, def_stmt;
1987  enum vect_def_type dt;
1988
1989  if (!STMT_VINFO_LIVE_P (stmt_info))
1990    return false;
1991
1992  if (TREE_CODE (stmt) != MODIFY_EXPR)
1993    return false;
1994
1995  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1996    return false;
1997
1998  operation = TREE_OPERAND (stmt, 1);
1999  code = TREE_CODE (operation);
2000
2001  op_type = TREE_CODE_LENGTH (code);
2002
2003  /* FORNOW: support only if all uses are invariant. This means
2004     that the scalar operations can remain in place, unvectorized.
2005     The original last scalar value that they compute will be used.  */
2006
2007  for (i = 0; i < op_type; i++)
2008    {
2009      op = TREE_OPERAND (operation, i);
2010      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2011        {
2012          if (vect_print_dump_info (REPORT_DETAILS))
2013            fprintf (vect_dump, "use not simple.");
2014          return false;
2015        }
2016
2017      if (dt != vect_invariant_def && dt != vect_constant_def)
2018        return false;
2019    }
2020
2021  /* No transformation is required for the cases we currently support.  */
2022  return true;
2023}
2024
2025
2026/* Function vect_is_simple_cond.
2027
2028   Input:
2029   LOOP - the loop that is being vectorized.
2030   COND - Condition that is checked for simple use.
2031
2032   Returns whether a COND can be vectorized.  Checks whether
2033   condition operands are supportable using vec_is_simple_use.  */
2034
2035static bool
2036vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2037{
2038  tree lhs, rhs;
2039  tree def;
2040  enum vect_def_type dt;
2041
2042  if (!COMPARISON_CLASS_P (cond))
2043    return false;
2044
2045  lhs = TREE_OPERAND (cond, 0);
2046  rhs = TREE_OPERAND (cond, 1);
2047
2048  if (TREE_CODE (lhs) == SSA_NAME)
2049    {
2050      tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2051      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2052	return false;
2053    }
2054  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2055    return false;
2056
2057  if (TREE_CODE (rhs) == SSA_NAME)
2058    {
2059      tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2060      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2061	return false;
2062    }
2063  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
2064    return false;
2065
2066  return true;
2067}
2068
2069/* vectorizable_condition.
2070
2071   Check if STMT is conditional modify expression that can be vectorized.
2072   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2073   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
2074   at BSI.
2075
2076   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2077
2078bool
2079vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2080{
2081  tree scalar_dest = NULL_TREE;
2082  tree vec_dest = NULL_TREE;
2083  tree op = NULL_TREE;
2084  tree cond_expr, then_clause, else_clause;
2085  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2086  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2087  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2088  tree vec_compare, vec_cond_expr;
2089  tree new_temp;
2090  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2091  enum machine_mode vec_mode;
2092  tree def;
2093  enum vect_def_type dt;
2094
2095  if (!STMT_VINFO_RELEVANT_P (stmt_info))
2096    return false;
2097
2098  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2099
2100  if (STMT_VINFO_LIVE_P (stmt_info))
2101    {
2102      /* FORNOW: not yet supported.  */
2103      if (vect_print_dump_info (REPORT_DETAILS))
2104        fprintf (vect_dump, "value used after loop.");
2105      return false;
2106    }
2107
2108  if (TREE_CODE (stmt) != MODIFY_EXPR)
2109    return false;
2110
2111  op = TREE_OPERAND (stmt, 1);
2112
2113  if (TREE_CODE (op) != COND_EXPR)
2114    return false;
2115
2116  cond_expr = TREE_OPERAND (op, 0);
2117  then_clause = TREE_OPERAND (op, 1);
2118  else_clause = TREE_OPERAND (op, 2);
2119
2120  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2121    return false;
2122
2123  /* We do not handle two different vector types for the condition
2124     and the values.  */
2125  if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2126    return false;
2127
2128  if (TREE_CODE (then_clause) == SSA_NAME)
2129    {
2130      tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2131      if (!vect_is_simple_use (then_clause, loop_vinfo,
2132			       &then_def_stmt, &def, &dt))
2133	return false;
2134    }
2135  else if (TREE_CODE (then_clause) != INTEGER_CST
2136	   && TREE_CODE (then_clause) != REAL_CST)
2137    return false;
2138
2139  if (TREE_CODE (else_clause) == SSA_NAME)
2140    {
2141      tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2142      if (!vect_is_simple_use (else_clause, loop_vinfo,
2143			       &else_def_stmt, &def, &dt))
2144	return false;
2145    }
2146  else if (TREE_CODE (else_clause) != INTEGER_CST
2147	   && TREE_CODE (else_clause) != REAL_CST)
2148    return false;
2149
2150
2151  vec_mode = TYPE_MODE (vectype);
2152
2153  if (!vec_stmt)
2154    {
2155      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2156      return expand_vec_cond_expr_p (op, vec_mode);
2157    }
2158
2159  /* Transform */
2160
2161  /* Handle def.  */
2162  scalar_dest = TREE_OPERAND (stmt, 0);
2163  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2164
2165  /* Handle cond expr.  */
2166  vec_cond_lhs =
2167    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2168  vec_cond_rhs =
2169    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2170  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2171  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2172
2173  /* Arguments are ready. create the new vector stmt.  */
2174  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2175			vec_cond_lhs, vec_cond_rhs);
2176  vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2177			  vec_compare, vec_then_clause, vec_else_clause);
2178
2179  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2180  new_temp = make_ssa_name (vec_dest, *vec_stmt);
2181  TREE_OPERAND (*vec_stmt, 0) = new_temp;
2182  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2183
2184  return true;
2185}
2186
2187/* Function vect_transform_stmt.
2188
2189   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2190
2191bool
2192vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2193{
2194  bool is_store = false;
2195  tree vec_stmt = NULL_TREE;
2196  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2197  tree orig_stmt_in_pattern;
2198  bool done;
2199
2200  if (STMT_VINFO_RELEVANT_P (stmt_info))
2201    {
2202      switch (STMT_VINFO_TYPE (stmt_info))
2203      {
2204      case op_vec_info_type:
2205	done = vectorizable_operation (stmt, bsi, &vec_stmt);
2206	gcc_assert (done);
2207	break;
2208
2209      case assignment_vec_info_type:
2210	done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2211	gcc_assert (done);
2212	break;
2213
2214      case load_vec_info_type:
2215	done = vectorizable_load (stmt, bsi, &vec_stmt);
2216	gcc_assert (done);
2217	break;
2218
2219      case store_vec_info_type:
2220	done = vectorizable_store (stmt, bsi, &vec_stmt);
2221	gcc_assert (done);
2222	is_store = true;
2223	break;
2224
2225      case condition_vec_info_type:
2226	done = vectorizable_condition (stmt, bsi, &vec_stmt);
2227	gcc_assert (done);
2228	break;
2229
2230      default:
2231	if (vect_print_dump_info (REPORT_DETAILS))
2232	  fprintf (vect_dump, "stmt not supported.");
2233	gcc_unreachable ();
2234      }
2235
2236      gcc_assert (vec_stmt);
2237      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2238      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
2239      if (orig_stmt_in_pattern)
2240        {
2241          stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
2242          if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2243            {
2244              gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2245
2246              /* STMT was inserted by the vectorizer to replace a computation
2247                 idiom.  ORIG_STMT_IN_PATTERN is a stmt in the original
2248                 sequence that computed this idiom.  We need to record a pointer
2249                 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN.  See more
2250                 detail in the documentation of vect_pattern_recog.  */
2251
2252              STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
2253            }
2254        }
2255    }
2256
2257  if (STMT_VINFO_LIVE_P (stmt_info))
2258    {
2259      switch (STMT_VINFO_TYPE (stmt_info))
2260      {
2261      case reduc_vec_info_type:
2262        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2263        gcc_assert (done);
2264        break;
2265
2266      default:
2267        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2268        gcc_assert (done);
2269      }
2270
2271      if (vec_stmt)
2272        {
2273          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2274          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2275        }
2276    }
2277
2278  return is_store;
2279}
2280
2281
2282/* This function builds ni_name = number of iterations loop executes
2283   on the loop preheader.  */
2284
2285static tree
2286vect_build_loop_niters (loop_vec_info loop_vinfo)
2287{
2288  tree ni_name, stmt, var;
2289  edge pe;
2290  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2291  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2292
2293  var = create_tmp_var (TREE_TYPE (ni), "niters");
2294  add_referenced_var (var);
2295  ni_name = force_gimple_operand (ni, &stmt, false, var);
2296
2297  pe = loop_preheader_edge (loop);
2298  if (stmt)
2299    {
2300      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2301      gcc_assert (!new_bb);
2302    }
2303
2304  return ni_name;
2305}
2306
2307
2308/* This function generates the following statements:
2309
2310 ni_name = number of iterations loop executes
2311 ratio = ni_name / vf
2312 ratio_mult_vf_name = ratio * vf
2313
2314 and places them at the loop preheader edge.  */
2315
2316static void
2317vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2318				 tree *ni_name_ptr,
2319				 tree *ratio_mult_vf_name_ptr,
2320				 tree *ratio_name_ptr)
2321{
2322
2323  edge pe;
2324  basic_block new_bb;
2325  tree stmt, ni_name;
2326  tree var;
2327  tree ratio_name;
2328  tree ratio_mult_vf_name;
2329  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2331  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2332  tree log_vf;
2333
2334  pe = loop_preheader_edge (loop);
2335
2336  /* Generate temporary variable that contains
2337     number of iterations loop executes.  */
2338
2339  ni_name = vect_build_loop_niters (loop_vinfo);
2340  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2341
2342  /* Create: ratio = ni >> log2(vf) */
2343
2344  var = create_tmp_var (TREE_TYPE (ni), "bnd");
2345  add_referenced_var (var);
2346  ratio_name = make_ssa_name (var, NULL_TREE);
2347  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2348	   build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2349  SSA_NAME_DEF_STMT (ratio_name) = stmt;
2350
2351  pe = loop_preheader_edge (loop);
2352  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2353  gcc_assert (!new_bb);
2354
2355  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2356
2357  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2358  add_referenced_var (var);
2359  ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2360  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2361	   build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2362  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2363
2364  pe = loop_preheader_edge (loop);
2365  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2366  gcc_assert (!new_bb);
2367
2368  *ni_name_ptr = ni_name;
2369  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2370  *ratio_name_ptr = ratio_name;
2371
2372  return;
2373}
2374
2375
2376/* Function update_vuses_to_preheader.
2377
2378   Input:
2379   STMT - a statement with potential VUSEs.
2380   LOOP - the loop whose preheader will contain STMT.
2381
2382   It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2383   appears to be defined in a V_MAY_DEF in another statement in a loop.
2384   One such case is when the VUSE is at the dereference of a __restricted__
2385   pointer in a load and the V_MAY_DEF is at the dereference of a different
2386   __restricted__ pointer in a store.  Vectorization may result in
2387   copy_virtual_uses being called to copy the problematic VUSE to a new
2388   statement that is being inserted in the loop preheader.  This procedure
2389   is called to change the SSA_NAME in the new statement's VUSE from the
2390   SSA_NAME updated in the loop to the related SSA_NAME available on the
2391   path entering the loop.
2392
2393   When this function is called, we have the following situation:
2394
2395        # vuse <name1>
2396        S1: vload
2397    do {
2398        # name1 = phi < name0 , name2>
2399
2400        # vuse <name1>
2401        S2: vload
2402
2403        # name2 = vdef <name1>
2404        S3: vstore
2405
2406    }while...
2407
2408   Stmt S1 was created in the loop preheader block as part of misaligned-load
2409   handling. This function fixes the name of the vuse of S1 from 'name1' to
2410   'name0'.  */
2411
2412static void
2413update_vuses_to_preheader (tree stmt, struct loop *loop)
2414{
2415  basic_block header_bb = loop->header;
2416  edge preheader_e = loop_preheader_edge (loop);
2417  ssa_op_iter iter;
2418  use_operand_p use_p;
2419
2420  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2421    {
2422      tree ssa_name = USE_FROM_PTR (use_p);
2423      tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2424      tree name_var = SSA_NAME_VAR (ssa_name);
2425      basic_block bb = bb_for_stmt (def_stmt);
2426
2427      /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2428      if (!IS_EMPTY_STMT (def_stmt)
2429	  && flow_bb_inside_loop_p (loop, bb))
2430        {
2431          /* If the block containing the statement defining the SSA_NAME
2432             is in the loop then it's necessary to find the definition
2433             outside the loop using the PHI nodes of the header.  */
2434	  tree phi;
2435	  bool updated = false;
2436
2437	  for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2438	    {
2439	      if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2440		{
2441		  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2442		  updated = true;
2443		  break;
2444		}
2445	    }
2446	  gcc_assert (updated);
2447	}
2448    }
2449}
2450
2451
2452/*   Function vect_update_ivs_after_vectorizer.
2453
2454     "Advance" the induction variables of LOOP to the value they should take
2455     after the execution of LOOP.  This is currently necessary because the
2456     vectorizer does not handle induction variables that are used after the
2457     loop.  Such a situation occurs when the last iterations of LOOP are
2458     peeled, because:
2459     1. We introduced new uses after LOOP for IVs that were not originally used
2460        after LOOP: the IVs of LOOP are now used by an epilog loop.
2461     2. LOOP is going to be vectorized; this means that it will iterate N/VF
2462        times, whereas the loop IVs should be bumped N times.
2463
2464     Input:
2465     - LOOP - a loop that is going to be vectorized. The last few iterations
2466              of LOOP were peeled.
2467     - NITERS - the number of iterations that LOOP executes (before it is
2468                vectorized). i.e, the number of times the ivs should be bumped.
2469     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2470                  coming out from LOOP on which there are uses of the LOOP ivs
2471		  (this is the path from LOOP->exit to epilog_loop->preheader).
2472
2473                  The new definitions of the ivs are placed in LOOP->exit.
2474                  The phi args associated with the edge UPDATE_E in the bb
2475                  UPDATE_E->dest are updated accordingly.
2476
2477     Assumption 1: Like the rest of the vectorizer, this function assumes
2478     a single loop exit that has a single predecessor.
2479
2480     Assumption 2: The phi nodes in the LOOP header and in update_bb are
2481     organized in the same order.
2482
2483     Assumption 3: The access function of the ivs is simple enough (see
2484     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2485
2486     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2487     coming out of LOOP on which the ivs of LOOP are used (this is the path
2488     that leads to the epilog loop; other paths skip the epilog loop).  This
2489     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2490     needs to have its phis updated.
2491 */
2492
2493static void
2494vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2495				  edge update_e)
2496{
2497  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498  basic_block exit_bb = loop->single_exit->dest;
2499  tree phi, phi1;
2500  basic_block update_bb = update_e->dest;
2501
2502  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2503
2504  /* Make sure there exists a single-predecessor exit bb:  */
2505  gcc_assert (single_pred_p (exit_bb));
2506
2507  for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2508       phi && phi1;
2509       phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2510    {
2511      tree access_fn = NULL;
2512      tree evolution_part;
2513      tree init_expr;
2514      tree step_expr;
2515      tree var, stmt, ni, ni_name;
2516      block_stmt_iterator last_bsi;
2517
2518      if (vect_print_dump_info (REPORT_DETAILS))
2519        {
2520          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2521          print_generic_expr (vect_dump, phi, TDF_SLIM);
2522        }
2523
2524      /* Skip virtual phi's.  */
2525      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2526	{
2527	  if (vect_print_dump_info (REPORT_DETAILS))
2528	    fprintf (vect_dump, "virtual phi. skip.");
2529	  continue;
2530	}
2531
2532      /* Skip reduction phis.  */
2533      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2534        {
2535          if (vect_print_dump_info (REPORT_DETAILS))
2536            fprintf (vect_dump, "reduc phi. skip.");
2537          continue;
2538        }
2539
2540      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2541      gcc_assert (access_fn);
2542      evolution_part =
2543	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2544      gcc_assert (evolution_part != NULL_TREE);
2545
2546      /* FORNOW: We do not support IVs whose evolution function is a polynomial
2547         of degree >= 2 or exponential.  */
2548      gcc_assert (!tree_is_chrec (evolution_part));
2549
2550      step_expr = evolution_part;
2551      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2552							       loop->num));
2553
2554      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2555		  build2 (MULT_EXPR, TREE_TYPE (niters),
2556		       niters, step_expr), init_expr);
2557
2558      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2559      add_referenced_var (var);
2560
2561      ni_name = force_gimple_operand (ni, &stmt, false, var);
2562
2563      /* Insert stmt into exit_bb.  */
2564      last_bsi = bsi_last (exit_bb);
2565      if (stmt)
2566        bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2567
2568      /* Fix phi expressions in the successor bb.  */
2569      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2570    }
2571}
2572
2573
2574/* Function vect_do_peeling_for_loop_bound
2575
2576   Peel the last iterations of the loop represented by LOOP_VINFO.
2577   The peeled iterations form a new epilog loop.  Given that the loop now
2578   iterates NITERS times, the new epilog loop iterates
2579   NITERS % VECTORIZATION_FACTOR times.
2580
2581   The original loop will later be made to iterate
2582   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2583
2584static void
2585vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2586				struct loops *loops)
2587{
2588  tree ni_name, ratio_mult_vf_name;
2589  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2590  struct loop *new_loop;
2591  edge update_e;
2592  basic_block preheader;
2593  int loop_num;
2594
2595  if (vect_print_dump_info (REPORT_DETAILS))
2596    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2597
2598  initialize_original_copy_tables ();
2599
2600  /* Generate the following variables on the preheader of original loop:
2601
2602     ni_name = number of iteration the original loop executes
2603     ratio = ni_name / vf
2604     ratio_mult_vf_name = ratio * vf  */
2605  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2606				   &ratio_mult_vf_name, ratio);
2607
2608  loop_num  = loop->num;
2609  new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2610					    ratio_mult_vf_name, ni_name, false);
2611  gcc_assert (new_loop);
2612  gcc_assert (loop_num == loop->num);
2613#ifdef ENABLE_CHECKING
2614  slpeel_verify_cfg_after_peeling (loop, new_loop);
2615#endif
2616
2617  /* A guard that controls whether the new_loop is to be executed or skipped
2618     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2619     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2620     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2621     is on the path where the LOOP IVs are used and need to be updated.  */
2622
2623  preheader = loop_preheader_edge (new_loop)->src;
2624  if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2625    update_e = EDGE_PRED (preheader, 0);
2626  else
2627    update_e = EDGE_PRED (preheader, 1);
2628
2629  /* Update IVs of original loop as if they were advanced
2630     by ratio_mult_vf_name steps.  */
2631  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2632
2633  /* After peeling we have to reset scalar evolution analyzer.  */
2634  scev_reset ();
2635
2636  free_original_copy_tables ();
2637}
2638
2639
2640/* Function vect_gen_niters_for_prolog_loop
2641
2642   Set the number of iterations for the loop represented by LOOP_VINFO
2643   to the minimum between LOOP_NITERS (the original iteration count of the loop)
2644   and the misalignment of DR - the data reference recorded in
2645   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2646   this loop, the data reference DR will refer to an aligned location.
2647
2648   The following computation is generated:
2649
2650   If the misalignment of DR is known at compile time:
2651     addr_mis = int mis = DR_MISALIGNMENT (dr);
2652   Else, compute address misalignment in bytes:
2653     addr_mis = addr & (vectype_size - 1)
2654
2655   prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2656
2657   (elem_size = element type size; an element is the scalar element
2658	whose type is the inner type of the vectype)  */
2659
2660static tree
2661vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2662{
2663  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2664  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2665  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2666  tree var, stmt;
2667  tree iters, iters_name;
2668  edge pe;
2669  basic_block new_bb;
2670  tree dr_stmt = DR_STMT (dr);
2671  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2672  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2673  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2674  tree niters_type = TREE_TYPE (loop_niters);
2675
2676  pe = loop_preheader_edge (loop);
2677
2678  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2679    {
2680      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2681      int element_size = vectype_align/vf;
2682      int elem_misalign = byte_misalign / element_size;
2683
2684      if (vect_print_dump_info (REPORT_DETAILS))
2685        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2686      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2687    }
2688  else
2689    {
2690      tree new_stmts = NULL_TREE;
2691      tree start_addr =
2692        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2693      tree ptr_type = TREE_TYPE (start_addr);
2694      tree size = TYPE_SIZE (ptr_type);
2695      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2696      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2697      tree elem_size_log =
2698        build_int_cst (type, exact_log2 (vectype_align/vf));
2699      tree vf_minus_1 = build_int_cst (type, vf - 1);
2700      tree vf_tree = build_int_cst (type, vf);
2701      tree byte_misalign;
2702      tree elem_misalign;
2703
2704      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2705      gcc_assert (!new_bb);
2706
2707      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2708      byte_misalign =
2709        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2710
2711      /* Create:  elem_misalign = byte_misalign / element_size  */
2712      elem_misalign =
2713        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2714
2715      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2716      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2717      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2718      iters = fold_convert (niters_type, iters);
2719    }
2720
2721  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2722  /* If the loop bound is known at compile time we already verified that it is
2723     greater than vf; since the misalignment ('iters') is at most vf, there's
2724     no need to generate the MIN_EXPR in this case.  */
2725  if (TREE_CODE (loop_niters) != INTEGER_CST)
2726    iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2727
2728  if (vect_print_dump_info (REPORT_DETAILS))
2729    {
2730      fprintf (vect_dump, "niters for prolog loop: ");
2731      print_generic_expr (vect_dump, iters, TDF_SLIM);
2732    }
2733
2734  var = create_tmp_var (niters_type, "prolog_loop_niters");
2735  add_referenced_var (var);
2736  iters_name = force_gimple_operand (iters, &stmt, false, var);
2737
2738  /* Insert stmt on loop preheader edge.  */
2739  if (stmt)
2740    {
2741      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2742      gcc_assert (!new_bb);
2743    }
2744
2745  return iters_name;
2746}
2747
2748
2749/* Function vect_update_init_of_dr
2750
2751   NITERS iterations were peeled from LOOP.  DR represents a data reference
2752   in LOOP.  This function updates the information recorded in DR to
2753   account for the fact that the first NITERS iterations had already been
2754   executed.  Specifically, it updates the OFFSET field of DR.  */
2755
2756static void
2757vect_update_init_of_dr (struct data_reference *dr, tree niters)
2758{
2759  tree offset = DR_OFFSET (dr);
2760
2761  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2762  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2763  DR_OFFSET (dr) = offset;
2764}
2765
2766
2767/* Function vect_update_inits_of_drs
2768
2769   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2770   This function updates the information recorded for the data references in
2771   the loop to account for the fact that the first NITERS iterations had
2772   already been executed.  Specifically, it updates the initial_condition of the
2773   access_function of all the data_references in the loop.  */
2774
2775static void
2776vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2777{
2778  unsigned int i;
2779  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2780  struct data_reference *dr;
2781
2782  if (vect_dump && (dump_flags & TDF_DETAILS))
2783    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2784
2785  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2786    vect_update_init_of_dr (dr, niters);
2787}
2788
2789
2790/* Function vect_do_peeling_for_alignment
2791
2792   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2793   'niters' is set to the misalignment of one of the data references in the
2794   loop, thereby forcing it to refer to an aligned location at the beginning
2795   of the execution of this loop.  The data reference for which we are
2796   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2797
2798static void
2799vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2800{
2801  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802  tree niters_of_prolog_loop, ni_name;
2803  tree n_iters;
2804  struct loop *new_loop;
2805
2806  if (vect_print_dump_info (REPORT_DETAILS))
2807    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2808
2809  initialize_original_copy_tables ();
2810
2811  ni_name = vect_build_loop_niters (loop_vinfo);
2812  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2813
2814  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2815  new_loop =
2816	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2817				       niters_of_prolog_loop, ni_name, true);
2818  gcc_assert (new_loop);
2819#ifdef ENABLE_CHECKING
2820  slpeel_verify_cfg_after_peeling (new_loop, loop);
2821#endif
2822
2823  /* Update number of times loop executes.  */
2824  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2825  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2826		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2827
2828  /* Update the init conditions of the access functions of all data refs.  */
2829  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2830
2831  /* After peeling we have to reset scalar evolution analyzer.  */
2832  scev_reset ();
2833
2834  free_original_copy_tables ();
2835}
2836
2837
2838/* Function vect_create_cond_for_align_checks.
2839
2840   Create a conditional expression that represents the alignment checks for
2841   all of data references (array element references) whose alignment must be
2842   checked at runtime.
2843
2844   Input:
2845   LOOP_VINFO - two fields of the loop information are used.
2846                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2847                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2848
2849   Output:
2850   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2851                         expression.
2852   The returned value is the conditional expression to be used in the if
2853   statement that controls which version of the loop gets executed at runtime.
2854
2855   The algorithm makes two assumptions:
2856     1) The number of bytes "n" in a vector is a power of 2.
2857     2) An address "a" is aligned if a%n is zero and that this
2858        test can be done as a&(n-1) == 0.  For example, for 16
2859        byte vectors the test is a&0xf == 0.  */
2860
2861static tree
2862vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2863                                   tree *cond_expr_stmt_list)
2864{
2865  VEC(tree,heap) *may_misalign_stmts
2866    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2867  tree ref_stmt;
2868  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2869  tree mask_cst;
2870  unsigned int i;
2871  tree psize;
2872  tree int_ptrsize_type;
2873  char tmp_name[20];
2874  tree or_tmp_name = NULL_TREE;
2875  tree and_tmp, and_tmp_name, and_stmt;
2876  tree ptrsize_zero;
2877
2878  /* Check that mask is one less than a power of 2, i.e., mask is
2879     all zeros followed by all ones.  */
2880  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2881
2882  /* CHECKME: what is the best integer or unsigned type to use to hold a
2883     cast from a pointer value?  */
2884  psize = TYPE_SIZE (ptr_type_node);
2885  int_ptrsize_type
2886    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2887
2888  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2889     of the first vector of the i'th data reference. */
2890
2891  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2892    {
2893      tree new_stmt_list = NULL_TREE;
2894      tree addr_base;
2895      tree addr_tmp, addr_tmp_name, addr_stmt;
2896      tree or_tmp, new_or_tmp_name, or_stmt;
2897
2898      /* create: addr_tmp = (int)(address_of_first_vector) */
2899      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2900							&new_stmt_list,
2901							NULL_TREE);
2902
2903      if (new_stmt_list != NULL_TREE)
2904        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2905
2906      sprintf (tmp_name, "%s%d", "addr2int", i);
2907      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2908      add_referenced_var (addr_tmp);
2909      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2910      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2911      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2912                          addr_tmp_name, addr_stmt);
2913      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2914      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2915
2916      /* The addresses are OR together.  */
2917
2918      if (or_tmp_name != NULL_TREE)
2919        {
2920          /* create: or_tmp = or_tmp | addr_tmp */
2921          sprintf (tmp_name, "%s%d", "orptrs", i);
2922          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2923          add_referenced_var (or_tmp);
2924          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2925          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2926                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
2927	                            or_tmp_name,
2928                                    addr_tmp_name));
2929          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2930          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2931          or_tmp_name = new_or_tmp_name;
2932        }
2933      else
2934        or_tmp_name = addr_tmp_name;
2935
2936    } /* end for i */
2937
2938  mask_cst = build_int_cst (int_ptrsize_type, mask);
2939
2940  /* create: and_tmp = or_tmp & mask  */
2941  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2942  add_referenced_var (and_tmp);
2943  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2944
2945  and_stmt = build2 (MODIFY_EXPR, void_type_node,
2946                     and_tmp_name,
2947                     build2 (BIT_AND_EXPR, int_ptrsize_type,
2948                             or_tmp_name, mask_cst));
2949  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2950  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2951
2952  /* Make and_tmp the left operand of the conditional test against zero.
2953     if and_tmp has a nonzero bit then some address is unaligned.  */
2954  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2955  return build2 (EQ_EXPR, boolean_type_node,
2956                 and_tmp_name, ptrsize_zero);
2957}
2958
2959
2960/* Function vect_transform_loop.
2961
2962   The analysis phase has determined that the loop is vectorizable.
2963   Vectorize the loop - created vectorized stmts to replace the scalar
2964   stmts in the loop, and update the loop exit condition.  */
2965
2966void
2967vect_transform_loop (loop_vec_info loop_vinfo,
2968		     struct loops *loops ATTRIBUTE_UNUSED)
2969{
2970  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2971  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2972  int nbbs = loop->num_nodes;
2973  block_stmt_iterator si;
2974  int i;
2975  tree ratio = NULL;
2976  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2977  bitmap_iterator bi;
2978  unsigned int j;
2979
2980  if (vect_print_dump_info (REPORT_DETAILS))
2981    fprintf (vect_dump, "=== vec_transform_loop ===");
2982
2983  /* If the loop has data references that may or may not be aligned then
2984     two versions of the loop need to be generated, one which is vectorized
2985     and one which isn't.  A test is then generated to control which of the
2986     loops is executed.  The test checks for the alignment of all of the
2987     data references that may or may not be aligned. */
2988
2989  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2990    {
2991      struct loop *nloop;
2992      tree cond_expr;
2993      tree cond_expr_stmt_list = NULL_TREE;
2994      basic_block condition_bb;
2995      block_stmt_iterator cond_exp_bsi;
2996      basic_block merge_bb;
2997      basic_block new_exit_bb;
2998      edge new_exit_e, e;
2999      tree orig_phi, new_phi, arg;
3000
3001      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3002                                                     &cond_expr_stmt_list);
3003      initialize_original_copy_tables ();
3004      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3005      free_original_copy_tables();
3006
3007      /** Loop versioning violates an assumption we try to maintain during
3008	 vectorization - that the loop exit block has a single predecessor.
3009	 After versioning, the exit block of both loop versions is the same
3010	 basic block (i.e. it has two predecessors). Just in order to simplify
3011	 following transformations in the vectorizer, we fix this situation
3012	 here by adding a new (empty) block on the exit-edge of the loop,
3013	 with the proper loop-exit phis to maintain loop-closed-form.  **/
3014
3015      merge_bb = loop->single_exit->dest;
3016      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3017      new_exit_bb = split_edge (loop->single_exit);
3018      add_bb_to_loop (new_exit_bb, loop->outer);
3019      new_exit_e = loop->single_exit;
3020      e = EDGE_SUCC (new_exit_bb, 0);
3021
3022      for (orig_phi = phi_nodes (merge_bb); orig_phi;
3023	   orig_phi = PHI_CHAIN (orig_phi))
3024	{
3025          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3026				     new_exit_bb);
3027          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3028          add_phi_arg (new_phi, arg, new_exit_e);
3029	  SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3030	}
3031
3032      /** end loop-exit-fixes after versioning  **/
3033
3034      update_ssa (TODO_update_ssa);
3035      cond_exp_bsi = bsi_last (condition_bb);
3036      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3037    }
3038
3039  /* CHECKME: we wouldn't need this if we called update_ssa once
3040     for all loops.  */
3041  bitmap_zero (vect_vnames_to_rename);
3042
3043  /* Peel the loop if there are data refs with unknown alignment.
3044     Only one data ref with unknown store is allowed.  */
3045
3046  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3047    vect_do_peeling_for_alignment (loop_vinfo, loops);
3048
3049  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3050     compile time constant), or it is a constant that doesn't divide by the
3051     vectorization factor, then an epilog loop needs to be created.
3052     We therefore duplicate the loop: the original loop will be vectorized,
3053     and will compute the first (n/VF) iterations. The second copy of the loop
3054     will remain scalar and will compute the remaining (n%VF) iterations.
3055     (VF is the vectorization factor).  */
3056
3057  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058      || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3059          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3060    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3061  else
3062    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3063		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3064
3065  /* 1) Make sure the loop header has exactly two entries
3066     2) Make sure we have a preheader basic block.  */
3067
3068  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3069
3070  loop_split_edge_with (loop_preheader_edge (loop), NULL);
3071
3072
3073  /* FORNOW: the vectorizer supports only loops which body consist
3074     of one basic block (header + empty latch). When the vectorizer will
3075     support more involved loop forms, the order by which the BBs are
3076     traversed need to be reconsidered.  */
3077
3078  for (i = 0; i < nbbs; i++)
3079    {
3080      basic_block bb = bbs[i];
3081
3082      for (si = bsi_start (bb); !bsi_end_p (si);)
3083	{
3084	  tree stmt = bsi_stmt (si);
3085	  stmt_vec_info stmt_info;
3086	  bool is_store;
3087
3088	  if (vect_print_dump_info (REPORT_DETAILS))
3089	    {
3090	      fprintf (vect_dump, "------>vectorizing statement: ");
3091	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
3092	    }
3093	  stmt_info = vinfo_for_stmt (stmt);
3094	  gcc_assert (stmt_info);
3095	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
3096	      && !STMT_VINFO_LIVE_P (stmt_info))
3097	    {
3098	      bsi_next (&si);
3099	      continue;
3100	    }
3101	  /* FORNOW: Verify that all stmts operate on the same number of
3102	             units and no inner unrolling is necessary.  */
3103	  gcc_assert
3104		(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3105		 == (unsigned HOST_WIDE_INT) vectorization_factor);
3106
3107	  /* -------- vectorize statement ------------ */
3108	  if (vect_print_dump_info (REPORT_DETAILS))
3109	    fprintf (vect_dump, "transform statement.");
3110
3111	  is_store = vect_transform_stmt (stmt, &si);
3112	  if (is_store)
3113	    {
3114	      /* Free the attached stmt_vec_info and remove the stmt.  */
3115	      stmt_ann_t ann = stmt_ann (stmt);
3116	      free (stmt_info);
3117	      set_stmt_info (ann, NULL);
3118	      bsi_remove (&si, true);
3119	      continue;
3120	    }
3121
3122	  bsi_next (&si);
3123	}		        /* stmts in BB */
3124    }				/* BBs in loop */
3125
3126  slpeel_make_loop_iterate_ntimes (loop, ratio);
3127
3128  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3129    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3130
3131  /* The memory tags and pointers in vectorized statements need to
3132     have their SSA forms updated.  FIXME, why can't this be delayed
3133     until all the loops have been transformed?  */
3134  update_ssa (TODO_update_ssa);
3135
3136  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3137    fprintf (vect_dump, "LOOP VECTORIZED.");
3138}
3139