1169689Skan/* Transformation Utilities for Loop Vectorization.
2169689Skan   Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3169689Skan   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4169689Skan
5169689SkanThis file is part of GCC.
6169689Skan
7169689SkanGCC is free software; you can redistribute it and/or modify it under
8169689Skanthe terms of the GNU General Public License as published by the Free
9169689SkanSoftware Foundation; either version 2, or (at your option) any later
10169689Skanversion.
11169689Skan
12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
14169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15169689Skanfor more details.
16169689Skan
17169689SkanYou should have received a copy of the GNU General Public License
18169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan02110-1301, USA.  */
21169689Skan
22169689Skan#include "config.h"
23169689Skan#include "system.h"
24169689Skan#include "coretypes.h"
25169689Skan#include "tm.h"
26169689Skan#include "ggc.h"
27169689Skan#include "tree.h"
28169689Skan#include "target.h"
29169689Skan#include "rtl.h"
30169689Skan#include "basic-block.h"
31169689Skan#include "diagnostic.h"
32169689Skan#include "tree-flow.h"
33169689Skan#include "tree-dump.h"
34169689Skan#include "timevar.h"
35169689Skan#include "cfgloop.h"
36169689Skan#include "expr.h"
37169689Skan#include "optabs.h"
38169689Skan#include "recog.h"
39169689Skan#include "tree-data-ref.h"
40169689Skan#include "tree-chrec.h"
41169689Skan#include "tree-scalar-evolution.h"
42169689Skan#include "tree-vectorizer.h"
43169689Skan#include "langhooks.h"
44169689Skan#include "tree-pass.h"
45169689Skan#include "toplev.h"
46169689Skan#include "real.h"
47169689Skan
48169689Skan/* Utility functions for the code transformation.  */
49169689Skanstatic bool vect_transform_stmt (tree, block_stmt_iterator *);
50169689Skanstatic void vect_align_data_ref (tree);
51169689Skanstatic tree vect_create_destination_var (tree, tree);
52169689Skanstatic tree vect_create_data_ref_ptr
53169689Skan  (tree, block_stmt_iterator *, tree, tree *, bool);
54169689Skanstatic tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55169689Skanstatic tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56169689Skanstatic tree vect_get_vec_def_for_operand (tree, tree, tree *);
57169689Skanstatic tree vect_init_vector (tree, tree);
58169689Skanstatic void vect_finish_stmt_generation
59169689Skan  (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60169689Skanstatic bool vect_is_simple_cond (tree, loop_vec_info);
61169689Skanstatic void update_vuses_to_preheader (tree, struct loop*);
62169689Skanstatic void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63169689Skanstatic tree get_initial_def_for_reduction (tree, tree, tree *);
64169689Skan
65169689Skan/* Utility function dealing with loop peeling (not peeling itself).  */
66169689Skanstatic void vect_generate_tmps_on_preheader
67169689Skan  (loop_vec_info, tree *, tree *, tree *);
68169689Skanstatic tree vect_build_loop_niters (loop_vec_info);
69169689Skanstatic void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70169689Skanstatic tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71169689Skanstatic void vect_update_init_of_dr (struct data_reference *, tree niters);
72169689Skanstatic void vect_update_inits_of_drs (loop_vec_info, tree);
73169689Skanstatic void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74169689Skanstatic void vect_do_peeling_for_loop_bound
75169689Skan  (loop_vec_info, tree *, struct loops *);
76169689Skanstatic int vect_min_worthwhile_factor (enum tree_code);
77169689Skan
78169689Skan
79169689Skan/* Function vect_get_new_vect_var.
80169689Skan
81169689Skan   Returns a name for a new variable. The current naming scheme appends the
82169689Skan   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83169689Skan   the name of vectorizer generated variables, and appends that to NAME if
84169689Skan   provided.  */
85169689Skan
86169689Skanstatic tree
87169689Skanvect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
88169689Skan{
89169689Skan  const char *prefix;
90169689Skan  tree new_vect_var;
91169689Skan
92169689Skan  switch (var_kind)
93169689Skan  {
94169689Skan  case vect_simple_var:
95169689Skan    prefix = "vect_";
96169689Skan    break;
97169689Skan  case vect_scalar_var:
98169689Skan    prefix = "stmp_";
99169689Skan    break;
100169689Skan  case vect_pointer_var:
101169689Skan    prefix = "vect_p";
102169689Skan    break;
103169689Skan  default:
104169689Skan    gcc_unreachable ();
105169689Skan  }
106169689Skan
107169689Skan  if (name)
108169689Skan    new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109169689Skan  else
110169689Skan    new_vect_var = create_tmp_var (type, prefix);
111169689Skan
112169689Skan  return new_vect_var;
113169689Skan}
114169689Skan
115169689Skan
116169689Skan/* Function vect_create_addr_base_for_vector_ref.
117169689Skan
118169689Skan   Create an expression that computes the address of the first memory location
119169689Skan   that will be accessed for a data reference.
120169689Skan
121169689Skan   Input:
122169689Skan   STMT: The statement containing the data reference.
123169689Skan   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124169689Skan   OFFSET: Optional. If supplied, it is be added to the initial address.
125169689Skan
126169689Skan   Output:
127169689Skan   1. Return an SSA_NAME whose value is the address of the memory location of
128169689Skan      the first vector of the data reference.
129169689Skan   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130169689Skan      these statement(s) which define the returned SSA_NAME.
131169689Skan
132169689Skan   FORNOW: We are only handling array accesses with step 1.  */
133169689Skan
134169689Skanstatic tree
135169689Skanvect_create_addr_base_for_vector_ref (tree stmt,
136169689Skan                                      tree *new_stmt_list,
137169689Skan				      tree offset)
138169689Skan{
139169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140169689Skan  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141169689Skan  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142169689Skan  tree base_name = build_fold_indirect_ref (data_ref_base);
143169689Skan  tree ref = DR_REF (dr);
144169689Skan  tree scalar_type = TREE_TYPE (ref);
145169689Skan  tree scalar_ptr_type = build_pointer_type (scalar_type);
146169689Skan  tree vec_stmt;
147169689Skan  tree new_temp;
148169689Skan  tree addr_base, addr_expr;
149169689Skan  tree dest, new_stmt;
150169689Skan  tree base_offset = unshare_expr (DR_OFFSET (dr));
151169689Skan  tree init = unshare_expr (DR_INIT (dr));
152169689Skan
153169689Skan  /* Create base_offset */
154169689Skan  base_offset = size_binop (PLUS_EXPR, base_offset, init);
155169689Skan  dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156169689Skan  add_referenced_var (dest);
157169689Skan  base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158169689Skan  append_to_statement_list_force (new_stmt, new_stmt_list);
159169689Skan
160169689Skan  if (offset)
161169689Skan    {
162169689Skan      tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163169689Skan      add_referenced_var (tmp);
164169689Skan      offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
165169689Skan			    DR_STEP (dr));
166169689Skan      base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167169689Skan				 base_offset, offset);
168169689Skan      base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169169689Skan      append_to_statement_list_force (new_stmt, new_stmt_list);
170169689Skan    }
171169689Skan
172169689Skan  /* base + base_offset */
173169689Skan  addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
174169689Skan			   base_offset);
175169689Skan
176169689Skan  /* addr_expr = addr_base */
177169689Skan  addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178169689Skan                                     get_name (base_name));
179169689Skan  add_referenced_var (addr_expr);
180169689Skan  vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181169689Skan  new_temp = make_ssa_name (addr_expr, vec_stmt);
182169689Skan  TREE_OPERAND (vec_stmt, 0) = new_temp;
183169689Skan  append_to_statement_list_force (vec_stmt, new_stmt_list);
184169689Skan
185169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
186169689Skan    {
187169689Skan      fprintf (vect_dump, "created ");
188169689Skan      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
189169689Skan    }
190169689Skan  return new_temp;
191169689Skan}
192169689Skan
193169689Skan
194169689Skan/* Function vect_align_data_ref.
195169689Skan
196169689Skan   Handle misalignment of a memory accesses.
197169689Skan
198169689Skan   FORNOW: Can't handle misaligned accesses.
199169689Skan   Make sure that the dataref is aligned.  */
200169689Skan
201169689Skanstatic void
202169689Skanvect_align_data_ref (tree stmt)
203169689Skan{
204169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205169689Skan  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
206169689Skan
207169689Skan  /* FORNOW: can't handle misaligned accesses;
208169689Skan             all accesses expected to be aligned.  */
209169689Skan  gcc_assert (aligned_access_p (dr));
210169689Skan}
211169689Skan
212169689Skan
213169689Skan/* Function vect_create_data_ref_ptr.
214169689Skan
215169689Skan   Create a memory reference expression for vector access, to be used in a
216169689Skan   vector load/store stmt. The reference is based on a new pointer to vector
217169689Skan   type (vp).
218169689Skan
219169689Skan   Input:
220169689Skan   1. STMT: a stmt that references memory. Expected to be of the form
221169689Skan         MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
222169689Skan   2. BSI: block_stmt_iterator where new stmts can be added.
223169689Skan   3. OFFSET (optional): an offset to be added to the initial address accessed
224169689Skan        by the data-ref in STMT.
225169689Skan   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
226169689Skan        pointing to the initial address.
227169689Skan
228169689Skan   Output:
229169689Skan   1. Declare a new ptr to vector_type, and have it point to the base of the
230169689Skan      data reference (initial addressed accessed by the data reference).
231169689Skan      For example, for vector of type V8HI, the following code is generated:
232169689Skan
233169689Skan      v8hi *vp;
234169689Skan      vp = (v8hi *)initial_address;
235169689Skan
236169689Skan      if OFFSET is not supplied:
237169689Skan         initial_address = &a[init];
238169689Skan      if OFFSET is supplied:
239169689Skan         initial_address = &a[init + OFFSET];
240169689Skan
241169689Skan      Return the initial_address in INITIAL_ADDRESS.
242169689Skan
243169689Skan   2. If ONLY_INIT is true, return the initial pointer.  Otherwise, create
244169689Skan      a data-reference in the loop based on the new vector pointer vp.  This
245169689Skan      new data reference will by some means be updated each iteration of
246169689Skan      the loop.  Return the pointer vp'.
247169689Skan
248169689Skan   FORNOW: handle only aligned and consecutive accesses.  */
249169689Skan
250169689Skanstatic tree
251169689Skanvect_create_data_ref_ptr (tree stmt,
252169689Skan			  block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
253169689Skan			  tree offset, tree *initial_address, bool only_init)
254169689Skan{
255169689Skan  tree base_name;
256169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
258169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
259169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
260169689Skan  tree vect_ptr_type;
261169689Skan  tree vect_ptr;
262169689Skan  tree tag;
263169689Skan  tree new_temp;
264169689Skan  tree vec_stmt;
265169689Skan  tree new_stmt_list = NULL_TREE;
266169689Skan  edge pe = loop_preheader_edge (loop);
267169689Skan  basic_block new_bb;
268169689Skan  tree vect_ptr_init;
269169689Skan  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
270169689Skan
271169689Skan  base_name =  build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
272169689Skan
273169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
274169689Skan    {
275169689Skan      tree data_ref_base = base_name;
276169689Skan      fprintf (vect_dump, "create vector-pointer variable to type: ");
277169689Skan      print_generic_expr (vect_dump, vectype, TDF_SLIM);
278169689Skan      if (TREE_CODE (data_ref_base) == VAR_DECL)
279169689Skan        fprintf (vect_dump, "  vectorizing a one dimensional array ref: ");
280169689Skan      else if (TREE_CODE (data_ref_base) == ARRAY_REF)
281169689Skan        fprintf (vect_dump, "  vectorizing a multidimensional array ref: ");
282169689Skan      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
283169689Skan        fprintf (vect_dump, "  vectorizing a record based array ref: ");
284169689Skan      else if (TREE_CODE (data_ref_base) == SSA_NAME)
285169689Skan        fprintf (vect_dump, "  vectorizing a pointer ref: ");
286169689Skan      print_generic_expr (vect_dump, base_name, TDF_SLIM);
287169689Skan    }
288169689Skan
289169689Skan  /** (1) Create the new vector-pointer variable:  **/
290169689Skan
291169689Skan  vect_ptr_type = build_pointer_type (vectype);
292169689Skan  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
293169689Skan                                    get_name (base_name));
294169689Skan  add_referenced_var (vect_ptr);
295169689Skan
296169689Skan
297169689Skan  /** (2) Add aliasing information to the new vector-pointer:
298169689Skan          (The points-to info (DR_PTR_INFO) may be defined later.)  **/
299169689Skan
300169689Skan  tag = DR_MEMTAG (dr);
301169689Skan  gcc_assert (tag);
302169689Skan
303169689Skan  /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
304169689Skan     tag must be created with tag added to its may alias list.  */
305169689Skan  if (!MTAG_P (tag))
306169689Skan    new_type_alias (vect_ptr, tag, DR_REF (dr));
307169689Skan  else
308169689Skan    var_ann (vect_ptr)->symbol_mem_tag = tag;
309169689Skan
310169689Skan  var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
311169689Skan
312169689Skan  /** (3) Calculate the initial address the vector-pointer, and set
313169689Skan          the vector-pointer to point to it before the loop:  **/
314169689Skan
315169689Skan  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
316169689Skan  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
317169689Skan                                                   offset);
318169689Skan  pe = loop_preheader_edge (loop);
319169689Skan  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
320169689Skan  gcc_assert (!new_bb);
321169689Skan  *initial_address = new_temp;
322169689Skan
323169689Skan  /* Create: p = (vectype *) initial_base  */
324169689Skan  vec_stmt = fold_convert (vect_ptr_type, new_temp);
325169689Skan  vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
326169689Skan  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
327169689Skan  TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
328169689Skan  new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
329169689Skan  gcc_assert (!new_bb);
330169689Skan
331169689Skan
332169689Skan  /** (4) Handle the updating of the vector-pointer inside the loop: **/
333169689Skan
334169689Skan  if (only_init) /* No update in loop is required.  */
335169689Skan    {
336169689Skan      /* Copy the points-to information if it exists. */
337169689Skan      if (DR_PTR_INFO (dr))
338169689Skan        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
339169689Skan      return vect_ptr_init;
340169689Skan    }
341169689Skan  else
342169689Skan    {
343169689Skan      block_stmt_iterator incr_bsi;
344169689Skan      bool insert_after;
345169689Skan      tree indx_before_incr, indx_after_incr;
346169689Skan      tree incr;
347169689Skan
348169689Skan      standard_iv_increment_position (loop, &incr_bsi, &insert_after);
349169689Skan      create_iv (vect_ptr_init,
350169689Skan		 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
351169689Skan		 NULL_TREE, loop, &incr_bsi, insert_after,
352169689Skan		 &indx_before_incr, &indx_after_incr);
353169689Skan      incr = bsi_stmt (incr_bsi);
354169689Skan      set_stmt_info (stmt_ann (incr),
355169689Skan		     new_stmt_vec_info (incr, loop_vinfo));
356169689Skan
357169689Skan      /* Copy the points-to information if it exists. */
358169689Skan      if (DR_PTR_INFO (dr))
359169689Skan	{
360169689Skan	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
361169689Skan	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
362169689Skan	}
363169689Skan      merge_alias_info (vect_ptr_init, indx_before_incr);
364169689Skan      merge_alias_info (vect_ptr_init, indx_after_incr);
365169689Skan
366169689Skan      return indx_before_incr;
367169689Skan    }
368169689Skan}
369169689Skan
370169689Skan
371169689Skan/* Function vect_create_destination_var.
372169689Skan
373169689Skan   Create a new temporary of type VECTYPE.  */
374169689Skan
375169689Skanstatic tree
376169689Skanvect_create_destination_var (tree scalar_dest, tree vectype)
377169689Skan{
378169689Skan  tree vec_dest;
379169689Skan  const char *new_name;
380169689Skan  tree type;
381169689Skan  enum vect_var_kind kind;
382169689Skan
383169689Skan  kind = vectype ? vect_simple_var : vect_scalar_var;
384169689Skan  type = vectype ? vectype : TREE_TYPE (scalar_dest);
385169689Skan
386169689Skan  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
387169689Skan
388169689Skan  new_name = get_name (scalar_dest);
389169689Skan  if (!new_name)
390169689Skan    new_name = "var_";
391169689Skan  vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
392169689Skan  add_referenced_var (vec_dest);
393169689Skan
394169689Skan  return vec_dest;
395169689Skan}
396169689Skan
397169689Skan
398169689Skan/* Function vect_init_vector.
399169689Skan
400169689Skan   Insert a new stmt (INIT_STMT) that initializes a new vector variable with
401169689Skan   the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
402169689Skan   used in the vectorization of STMT.  */
403169689Skan
404169689Skanstatic tree
405169689Skanvect_init_vector (tree stmt, tree vector_var)
406169689Skan{
407169689Skan  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
408169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
409169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
410169689Skan  tree new_var;
411169689Skan  tree init_stmt;
412169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
413169689Skan  tree vec_oprnd;
414169689Skan  edge pe;
415169689Skan  tree new_temp;
416169689Skan  basic_block new_bb;
417169689Skan
418169689Skan  new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
419169689Skan  add_referenced_var (new_var);
420169689Skan
421169689Skan  init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
422169689Skan  new_temp = make_ssa_name (new_var, init_stmt);
423169689Skan  TREE_OPERAND (init_stmt, 0) = new_temp;
424169689Skan
425169689Skan  pe = loop_preheader_edge (loop);
426169689Skan  new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
427169689Skan  gcc_assert (!new_bb);
428169689Skan
429169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
430169689Skan    {
431169689Skan      fprintf (vect_dump, "created new init_stmt: ");
432169689Skan      print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
433169689Skan    }
434169689Skan
435169689Skan  vec_oprnd = TREE_OPERAND (init_stmt, 0);
436169689Skan  return vec_oprnd;
437169689Skan}
438169689Skan
439169689Skan
440169689Skan/* Function vect_get_vec_def_for_operand.
441169689Skan
442169689Skan   OP is an operand in STMT. This function returns a (vector) def that will be
443169689Skan   used in the vectorized stmt for STMT.
444169689Skan
445169689Skan   In the case that OP is an SSA_NAME which is defined in the loop, then
446169689Skan   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
447169689Skan
448169689Skan   In case OP is an invariant or constant, a new stmt that creates a vector def
449169689Skan   needs to be introduced.  */
450169689Skan
451169689Skanstatic tree
452169689Skanvect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
453169689Skan{
454169689Skan  tree vec_oprnd;
455169689Skan  tree vec_stmt;
456169689Skan  tree def_stmt;
457169689Skan  stmt_vec_info def_stmt_info = NULL;
458169689Skan  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
460169689Skan  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
461169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
462169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463169689Skan  tree vec_inv;
464169689Skan  tree vec_cst;
465169689Skan  tree t = NULL_TREE;
466169689Skan  tree def;
467169689Skan  int i;
468169689Skan  enum vect_def_type dt;
469169689Skan  bool is_simple_use;
470169689Skan
471169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
472169689Skan    {
473169689Skan      fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
474169689Skan      print_generic_expr (vect_dump, op, TDF_SLIM);
475169689Skan    }
476169689Skan
477169689Skan  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
478169689Skan  gcc_assert (is_simple_use);
479169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
480169689Skan    {
481169689Skan      if (def)
482169689Skan        {
483169689Skan          fprintf (vect_dump, "def =  ");
484169689Skan          print_generic_expr (vect_dump, def, TDF_SLIM);
485169689Skan        }
486169689Skan      if (def_stmt)
487169689Skan        {
488169689Skan          fprintf (vect_dump, "  def_stmt =  ");
489169689Skan          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
490169689Skan        }
491169689Skan    }
492169689Skan
493169689Skan  switch (dt)
494169689Skan    {
495169689Skan    /* Case 1: operand is a constant.  */
496169689Skan    case vect_constant_def:
497169689Skan      {
498169689Skan	if (scalar_def)
499169689Skan	  *scalar_def = op;
500169689Skan
501169689Skan        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
502169689Skan        if (vect_print_dump_info (REPORT_DETAILS))
503169689Skan          fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
504169689Skan
505169689Skan        for (i = nunits - 1; i >= 0; --i)
506169689Skan          {
507169689Skan            t = tree_cons (NULL_TREE, op, t);
508169689Skan          }
509169689Skan        vec_cst = build_vector (vectype, t);
510169689Skan        return vect_init_vector (stmt, vec_cst);
511169689Skan      }
512169689Skan
513169689Skan    /* Case 2: operand is defined outside the loop - loop invariant.  */
514169689Skan    case vect_invariant_def:
515169689Skan      {
516169689Skan	if (scalar_def)
517169689Skan	  *scalar_def = def;
518169689Skan
519169689Skan        /* Create 'vec_inv = {inv,inv,..,inv}'  */
520169689Skan        if (vect_print_dump_info (REPORT_DETAILS))
521169689Skan          fprintf (vect_dump, "Create vector_inv.");
522169689Skan
523169689Skan        for (i = nunits - 1; i >= 0; --i)
524169689Skan          {
525169689Skan            t = tree_cons (NULL_TREE, def, t);
526169689Skan          }
527169689Skan
528169689Skan	/* FIXME: use build_constructor directly.  */
529169689Skan        vec_inv = build_constructor_from_list (vectype, t);
530169689Skan        return vect_init_vector (stmt, vec_inv);
531169689Skan      }
532169689Skan
533169689Skan    /* Case 3: operand is defined inside the loop.  */
534169689Skan    case vect_loop_def:
535169689Skan      {
536169689Skan	if (scalar_def)
537169689Skan	  *scalar_def = def_stmt;
538169689Skan
539169689Skan        /* Get the def from the vectorized stmt.  */
540169689Skan        def_stmt_info = vinfo_for_stmt (def_stmt);
541169689Skan        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
542169689Skan        gcc_assert (vec_stmt);
543169689Skan        vec_oprnd = TREE_OPERAND (vec_stmt, 0);
544169689Skan        return vec_oprnd;
545169689Skan      }
546169689Skan
547169689Skan    /* Case 4: operand is defined by a loop header phi - reduction  */
548169689Skan    case vect_reduction_def:
549169689Skan      {
550169689Skan        gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
551169689Skan
552169689Skan        /* Get the def before the loop  */
553169689Skan        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
554169689Skan        return get_initial_def_for_reduction (stmt, op, scalar_def);
555169689Skan     }
556169689Skan
557169689Skan    /* Case 5: operand is defined by loop-header phi - induction.  */
558169689Skan    case vect_induction_def:
559169689Skan      {
560169689Skan        if (vect_print_dump_info (REPORT_DETAILS))
561169689Skan          fprintf (vect_dump, "induction - unsupported.");
562169689Skan        internal_error ("no support for induction"); /* FORNOW */
563169689Skan      }
564169689Skan
565169689Skan    default:
566169689Skan      gcc_unreachable ();
567169689Skan    }
568169689Skan}
569169689Skan
570169689Skan
571169689Skan/* Function vect_finish_stmt_generation.
572169689Skan
573169689Skan   Insert a new stmt.  */
574169689Skan
575169689Skanstatic void
576169689Skanvect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
577169689Skan{
578169689Skan  bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
579169689Skan
580169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
581169689Skan    {
582169689Skan      fprintf (vect_dump, "add new stmt: ");
583169689Skan      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
584169689Skan    }
585169689Skan
586169689Skan  /* Make sure bsi points to the stmt that is being vectorized.  */
587169689Skan  gcc_assert (stmt == bsi_stmt (*bsi));
588169689Skan
589169689Skan#ifdef USE_MAPPED_LOCATION
590169689Skan  SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
591169689Skan#else
592169689Skan  SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
593169689Skan#endif
594169689Skan}
595169689Skan
596169689Skan
597169689Skan#define ADJUST_IN_EPILOG 1
598169689Skan
599169689Skan/* Function get_initial_def_for_reduction
600169689Skan
601169689Skan   Input:
602169689Skan   STMT - a stmt that performs a reduction operation in the loop.
603169689Skan   INIT_VAL - the initial value of the reduction variable
604169689Skan
605169689Skan   Output:
606169689Skan   SCALAR_DEF - a tree that holds a value to be added to the final result
607169689Skan	of the reduction (used for "ADJUST_IN_EPILOG" - see below).
608169689Skan   Return a vector variable, initialized according to the operation that STMT
609169689Skan	performs. This vector will be used as the initial value of the
610169689Skan	vector of partial results.
611169689Skan
612169689Skan   Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
613169689Skan     add:         [0,0,...,0,0]
614169689Skan     mult:        [1,1,...,1,1]
615169689Skan     min/max:     [init_val,init_val,..,init_val,init_val]
616169689Skan     bit and/or:  [init_val,init_val,..,init_val,init_val]
617169689Skan   and when necessary (e.g. add/mult case) let the caller know
618169689Skan   that it needs to adjust the result by init_val.
619169689Skan
620169689Skan   Option2: Initialize the vector as follows:
621169689Skan     add:         [0,0,...,0,init_val]
622169689Skan     mult:        [1,1,...,1,init_val]
623169689Skan     min/max:     [init_val,init_val,...,init_val]
624169689Skan     bit and/or:  [init_val,init_val,...,init_val]
625169689Skan   and no adjustments are needed.
626169689Skan
627169689Skan   For example, for the following code:
628169689Skan
629169689Skan   s = init_val;
630169689Skan   for (i=0;i<n;i++)
631169689Skan     s = s + a[i];
632169689Skan
633169689Skan   STMT is 's = s + a[i]', and the reduction variable is 's'.
634169689Skan   For a vector of 4 units, we want to return either [0,0,0,init_val],
635169689Skan   or [0,0,0,0] and let the caller know that it needs to adjust
636169689Skan   the result at the end by 'init_val'.
637169689Skan
638169689Skan   FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
639169689Skan   TODO: Use some cost-model to estimate which scheme is more profitable.
640169689Skan*/
641169689Skan
642169689Skanstatic tree
643169689Skanget_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
644169689Skan{
645169689Skan  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
646169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
647169689Skan  int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
648169689Skan  int nelements;
649169689Skan  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
650169689Skan  tree type = TREE_TYPE (init_val);
651169689Skan  tree def;
652169689Skan  tree vec, t = NULL_TREE;
653169689Skan  bool need_epilog_adjust;
654169689Skan  int i;
655169689Skan
656169689Skan  gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
657169689Skan
658169689Skan  switch (code)
659169689Skan  {
660169689Skan  case WIDEN_SUM_EXPR:
661169689Skan  case DOT_PROD_EXPR:
662169689Skan  case PLUS_EXPR:
663169689Skan    if (INTEGRAL_TYPE_P (type))
664169689Skan      def = build_int_cst (type, 0);
665169689Skan    else
666169689Skan      def = build_real (type, dconst0);
667169689Skan
668169689Skan#ifdef ADJUST_IN_EPILOG
669169689Skan    /* All the 'nunits' elements are set to 0. The final result will be
670169689Skan       adjusted by 'init_val' at the loop epilog.  */
671169689Skan    nelements = nunits;
672169689Skan    need_epilog_adjust = true;
673169689Skan#else
674169689Skan    /* 'nunits - 1' elements are set to 0; The last element is set to
675169689Skan        'init_val'.  No further adjustments at the epilog are needed.  */
676169689Skan    nelements = nunits - 1;
677169689Skan    need_epilog_adjust = false;
678169689Skan#endif
679169689Skan    break;
680169689Skan
681169689Skan  case MIN_EXPR:
682169689Skan  case MAX_EXPR:
683169689Skan    def = init_val;
684169689Skan    nelements = nunits;
685169689Skan    need_epilog_adjust = false;
686169689Skan    break;
687169689Skan
688169689Skan  default:
689169689Skan    gcc_unreachable ();
690169689Skan  }
691169689Skan
692169689Skan  for (i = nelements - 1; i >= 0; --i)
693169689Skan    t = tree_cons (NULL_TREE, def, t);
694169689Skan
695169689Skan  if (nelements == nunits - 1)
696169689Skan    {
697169689Skan      /* Set the last element of the vector.  */
698169689Skan      t = tree_cons (NULL_TREE, init_val, t);
699169689Skan      nelements += 1;
700169689Skan    }
701169689Skan  gcc_assert (nelements == nunits);
702169689Skan
703169689Skan  if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
704169689Skan    vec = build_vector (vectype, t);
705169689Skan  else
706169689Skan    vec = build_constructor_from_list (vectype, t);
707169689Skan
708169689Skan  if (!need_epilog_adjust)
709169689Skan    *scalar_def = NULL_TREE;
710169689Skan  else
711169689Skan    *scalar_def = init_val;
712169689Skan
713169689Skan  return vect_init_vector (stmt, vec);
714169689Skan}
715169689Skan
716169689Skan
717169689Skan/* Function vect_create_epilog_for_reduction
718169689Skan
719169689Skan   Create code at the loop-epilog to finalize the result of a reduction
720169689Skan   computation.
721169689Skan
722169689Skan   VECT_DEF is a vector of partial results.
723169689Skan   REDUC_CODE is the tree-code for the epilog reduction.
724169689Skan   STMT is the scalar reduction stmt that is being vectorized.
725169689Skan   REDUCTION_PHI is the phi-node that carries the reduction computation.
726169689Skan
727169689Skan   This function:
728169689Skan   1. Creates the reduction def-use cycle: sets the the arguments for
729169689Skan      REDUCTION_PHI:
730169689Skan      The loop-entry argument is the vectorized initial-value of the reduction.
731169689Skan      The loop-latch argument is VECT_DEF - the vector of partial sums.
732169689Skan   2. "Reduces" the vector of partial results VECT_DEF into a single result,
733169689Skan      by applying the operation specified by REDUC_CODE if available, or by
734169689Skan      other means (whole-vector shifts or a scalar loop).
735169689Skan      The function also creates a new phi node at the loop exit to preserve
736169689Skan      loop-closed form, as illustrated below.
737169689Skan
738169689Skan     The flow at the entry to this function:
739169689Skan
740169689Skan        loop:
741169689Skan          vec_def = phi <null, null>            # REDUCTION_PHI
742169689Skan          VECT_DEF = vector_stmt                # vectorized form of STMT
743169689Skan          s_loop = scalar_stmt                  # (scalar) STMT
744169689Skan        loop_exit:
745169689Skan          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
746169689Skan          use <s_out0>
747169689Skan          use <s_out0>
748169689Skan
749169689Skan     The above is transformed by this function into:
750169689Skan
751169689Skan        loop:
752169689Skan          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
753169689Skan          VECT_DEF = vector_stmt                # vectorized form of STMT
754169689Skan          s_loop = scalar_stmt                  # (scalar) STMT
755169689Skan        loop_exit:
756169689Skan          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
757169689Skan          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
758169689Skan          v_out2 = reduce <v_out1>
759169689Skan          s_out3 = extract_field <v_out2, 0>
760169689Skan          s_out4 = adjust_result <s_out3>
761169689Skan          use <s_out4>
762169689Skan          use <s_out4>
763169689Skan*/
764169689Skan
765169689Skanstatic void
766169689Skanvect_create_epilog_for_reduction (tree vect_def, tree stmt,
767169689Skan                                  enum tree_code reduc_code, tree reduction_phi)
768169689Skan{
769169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
770169689Skan  tree vectype;
771169689Skan  enum machine_mode mode;
772169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
773169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
774169689Skan  basic_block exit_bb;
775169689Skan  tree scalar_dest;
776169689Skan  tree scalar_type;
777169689Skan  tree new_phi;
778169689Skan  block_stmt_iterator exit_bsi;
779169689Skan  tree vec_dest;
780169689Skan  tree new_temp;
781169689Skan  tree new_name;
782169689Skan  tree epilog_stmt;
783169689Skan  tree new_scalar_dest, exit_phi;
784169689Skan  tree bitsize, bitpos, bytesize;
785169689Skan  enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
786169689Skan  tree scalar_initial_def;
787169689Skan  tree vec_initial_def;
788169689Skan  tree orig_name;
789169689Skan  imm_use_iterator imm_iter;
790169689Skan  use_operand_p use_p;
791169689Skan  bool extract_scalar_result;
792169689Skan  tree reduction_op;
793169689Skan  tree orig_stmt;
794169689Skan  tree use_stmt;
795169689Skan  tree operation = TREE_OPERAND (stmt, 1);
796169689Skan  int op_type;
797169689Skan
798169689Skan  op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
799169689Skan  reduction_op = TREE_OPERAND (operation, op_type-1);
800169689Skan  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
801169689Skan  mode = TYPE_MODE (vectype);
802169689Skan
803169689Skan  /*** 1. Create the reduction def-use cycle  ***/
804169689Skan
805169689Skan  /* 1.1 set the loop-entry arg of the reduction-phi:  */
806169689Skan  /* For the case of reduction, vect_get_vec_def_for_operand returns
807169689Skan     the scalar def before the loop, that defines the initial value
808169689Skan     of the reduction variable.  */
809169689Skan  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
810169689Skan						  &scalar_initial_def);
811169689Skan  add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
812169689Skan
813169689Skan  /* 1.2 set the loop-latch arg for the reduction-phi:  */
814169689Skan  add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
815169689Skan
816169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
817169689Skan    {
818169689Skan      fprintf (vect_dump, "transform reduction: created def-use cycle:");
819169689Skan      print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
820169689Skan      fprintf (vect_dump, "\n");
821169689Skan      print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
822169689Skan    }
823169689Skan
824169689Skan
825169689Skan  /*** 2. Create epilog code
826169689Skan	  The reduction epilog code operates across the elements of the vector
827169689Skan          of partial results computed by the vectorized loop.
828169689Skan          The reduction epilog code consists of:
829169689Skan          step 1: compute the scalar result in a vector (v_out2)
830169689Skan          step 2: extract the scalar result (s_out3) from the vector (v_out2)
831169689Skan          step 3: adjust the scalar result (s_out3) if needed.
832169689Skan
833169689Skan          Step 1 can be accomplished using one the following three schemes:
834169689Skan          (scheme 1) using reduc_code, if available.
835169689Skan          (scheme 2) using whole-vector shifts, if available.
836169689Skan          (scheme 3) using a scalar loop. In this case steps 1+2 above are
837169689Skan                     combined.
838169689Skan
839169689Skan          The overall epilog code looks like this:
840169689Skan
841169689Skan          s_out0 = phi <s_loop>         # original EXIT_PHI
842169689Skan          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
843169689Skan          v_out2 = reduce <v_out1>              # step 1
844169689Skan          s_out3 = extract_field <v_out2, 0>    # step 2
845169689Skan          s_out4 = adjust_result <s_out3>       # step 3
846169689Skan
847169689Skan          (step 3 is optional, and step2 1 and 2 may be combined).
848169689Skan          Lastly, the uses of s_out0 are replaced by s_out4.
849169689Skan
850169689Skan	  ***/
851169689Skan
852169689Skan  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
853169689Skan        v_out1 = phi <v_loop>  */
854169689Skan
855169689Skan  exit_bb = loop->single_exit->dest;
856169689Skan  new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
857169689Skan  SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
858169689Skan  exit_bsi = bsi_start (exit_bb);
859169689Skan
860169689Skan  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
861169689Skan         (i.e. when reduc_code is not available) and in the final adjustment code
862169689Skan         (if needed).  Also get the original scalar reduction variable as
863169689Skan         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
864169689Skan         represents a reduction pattern), the tree-code and scalar-def are
865169689Skan         taken from the original stmt that the pattern-stmt (STMT) replaces.
866169689Skan         Otherwise (it is a regular reduction) - the tree-code and scalar-def
867169689Skan         are taken from STMT.  */
868169689Skan
869169689Skan  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
870169689Skan  if (!orig_stmt)
871169689Skan    {
872169689Skan      /* Regular reduction  */
873169689Skan      orig_stmt = stmt;
874169689Skan    }
875169689Skan  else
876169689Skan    {
877169689Skan      /* Reduction pattern  */
878169689Skan      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
879169689Skan      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
880169689Skan      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
881169689Skan    }
882169689Skan  code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
883169689Skan  scalar_dest = TREE_OPERAND (orig_stmt, 0);
884169689Skan  scalar_type = TREE_TYPE (scalar_dest);
885169689Skan  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
886169689Skan  bitsize = TYPE_SIZE (scalar_type);
887169689Skan  bytesize = TYPE_SIZE_UNIT (scalar_type);
888169689Skan
889169689Skan  /* 2.3 Create the reduction code, using one of the three schemes described
890169689Skan         above.  */
891169689Skan
892169689Skan  if (reduc_code < NUM_TREE_CODES)
893169689Skan    {
894169689Skan      /*** Case 1:  Create:
895169689Skan	   v_out2 = reduc_expr <v_out1>  */
896169689Skan
897169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
898169689Skan	fprintf (vect_dump, "Reduce using direct vector reduction.");
899169689Skan
900169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
901169689Skan      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
902169689Skan			build1 (reduc_code, vectype,  PHI_RESULT (new_phi)));
903169689Skan      new_temp = make_ssa_name (vec_dest, epilog_stmt);
904169689Skan      TREE_OPERAND (epilog_stmt, 0) = new_temp;
905169689Skan      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
906169689Skan
907169689Skan      extract_scalar_result = true;
908169689Skan    }
909169689Skan  else
910169689Skan    {
911169689Skan      enum tree_code shift_code = 0;
912169689Skan      bool have_whole_vector_shift = true;
913169689Skan      int bit_offset;
914169689Skan      int element_bitsize = tree_low_cst (bitsize, 1);
915169689Skan      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
916169689Skan      tree vec_temp;
917169689Skan
918169689Skan      if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
919169689Skan	shift_code = VEC_RSHIFT_EXPR;
920169689Skan      else
921169689Skan	have_whole_vector_shift = false;
922169689Skan
923169689Skan      /* Regardless of whether we have a whole vector shift, if we're
924169689Skan	 emulating the operation via tree-vect-generic, we don't want
925169689Skan	 to use it.  Only the first round of the reduction is likely
926169689Skan	 to still be profitable via emulation.  */
927169689Skan      /* ??? It might be better to emit a reduction tree code here, so that
928169689Skan	 tree-vect-generic can expand the first round via bit tricks.  */
929169689Skan      if (!VECTOR_MODE_P (mode))
930169689Skan	have_whole_vector_shift = false;
931169689Skan      else
932169689Skan	{
933169689Skan	  optab optab = optab_for_tree_code (code, vectype);
934169689Skan	  if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
935169689Skan	    have_whole_vector_shift = false;
936169689Skan	}
937169689Skan
938169689Skan      if (have_whole_vector_shift)
939169689Skan        {
940169689Skan	  /*** Case 2: Create:
941169689Skan	     for (offset = VS/2; offset >= element_size; offset/=2)
942169689Skan	        {
943169689Skan	          Create:  va' = vec_shift <va, offset>
944169689Skan	          Create:  va = vop <va, va'>
945169689Skan	        }  */
946169689Skan
947169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
948169689Skan	    fprintf (vect_dump, "Reduce using vector shifts");
949169689Skan
950169689Skan	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
951169689Skan	  new_temp = PHI_RESULT (new_phi);
952169689Skan
953169689Skan	  for (bit_offset = vec_size_in_bits/2;
954169689Skan	       bit_offset >= element_bitsize;
955169689Skan	       bit_offset /= 2)
956169689Skan	    {
957169689Skan	      tree bitpos = size_int (bit_offset);
958169689Skan
959169689Skan	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
960169689Skan	      build2 (shift_code, vectype, new_temp, bitpos));
961169689Skan	      new_name = make_ssa_name (vec_dest, epilog_stmt);
962169689Skan	      TREE_OPERAND (epilog_stmt, 0) = new_name;
963169689Skan	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
964169689Skan
965169689Skan	      epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
966169689Skan	      build2 (code, vectype, new_name, new_temp));
967169689Skan	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
968169689Skan	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
969169689Skan	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
970169689Skan	    }
971169689Skan
972169689Skan	  extract_scalar_result = true;
973169689Skan	}
974169689Skan      else
975169689Skan        {
976169689Skan	  tree rhs;
977169689Skan
978169689Skan	  /*** Case 3: Create:
979169689Skan	     s = extract_field <v_out2, 0>
980169689Skan	     for (offset = element_size;
981169689Skan		  offset < vector_size;
982169689Skan		  offset += element_size;)
983169689Skan	       {
984169689Skan	         Create:  s' = extract_field <v_out2, offset>
985169689Skan	         Create:  s = op <s, s'>
986169689Skan	       }  */
987169689Skan
988169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
989169689Skan	    fprintf (vect_dump, "Reduce using scalar code. ");
990169689Skan
991169689Skan	  vec_temp = PHI_RESULT (new_phi);
992169689Skan	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
993169689Skan	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
994169689Skan			 bitsize_zero_node);
995169689Skan	  BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
996169689Skan	  epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
997169689Skan	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
998169689Skan	  TREE_OPERAND (epilog_stmt, 0) = new_temp;
999169689Skan	  bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1000169689Skan
1001169689Skan	  for (bit_offset = element_bitsize;
1002169689Skan	       bit_offset < vec_size_in_bits;
1003169689Skan	       bit_offset += element_bitsize)
1004169689Skan	    {
1005169689Skan	      tree bitpos = bitsize_int (bit_offset);
1006169689Skan	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1007169689Skan				 bitpos);
1008169689Skan
1009169689Skan	      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1010169689Skan	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1011169689Skan				    rhs);
1012169689Skan	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1013169689Skan	      TREE_OPERAND (epilog_stmt, 0) = new_name;
1014169689Skan	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1015169689Skan
1016169689Skan	      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1017169689Skan				build2 (code, scalar_type, new_name, new_temp));
1018169689Skan	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1019169689Skan	      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1020169689Skan	      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1021169689Skan	    }
1022169689Skan
1023169689Skan	  extract_scalar_result = false;
1024169689Skan	}
1025169689Skan    }
1026169689Skan
1027169689Skan  /* 2.4  Extract the final scalar result.  Create:
1028169689Skan         s_out3 = extract_field <v_out2, bitpos>  */
1029169689Skan
1030169689Skan  if (extract_scalar_result)
1031169689Skan    {
1032169689Skan      tree rhs;
1033169689Skan
1034169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1035169689Skan	fprintf (vect_dump, "extract scalar result");
1036169689Skan
1037169689Skan      if (BYTES_BIG_ENDIAN)
1038169689Skan	bitpos = size_binop (MULT_EXPR,
1039169689Skan		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1040169689Skan		       TYPE_SIZE (scalar_type));
1041169689Skan      else
1042169689Skan	bitpos = bitsize_zero_node;
1043169689Skan
1044169689Skan      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1045169689Skan      BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1046169689Skan      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1047169689Skan      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1048169689Skan      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1049169689Skan      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050169689Skan    }
1051169689Skan
1052169689Skan  /* 2.4 Adjust the final result by the initial value of the reduction
1053169689Skan	 variable. (When such adjustment is not needed, then
1054169689Skan	 'scalar_initial_def' is zero).
1055169689Skan
1056169689Skan	 Create:
1057169689Skan	 s_out4 = scalar_expr <s_out3, scalar_initial_def>  */
1058169689Skan
1059169689Skan  if (scalar_initial_def)
1060169689Skan    {
1061169689Skan      epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1062169689Skan                      build2 (code, scalar_type, new_temp, scalar_initial_def));
1063169689Skan      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1064169689Skan      TREE_OPERAND (epilog_stmt, 0) = new_temp;
1065169689Skan      bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1066169689Skan    }
1067169689Skan
1068169689Skan  /* 2.6 Replace uses of s_out0 with uses of s_out3  */
1069169689Skan
1070169689Skan  /* Find the loop-closed-use at the loop exit of the original scalar result.
1071169689Skan     (The reduction result is expected to have two immediate uses - one at the
1072169689Skan     latch block, and one at the loop exit).  */
1073169689Skan  exit_phi = NULL;
1074169689Skan  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1075169689Skan    {
1076169689Skan      if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1077169689Skan	{
1078169689Skan	  exit_phi = USE_STMT (use_p);
1079169689Skan	  break;
1080169689Skan	}
1081169689Skan    }
1082169689Skan  /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
1083169689Skan  gcc_assert (exit_phi);
1084169689Skan  /* Replace the uses:  */
1085169689Skan  orig_name = PHI_RESULT (exit_phi);
1086169689Skan  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1087169689Skan    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1088169689Skan      SET_USE (use_p, new_temp);
1089169689Skan}
1090169689Skan
1091169689Skan
1092169689Skan/* Function vectorizable_reduction.
1093169689Skan
1094169689Skan   Check if STMT performs a reduction operation that can be vectorized.
1095169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1096169689Skan   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1097169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.
1098169689Skan
1099169689Skan   This function also handles reduction idioms (patterns) that have been
1100169689Skan   recognized in advance during vect_pattern_recog. In this case, STMT may be
1101169689Skan   of this form:
1102169689Skan     X = pattern_expr (arg0, arg1, ..., X)
1103169689Skan   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1104169689Skan   sequence that had been detected and replaced by the pattern-stmt (STMT).
1105169689Skan
1106169689Skan   In some cases of reduction patterns, the type of the reduction variable X is
1107169689Skan   different than the type of the other arguments of STMT.
1108169689Skan   In such cases, the vectype that is used when transforming STMT into a vector
1109169689Skan   stmt is different than the vectype that is used to determine the
1110169689Skan   vectorization factor, because it consists of a different number of elements
1111169689Skan   than the actual number of elements that are being operated upon in parallel.
1112169689Skan
1113169689Skan   For example, consider an accumulation of shorts into an int accumulator.
1114169689Skan   On some targets it's possible to vectorize this pattern operating on 8
1115169689Skan   shorts at a time (hence, the vectype for purposes of determining the
1116169689Skan   vectorization factor should be V8HI); on the other hand, the vectype that
1117169689Skan   is used to create the vector form is actually V4SI (the type of the result).
1118169689Skan
1119169689Skan   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1120169689Skan   indicates what is the actual level of parallelism (V8HI in the example), so
1121169689Skan   that the right vectorization factor would be derived. This vectype
1122169689Skan   corresponds to the type of arguments to the reduction stmt, and should *NOT*
1123169689Skan   be used to create the vectorized stmt. The right vectype for the vectorized
1124169689Skan   stmt is obtained from the type of the result X:
1125169689Skan        get_vectype_for_scalar_type (TREE_TYPE (X))
1126169689Skan
1127169689Skan   This means that, contrary to "regular" reductions (or "regular" stmts in
1128169689Skan   general), the following equation:
1129169689Skan      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1130169689Skan   does *NOT* necessarily hold for reduction patterns.  */
1131169689Skan
1132169689Skanbool
1133169689Skanvectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1134169689Skan{
1135169689Skan  tree vec_dest;
1136169689Skan  tree scalar_dest;
1137169689Skan  tree op;
1138169689Skan  tree loop_vec_def0, loop_vec_def1;
1139169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1140169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1141169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1142169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1143169689Skan  tree operation;
1144169689Skan  enum tree_code code, orig_code, epilog_reduc_code = 0;
1145169689Skan  enum machine_mode vec_mode;
1146169689Skan  int op_type;
1147169689Skan  optab optab, reduc_optab;
1148169689Skan  tree new_temp;
1149169689Skan  tree def, def_stmt;
1150169689Skan  enum vect_def_type dt;
1151169689Skan  tree new_phi;
1152169689Skan  tree scalar_type;
1153169689Skan  bool is_simple_use;
1154169689Skan  tree orig_stmt;
1155169689Skan  stmt_vec_info orig_stmt_info;
1156169689Skan  tree expr = NULL_TREE;
1157169689Skan  int i;
1158169689Skan
1159169689Skan  /* 1. Is vectorizable reduction?  */
1160169689Skan
1161169689Skan  /* Not supportable if the reduction variable is used in the loop.  */
1162169689Skan  if (STMT_VINFO_RELEVANT_P (stmt_info))
1163169689Skan    return false;
1164169689Skan
1165169689Skan  if (!STMT_VINFO_LIVE_P (stmt_info))
1166169689Skan    return false;
1167169689Skan
1168169689Skan  /* Make sure it was already recognized as a reduction computation.  */
1169169689Skan  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1170169689Skan    return false;
1171169689Skan
1172169689Skan  /* 2. Has this been recognized as a reduction pattern?
1173169689Skan
1174169689Skan     Check if STMT represents a pattern that has been recognized
1175169689Skan     in earlier analysis stages.  For stmts that represent a pattern,
1176169689Skan     the STMT_VINFO_RELATED_STMT field records the last stmt in
1177169689Skan     the original sequence that constitutes the pattern.  */
1178169689Skan
1179169689Skan  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1180169689Skan  if (orig_stmt)
1181169689Skan    {
1182169689Skan      orig_stmt_info = vinfo_for_stmt (orig_stmt);
1183169689Skan      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1184169689Skan      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1185169689Skan      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1186169689Skan    }
1187169689Skan
1188169689Skan  /* 3. Check the operands of the operation. The first operands are defined
1189169689Skan        inside the loop body. The last operand is the reduction variable,
1190169689Skan        which is defined by the loop-header-phi.  */
1191169689Skan
1192169689Skan  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1193169689Skan
1194169689Skan  operation = TREE_OPERAND (stmt, 1);
1195169689Skan  code = TREE_CODE (operation);
1196169689Skan  op_type = TREE_CODE_LENGTH (code);
1197169689Skan
1198169689Skan  if (op_type != binary_op && op_type != ternary_op)
1199169689Skan    return false;
1200169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
1201169689Skan  scalar_type = TREE_TYPE (scalar_dest);
1202169689Skan
1203169689Skan  /* All uses but the last are expected to be defined in the loop.
1204169689Skan     The last use is the reduction variable.  */
1205169689Skan  for (i = 0; i < op_type-1; i++)
1206169689Skan    {
1207169689Skan      op = TREE_OPERAND (operation, i);
1208169689Skan      is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1209169689Skan      gcc_assert (is_simple_use);
1210169689Skan      gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1211169689Skan                  dt == vect_constant_def);
1212169689Skan    }
1213169689Skan
1214169689Skan  op = TREE_OPERAND (operation, i);
1215169689Skan  is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1216169689Skan  gcc_assert (is_simple_use);
1217169689Skan  gcc_assert (dt == vect_reduction_def);
1218169689Skan  gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1219169689Skan  if (orig_stmt)
1220169689Skan    gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1221169689Skan  else
1222169689Skan    gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1223169689Skan
1224169689Skan  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1225169689Skan    return false;
1226169689Skan
1227169689Skan  /* 4. Supportable by target?  */
1228169689Skan
1229169689Skan  /* 4.1. check support for the operation in the loop  */
1230169689Skan  optab = optab_for_tree_code (code, vectype);
1231169689Skan  if (!optab)
1232169689Skan    {
1233169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1234169689Skan        fprintf (vect_dump, "no optab.");
1235169689Skan      return false;
1236169689Skan    }
1237169689Skan  vec_mode = TYPE_MODE (vectype);
1238169689Skan  if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1239169689Skan    {
1240169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1241169689Skan        fprintf (vect_dump, "op not supported by target.");
1242169689Skan      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1243169689Skan          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1244169689Skan	     < vect_min_worthwhile_factor (code))
1245169689Skan        return false;
1246169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1247169689Skan	fprintf (vect_dump, "proceeding using word mode.");
1248169689Skan    }
1249169689Skan
1250169689Skan  /* Worthwhile without SIMD support?  */
1251169689Skan  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1252169689Skan      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1253169689Skan	 < vect_min_worthwhile_factor (code))
1254169689Skan    {
1255169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1256169689Skan	fprintf (vect_dump, "not worthwhile without SIMD support.");
1257169689Skan      return false;
1258169689Skan    }
1259169689Skan
1260169689Skan  /* 4.2. Check support for the epilog operation.
1261169689Skan
1262169689Skan          If STMT represents a reduction pattern, then the type of the
1263169689Skan          reduction variable may be different than the type of the rest
1264169689Skan          of the arguments.  For example, consider the case of accumulation
1265169689Skan          of shorts into an int accumulator; The original code:
1266169689Skan                        S1: int_a = (int) short_a;
1267169689Skan          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
1268169689Skan
1269169689Skan          was replaced with:
1270169689Skan                        STMT: int_acc = widen_sum <short_a, int_acc>
1271169689Skan
1272169689Skan          This means that:
1273169689Skan          1. The tree-code that is used to create the vector operation in the
1274169689Skan             epilog code (that reduces the partial results) is not the
1275169689Skan             tree-code of STMT, but is rather the tree-code of the original
1276169689Skan             stmt from the pattern that STMT is replacing. I.e, in the example
1277169689Skan             above we want to use 'widen_sum' in the loop, but 'plus' in the
1278169689Skan             epilog.
1279169689Skan          2. The type (mode) we use to check available target support
1280169689Skan             for the vector operation to be created in the *epilog*, is
1281169689Skan             determined by the type of the reduction variable (in the example
1282169689Skan             above we'd check this: plus_optab[vect_int_mode]).
1283169689Skan             However the type (mode) we use to check available target support
1284169689Skan             for the vector operation to be created *inside the loop*, is
1285169689Skan             determined by the type of the other arguments to STMT (in the
1286169689Skan             example we'd check this: widen_sum_optab[vect_short_mode]).
1287169689Skan
1288169689Skan          This is contrary to "regular" reductions, in which the types of all
1289169689Skan          the arguments are the same as the type of the reduction variable.
1290169689Skan          For "regular" reductions we can therefore use the same vector type
1291169689Skan          (and also the same tree-code) when generating the epilog code and
1292169689Skan          when generating the code inside the loop.  */
1293169689Skan
1294169689Skan  if (orig_stmt)
1295169689Skan    {
1296169689Skan      /* This is a reduction pattern: get the vectype from the type of the
1297169689Skan         reduction variable, and get the tree-code from orig_stmt.  */
1298169689Skan      orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1299169689Skan      vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1300169689Skan      vec_mode = TYPE_MODE (vectype);
1301169689Skan    }
1302169689Skan  else
1303169689Skan    {
1304169689Skan      /* Regular reduction: use the same vectype and tree-code as used for
1305169689Skan         the vector code inside the loop can be used for the epilog code. */
1306169689Skan      orig_code = code;
1307169689Skan    }
1308169689Skan
1309169689Skan  if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1310169689Skan    return false;
1311169689Skan  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1312169689Skan  if (!reduc_optab)
1313169689Skan    {
1314169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1315169689Skan        fprintf (vect_dump, "no optab for reduction.");
1316169689Skan      epilog_reduc_code = NUM_TREE_CODES;
1317169689Skan    }
1318169689Skan  if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1319169689Skan    {
1320169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1321169689Skan        fprintf (vect_dump, "reduc op not supported by target.");
1322169689Skan      epilog_reduc_code = NUM_TREE_CODES;
1323169689Skan    }
1324169689Skan
1325169689Skan  if (!vec_stmt) /* transformation not required.  */
1326169689Skan    {
1327169689Skan      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1328169689Skan      return true;
1329169689Skan    }
1330169689Skan
1331169689Skan  /** Transform.  **/
1332169689Skan
1333169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1334169689Skan    fprintf (vect_dump, "transform reduction.");
1335169689Skan
1336169689Skan  /* Create the destination vector  */
1337169689Skan  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1338169689Skan
1339169689Skan  /* Create the reduction-phi that defines the reduction-operand.  */
1340169689Skan  new_phi = create_phi_node (vec_dest, loop->header);
1341169689Skan
1342169689Skan  /* Prepare the operand that is defined inside the loop body  */
1343169689Skan  op = TREE_OPERAND (operation, 0);
1344169689Skan  loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1345169689Skan  if (op_type == binary_op)
1346169689Skan    expr = build2 (code, vectype, loop_vec_def0, PHI_RESULT (new_phi));
1347169689Skan  else if (op_type == ternary_op)
1348169689Skan    {
1349169689Skan      op = TREE_OPERAND (operation, 1);
1350169689Skan      loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1351169689Skan      expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1352169689Skan		     PHI_RESULT (new_phi));
1353169689Skan    }
1354169689Skan
1355169689Skan  /* Create the vectorized operation that computes the partial results  */
1356169689Skan  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, expr);
1357169689Skan  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1358169689Skan  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1359169689Skan  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1360169689Skan
1361169689Skan  /* Finalize the reduction-phi (set it's arguments) and create the
1362169689Skan     epilog reduction code.  */
1363169689Skan  vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1364169689Skan  return true;
1365169689Skan}
1366169689Skan
1367169689Skan
1368169689Skan/* Function vectorizable_assignment.
1369169689Skan
1370169689Skan   Check if STMT performs an assignment (copy) that can be vectorized.
1371169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1372169689Skan   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1373169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1374169689Skan
1375169689Skanbool
1376169689Skanvectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1377169689Skan{
1378169689Skan  tree vec_dest;
1379169689Skan  tree scalar_dest;
1380169689Skan  tree op;
1381169689Skan  tree vec_oprnd;
1382169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1383169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1384169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1385169689Skan  tree new_temp;
1386169689Skan  tree def, def_stmt;
1387169689Skan  enum vect_def_type dt;
1388169689Skan
1389169689Skan  /* Is vectorizable assignment?  */
1390169689Skan  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1391169689Skan    return false;
1392169689Skan
1393169689Skan  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1394169689Skan
1395169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
1396169689Skan    return false;
1397169689Skan
1398169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
1399169689Skan  if (TREE_CODE (scalar_dest) != SSA_NAME)
1400169689Skan    return false;
1401169689Skan
1402169689Skan  op = TREE_OPERAND (stmt, 1);
1403169689Skan  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1404169689Skan    {
1405169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1406169689Skan        fprintf (vect_dump, "use not simple.");
1407169689Skan      return false;
1408169689Skan    }
1409169689Skan
1410169689Skan  if (!vec_stmt) /* transformation not required.  */
1411169689Skan    {
1412169689Skan      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1413169689Skan      return true;
1414169689Skan    }
1415169689Skan
1416169689Skan  /** Transform.  **/
1417169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1418169689Skan    fprintf (vect_dump, "transform assignment.");
1419169689Skan
1420169689Skan  /* Handle def.  */
1421169689Skan  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1422169689Skan
1423169689Skan  /* Handle use.  */
1424169689Skan  op = TREE_OPERAND (stmt, 1);
1425169689Skan  vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1426169689Skan
1427169689Skan  /* Arguments are ready. create the new vector stmt.  */
1428169689Skan  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1429169689Skan  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1430169689Skan  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1431169689Skan  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1432169689Skan
1433169689Skan  return true;
1434169689Skan}
1435169689Skan
1436169689Skan
1437169689Skan/* Function vect_min_worthwhile_factor.
1438169689Skan
1439169689Skan   For a loop where we could vectorize the operation indicated by CODE,
1440169689Skan   return the minimum vectorization factor that makes it worthwhile
1441169689Skan   to use generic vectors.  */
1442169689Skanstatic int
1443169689Skanvect_min_worthwhile_factor (enum tree_code code)
1444169689Skan{
1445169689Skan  switch (code)
1446169689Skan    {
1447169689Skan    case PLUS_EXPR:
1448169689Skan    case MINUS_EXPR:
1449169689Skan    case NEGATE_EXPR:
1450169689Skan      return 4;
1451169689Skan
1452169689Skan    case BIT_AND_EXPR:
1453169689Skan    case BIT_IOR_EXPR:
1454169689Skan    case BIT_XOR_EXPR:
1455169689Skan    case BIT_NOT_EXPR:
1456169689Skan      return 2;
1457169689Skan
1458169689Skan    default:
1459169689Skan      return INT_MAX;
1460169689Skan    }
1461169689Skan}
1462169689Skan
1463169689Skan
1464169689Skan/* Function vectorizable_operation.
1465169689Skan
1466169689Skan   Check if STMT performs a binary or unary operation that can be vectorized.
1467169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1468169689Skan   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1469169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1470169689Skan
1471169689Skanbool
1472169689Skanvectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1473169689Skan{
1474169689Skan  tree vec_dest;
1475169689Skan  tree scalar_dest;
1476169689Skan  tree operation;
1477169689Skan  tree op0, op1 = NULL;
1478169689Skan  tree vec_oprnd0, vec_oprnd1=NULL;
1479169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1480169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1481169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1482169689Skan  int i;
1483169689Skan  enum tree_code code;
1484169689Skan  enum machine_mode vec_mode;
1485169689Skan  tree new_temp;
1486169689Skan  int op_type;
1487169689Skan  tree op;
1488169689Skan  optab optab;
1489169689Skan  int icode;
1490169689Skan  enum machine_mode optab_op2_mode;
1491169689Skan  tree def, def_stmt;
1492169689Skan  enum vect_def_type dt;
1493169689Skan
1494169689Skan  /* Is STMT a vectorizable binary/unary operation?   */
1495169689Skan  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1496169689Skan    return false;
1497169689Skan
1498169689Skan  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1499169689Skan
1500169689Skan  if (STMT_VINFO_LIVE_P (stmt_info))
1501169689Skan    {
1502169689Skan      /* FORNOW: not yet supported.  */
1503169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1504169689Skan        fprintf (vect_dump, "value used after loop.");
1505169689Skan      return false;
1506169689Skan    }
1507169689Skan
1508169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
1509169689Skan    return false;
1510169689Skan
1511169689Skan  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1512169689Skan    return false;
1513169689Skan
1514169689Skan  operation = TREE_OPERAND (stmt, 1);
1515169689Skan  code = TREE_CODE (operation);
1516169689Skan  optab = optab_for_tree_code (code, vectype);
1517169689Skan
1518169689Skan  /* Support only unary or binary operations.  */
1519169689Skan  op_type = TREE_CODE_LENGTH (code);
1520169689Skan  if (op_type != unary_op && op_type != binary_op)
1521169689Skan    {
1522169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1523169689Skan	fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1524169689Skan      return false;
1525169689Skan    }
1526169689Skan
1527169689Skan  for (i = 0; i < op_type; i++)
1528169689Skan    {
1529169689Skan      op = TREE_OPERAND (operation, i);
1530169689Skan      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1531169689Skan	{
1532169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1533169689Skan	    fprintf (vect_dump, "use not simple.");
1534169689Skan	  return false;
1535169689Skan	}
1536169689Skan    }
1537169689Skan
1538169689Skan  /* Supportable by target?  */
1539169689Skan  if (!optab)
1540169689Skan    {
1541169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1542169689Skan	fprintf (vect_dump, "no optab.");
1543169689Skan      return false;
1544169689Skan    }
1545169689Skan  vec_mode = TYPE_MODE (vectype);
1546169689Skan  icode = (int) optab->handlers[(int) vec_mode].insn_code;
1547169689Skan  if (icode == CODE_FOR_nothing)
1548169689Skan    {
1549169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1550169689Skan	fprintf (vect_dump, "op not supported by target.");
1551169689Skan      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1552169689Skan          || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1553169689Skan	     < vect_min_worthwhile_factor (code))
1554169689Skan        return false;
1555169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1556169689Skan	fprintf (vect_dump, "proceeding using word mode.");
1557169689Skan    }
1558169689Skan
1559169689Skan  /* Worthwhile without SIMD support?  */
1560169689Skan  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1561169689Skan      && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1562169689Skan	 < vect_min_worthwhile_factor (code))
1563169689Skan    {
1564169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1565169689Skan	fprintf (vect_dump, "not worthwhile without SIMD support.");
1566169689Skan      return false;
1567169689Skan    }
1568169689Skan
1569169689Skan  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1570169689Skan    {
1571169689Skan      /* FORNOW: not yet supported.  */
1572169689Skan      if (!VECTOR_MODE_P (vec_mode))
1573169689Skan	return false;
1574169689Skan
1575169689Skan      /* Invariant argument is needed for a vector shift
1576169689Skan	 by a scalar shift operand.  */
1577169689Skan      optab_op2_mode = insn_data[icode].operand[2].mode;
1578169689Skan      if (! (VECTOR_MODE_P (optab_op2_mode)
1579169689Skan	     || dt == vect_constant_def
1580169689Skan	     || dt == vect_invariant_def))
1581169689Skan	{
1582169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1583169689Skan	    fprintf (vect_dump, "operand mode requires invariant argument.");
1584169689Skan	  return false;
1585169689Skan	}
1586169689Skan    }
1587169689Skan
1588169689Skan  if (!vec_stmt) /* transformation not required.  */
1589169689Skan    {
1590169689Skan      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1591169689Skan      return true;
1592169689Skan    }
1593169689Skan
1594169689Skan  /** Transform.  **/
1595169689Skan
1596169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1597169689Skan    fprintf (vect_dump, "transform binary/unary operation.");
1598169689Skan
1599169689Skan  /* Handle def.  */
1600169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
1601169689Skan  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1602169689Skan
1603169689Skan  /* Handle uses.  */
1604169689Skan  op0 = TREE_OPERAND (operation, 0);
1605169689Skan  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1606169689Skan
1607169689Skan  if (op_type == binary_op)
1608169689Skan    {
1609169689Skan      op1 = TREE_OPERAND (operation, 1);
1610169689Skan
1611169689Skan      if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1612169689Skan	{
1613169689Skan	  /* Vector shl and shr insn patterns can be defined with
1614169689Skan	     scalar operand 2 (shift operand).  In this case, use
1615169689Skan	     constant or loop invariant op1 directly, without
1616169689Skan	     extending it to vector mode first.  */
1617169689Skan
1618169689Skan	  optab_op2_mode = insn_data[icode].operand[2].mode;
1619169689Skan	  if (!VECTOR_MODE_P (optab_op2_mode))
1620169689Skan	    {
1621169689Skan	      if (vect_print_dump_info (REPORT_DETAILS))
1622169689Skan		fprintf (vect_dump, "operand 1 using scalar mode.");
1623169689Skan	      vec_oprnd1 = op1;
1624169689Skan	    }
1625169689Skan	}
1626169689Skan
1627169689Skan      if (!vec_oprnd1)
1628169689Skan	vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1629169689Skan    }
1630169689Skan
1631169689Skan  /* Arguments are ready. create the new vector stmt.  */
1632169689Skan
1633169689Skan  if (op_type == binary_op)
1634169689Skan    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1635169689Skan		build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1636169689Skan  else
1637169689Skan    *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1638169689Skan		build1 (code, vectype, vec_oprnd0));
1639169689Skan  new_temp = make_ssa_name (vec_dest, *vec_stmt);
1640169689Skan  TREE_OPERAND (*vec_stmt, 0) = new_temp;
1641169689Skan  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1642169689Skan
1643169689Skan  return true;
1644169689Skan}
1645169689Skan
1646169689Skan
1647169689Skan/* Function vectorizable_store.
1648169689Skan
1649169689Skan   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1650169689Skan   can be vectorized.
1651169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1652169689Skan   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1653169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1654169689Skan
1655169689Skanbool
1656169689Skanvectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1657169689Skan{
1658169689Skan  tree scalar_dest;
1659169689Skan  tree data_ref;
1660169689Skan  tree op;
1661169689Skan  tree vec_oprnd1;
1662169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1663169689Skan  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1664169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1665169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1666169689Skan  enum machine_mode vec_mode;
1667169689Skan  tree dummy;
1668169689Skan  enum dr_alignment_support alignment_support_cheme;
1669169689Skan  ssa_op_iter iter;
1670169689Skan  tree def, def_stmt;
1671169689Skan  enum vect_def_type dt;
1672169689Skan
1673169689Skan  /* Is vectorizable store? */
1674169689Skan
1675169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
1676169689Skan    return false;
1677169689Skan
1678169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
1679169689Skan  if (TREE_CODE (scalar_dest) != ARRAY_REF
1680169689Skan      && TREE_CODE (scalar_dest) != INDIRECT_REF)
1681169689Skan    return false;
1682169689Skan
1683169689Skan  op = TREE_OPERAND (stmt, 1);
1684169689Skan  if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1685169689Skan    {
1686169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1687169689Skan        fprintf (vect_dump, "use not simple.");
1688169689Skan      return false;
1689169689Skan    }
1690169689Skan
1691169689Skan  vec_mode = TYPE_MODE (vectype);
1692169689Skan  /* FORNOW. In some cases can vectorize even if data-type not supported
1693169689Skan     (e.g. - array initialization with 0).  */
1694169689Skan  if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1695169689Skan    return false;
1696169689Skan
1697169689Skan  if (!STMT_VINFO_DATA_REF (stmt_info))
1698169689Skan    return false;
1699169689Skan
1700169689Skan
1701169689Skan  if (!vec_stmt) /* transformation not required.  */
1702169689Skan    {
1703169689Skan      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1704169689Skan      return true;
1705169689Skan    }
1706169689Skan
1707169689Skan  /** Transform.  **/
1708169689Skan
1709169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1710169689Skan    fprintf (vect_dump, "transform store");
1711169689Skan
1712169689Skan  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1713169689Skan  gcc_assert (alignment_support_cheme);
1714169689Skan  gcc_assert (alignment_support_cheme == dr_aligned);  /* FORNOW */
1715169689Skan
1716169689Skan  /* Handle use - get the vectorized def from the defining stmt.  */
1717169689Skan  vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1718169689Skan
1719169689Skan  /* Handle def.  */
1720169689Skan  /* FORNOW: make sure the data reference is aligned.  */
1721169689Skan  vect_align_data_ref (stmt);
1722169689Skan  data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1723169689Skan  data_ref = build_fold_indirect_ref (data_ref);
1724169689Skan
1725169689Skan  /* Arguments are ready. create the new vector stmt.  */
1726169689Skan  *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1727169689Skan  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1728169689Skan
1729169689Skan  /* Copy the V_MAY_DEFS representing the aliasing of the original array
1730169689Skan     element's definition to the vector's definition then update the
1731169689Skan     defining statement.  The original is being deleted so the same
1732169689Skan     SSA_NAMEs can be used.  */
1733169689Skan  copy_virtual_operands (*vec_stmt, stmt);
1734169689Skan
1735169689Skan  FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1736169689Skan    {
1737169689Skan      SSA_NAME_DEF_STMT (def) = *vec_stmt;
1738169689Skan
1739169689Skan      /* If this virtual def has a use outside the loop and a loop peel is
1740169689Skan	 performed then the def may be renamed by the peel.  Mark it for
1741169689Skan	 renaming so the later use will also be renamed.  */
1742169689Skan      mark_sym_for_renaming (SSA_NAME_VAR (def));
1743169689Skan    }
1744169689Skan
1745169689Skan  return true;
1746169689Skan}
1747169689Skan
1748169689Skan
1749169689Skan/* vectorizable_load.
1750169689Skan
1751169689Skan   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1752169689Skan   can be vectorized.
1753169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1754169689Skan   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1755169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1756169689Skan
1757169689Skanbool
1758169689Skanvectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1759169689Skan{
1760169689Skan  tree scalar_dest;
1761169689Skan  tree vec_dest = NULL;
1762169689Skan  tree data_ref = NULL;
1763169689Skan  tree op;
1764169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765169689Skan  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1766169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1767169689Skan  tree new_temp;
1768169689Skan  int mode;
1769169689Skan  tree init_addr;
1770169689Skan  tree new_stmt;
1771169689Skan  tree dummy;
1772169689Skan  basic_block new_bb;
1773169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1774169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1775169689Skan  edge pe = loop_preheader_edge (loop);
1776169689Skan  enum dr_alignment_support alignment_support_cheme;
1777169689Skan
1778169689Skan  /* Is vectorizable load? */
1779169689Skan  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1780169689Skan    return false;
1781169689Skan
1782169689Skan  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1783169689Skan
1784169689Skan  if (STMT_VINFO_LIVE_P (stmt_info))
1785169689Skan    {
1786169689Skan      /* FORNOW: not yet supported.  */
1787169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1788169689Skan        fprintf (vect_dump, "value used after loop.");
1789169689Skan      return false;
1790169689Skan    }
1791169689Skan
1792169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
1793169689Skan    return false;
1794169689Skan
1795169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
1796169689Skan  if (TREE_CODE (scalar_dest) != SSA_NAME)
1797169689Skan    return false;
1798169689Skan
1799169689Skan  op = TREE_OPERAND (stmt, 1);
1800169689Skan  if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1801169689Skan    return false;
1802169689Skan
1803169689Skan  if (!STMT_VINFO_DATA_REF (stmt_info))
1804169689Skan    return false;
1805169689Skan
1806169689Skan  mode = (int) TYPE_MODE (vectype);
1807169689Skan
1808169689Skan  /* FORNOW. In some cases can vectorize even if data-type not supported
1809169689Skan    (e.g. - data copies).  */
1810169689Skan  if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1811169689Skan    {
1812169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1813169689Skan	fprintf (vect_dump, "Aligned load, but unsupported type.");
1814169689Skan      return false;
1815169689Skan    }
1816169689Skan
1817169689Skan  if (!vec_stmt) /* transformation not required.  */
1818169689Skan    {
1819169689Skan      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1820169689Skan      return true;
1821169689Skan    }
1822169689Skan
1823169689Skan  /** Transform.  **/
1824169689Skan
1825169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1826169689Skan    fprintf (vect_dump, "transform load.");
1827169689Skan
1828169689Skan  alignment_support_cheme = vect_supportable_dr_alignment (dr);
1829169689Skan  gcc_assert (alignment_support_cheme);
1830169689Skan
1831169689Skan  if (alignment_support_cheme == dr_aligned
1832169689Skan      || alignment_support_cheme == dr_unaligned_supported)
1833169689Skan    {
1834169689Skan      /* Create:
1835169689Skan         p = initial_addr;
1836169689Skan         indx = 0;
1837169689Skan         loop {
1838169689Skan           vec_dest = *(p);
1839169689Skan           indx = indx + 1;
1840169689Skan         }
1841169689Skan      */
1842169689Skan
1843169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1844169689Skan      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1845169689Skan      if (aligned_access_p (dr))
1846169689Skan        data_ref = build_fold_indirect_ref (data_ref);
1847169689Skan      else
1848169689Skan	{
1849169689Skan	  int mis = DR_MISALIGNMENT (dr);
1850169689Skan	  tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1851169689Skan	  tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1852169689Skan	  data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1853169689Skan	}
1854169689Skan      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1855169689Skan      new_temp = make_ssa_name (vec_dest, new_stmt);
1856169689Skan      TREE_OPERAND (new_stmt, 0) = new_temp;
1857169689Skan      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1858169689Skan      copy_virtual_operands (new_stmt, stmt);
1859169689Skan    }
1860169689Skan  else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1861169689Skan    {
1862169689Skan      /* Create:
1863169689Skan	 p1 = initial_addr;
1864169689Skan	 msq_init = *(floor(p1))
1865169689Skan	 p2 = initial_addr + VS - 1;
1866169689Skan	 magic = have_builtin ? builtin_result : initial_address;
1867169689Skan	 indx = 0;
1868169689Skan	 loop {
1869169689Skan	   p2' = p2 + indx * vectype_size
1870169689Skan	   lsq = *(floor(p2'))
1871169689Skan	   vec_dest = realign_load (msq, lsq, magic)
1872169689Skan	   indx = indx + 1;
1873169689Skan	   msq = lsq;
1874169689Skan	 }
1875169689Skan      */
1876169689Skan
1877169689Skan      tree offset;
1878169689Skan      tree magic;
1879169689Skan      tree phi_stmt;
1880169689Skan      tree msq_init;
1881169689Skan      tree msq, lsq;
1882169689Skan      tree dataref_ptr;
1883169689Skan      tree params;
1884169689Skan
1885169689Skan      /* <1> Create msq_init = *(floor(p1)) in the loop preheader  */
1886169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1887169689Skan      data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1888169689Skan					   &init_addr, true);
1889169689Skan      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1890169689Skan      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1891169689Skan      new_temp = make_ssa_name (vec_dest, new_stmt);
1892169689Skan      TREE_OPERAND (new_stmt, 0) = new_temp;
1893169689Skan      new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1894169689Skan      gcc_assert (!new_bb);
1895169689Skan      msq_init = TREE_OPERAND (new_stmt, 0);
1896169689Skan      copy_virtual_operands (new_stmt, stmt);
1897169689Skan      update_vuses_to_preheader (new_stmt, loop);
1898169689Skan
1899169689Skan
1900169689Skan      /* <2> Create lsq = *(floor(p2')) in the loop  */
1901169689Skan      offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
1902169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1903169689Skan      dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1904169689Skan      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1905169689Skan      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1906169689Skan      new_temp = make_ssa_name (vec_dest, new_stmt);
1907169689Skan      TREE_OPERAND (new_stmt, 0) = new_temp;
1908169689Skan      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1909169689Skan      lsq = TREE_OPERAND (new_stmt, 0);
1910169689Skan      copy_virtual_operands (new_stmt, stmt);
1911169689Skan
1912169689Skan
1913169689Skan      /* <3> */
1914169689Skan      if (targetm.vectorize.builtin_mask_for_load)
1915169689Skan	{
1916169689Skan	  /* Create permutation mask, if required, in loop preheader.  */
1917169689Skan	  tree builtin_decl;
1918169689Skan	  params = build_tree_list (NULL_TREE, init_addr);
1919169689Skan	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
1920169689Skan	  builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1921169689Skan	  new_stmt = build_function_call_expr (builtin_decl, params);
1922169689Skan	  new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1923169689Skan	  new_temp = make_ssa_name (vec_dest, new_stmt);
1924169689Skan	  TREE_OPERAND (new_stmt, 0) = new_temp;
1925169689Skan	  new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1926169689Skan	  gcc_assert (!new_bb);
1927169689Skan	  magic = TREE_OPERAND (new_stmt, 0);
1928169689Skan
1929169689Skan	  /* The result of the CALL_EXPR to this builtin is determined from
1930169689Skan	     the value of the parameter and no global variables are touched
1931169689Skan	     which makes the builtin a "const" function.  Requiring the
1932169689Skan	     builtin to have the "const" attribute makes it unnecessary
1933169689Skan	     to call mark_call_clobbered.  */
1934169689Skan	  gcc_assert (TREE_READONLY (builtin_decl));
1935169689Skan	}
1936169689Skan      else
1937169689Skan	{
1938169689Skan	  /* Use current address instead of init_addr for reduced reg pressure.
1939169689Skan	   */
1940169689Skan	  magic = dataref_ptr;
1941169689Skan	}
1942169689Skan
1943169689Skan
1944169689Skan      /* <4> Create msq = phi <msq_init, lsq> in loop  */
1945169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1946169689Skan      msq = make_ssa_name (vec_dest, NULL_TREE);
1947169689Skan      phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1948169689Skan      SSA_NAME_DEF_STMT (msq) = phi_stmt;
1949169689Skan      add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1950169689Skan      add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1951169689Skan
1952169689Skan
1953169689Skan      /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop  */
1954169689Skan      vec_dest = vect_create_destination_var (scalar_dest, vectype);
1955169689Skan      new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1956169689Skan      new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1957169689Skan      new_temp = make_ssa_name (vec_dest, new_stmt);
1958169689Skan      TREE_OPERAND (new_stmt, 0) = new_temp;
1959169689Skan      vect_finish_stmt_generation (stmt, new_stmt, bsi);
1960169689Skan    }
1961169689Skan  else
1962169689Skan    gcc_unreachable ();
1963169689Skan
1964169689Skan  *vec_stmt = new_stmt;
1965169689Skan  return true;
1966169689Skan}
1967169689Skan
1968169689Skan
1969169689Skan/* Function vectorizable_live_operation.
1970169689Skan
1971169689Skan   STMT computes a value that is used outside the loop. Check if
1972169689Skan   it can be supported.  */
1973169689Skan
1974169689Skanbool
1975169689Skanvectorizable_live_operation (tree stmt,
1976169689Skan                             block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1977169689Skan                             tree *vec_stmt ATTRIBUTE_UNUSED)
1978169689Skan{
1979169689Skan  tree operation;
1980169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1981169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1982169689Skan  int i;
1983169689Skan  enum tree_code code;
1984169689Skan  int op_type;
1985169689Skan  tree op;
1986169689Skan  tree def, def_stmt;
1987169689Skan  enum vect_def_type dt;
1988169689Skan
1989169689Skan  if (!STMT_VINFO_LIVE_P (stmt_info))
1990169689Skan    return false;
1991169689Skan
1992169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
1993169689Skan    return false;
1994169689Skan
1995169689Skan  if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1996169689Skan    return false;
1997169689Skan
1998169689Skan  operation = TREE_OPERAND (stmt, 1);
1999169689Skan  code = TREE_CODE (operation);
2000169689Skan
2001169689Skan  op_type = TREE_CODE_LENGTH (code);
2002169689Skan
2003169689Skan  /* FORNOW: support only if all uses are invariant. This means
2004169689Skan     that the scalar operations can remain in place, unvectorized.
2005169689Skan     The original last scalar value that they compute will be used.  */
2006169689Skan
2007169689Skan  for (i = 0; i < op_type; i++)
2008169689Skan    {
2009169689Skan      op = TREE_OPERAND (operation, i);
2010169689Skan      if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2011169689Skan        {
2012169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
2013169689Skan            fprintf (vect_dump, "use not simple.");
2014169689Skan          return false;
2015169689Skan        }
2016169689Skan
2017169689Skan      if (dt != vect_invariant_def && dt != vect_constant_def)
2018169689Skan        return false;
2019169689Skan    }
2020169689Skan
2021169689Skan  /* No transformation is required for the cases we currently support.  */
2022169689Skan  return true;
2023169689Skan}
2024169689Skan
2025169689Skan
2026169689Skan/* Function vect_is_simple_cond.
2027169689Skan
2028169689Skan   Input:
2029169689Skan   LOOP - the loop that is being vectorized.
2030169689Skan   COND - Condition that is checked for simple use.
2031169689Skan
2032169689Skan   Returns whether a COND can be vectorized.  Checks whether
2033169689Skan   condition operands are supportable using vec_is_simple_use.  */
2034169689Skan
2035169689Skanstatic bool
2036169689Skanvect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2037169689Skan{
2038169689Skan  tree lhs, rhs;
2039169689Skan  tree def;
2040169689Skan  enum vect_def_type dt;
2041169689Skan
2042169689Skan  if (!COMPARISON_CLASS_P (cond))
2043169689Skan    return false;
2044169689Skan
2045169689Skan  lhs = TREE_OPERAND (cond, 0);
2046169689Skan  rhs = TREE_OPERAND (cond, 1);
2047169689Skan
2048169689Skan  if (TREE_CODE (lhs) == SSA_NAME)
2049169689Skan    {
2050169689Skan      tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2051169689Skan      if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2052169689Skan	return false;
2053169689Skan    }
2054169689Skan  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2055169689Skan    return false;
2056169689Skan
2057169689Skan  if (TREE_CODE (rhs) == SSA_NAME)
2058169689Skan    {
2059169689Skan      tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2060169689Skan      if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2061169689Skan	return false;
2062169689Skan    }
2063169689Skan  else if (TREE_CODE (rhs) != INTEGER_CST  && TREE_CODE (rhs) != REAL_CST)
2064169689Skan    return false;
2065169689Skan
2066169689Skan  return true;
2067169689Skan}
2068169689Skan
2069169689Skan/* vectorizable_condition.
2070169689Skan
2071169689Skan   Check if STMT is conditional modify expression that can be vectorized.
2072169689Skan   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2073169689Skan   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
2074169689Skan   at BSI.
2075169689Skan
2076169689Skan   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2077169689Skan
2078169689Skanbool
2079169689Skanvectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2080169689Skan{
2081169689Skan  tree scalar_dest = NULL_TREE;
2082169689Skan  tree vec_dest = NULL_TREE;
2083169689Skan  tree op = NULL_TREE;
2084169689Skan  tree cond_expr, then_clause, else_clause;
2085169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2086169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2087169689Skan  tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2088169689Skan  tree vec_compare, vec_cond_expr;
2089169689Skan  tree new_temp;
2090169689Skan  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2091169689Skan  enum machine_mode vec_mode;
2092169689Skan  tree def;
2093169689Skan  enum vect_def_type dt;
2094169689Skan
2095169689Skan  if (!STMT_VINFO_RELEVANT_P (stmt_info))
2096169689Skan    return false;
2097169689Skan
2098169689Skan  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2099169689Skan
2100169689Skan  if (STMT_VINFO_LIVE_P (stmt_info))
2101169689Skan    {
2102169689Skan      /* FORNOW: not yet supported.  */
2103169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2104169689Skan        fprintf (vect_dump, "value used after loop.");
2105169689Skan      return false;
2106169689Skan    }
2107169689Skan
2108169689Skan  if (TREE_CODE (stmt) != MODIFY_EXPR)
2109169689Skan    return false;
2110169689Skan
2111169689Skan  op = TREE_OPERAND (stmt, 1);
2112169689Skan
2113169689Skan  if (TREE_CODE (op) != COND_EXPR)
2114169689Skan    return false;
2115169689Skan
2116169689Skan  cond_expr = TREE_OPERAND (op, 0);
2117169689Skan  then_clause = TREE_OPERAND (op, 1);
2118169689Skan  else_clause = TREE_OPERAND (op, 2);
2119169689Skan
2120169689Skan  if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2121169689Skan    return false;
2122169689Skan
2123169689Skan  /* We do not handle two different vector types for the condition
2124169689Skan     and the values.  */
2125169689Skan  if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2126169689Skan    return false;
2127169689Skan
2128169689Skan  if (TREE_CODE (then_clause) == SSA_NAME)
2129169689Skan    {
2130169689Skan      tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2131169689Skan      if (!vect_is_simple_use (then_clause, loop_vinfo,
2132169689Skan			       &then_def_stmt, &def, &dt))
2133169689Skan	return false;
2134169689Skan    }
2135169689Skan  else if (TREE_CODE (then_clause) != INTEGER_CST
2136169689Skan	   && TREE_CODE (then_clause) != REAL_CST)
2137169689Skan    return false;
2138169689Skan
2139169689Skan  if (TREE_CODE (else_clause) == SSA_NAME)
2140169689Skan    {
2141169689Skan      tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2142169689Skan      if (!vect_is_simple_use (else_clause, loop_vinfo,
2143169689Skan			       &else_def_stmt, &def, &dt))
2144169689Skan	return false;
2145169689Skan    }
2146169689Skan  else if (TREE_CODE (else_clause) != INTEGER_CST
2147169689Skan	   && TREE_CODE (else_clause) != REAL_CST)
2148169689Skan    return false;
2149169689Skan
2150169689Skan
2151169689Skan  vec_mode = TYPE_MODE (vectype);
2152169689Skan
2153169689Skan  if (!vec_stmt)
2154169689Skan    {
2155169689Skan      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2156169689Skan      return expand_vec_cond_expr_p (op, vec_mode);
2157169689Skan    }
2158169689Skan
2159169689Skan  /* Transform */
2160169689Skan
2161169689Skan  /* Handle def.  */
2162169689Skan  scalar_dest = TREE_OPERAND (stmt, 0);
2163169689Skan  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2164169689Skan
2165169689Skan  /* Handle cond expr.  */
2166169689Skan  vec_cond_lhs =
2167169689Skan    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2168169689Skan  vec_cond_rhs =
2169169689Skan    vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2170169689Skan  vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2171169689Skan  vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2172169689Skan
2173169689Skan  /* Arguments are ready. create the new vector stmt.  */
2174169689Skan  vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2175169689Skan			vec_cond_lhs, vec_cond_rhs);
2176169689Skan  vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2177169689Skan			  vec_compare, vec_then_clause, vec_else_clause);
2178169689Skan
2179169689Skan  *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2180169689Skan  new_temp = make_ssa_name (vec_dest, *vec_stmt);
2181169689Skan  TREE_OPERAND (*vec_stmt, 0) = new_temp;
2182169689Skan  vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2183169689Skan
2184169689Skan  return true;
2185169689Skan}
2186169689Skan
2187169689Skan/* Function vect_transform_stmt.
2188169689Skan
2189169689Skan   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
2190169689Skan
2191169689Skanbool
2192169689Skanvect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2193169689Skan{
2194169689Skan  bool is_store = false;
2195169689Skan  tree vec_stmt = NULL_TREE;
2196169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2197169689Skan  tree orig_stmt_in_pattern;
2198169689Skan  bool done;
2199169689Skan
2200169689Skan  if (STMT_VINFO_RELEVANT_P (stmt_info))
2201169689Skan    {
2202169689Skan      switch (STMT_VINFO_TYPE (stmt_info))
2203169689Skan      {
2204169689Skan      case op_vec_info_type:
2205169689Skan	done = vectorizable_operation (stmt, bsi, &vec_stmt);
2206169689Skan	gcc_assert (done);
2207169689Skan	break;
2208169689Skan
2209169689Skan      case assignment_vec_info_type:
2210169689Skan	done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2211169689Skan	gcc_assert (done);
2212169689Skan	break;
2213169689Skan
2214169689Skan      case load_vec_info_type:
2215169689Skan	done = vectorizable_load (stmt, bsi, &vec_stmt);
2216169689Skan	gcc_assert (done);
2217169689Skan	break;
2218169689Skan
2219169689Skan      case store_vec_info_type:
2220169689Skan	done = vectorizable_store (stmt, bsi, &vec_stmt);
2221169689Skan	gcc_assert (done);
2222169689Skan	is_store = true;
2223169689Skan	break;
2224169689Skan
2225169689Skan      case condition_vec_info_type:
2226169689Skan	done = vectorizable_condition (stmt, bsi, &vec_stmt);
2227169689Skan	gcc_assert (done);
2228169689Skan	break;
2229169689Skan
2230169689Skan      default:
2231169689Skan	if (vect_print_dump_info (REPORT_DETAILS))
2232169689Skan	  fprintf (vect_dump, "stmt not supported.");
2233169689Skan	gcc_unreachable ();
2234169689Skan      }
2235169689Skan
2236169689Skan      gcc_assert (vec_stmt);
2237169689Skan      STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2238169689Skan      orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
2239169689Skan      if (orig_stmt_in_pattern)
2240169689Skan        {
2241169689Skan          stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
2242169689Skan          if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2243169689Skan            {
2244169689Skan              gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2245169689Skan
2246169689Skan              /* STMT was inserted by the vectorizer to replace a computation
2247169689Skan                 idiom.  ORIG_STMT_IN_PATTERN is a stmt in the original
2248169689Skan                 sequence that computed this idiom.  We need to record a pointer
2249169689Skan                 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN.  See more
2250169689Skan                 detail in the documentation of vect_pattern_recog.  */
2251169689Skan
2252169689Skan              STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
2253169689Skan            }
2254169689Skan        }
2255169689Skan    }
2256169689Skan
2257169689Skan  if (STMT_VINFO_LIVE_P (stmt_info))
2258169689Skan    {
2259169689Skan      switch (STMT_VINFO_TYPE (stmt_info))
2260169689Skan      {
2261169689Skan      case reduc_vec_info_type:
2262169689Skan        done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2263169689Skan        gcc_assert (done);
2264169689Skan        break;
2265169689Skan
2266169689Skan      default:
2267169689Skan        done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2268169689Skan        gcc_assert (done);
2269169689Skan      }
2270169689Skan
2271169689Skan      if (vec_stmt)
2272169689Skan        {
2273169689Skan          gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2274169689Skan          STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2275169689Skan        }
2276169689Skan    }
2277169689Skan
2278169689Skan  return is_store;
2279169689Skan}
2280169689Skan
2281169689Skan
2282169689Skan/* This function builds ni_name = number of iterations loop executes
2283169689Skan   on the loop preheader.  */
2284169689Skan
2285169689Skanstatic tree
2286169689Skanvect_build_loop_niters (loop_vec_info loop_vinfo)
2287169689Skan{
2288169689Skan  tree ni_name, stmt, var;
2289169689Skan  edge pe;
2290169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2291169689Skan  tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2292169689Skan
2293169689Skan  var = create_tmp_var (TREE_TYPE (ni), "niters");
2294169689Skan  add_referenced_var (var);
2295169689Skan  ni_name = force_gimple_operand (ni, &stmt, false, var);
2296169689Skan
2297169689Skan  pe = loop_preheader_edge (loop);
2298169689Skan  if (stmt)
2299169689Skan    {
2300169689Skan      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2301169689Skan      gcc_assert (!new_bb);
2302169689Skan    }
2303169689Skan
2304169689Skan  return ni_name;
2305169689Skan}
2306169689Skan
2307169689Skan
2308169689Skan/* This function generates the following statements:
2309169689Skan
2310169689Skan ni_name = number of iterations loop executes
2311169689Skan ratio = ni_name / vf
2312169689Skan ratio_mult_vf_name = ratio * vf
2313169689Skan
2314169689Skan and places them at the loop preheader edge.  */
2315169689Skan
2316169689Skanstatic void
2317169689Skanvect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2318169689Skan				 tree *ni_name_ptr,
2319169689Skan				 tree *ratio_mult_vf_name_ptr,
2320169689Skan				 tree *ratio_name_ptr)
2321169689Skan{
2322169689Skan
2323169689Skan  edge pe;
2324169689Skan  basic_block new_bb;
2325169689Skan  tree stmt, ni_name;
2326169689Skan  tree var;
2327169689Skan  tree ratio_name;
2328169689Skan  tree ratio_mult_vf_name;
2329169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2330169689Skan  tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2331169689Skan  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2332169689Skan  tree log_vf;
2333169689Skan
2334169689Skan  pe = loop_preheader_edge (loop);
2335169689Skan
2336169689Skan  /* Generate temporary variable that contains
2337169689Skan     number of iterations loop executes.  */
2338169689Skan
2339169689Skan  ni_name = vect_build_loop_niters (loop_vinfo);
2340169689Skan  log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
2341169689Skan
2342169689Skan  /* Create: ratio = ni >> log2(vf) */
2343169689Skan
2344169689Skan  var = create_tmp_var (TREE_TYPE (ni), "bnd");
2345169689Skan  add_referenced_var (var);
2346169689Skan  ratio_name = make_ssa_name (var, NULL_TREE);
2347169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2348169689Skan	   build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2349169689Skan  SSA_NAME_DEF_STMT (ratio_name) = stmt;
2350169689Skan
2351169689Skan  pe = loop_preheader_edge (loop);
2352169689Skan  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2353169689Skan  gcc_assert (!new_bb);
2354169689Skan
2355169689Skan  /* Create: ratio_mult_vf = ratio << log2 (vf).  */
2356169689Skan
2357169689Skan  var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2358169689Skan  add_referenced_var (var);
2359169689Skan  ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2360169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2361169689Skan	   build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2362169689Skan  SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2363169689Skan
2364169689Skan  pe = loop_preheader_edge (loop);
2365169689Skan  new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2366169689Skan  gcc_assert (!new_bb);
2367169689Skan
2368169689Skan  *ni_name_ptr = ni_name;
2369169689Skan  *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2370169689Skan  *ratio_name_ptr = ratio_name;
2371169689Skan
2372169689Skan  return;
2373169689Skan}
2374169689Skan
2375169689Skan
2376169689Skan/* Function update_vuses_to_preheader.
2377169689Skan
2378169689Skan   Input:
2379169689Skan   STMT - a statement with potential VUSEs.
2380169689Skan   LOOP - the loop whose preheader will contain STMT.
2381169689Skan
2382169689Skan   It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2383169689Skan   appears to be defined in a V_MAY_DEF in another statement in a loop.
2384169689Skan   One such case is when the VUSE is at the dereference of a __restricted__
2385169689Skan   pointer in a load and the V_MAY_DEF is at the dereference of a different
2386169689Skan   __restricted__ pointer in a store.  Vectorization may result in
2387169689Skan   copy_virtual_uses being called to copy the problematic VUSE to a new
2388169689Skan   statement that is being inserted in the loop preheader.  This procedure
2389169689Skan   is called to change the SSA_NAME in the new statement's VUSE from the
2390169689Skan   SSA_NAME updated in the loop to the related SSA_NAME available on the
2391169689Skan   path entering the loop.
2392169689Skan
2393169689Skan   When this function is called, we have the following situation:
2394169689Skan
2395169689Skan        # vuse <name1>
2396169689Skan        S1: vload
2397169689Skan    do {
2398169689Skan        # name1 = phi < name0 , name2>
2399169689Skan
2400169689Skan        # vuse <name1>
2401169689Skan        S2: vload
2402169689Skan
2403169689Skan        # name2 = vdef <name1>
2404169689Skan        S3: vstore
2405169689Skan
2406169689Skan    }while...
2407169689Skan
2408169689Skan   Stmt S1 was created in the loop preheader block as part of misaligned-load
2409169689Skan   handling. This function fixes the name of the vuse of S1 from 'name1' to
2410169689Skan   'name0'.  */
2411169689Skan
2412169689Skanstatic void
2413169689Skanupdate_vuses_to_preheader (tree stmt, struct loop *loop)
2414169689Skan{
2415169689Skan  basic_block header_bb = loop->header;
2416169689Skan  edge preheader_e = loop_preheader_edge (loop);
2417169689Skan  ssa_op_iter iter;
2418169689Skan  use_operand_p use_p;
2419169689Skan
2420169689Skan  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2421169689Skan    {
2422169689Skan      tree ssa_name = USE_FROM_PTR (use_p);
2423169689Skan      tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2424169689Skan      tree name_var = SSA_NAME_VAR (ssa_name);
2425169689Skan      basic_block bb = bb_for_stmt (def_stmt);
2426169689Skan
2427169689Skan      /* For a use before any definitions, def_stmt is a NOP_EXPR.  */
2428169689Skan      if (!IS_EMPTY_STMT (def_stmt)
2429169689Skan	  && flow_bb_inside_loop_p (loop, bb))
2430169689Skan        {
2431169689Skan          /* If the block containing the statement defining the SSA_NAME
2432169689Skan             is in the loop then it's necessary to find the definition
2433169689Skan             outside the loop using the PHI nodes of the header.  */
2434169689Skan	  tree phi;
2435169689Skan	  bool updated = false;
2436169689Skan
2437169689Skan	  for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2438169689Skan	    {
2439169689Skan	      if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2440169689Skan		{
2441169689Skan		  SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2442169689Skan		  updated = true;
2443169689Skan		  break;
2444169689Skan		}
2445169689Skan	    }
2446169689Skan	  gcc_assert (updated);
2447169689Skan	}
2448169689Skan    }
2449169689Skan}
2450169689Skan
2451169689Skan
2452169689Skan/*   Function vect_update_ivs_after_vectorizer.
2453169689Skan
2454169689Skan     "Advance" the induction variables of LOOP to the value they should take
2455169689Skan     after the execution of LOOP.  This is currently necessary because the
2456169689Skan     vectorizer does not handle induction variables that are used after the
2457169689Skan     loop.  Such a situation occurs when the last iterations of LOOP are
2458169689Skan     peeled, because:
2459169689Skan     1. We introduced new uses after LOOP for IVs that were not originally used
2460169689Skan        after LOOP: the IVs of LOOP are now used by an epilog loop.
2461169689Skan     2. LOOP is going to be vectorized; this means that it will iterate N/VF
2462169689Skan        times, whereas the loop IVs should be bumped N times.
2463169689Skan
2464169689Skan     Input:
2465169689Skan     - LOOP - a loop that is going to be vectorized. The last few iterations
2466169689Skan              of LOOP were peeled.
2467169689Skan     - NITERS - the number of iterations that LOOP executes (before it is
2468169689Skan                vectorized). i.e, the number of times the ivs should be bumped.
2469169689Skan     - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2470169689Skan                  coming out from LOOP on which there are uses of the LOOP ivs
2471169689Skan		  (this is the path from LOOP->exit to epilog_loop->preheader).
2472169689Skan
2473169689Skan                  The new definitions of the ivs are placed in LOOP->exit.
2474169689Skan                  The phi args associated with the edge UPDATE_E in the bb
2475169689Skan                  UPDATE_E->dest are updated accordingly.
2476169689Skan
2477169689Skan     Assumption 1: Like the rest of the vectorizer, this function assumes
2478169689Skan     a single loop exit that has a single predecessor.
2479169689Skan
2480169689Skan     Assumption 2: The phi nodes in the LOOP header and in update_bb are
2481169689Skan     organized in the same order.
2482169689Skan
2483169689Skan     Assumption 3: The access function of the ivs is simple enough (see
2484169689Skan     vect_can_advance_ivs_p).  This assumption will be relaxed in the future.
2485169689Skan
2486169689Skan     Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2487169689Skan     coming out of LOOP on which the ivs of LOOP are used (this is the path
2488169689Skan     that leads to the epilog loop; other paths skip the epilog loop).  This
2489169689Skan     path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2490169689Skan     needs to have its phis updated.
2491169689Skan */
2492169689Skan
2493169689Skanstatic void
2494169689Skanvect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2495169689Skan				  edge update_e)
2496169689Skan{
2497169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2498169689Skan  basic_block exit_bb = loop->single_exit->dest;
2499169689Skan  tree phi, phi1;
2500169689Skan  basic_block update_bb = update_e->dest;
2501169689Skan
2502169689Skan  /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2503169689Skan
2504169689Skan  /* Make sure there exists a single-predecessor exit bb:  */
2505169689Skan  gcc_assert (single_pred_p (exit_bb));
2506169689Skan
2507169689Skan  for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2508169689Skan       phi && phi1;
2509169689Skan       phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2510169689Skan    {
2511169689Skan      tree access_fn = NULL;
2512169689Skan      tree evolution_part;
2513169689Skan      tree init_expr;
2514169689Skan      tree step_expr;
2515169689Skan      tree var, stmt, ni, ni_name;
2516169689Skan      block_stmt_iterator last_bsi;
2517169689Skan
2518169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2519169689Skan        {
2520169689Skan          fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2521169689Skan          print_generic_expr (vect_dump, phi, TDF_SLIM);
2522169689Skan        }
2523169689Skan
2524169689Skan      /* Skip virtual phi's.  */
2525169689Skan      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2526169689Skan	{
2527169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
2528169689Skan	    fprintf (vect_dump, "virtual phi. skip.");
2529169689Skan	  continue;
2530169689Skan	}
2531169689Skan
2532169689Skan      /* Skip reduction phis.  */
2533169689Skan      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2534169689Skan        {
2535169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
2536169689Skan            fprintf (vect_dump, "reduc phi. skip.");
2537169689Skan          continue;
2538169689Skan        }
2539169689Skan
2540169689Skan      access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2541169689Skan      gcc_assert (access_fn);
2542169689Skan      evolution_part =
2543169689Skan	 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2544169689Skan      gcc_assert (evolution_part != NULL_TREE);
2545169689Skan
2546169689Skan      /* FORNOW: We do not support IVs whose evolution function is a polynomial
2547169689Skan         of degree >= 2 or exponential.  */
2548169689Skan      gcc_assert (!tree_is_chrec (evolution_part));
2549169689Skan
2550169689Skan      step_expr = evolution_part;
2551169689Skan      init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2552169689Skan							       loop->num));
2553169689Skan
2554169689Skan      ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2555169689Skan		  build2 (MULT_EXPR, TREE_TYPE (niters),
2556169689Skan		       niters, step_expr), init_expr);
2557169689Skan
2558169689Skan      var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2559169689Skan      add_referenced_var (var);
2560169689Skan
2561169689Skan      ni_name = force_gimple_operand (ni, &stmt, false, var);
2562169689Skan
2563169689Skan      /* Insert stmt into exit_bb.  */
2564169689Skan      last_bsi = bsi_last (exit_bb);
2565169689Skan      if (stmt)
2566169689Skan        bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2567169689Skan
2568169689Skan      /* Fix phi expressions in the successor bb.  */
2569169689Skan      SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2570169689Skan    }
2571169689Skan}
2572169689Skan
2573169689Skan
2574169689Skan/* Function vect_do_peeling_for_loop_bound
2575169689Skan
2576169689Skan   Peel the last iterations of the loop represented by LOOP_VINFO.
2577169689Skan   The peeled iterations form a new epilog loop.  Given that the loop now
2578169689Skan   iterates NITERS times, the new epilog loop iterates
2579169689Skan   NITERS % VECTORIZATION_FACTOR times.
2580169689Skan
2581169689Skan   The original loop will later be made to iterate
2582169689Skan   NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).  */
2583169689Skan
2584169689Skanstatic void
2585169689Skanvect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2586169689Skan				struct loops *loops)
2587169689Skan{
2588169689Skan  tree ni_name, ratio_mult_vf_name;
2589169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2590169689Skan  struct loop *new_loop;
2591169689Skan  edge update_e;
2592169689Skan  basic_block preheader;
2593169689Skan  int loop_num;
2594169689Skan
2595169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2596169689Skan    fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2597169689Skan
2598169689Skan  initialize_original_copy_tables ();
2599169689Skan
2600169689Skan  /* Generate the following variables on the preheader of original loop:
2601169689Skan
2602169689Skan     ni_name = number of iteration the original loop executes
2603169689Skan     ratio = ni_name / vf
2604169689Skan     ratio_mult_vf_name = ratio * vf  */
2605169689Skan  vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2606169689Skan				   &ratio_mult_vf_name, ratio);
2607169689Skan
2608169689Skan  loop_num  = loop->num;
2609169689Skan  new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2610169689Skan					    ratio_mult_vf_name, ni_name, false);
2611169689Skan  gcc_assert (new_loop);
2612169689Skan  gcc_assert (loop_num == loop->num);
2613169689Skan#ifdef ENABLE_CHECKING
2614169689Skan  slpeel_verify_cfg_after_peeling (loop, new_loop);
2615169689Skan#endif
2616169689Skan
2617169689Skan  /* A guard that controls whether the new_loop is to be executed or skipped
2618169689Skan     is placed in LOOP->exit.  LOOP->exit therefore has two successors - one
2619169689Skan     is the preheader of NEW_LOOP, where the IVs from LOOP are used.  The other
2620169689Skan     is a bb after NEW_LOOP, where these IVs are not used.  Find the edge that
2621169689Skan     is on the path where the LOOP IVs are used and need to be updated.  */
2622169689Skan
2623169689Skan  preheader = loop_preheader_edge (new_loop)->src;
2624169689Skan  if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2625169689Skan    update_e = EDGE_PRED (preheader, 0);
2626169689Skan  else
2627169689Skan    update_e = EDGE_PRED (preheader, 1);
2628169689Skan
2629169689Skan  /* Update IVs of original loop as if they were advanced
2630169689Skan     by ratio_mult_vf_name steps.  */
2631169689Skan  vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2632169689Skan
2633169689Skan  /* After peeling we have to reset scalar evolution analyzer.  */
2634169689Skan  scev_reset ();
2635169689Skan
2636169689Skan  free_original_copy_tables ();
2637169689Skan}
2638169689Skan
2639169689Skan
2640169689Skan/* Function vect_gen_niters_for_prolog_loop
2641169689Skan
2642169689Skan   Set the number of iterations for the loop represented by LOOP_VINFO
2643169689Skan   to the minimum between LOOP_NITERS (the original iteration count of the loop)
2644169689Skan   and the misalignment of DR - the data reference recorded in
2645169689Skan   LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).  As a result, after the execution of
2646169689Skan   this loop, the data reference DR will refer to an aligned location.
2647169689Skan
2648169689Skan   The following computation is generated:
2649169689Skan
2650169689Skan   If the misalignment of DR is known at compile time:
2651169689Skan     addr_mis = int mis = DR_MISALIGNMENT (dr);
2652169689Skan   Else, compute address misalignment in bytes:
2653169689Skan     addr_mis = addr & (vectype_size - 1)
2654169689Skan
2655169689Skan   prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2656169689Skan
2657169689Skan   (elem_size = element type size; an element is the scalar element
2658169689Skan	whose type is the inner type of the vectype)  */
2659169689Skan
2660169689Skanstatic tree
2661169689Skanvect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2662169689Skan{
2663169689Skan  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2664169689Skan  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2665169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2666169689Skan  tree var, stmt;
2667169689Skan  tree iters, iters_name;
2668169689Skan  edge pe;
2669169689Skan  basic_block new_bb;
2670169689Skan  tree dr_stmt = DR_STMT (dr);
2671169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2672169689Skan  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2673169689Skan  int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2674169689Skan  tree niters_type = TREE_TYPE (loop_niters);
2675169689Skan
2676169689Skan  pe = loop_preheader_edge (loop);
2677169689Skan
2678169689Skan  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2679169689Skan    {
2680169689Skan      int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2681169689Skan      int element_size = vectype_align/vf;
2682169689Skan      int elem_misalign = byte_misalign / element_size;
2683169689Skan
2684169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2685169689Skan        fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2686169689Skan      iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2687169689Skan    }
2688169689Skan  else
2689169689Skan    {
2690169689Skan      tree new_stmts = NULL_TREE;
2691169689Skan      tree start_addr =
2692169689Skan        vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2693169689Skan      tree ptr_type = TREE_TYPE (start_addr);
2694169689Skan      tree size = TYPE_SIZE (ptr_type);
2695169689Skan      tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2696169689Skan      tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2697169689Skan      tree elem_size_log =
2698169689Skan        build_int_cst (type, exact_log2 (vectype_align/vf));
2699169689Skan      tree vf_minus_1 = build_int_cst (type, vf - 1);
2700169689Skan      tree vf_tree = build_int_cst (type, vf);
2701169689Skan      tree byte_misalign;
2702169689Skan      tree elem_misalign;
2703169689Skan
2704169689Skan      new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2705169689Skan      gcc_assert (!new_bb);
2706169689Skan
2707169689Skan      /* Create:  byte_misalign = addr & (vectype_size - 1)  */
2708169689Skan      byte_misalign =
2709169689Skan        build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2710169689Skan
2711169689Skan      /* Create:  elem_misalign = byte_misalign / element_size  */
2712169689Skan      elem_misalign =
2713169689Skan        build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
2714169689Skan
2715169689Skan      /* Create:  (niters_type) (VF - elem_misalign)&(VF - 1)  */
2716169689Skan      iters = build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
2717169689Skan      iters = build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
2718169689Skan      iters = fold_convert (niters_type, iters);
2719169689Skan    }
2720169689Skan
2721169689Skan  /* Create:  prolog_loop_niters = min (iters, loop_niters) */
2722169689Skan  /* If the loop bound is known at compile time we already verified that it is
2723169689Skan     greater than vf; since the misalignment ('iters') is at most vf, there's
2724169689Skan     no need to generate the MIN_EXPR in this case.  */
2725169689Skan  if (TREE_CODE (loop_niters) != INTEGER_CST)
2726169689Skan    iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2727169689Skan
2728169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2729169689Skan    {
2730169689Skan      fprintf (vect_dump, "niters for prolog loop: ");
2731169689Skan      print_generic_expr (vect_dump, iters, TDF_SLIM);
2732169689Skan    }
2733169689Skan
2734169689Skan  var = create_tmp_var (niters_type, "prolog_loop_niters");
2735169689Skan  add_referenced_var (var);
2736169689Skan  iters_name = force_gimple_operand (iters, &stmt, false, var);
2737169689Skan
2738169689Skan  /* Insert stmt on loop preheader edge.  */
2739169689Skan  if (stmt)
2740169689Skan    {
2741169689Skan      basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2742169689Skan      gcc_assert (!new_bb);
2743169689Skan    }
2744169689Skan
2745169689Skan  return iters_name;
2746169689Skan}
2747169689Skan
2748169689Skan
2749169689Skan/* Function vect_update_init_of_dr
2750169689Skan
2751169689Skan   NITERS iterations were peeled from LOOP.  DR represents a data reference
2752169689Skan   in LOOP.  This function updates the information recorded in DR to
2753169689Skan   account for the fact that the first NITERS iterations had already been
2754169689Skan   executed.  Specifically, it updates the OFFSET field of DR.  */
2755169689Skan
2756169689Skanstatic void
2757169689Skanvect_update_init_of_dr (struct data_reference *dr, tree niters)
2758169689Skan{
2759169689Skan  tree offset = DR_OFFSET (dr);
2760169689Skan
2761169689Skan  niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2762169689Skan  offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2763169689Skan  DR_OFFSET (dr) = offset;
2764169689Skan}
2765169689Skan
2766169689Skan
2767169689Skan/* Function vect_update_inits_of_drs
2768169689Skan
2769169689Skan   NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2770169689Skan   This function updates the information recorded for the data references in
2771169689Skan   the loop to account for the fact that the first NITERS iterations had
2772169689Skan   already been executed.  Specifically, it updates the initial_condition of the
2773169689Skan   access_function of all the data_references in the loop.  */
2774169689Skan
2775169689Skanstatic void
2776169689Skanvect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2777169689Skan{
2778169689Skan  unsigned int i;
2779169689Skan  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2780169689Skan  struct data_reference *dr;
2781169689Skan
2782169689Skan  if (vect_dump && (dump_flags & TDF_DETAILS))
2783169689Skan    fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2784169689Skan
2785169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2786169689Skan    vect_update_init_of_dr (dr, niters);
2787169689Skan}
2788169689Skan
2789169689Skan
2790169689Skan/* Function vect_do_peeling_for_alignment
2791169689Skan
2792169689Skan   Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2793169689Skan   'niters' is set to the misalignment of one of the data references in the
2794169689Skan   loop, thereby forcing it to refer to an aligned location at the beginning
2795169689Skan   of the execution of this loop.  The data reference for which we are
2796169689Skan   peeling is recorded in LOOP_VINFO_UNALIGNED_DR.  */
2797169689Skan
2798169689Skanstatic void
2799169689Skanvect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2800169689Skan{
2801169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802169689Skan  tree niters_of_prolog_loop, ni_name;
2803169689Skan  tree n_iters;
2804169689Skan  struct loop *new_loop;
2805169689Skan
2806169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2807169689Skan    fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2808169689Skan
2809169689Skan  initialize_original_copy_tables ();
2810169689Skan
2811169689Skan  ni_name = vect_build_loop_niters (loop_vinfo);
2812169689Skan  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2813169689Skan
2814169689Skan  /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
2815169689Skan  new_loop =
2816169689Skan	slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2817169689Skan				       niters_of_prolog_loop, ni_name, true);
2818169689Skan  gcc_assert (new_loop);
2819169689Skan#ifdef ENABLE_CHECKING
2820169689Skan  slpeel_verify_cfg_after_peeling (new_loop, loop);
2821169689Skan#endif
2822169689Skan
2823169689Skan  /* Update number of times loop executes.  */
2824169689Skan  n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2825169689Skan  LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2826169689Skan		TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2827169689Skan
2828169689Skan  /* Update the init conditions of the access functions of all data refs.  */
2829169689Skan  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2830169689Skan
2831169689Skan  /* After peeling we have to reset scalar evolution analyzer.  */
2832169689Skan  scev_reset ();
2833169689Skan
2834169689Skan  free_original_copy_tables ();
2835169689Skan}
2836169689Skan
2837169689Skan
2838169689Skan/* Function vect_create_cond_for_align_checks.
2839169689Skan
2840169689Skan   Create a conditional expression that represents the alignment checks for
2841169689Skan   all of data references (array element references) whose alignment must be
2842169689Skan   checked at runtime.
2843169689Skan
2844169689Skan   Input:
2845169689Skan   LOOP_VINFO - two fields of the loop information are used.
2846169689Skan                LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2847169689Skan                LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2848169689Skan
2849169689Skan   Output:
2850169689Skan   COND_EXPR_STMT_LIST - statements needed to construct the conditional
2851169689Skan                         expression.
2852169689Skan   The returned value is the conditional expression to be used in the if
2853169689Skan   statement that controls which version of the loop gets executed at runtime.
2854169689Skan
2855169689Skan   The algorithm makes two assumptions:
2856169689Skan     1) The number of bytes "n" in a vector is a power of 2.
2857169689Skan     2) An address "a" is aligned if a%n is zero and that this
2858169689Skan        test can be done as a&(n-1) == 0.  For example, for 16
2859169689Skan        byte vectors the test is a&0xf == 0.  */
2860169689Skan
2861169689Skanstatic tree
2862169689Skanvect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2863169689Skan                                   tree *cond_expr_stmt_list)
2864169689Skan{
2865169689Skan  VEC(tree,heap) *may_misalign_stmts
2866169689Skan    = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2867169689Skan  tree ref_stmt;
2868169689Skan  int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2869169689Skan  tree mask_cst;
2870169689Skan  unsigned int i;
2871169689Skan  tree psize;
2872169689Skan  tree int_ptrsize_type;
2873169689Skan  char tmp_name[20];
2874169689Skan  tree or_tmp_name = NULL_TREE;
2875169689Skan  tree and_tmp, and_tmp_name, and_stmt;
2876169689Skan  tree ptrsize_zero;
2877169689Skan
2878169689Skan  /* Check that mask is one less than a power of 2, i.e., mask is
2879169689Skan     all zeros followed by all ones.  */
2880169689Skan  gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2881169689Skan
2882169689Skan  /* CHECKME: what is the best integer or unsigned type to use to hold a
2883169689Skan     cast from a pointer value?  */
2884169689Skan  psize = TYPE_SIZE (ptr_type_node);
2885169689Skan  int_ptrsize_type
2886169689Skan    = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2887169689Skan
2888169689Skan  /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2889169689Skan     of the first vector of the i'th data reference. */
2890169689Skan
2891169689Skan  for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2892169689Skan    {
2893169689Skan      tree new_stmt_list = NULL_TREE;
2894169689Skan      tree addr_base;
2895169689Skan      tree addr_tmp, addr_tmp_name, addr_stmt;
2896169689Skan      tree or_tmp, new_or_tmp_name, or_stmt;
2897169689Skan
2898169689Skan      /* create: addr_tmp = (int)(address_of_first_vector) */
2899169689Skan      addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2900169689Skan							&new_stmt_list,
2901169689Skan							NULL_TREE);
2902169689Skan
2903169689Skan      if (new_stmt_list != NULL_TREE)
2904169689Skan        append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2905169689Skan
2906169689Skan      sprintf (tmp_name, "%s%d", "addr2int", i);
2907169689Skan      addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2908169689Skan      add_referenced_var (addr_tmp);
2909169689Skan      addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2910169689Skan      addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2911169689Skan      addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2912169689Skan                          addr_tmp_name, addr_stmt);
2913169689Skan      SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2914169689Skan      append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2915169689Skan
2916169689Skan      /* The addresses are OR together.  */
2917169689Skan
2918169689Skan      if (or_tmp_name != NULL_TREE)
2919169689Skan        {
2920169689Skan          /* create: or_tmp = or_tmp | addr_tmp */
2921169689Skan          sprintf (tmp_name, "%s%d", "orptrs", i);
2922169689Skan          or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2923169689Skan          add_referenced_var (or_tmp);
2924169689Skan          new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2925169689Skan          or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2926169689Skan                            build2 (BIT_IOR_EXPR, int_ptrsize_type,
2927169689Skan	                            or_tmp_name,
2928169689Skan                                    addr_tmp_name));
2929169689Skan          SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2930169689Skan          append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2931169689Skan          or_tmp_name = new_or_tmp_name;
2932169689Skan        }
2933169689Skan      else
2934169689Skan        or_tmp_name = addr_tmp_name;
2935169689Skan
2936169689Skan    } /* end for i */
2937169689Skan
2938169689Skan  mask_cst = build_int_cst (int_ptrsize_type, mask);
2939169689Skan
2940169689Skan  /* create: and_tmp = or_tmp & mask  */
2941169689Skan  and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2942169689Skan  add_referenced_var (and_tmp);
2943169689Skan  and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2944169689Skan
2945169689Skan  and_stmt = build2 (MODIFY_EXPR, void_type_node,
2946169689Skan                     and_tmp_name,
2947169689Skan                     build2 (BIT_AND_EXPR, int_ptrsize_type,
2948169689Skan                             or_tmp_name, mask_cst));
2949169689Skan  SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2950169689Skan  append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2951169689Skan
2952169689Skan  /* Make and_tmp the left operand of the conditional test against zero.
2953169689Skan     if and_tmp has a nonzero bit then some address is unaligned.  */
2954169689Skan  ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2955169689Skan  return build2 (EQ_EXPR, boolean_type_node,
2956169689Skan                 and_tmp_name, ptrsize_zero);
2957169689Skan}
2958169689Skan
2959169689Skan
2960169689Skan/* Function vect_transform_loop.
2961169689Skan
2962169689Skan   The analysis phase has determined that the loop is vectorizable.
2963169689Skan   Vectorize the loop - created vectorized stmts to replace the scalar
2964169689Skan   stmts in the loop, and update the loop exit condition.  */
2965169689Skan
2966169689Skanvoid
2967169689Skanvect_transform_loop (loop_vec_info loop_vinfo,
2968169689Skan		     struct loops *loops ATTRIBUTE_UNUSED)
2969169689Skan{
2970169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2971169689Skan  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2972169689Skan  int nbbs = loop->num_nodes;
2973169689Skan  block_stmt_iterator si;
2974169689Skan  int i;
2975169689Skan  tree ratio = NULL;
2976169689Skan  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2977169689Skan  bitmap_iterator bi;
2978169689Skan  unsigned int j;
2979169689Skan
2980169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2981169689Skan    fprintf (vect_dump, "=== vec_transform_loop ===");
2982169689Skan
2983169689Skan  /* If the loop has data references that may or may not be aligned then
2984169689Skan     two versions of the loop need to be generated, one which is vectorized
2985169689Skan     and one which isn't.  A test is then generated to control which of the
2986169689Skan     loops is executed.  The test checks for the alignment of all of the
2987169689Skan     data references that may or may not be aligned. */
2988169689Skan
2989169689Skan  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2990169689Skan    {
2991169689Skan      struct loop *nloop;
2992169689Skan      tree cond_expr;
2993169689Skan      tree cond_expr_stmt_list = NULL_TREE;
2994169689Skan      basic_block condition_bb;
2995169689Skan      block_stmt_iterator cond_exp_bsi;
2996169689Skan      basic_block merge_bb;
2997169689Skan      basic_block new_exit_bb;
2998169689Skan      edge new_exit_e, e;
2999169689Skan      tree orig_phi, new_phi, arg;
3000169689Skan
3001169689Skan      cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3002169689Skan                                                     &cond_expr_stmt_list);
3003169689Skan      initialize_original_copy_tables ();
3004169689Skan      nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3005169689Skan      free_original_copy_tables();
3006169689Skan
3007169689Skan      /** Loop versioning violates an assumption we try to maintain during
3008169689Skan	 vectorization - that the loop exit block has a single predecessor.
3009169689Skan	 After versioning, the exit block of both loop versions is the same
3010169689Skan	 basic block (i.e. it has two predecessors). Just in order to simplify
3011169689Skan	 following transformations in the vectorizer, we fix this situation
3012169689Skan	 here by adding a new (empty) block on the exit-edge of the loop,
3013169689Skan	 with the proper loop-exit phis to maintain loop-closed-form.  **/
3014169689Skan
3015169689Skan      merge_bb = loop->single_exit->dest;
3016169689Skan      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3017169689Skan      new_exit_bb = split_edge (loop->single_exit);
3018169689Skan      add_bb_to_loop (new_exit_bb, loop->outer);
3019169689Skan      new_exit_e = loop->single_exit;
3020169689Skan      e = EDGE_SUCC (new_exit_bb, 0);
3021169689Skan
3022169689Skan      for (orig_phi = phi_nodes (merge_bb); orig_phi;
3023169689Skan	   orig_phi = PHI_CHAIN (orig_phi))
3024169689Skan	{
3025169689Skan          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3026169689Skan				     new_exit_bb);
3027169689Skan          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3028169689Skan          add_phi_arg (new_phi, arg, new_exit_e);
3029169689Skan	  SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3030169689Skan	}
3031169689Skan
3032169689Skan      /** end loop-exit-fixes after versioning  **/
3033169689Skan
3034169689Skan      update_ssa (TODO_update_ssa);
3035169689Skan      cond_exp_bsi = bsi_last (condition_bb);
3036169689Skan      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3037169689Skan    }
3038169689Skan
3039169689Skan  /* CHECKME: we wouldn't need this if we called update_ssa once
3040169689Skan     for all loops.  */
3041169689Skan  bitmap_zero (vect_vnames_to_rename);
3042169689Skan
3043169689Skan  /* Peel the loop if there are data refs with unknown alignment.
3044169689Skan     Only one data ref with unknown store is allowed.  */
3045169689Skan
3046169689Skan  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3047169689Skan    vect_do_peeling_for_alignment (loop_vinfo, loops);
3048169689Skan
3049169689Skan  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3050169689Skan     compile time constant), or it is a constant that doesn't divide by the
3051169689Skan     vectorization factor, then an epilog loop needs to be created.
3052169689Skan     We therefore duplicate the loop: the original loop will be vectorized,
3053169689Skan     and will compute the first (n/VF) iterations. The second copy of the loop
3054169689Skan     will remain scalar and will compute the remaining (n%VF) iterations.
3055169689Skan     (VF is the vectorization factor).  */
3056169689Skan
3057169689Skan  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3058169689Skan      || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3059169689Skan          && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3060169689Skan    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3061169689Skan  else
3062169689Skan    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3063169689Skan		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3064169689Skan
3065169689Skan  /* 1) Make sure the loop header has exactly two entries
3066169689Skan     2) Make sure we have a preheader basic block.  */
3067169689Skan
3068169689Skan  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3069169689Skan
3070169689Skan  loop_split_edge_with (loop_preheader_edge (loop), NULL);
3071169689Skan
3072169689Skan
3073169689Skan  /* FORNOW: the vectorizer supports only loops which body consist
3074169689Skan     of one basic block (header + empty latch). When the vectorizer will
3075169689Skan     support more involved loop forms, the order by which the BBs are
3076169689Skan     traversed need to be reconsidered.  */
3077169689Skan
3078169689Skan  for (i = 0; i < nbbs; i++)
3079169689Skan    {
3080169689Skan      basic_block bb = bbs[i];
3081169689Skan
3082169689Skan      for (si = bsi_start (bb); !bsi_end_p (si);)
3083169689Skan	{
3084169689Skan	  tree stmt = bsi_stmt (si);
3085169689Skan	  stmt_vec_info stmt_info;
3086169689Skan	  bool is_store;
3087169689Skan
3088169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
3089169689Skan	    {
3090169689Skan	      fprintf (vect_dump, "------>vectorizing statement: ");
3091169689Skan	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
3092169689Skan	    }
3093169689Skan	  stmt_info = vinfo_for_stmt (stmt);
3094169689Skan	  gcc_assert (stmt_info);
3095169689Skan	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
3096169689Skan	      && !STMT_VINFO_LIVE_P (stmt_info))
3097169689Skan	    {
3098169689Skan	      bsi_next (&si);
3099169689Skan	      continue;
3100169689Skan	    }
3101169689Skan	  /* FORNOW: Verify that all stmts operate on the same number of
3102169689Skan	             units and no inner unrolling is necessary.  */
3103169689Skan	  gcc_assert
3104169689Skan		(TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3105169689Skan		 == (unsigned HOST_WIDE_INT) vectorization_factor);
3106169689Skan
3107169689Skan	  /* -------- vectorize statement ------------ */
3108169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
3109169689Skan	    fprintf (vect_dump, "transform statement.");
3110169689Skan
3111169689Skan	  is_store = vect_transform_stmt (stmt, &si);
3112169689Skan	  if (is_store)
3113169689Skan	    {
3114169689Skan	      /* Free the attached stmt_vec_info and remove the stmt.  */
3115169689Skan	      stmt_ann_t ann = stmt_ann (stmt);
3116169689Skan	      free (stmt_info);
3117169689Skan	      set_stmt_info (ann, NULL);
3118169689Skan	      bsi_remove (&si, true);
3119169689Skan	      continue;
3120169689Skan	    }
3121169689Skan
3122169689Skan	  bsi_next (&si);
3123169689Skan	}		        /* stmts in BB */
3124169689Skan    }				/* BBs in loop */
3125169689Skan
3126169689Skan  slpeel_make_loop_iterate_ntimes (loop, ratio);
3127169689Skan
3128169689Skan  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3129169689Skan    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3130169689Skan
3131169689Skan  /* The memory tags and pointers in vectorized statements need to
3132169689Skan     have their SSA forms updated.  FIXME, why can't this be delayed
3133169689Skan     until all the loops have been transformed?  */
3134169689Skan  update_ssa (TODO_update_ssa);
3135169689Skan
3136169689Skan  if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3137169689Skan    fprintf (vect_dump, "LOOP VECTORIZED.");
3138169689Skan}
3139