1/* Data References Analysis and Manipulation Utilities for Vectorization.
2   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5   and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
37#include "tree-chrec.h"
38#include "tree-scalar-evolution.h"
39#include "tree-vectorizer.h"
40#include "toplev.h"
41
42
43/* Return the smallest scalar part of STMT.
44   This is used to determine the vectype of the stmt. We generally set the
45   vectype according to the type of the result (lhs). For stmts whose
46   result-type is different than the type of the arguments (e.g., demotion,
47   promotion), vectype will be reset appropriately (later).  Note that we have
48   to visit the smallest datatype in this function, because that determines the
49   VF. If the smallest datatype in the loop is present only as the rhs of a
50   promotion operation - we'd miss it.
51   Such a case, where a variable of this datatype does not appear in the lhs
52   anywhere in the loop, can only occur if it's an invariant: e.g.:
53   'int_x = (int) short_inv', which we'd expect to have been optimized away by
54   invariant motion. However, we cannot rely on invariant motion to always take
55   invariants out of the loop, and so in the case of promotion we also have to
56   check the rhs.
57   LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58   types.  */
59
60tree
61vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62                               HOST_WIDE_INT *rhs_size_unit)
63{
64  tree scalar_type = gimple_expr_type (stmt);
65  HOST_WIDE_INT lhs, rhs;
66
67  lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69  if (is_gimple_assign (stmt)
70      && (gimple_assign_cast_p (stmt)
71          || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72          || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73    {
74      tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76      rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77      if (rhs < lhs)
78        scalar_type = rhs_type;
79    }
80
81  *lhs_size_unit = lhs;
82  *rhs_size_unit = rhs;
83  return scalar_type;
84}
85
86
87/* Find the place of the data-ref in STMT in the interleaving chain that starts
88   from FIRST_STMT. Return -1 if the data-ref is not a part of the chain.  */
89
90int
91vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92{
93  gimple next_stmt = first_stmt;
94  int result = 0;
95
96  if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97    return -1;
98
99  while (next_stmt && next_stmt != stmt)
100    {
101      result++;
102      next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103    }
104
105  if (next_stmt)
106    return result;
107  else
108    return -1;
109}
110
111
112/* Function vect_insert_into_interleaving_chain.
113
114   Insert DRA into the interleaving chain of DRB according to DRA's INIT.  */
115
116static void
117vect_insert_into_interleaving_chain (struct data_reference *dra,
118				     struct data_reference *drb)
119{
120  gimple prev, next;
121  tree next_init;
122  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125  prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126  next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127  while (next)
128    {
129      next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130      if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131	{
132	  /* Insert here.  */
133	  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134	  DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135	  return;
136	}
137      prev = next;
138      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139    }
140
141  /* We got to the end of the list. Insert here.  */
142  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143  DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144}
145
146
147/* Function vect_update_interleaving_chain.
148
149   For two data-refs DRA and DRB that are a part of a chain interleaved data
150   accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152   There are four possible cases:
153   1. New stmts - both DRA and DRB are not a part of any chain:
154      FIRST_DR = DRB
155      NEXT_DR (DRB) = DRA
156   2. DRB is a part of a chain and DRA is not:
157      no need to update FIRST_DR
158      no need to insert DRB
159      insert DRA according to init
160   3. DRA is a part of a chain and DRB is not:
161      if (init of FIRST_DR > init of DRB)
162          FIRST_DR = DRB
163	  NEXT(FIRST_DR) = previous FIRST_DR
164      else
165          insert DRB according to its init
166   4. both DRA and DRB are in some interleaving chains:
167      choose the chain with the smallest init of FIRST_DR
168      insert the nodes of the second chain into the first one.  */
169
170static void
171vect_update_interleaving_chain (struct data_reference *drb,
172				struct data_reference *dra)
173{
174  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176  tree next_init, init_dra_chain, init_drb_chain;
177  gimple first_a, first_b;
178  tree node_init;
179  gimple node, prev, next, first_stmt;
180
181  /* 1. New stmts - both DRA and DRB are not a part of any chain.   */
182  if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183    {
184      DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185      DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186      DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187      return;
188    }
189
190  /* 2. DRB is a part of a chain and DRA is not.  */
191  if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192    {
193      DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194      /* Insert DRA into the chain of DRB.  */
195      vect_insert_into_interleaving_chain (dra, drb);
196      return;
197    }
198
199  /* 3. DRA is a part of a chain and DRB is not.  */
200  if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201    {
202      gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203      tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204							      old_first_stmt)));
205      gimple tmp;
206
207      if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208	{
209	  /* DRB's init is smaller than the init of the stmt previously marked
210	     as the first stmt of the interleaving chain of DRA. Therefore, we
211	     update FIRST_STMT and put DRB in the head of the list.  */
212	  DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213	  DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215	  /* Update all the stmts in the list to point to the new FIRST_STMT.  */
216	  tmp = old_first_stmt;
217	  while (tmp)
218	    {
219	      DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220	      tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221	    }
222	}
223      else
224	{
225	  /* Insert DRB in the list of DRA.  */
226	  vect_insert_into_interleaving_chain (drb, dra);
227	  DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228	}
229      return;
230    }
231
232  /* 4. both DRA and DRB are in some interleaving chains.  */
233  first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234  first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235  if (first_a == first_b)
236    return;
237  init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238  init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240  if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241    {
242      /* Insert the nodes of DRA chain into the DRB chain.
243	 After inserting a node, continue from this node of the DRB chain (don't
244         start from the beginning.  */
245      node = DR_GROUP_FIRST_DR (stmtinfo_a);
246      prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247      first_stmt = first_b;
248    }
249  else
250    {
251      /* Insert the nodes of DRB chain into the DRA chain.
252	 After inserting a node, continue from this node of the DRA chain (don't
253         start from the beginning.  */
254      node = DR_GROUP_FIRST_DR (stmtinfo_b);
255      prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256      first_stmt = first_a;
257    }
258
259  while (node)
260    {
261      node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263      while (next)
264	{
265	  next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266	  if (tree_int_cst_compare (next_init, node_init) > 0)
267	    {
268	      /* Insert here.  */
269	      DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270	      DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271	      prev = node;
272	      break;
273	    }
274	  prev = next;
275	  next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276	}
277      if (!next)
278	{
279	  /* We got to the end of the list. Insert here.  */
280	  DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281	  DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282	  prev = node;
283	}
284      DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285      node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286    }
287}
288
289
290/* Function vect_equal_offsets.
291
292   Check if OFFSET1 and OFFSET2 are identical expressions.  */
293
294static bool
295vect_equal_offsets (tree offset1, tree offset2)
296{
297  bool res;
298
299  STRIP_NOPS (offset1);
300  STRIP_NOPS (offset2);
301
302  if (offset1 == offset2)
303    return true;
304
305  if (TREE_CODE (offset1) != TREE_CODE (offset2)
306      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307    return false;
308
309  res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310			    TREE_OPERAND (offset2, 0));
311
312  if (!res || !BINARY_CLASS_P (offset1))
313    return res;
314
315  res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316			    TREE_OPERAND (offset2, 1));
317
318  return res;
319}
320
321
322/* Function vect_check_interleaving.
323
324   Check if DRA and DRB are a part of interleaving. In case they are, insert
325   DRA and DRB in an interleaving chain.  */
326
327static bool
328vect_check_interleaving (struct data_reference *dra,
329			 struct data_reference *drb)
330{
331  HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
332
333  /* Check that the data-refs have same first location (except init) and they
334     are both either store or load (not load and store).  */
335  if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336       && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337	   || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338	   || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339	   != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340      || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341      || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342      || DR_IS_READ (dra) != DR_IS_READ (drb))
343    return false;
344
345  /* Check:
346     1. data-refs are of the same type
347     2. their steps are equal
348     3. the step (if greater than zero) is greater than the difference between
349        data-refs' inits.  */
350  type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351  type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
352
353  if (type_size_a != type_size_b
354      || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355      || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356                              TREE_TYPE (DR_REF (drb))))
357    return false;
358
359  init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360  init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361  step = TREE_INT_CST_LOW (DR_STEP (dra));
362
363  if (init_a > init_b)
364    {
365      /* If init_a == init_b + the size of the type * k, we have an interleaving,
366	 and DRB is accessed before DRA.  */
367      diff_mod_size = (init_a - init_b) % type_size_a;
368
369      if (step && (init_a - init_b) > step)
370         return false;
371
372      if (diff_mod_size == 0)
373	{
374	  vect_update_interleaving_chain (drb, dra);
375	  if (vect_print_dump_info (REPORT_DR_DETAILS))
376	    {
377	      fprintf (vect_dump, "Detected interleaving ");
378	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379	      fprintf (vect_dump, " and ");
380	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
381	    }
382	  return true;
383	}
384    }
385  else
386    {
387      /* If init_b == init_a + the size of the type * k, we have an
388	 interleaving, and DRA is accessed before DRB.  */
389      diff_mod_size = (init_b - init_a) % type_size_a;
390
391      if (step && (init_b - init_a) > step)
392         return false;
393
394      if (diff_mod_size == 0)
395	{
396	  vect_update_interleaving_chain (dra, drb);
397	  if (vect_print_dump_info (REPORT_DR_DETAILS))
398	    {
399	      fprintf (vect_dump, "Detected interleaving ");
400	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401	      fprintf (vect_dump, " and ");
402	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
403	    }
404	  return true;
405	}
406    }
407
408  return false;
409}
410
411/* Check if data references pointed by DR_I and DR_J are same or
412   belong to same interleaving group.  Return FALSE if drs are
413   different, otherwise return TRUE.  */
414
415static bool
416vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
417{
418  gimple stmt_i = DR_STMT (dr_i);
419  gimple stmt_j = DR_STMT (dr_j);
420
421  if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422      || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423	    && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424	    && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425		== DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426    return true;
427  else
428    return false;
429}
430
431/* If address ranges represented by DDR_I and DDR_J are equal,
432   return TRUE, otherwise return FALSE.  */
433
434static bool
435vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
436{
437  if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438       && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439      || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440	  && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441    return true;
442  else
443    return false;
444}
445
446/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447   tested at run-time.  Return TRUE if DDR was successfully inserted.
448   Return false if versioning is not supported.  */
449
450static bool
451vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
452{
453  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
454
455  if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456    return false;
457
458  if (vect_print_dump_info (REPORT_DR_DETAILS))
459    {
460      fprintf (vect_dump, "mark for run-time aliasing test between ");
461      print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462      fprintf (vect_dump, " and ");
463      print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
464    }
465
466  if (optimize_loop_nest_for_size_p (loop))
467    {
468      if (vect_print_dump_info (REPORT_DR_DETAILS))
469	fprintf (vect_dump, "versioning not supported when optimizing for size.");
470      return false;
471    }
472
473  /* FORNOW: We don't support versioning with outer-loop vectorization.  */
474  if (loop->inner)
475    {
476      if (vect_print_dump_info (REPORT_DR_DETAILS))
477	fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478      return false;
479    }
480
481  VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482  return true;
483}
484
485
486/* Function vect_analyze_data_ref_dependence.
487
488   Return TRUE if there (might) exist a dependence between a memory-reference
489   DRA and a memory-reference DRB.  When versioning for alias may check a
490   dependence at run-time, return FALSE.  */
491
492static bool
493vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
494                                  loop_vec_info loop_vinfo)
495{
496  unsigned int i;
497  struct loop *loop = NULL;
498  int vectorization_factor = 0;
499  struct data_reference *dra = DDR_A (ddr);
500  struct data_reference *drb = DDR_B (ddr);
501  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503  int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
504  int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
505  lambda_vector dist_v;
506  unsigned int loop_depth;
507
508  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
509    {
510      /* Independent data accesses.  */
511      vect_check_interleaving (dra, drb);
512      return false;
513    }
514
515  if (loop_vinfo)
516    {
517      loop = LOOP_VINFO_LOOP (loop_vinfo);
518      vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
519    }
520
521  if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
522    return false;
523
524  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
525    {
526      if (loop_vinfo)
527        {
528          if (vect_print_dump_info (REPORT_DR_DETAILS))
529            {
530              fprintf (vect_dump, "versioning for alias required: "
531                                  "can't determine dependence between ");
532              print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
533              fprintf (vect_dump, " and ");
534              print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
535            }
536
537          /* Add to list of ddrs that need to be tested at run-time.  */
538          return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
539        }
540
541      /* When vectorizing a basic block unknown depnedence can still mean
542	 strided access.  */
543      if (vect_check_interleaving (dra, drb))
544         return false;
545
546      if (vect_print_dump_info (REPORT_DR_DETAILS))
547        {
548          fprintf (vect_dump, "can't determine dependence between ");
549          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
550          fprintf (vect_dump, " and ");
551          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
552        }
553
554      return true;
555    }
556
557  /* Versioning for alias is not yet supported for basic block SLP, and
558     dependence distance is unapplicable, hence, in case of known data
559     dependence, basic block vectorization is impossible for now.  */
560  if (!loop_vinfo)
561    {
562      if (dra != drb && vect_check_interleaving (dra, drb))
563        return false;
564
565      if (vect_print_dump_info (REPORT_DR_DETAILS))
566        {
567          fprintf (vect_dump, "determined dependence between ");
568          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
569          fprintf (vect_dump, " and ");
570          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
571        }
572
573      return true;
574    }
575
576  /* Loop-based vectorization and known data dependence.  */
577  if (DDR_NUM_DIST_VECTS (ddr) == 0)
578    {
579      if (vect_print_dump_info (REPORT_DR_DETAILS))
580        {
581          fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
582          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583          fprintf (vect_dump, " and ");
584          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
585        }
586      /* Add to list of ddrs that need to be tested at run-time.  */
587      return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
588    }
589
590  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
591  for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
592    {
593      int dist = dist_v[loop_depth];
594
595      if (vect_print_dump_info (REPORT_DR_DETAILS))
596	fprintf (vect_dump, "dependence distance  = %d.", dist);
597
598      /* Same loop iteration.  */
599      if (dist % vectorization_factor == 0 && dra_size == drb_size)
600	{
601	  /* Two references with distance zero have the same alignment.  */
602	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
603	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
604	  if (vect_print_dump_info (REPORT_ALIGNMENT))
605	    fprintf (vect_dump, "accesses have the same alignment.");
606	  if (vect_print_dump_info (REPORT_DR_DETAILS))
607	    {
608	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
609	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610	      fprintf (vect_dump, " and ");
611	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612	    }
613
614          /* For interleaving, mark that there is a read-write dependency if
615             necessary. We check before that one of the data-refs is store.  */
616          if (DR_IS_READ (dra))
617            DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618	  else
619            {
620              if (DR_IS_READ (drb))
621                DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622	    }
623
624          continue;
625	}
626
627      if (abs (dist) >= vectorization_factor
628          || (dist > 0 && DDR_REVERSED_P (ddr)))
629	{
630	  /* Dependence distance does not create dependence, as far as
631	     vectorization is concerned, in this case. If DDR_REVERSED_P the
632	     order of the data-refs in DDR was reversed (to make distance
633	     vector positive), and the actual distance is negative.  */
634	  if (vect_print_dump_info (REPORT_DR_DETAILS))
635	    fprintf (vect_dump, "dependence distance >= VF or negative.");
636	  continue;
637	}
638
639      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
640	{
641	  fprintf (vect_dump, "not vectorized, possible dependence "
642    		              "between data-refs ");
643	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644	  fprintf (vect_dump, " and ");
645	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
646	}
647
648      return true;
649    }
650
651  return false;
652}
653
654/* Function vect_analyze_data_ref_dependences.
655
656   Examine all the data references in the loop, and make sure there do not
657   exist any data dependences between them.  */
658
659bool
660vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
661                                   bb_vec_info bb_vinfo)
662{
663  unsigned int i;
664  VEC (ddr_p, heap) *ddrs = NULL;
665  struct data_dependence_relation *ddr;
666
667  if (vect_print_dump_info (REPORT_DETAILS))
668    fprintf (vect_dump, "=== vect_analyze_dependences ===");
669
670  if (loop_vinfo)
671    ddrs = LOOP_VINFO_DDRS (loop_vinfo);
672  else
673    ddrs = BB_VINFO_DDRS (bb_vinfo);
674
675  for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
676    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
677      return false;
678
679  return true;
680}
681
682
683/* Function vect_compute_data_ref_alignment
684
685   Compute the misalignment of the data reference DR.
686
687   Output:
688   1. If during the misalignment computation it is found that the data reference
689      cannot be vectorized then false is returned.
690   2. DR_MISALIGNMENT (DR) is defined.
691
692   FOR NOW: No analysis is actually performed. Misalignment is calculated
693   only for trivial cases. TODO.  */
694
695static bool
696vect_compute_data_ref_alignment (struct data_reference *dr)
697{
698  gimple stmt = DR_STMT (dr);
699  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
701  struct loop *loop = NULL;
702  tree ref = DR_REF (dr);
703  tree vectype;
704  tree base, base_addr;
705  bool base_aligned;
706  tree misalign;
707  tree aligned_to, alignment;
708
709  if (vect_print_dump_info (REPORT_DETAILS))
710    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
711
712  if (loop_vinfo)
713    loop = LOOP_VINFO_LOOP (loop_vinfo);
714
715  /* Initialize misalignment to unknown.  */
716  SET_DR_MISALIGNMENT (dr, -1);
717
718  misalign = DR_INIT (dr);
719  aligned_to = DR_ALIGNED_TO (dr);
720  base_addr = DR_BASE_ADDRESS (dr);
721  vectype = STMT_VINFO_VECTYPE (stmt_info);
722
723  /* In case the dataref is in an inner-loop of the loop that is being
724     vectorized (LOOP), we use the base and misalignment information
725     relative to the outer-loop (LOOP). This is ok only if the misalignment
726     stays the same throughout the execution of the inner-loop, which is why
727     we have to check that the stride of the dataref in the inner-loop evenly
728     divides by the vector size.  */
729  if (loop && nested_in_vect_loop_p (loop, stmt))
730    {
731      tree step = DR_STEP (dr);
732      HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
733
734      if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
735        {
736          if (vect_print_dump_info (REPORT_ALIGNMENT))
737            fprintf (vect_dump, "inner step divides the vector-size.");
738	  misalign = STMT_VINFO_DR_INIT (stmt_info);
739	  aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
740	  base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
741        }
742      else
743	{
744	  if (vect_print_dump_info (REPORT_ALIGNMENT))
745	    fprintf (vect_dump, "inner step doesn't divide the vector-size.");
746	  misalign = NULL_TREE;
747	}
748    }
749
750  base = build_fold_indirect_ref (base_addr);
751  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
752
753  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
754      || !misalign)
755    {
756      if (vect_print_dump_info (REPORT_ALIGNMENT))
757	{
758	  fprintf (vect_dump, "Unknown alignment for access: ");
759	  print_generic_expr (vect_dump, base, TDF_SLIM);
760	}
761      return true;
762    }
763
764  if ((DECL_P (base)
765       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
766				alignment) >= 0)
767      || (TREE_CODE (base_addr) == SSA_NAME
768	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
769						      TREE_TYPE (base_addr)))),
770				   alignment) >= 0))
771    base_aligned = true;
772  else
773    base_aligned = false;
774
775  if (!base_aligned)
776    {
777      /* Do not change the alignment of global variables if
778	 flag_section_anchors is enabled.  */
779      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
780	  || (TREE_STATIC (base) && flag_section_anchors))
781	{
782	  if (vect_print_dump_info (REPORT_DETAILS))
783	    {
784	      fprintf (vect_dump, "can't force alignment of ref: ");
785	      print_generic_expr (vect_dump, ref, TDF_SLIM);
786	    }
787	  return true;
788	}
789
790      /* Force the alignment of the decl.
791	 NOTE: This is the only change to the code we make during
792	 the analysis phase, before deciding to vectorize the loop.  */
793      if (vect_print_dump_info (REPORT_DETAILS))
794	fprintf (vect_dump, "force alignment");
795      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
796      DECL_USER_ALIGN (base) = 1;
797    }
798
799  /* At this point we assume that the base is aligned.  */
800  gcc_assert (base_aligned
801	      || (TREE_CODE (base) == VAR_DECL
802		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
803
804  /* Modulo alignment.  */
805  misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
806
807  if (!host_integerp (misalign, 1))
808    {
809      /* Negative or overflowed misalignment value.  */
810      if (vect_print_dump_info (REPORT_DETAILS))
811	fprintf (vect_dump, "unexpected misalign value");
812      return false;
813    }
814
815  SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
816
817  if (vect_print_dump_info (REPORT_DETAILS))
818    {
819      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
820      print_generic_expr (vect_dump, ref, TDF_SLIM);
821    }
822
823  return true;
824}
825
826
827/* Function vect_compute_data_refs_alignment
828
829   Compute the misalignment of data references in the loop.
830   Return FALSE if a data reference is found that cannot be vectorized.  */
831
832static bool
833vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
834                                  bb_vec_info bb_vinfo)
835{
836  VEC (data_reference_p, heap) *datarefs;
837  struct data_reference *dr;
838  unsigned int i;
839
840  if (loop_vinfo)
841    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
842  else
843    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
844
845  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
846    if (!vect_compute_data_ref_alignment (dr))
847      return false;
848
849  return true;
850}
851
852
853/* Function vect_update_misalignment_for_peel
854
855   DR - the data reference whose misalignment is to be adjusted.
856   DR_PEEL - the data reference whose misalignment is being made
857             zero in the vector loop by the peel.
858   NPEEL - the number of iterations in the peel loop if the misalignment
859           of DR_PEEL is known at compile time.  */
860
861static void
862vect_update_misalignment_for_peel (struct data_reference *dr,
863                                   struct data_reference *dr_peel, int npeel)
864{
865  unsigned int i;
866  VEC(dr_p,heap) *same_align_drs;
867  struct data_reference *current_dr;
868  int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869  int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
870  stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
871  stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
872
873 /* For interleaved data accesses the step in the loop must be multiplied by
874     the size of the interleaving group.  */
875  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
876    dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
877  if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
878    dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
879
880  /* It can be assumed that the data refs with the same alignment as dr_peel
881     are aligned in the vector loop.  */
882  same_align_drs
883    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
884  for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
885    {
886      if (current_dr != dr)
887        continue;
888      gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
889                  DR_MISALIGNMENT (dr_peel) / dr_peel_size);
890      SET_DR_MISALIGNMENT (dr, 0);
891      return;
892    }
893
894  if (known_alignment_for_access_p (dr)
895      && known_alignment_for_access_p (dr_peel))
896    {
897      int misal = DR_MISALIGNMENT (dr);
898      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
899      misal += npeel * dr_size;
900      misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
901      SET_DR_MISALIGNMENT (dr, misal);
902      return;
903    }
904
905  if (vect_print_dump_info (REPORT_DETAILS))
906    fprintf (vect_dump, "Setting misalignment to -1.");
907  SET_DR_MISALIGNMENT (dr, -1);
908}
909
910
911/* Function vect_verify_datarefs_alignment
912
913   Return TRUE if all data references in the loop can be
914   handled with respect to alignment.  */
915
916bool
917vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
918{
919  VEC (data_reference_p, heap) *datarefs;
920  struct data_reference *dr;
921  enum dr_alignment_support supportable_dr_alignment;
922  unsigned int i;
923
924  if (loop_vinfo)
925    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
926  else
927    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
928
929  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
930    {
931      gimple stmt = DR_STMT (dr);
932      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
933
934      /* For interleaving, only the alignment of the first access matters.  */
935      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
936          && DR_GROUP_FIRST_DR (stmt_info) != stmt)
937        continue;
938
939      supportable_dr_alignment = vect_supportable_dr_alignment (dr);
940      if (!supportable_dr_alignment)
941        {
942          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
943            {
944              if (DR_IS_READ (dr))
945                fprintf (vect_dump,
946                         "not vectorized: unsupported unaligned load.");
947              else
948                fprintf (vect_dump,
949                         "not vectorized: unsupported unaligned store.");
950            }
951          return false;
952        }
953      if (supportable_dr_alignment != dr_aligned
954          && vect_print_dump_info (REPORT_ALIGNMENT))
955        fprintf (vect_dump, "Vectorizing an unaligned access.");
956    }
957  return true;
958}
959
960
961/* Function vector_alignment_reachable_p
962
963   Return true if vector alignment for DR is reachable by peeling
964   a few loop iterations.  Return false otherwise.  */
965
966static bool
967vector_alignment_reachable_p (struct data_reference *dr)
968{
969  gimple stmt = DR_STMT (dr);
970  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
971  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
972
973  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
974    {
975      /* For interleaved access we peel only if number of iterations in
976	 the prolog loop ({VF - misalignment}), is a multiple of the
977	 number of the interleaved accesses.  */
978      int elem_size, mis_in_elements;
979      int nelements = TYPE_VECTOR_SUBPARTS (vectype);
980
981      /* FORNOW: handle only known alignment.  */
982      if (!known_alignment_for_access_p (dr))
983	return false;
984
985      elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
986      mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
987
988      if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
989	return false;
990    }
991
992  /* If misalignment is known at the compile time then allow peeling
993     only if natural alignment is reachable through peeling.  */
994  if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
995    {
996      HOST_WIDE_INT elmsize =
997		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
998      if (vect_print_dump_info (REPORT_DETAILS))
999	{
1000	  fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001	  fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1002	}
1003      if (DR_MISALIGNMENT (dr) % elmsize)
1004	{
1005	  if (vect_print_dump_info (REPORT_DETAILS))
1006	    fprintf (vect_dump, "data size does not divide the misalignment.\n");
1007	  return false;
1008	}
1009    }
1010
1011  if (!known_alignment_for_access_p (dr))
1012    {
1013      tree type = (TREE_TYPE (DR_REF (dr)));
1014      tree ba = DR_BASE_OBJECT (dr);
1015      bool is_packed = false;
1016
1017      if (ba)
1018	is_packed = contains_packed_reference (ba);
1019
1020      if (vect_print_dump_info (REPORT_DETAILS))
1021	fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1022      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1023	return true;
1024      else
1025	return false;
1026    }
1027
1028  return true;
1029}
1030
1031/* Function vect_enhance_data_refs_alignment
1032
1033   This pass will use loop versioning and loop peeling in order to enhance
1034   the alignment of data references in the loop.
1035
1036   FOR NOW: we assume that whatever versioning/peeling takes place, only the
1037   original loop is to be vectorized; Any other loops that are created by
1038   the transformations performed in this pass - are not supposed to be
1039   vectorized. This restriction will be relaxed.
1040
1041   This pass will require a cost model to guide it whether to apply peeling
1042   or versioning or a combination of the two. For example, the scheme that
1043   intel uses when given a loop with several memory accesses, is as follows:
1044   choose one memory access ('p') which alignment you want to force by doing
1045   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1046   other accesses are not necessarily aligned, or (2) use loop versioning to
1047   generate one loop in which all accesses are aligned, and another loop in
1048   which only 'p' is necessarily aligned.
1049
1050   ("Automatic Intra-Register Vectorization for the Intel Architecture",
1051   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1052   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1053
1054   Devising a cost model is the most critical aspect of this work. It will
1055   guide us on which access to peel for, whether to use loop versioning, how
1056   many versions to create, etc. The cost model will probably consist of
1057   generic considerations as well as target specific considerations (on
1058   powerpc for example, misaligned stores are more painful than misaligned
1059   loads).
1060
1061   Here are the general steps involved in alignment enhancements:
1062
1063     -- original loop, before alignment analysis:
1064	for (i=0; i<N; i++){
1065	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1066	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1067	}
1068
1069     -- After vect_compute_data_refs_alignment:
1070	for (i=0; i<N; i++){
1071	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1072	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1073	}
1074
1075     -- Possibility 1: we do loop versioning:
1076     if (p is aligned) {
1077	for (i=0; i<N; i++){	# loop 1A
1078	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1079	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1080	}
1081     }
1082     else {
1083	for (i=0; i<N; i++){	# loop 1B
1084	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1085	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1086	}
1087     }
1088
1089     -- Possibility 2: we do loop peeling:
1090     for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1091	x = q[i];
1092	p[i] = y;
1093     }
1094     for (i = 3; i < N; i++){	# loop 2A
1095	x = q[i];			# DR_MISALIGNMENT(q) = 0
1096	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1097     }
1098
1099     -- Possibility 3: combination of loop peeling and versioning:
1100     for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1101	x = q[i];
1102	p[i] = y;
1103     }
1104     if (p is aligned) {
1105	for (i = 3; i<N; i++){	# loop 3A
1106	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1107	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1108	}
1109     }
1110     else {
1111	for (i = 3; i<N; i++){	# loop 3B
1112	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1113	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1114	}
1115     }
1116
1117     These loops are later passed to loop_transform to be vectorized. The
1118     vectorizer will use the alignment information to guide the transformation
1119     (whether to generate regular loads/stores, or with special handling for
1120     misalignment).  */
1121
1122bool
1123vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1124{
1125  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1126  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1127  enum dr_alignment_support supportable_dr_alignment;
1128  struct data_reference *dr0 = NULL;
1129  struct data_reference *dr;
1130  unsigned int i;
1131  bool do_peeling = false;
1132  bool do_versioning = false;
1133  bool stat;
1134  gimple stmt;
1135  stmt_vec_info stmt_info;
1136  int vect_versioning_for_alias_required;
1137
1138  if (vect_print_dump_info (REPORT_DETAILS))
1139    fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1140
1141  /* While cost model enhancements are expected in the future, the high level
1142     view of the code at this time is as follows:
1143
1144     A) If there is a misaligned access then see if peeling to align
1145        this access can make all data references satisfy
1146        vect_supportable_dr_alignment.  If so, update data structures
1147        as needed and return true.
1148
1149     B) If peeling wasn't possible and there is a data reference with an
1150        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1151        then see if loop versioning checks can be used to make all data
1152        references satisfy vect_supportable_dr_alignment.  If so, update
1153        data structures as needed and return true.
1154
1155     C) If neither peeling nor versioning were successful then return false if
1156        any data reference does not satisfy vect_supportable_dr_alignment.
1157
1158     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1159
1160     Note, Possibility 3 above (which is peeling and versioning together) is not
1161     being done at this time.  */
1162
1163  /* (1) Peeling to force alignment.  */
1164
1165  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1166     Considerations:
1167     + How many accesses will become aligned due to the peeling
1168     - How many accesses will become unaligned due to the peeling,
1169       and the cost of misaligned accesses.
1170     - The cost of peeling (the extra runtime checks, the increase
1171       in code size).
1172
1173     The scheme we use FORNOW: peel to force the alignment of the first
1174     unsupported misaligned access in the loop.
1175
1176     TODO: Use a cost model.  */
1177
1178  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1179    {
1180      stmt = DR_STMT (dr);
1181      stmt_info = vinfo_for_stmt (stmt);
1182
1183      /* For interleaving, only the alignment of the first access
1184         matters.  */
1185      if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1186          && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187        continue;
1188
1189      if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1190        {
1191	  do_peeling = vector_alignment_reachable_p (dr);
1192	  if (do_peeling)
1193	    dr0 = dr;
1194	  if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1195            fprintf (vect_dump, "vector alignment may not be reachable");
1196	  break;
1197	}
1198    }
1199
1200  vect_versioning_for_alias_required
1201    = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1202
1203  /* Temporarily, if versioning for alias is required, we disable peeling
1204     until we support peeling and versioning.  Often peeling for alignment
1205     will require peeling for loop-bound, which in turn requires that we
1206     know how to adjust the loop ivs after the loop.  */
1207  if (vect_versioning_for_alias_required
1208      || !vect_can_advance_ivs_p (loop_vinfo)
1209      || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1210    do_peeling = false;
1211
1212  if (do_peeling)
1213    {
1214      int mis;
1215      int npeel = 0;
1216      gimple stmt = DR_STMT (dr0);
1217      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1219      int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1220
1221      if (known_alignment_for_access_p (dr0))
1222        {
1223          /* Since it's known at compile time, compute the number of iterations
1224             in the peeled loop (the peeling factor) for use in updating
1225             DR_MISALIGNMENT values.  The peeling factor is the vectorization
1226             factor minus the misalignment as an element count.  */
1227          mis = DR_MISALIGNMENT (dr0);
1228          mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1229          npeel = nelements - mis;
1230
1231	  /* For interleaved data access every iteration accesses all the
1232	     members of the group, therefore we divide the number of iterations
1233	     by the group size.  */
1234	  stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1235	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1236	    npeel /= DR_GROUP_SIZE (stmt_info);
1237
1238          if (vect_print_dump_info (REPORT_DETAILS))
1239            fprintf (vect_dump, "Try peeling by %d", npeel);
1240        }
1241
1242      /* Ensure that all data refs can be vectorized after the peel.  */
1243      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1244        {
1245          int save_misalignment;
1246
1247	  if (dr == dr0)
1248	    continue;
1249
1250	  stmt = DR_STMT (dr);
1251	  stmt_info = vinfo_for_stmt (stmt);
1252	  /* For interleaving, only the alignment of the first access
1253            matters.  */
1254	  if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1255	      && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1256	    continue;
1257
1258	  save_misalignment = DR_MISALIGNMENT (dr);
1259	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1260	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1261	  SET_DR_MISALIGNMENT (dr, save_misalignment);
1262
1263	  if (!supportable_dr_alignment)
1264	    {
1265	      do_peeling = false;
1266	      break;
1267	    }
1268	}
1269
1270      if (do_peeling)
1271        {
1272          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1273             If the misalignment of DR_i is identical to that of dr0 then set
1274             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1275             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1276             by the peeling factor times the element size of DR_i (MOD the
1277             vectorization factor times the size).  Otherwise, the
1278             misalignment of DR_i must be set to unknown.  */
1279	  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1280	    if (dr != dr0)
1281	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1282
1283          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1284          LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1285	  SET_DR_MISALIGNMENT (dr0, 0);
1286	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1287            fprintf (vect_dump, "Alignment of access forced using peeling.");
1288
1289          if (vect_print_dump_info (REPORT_DETAILS))
1290            fprintf (vect_dump, "Peeling for alignment will be applied.");
1291
1292	  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1293	  gcc_assert (stat);
1294          return stat;
1295        }
1296    }
1297
1298
1299  /* (2) Versioning to force alignment.  */
1300
1301  /* Try versioning if:
1302     1) flag_tree_vect_loop_version is TRUE
1303     2) optimize loop for speed
1304     3) there is at least one unsupported misaligned data ref with an unknown
1305        misalignment, and
1306     4) all misaligned data refs with a known misalignment are supported, and
1307     5) the number of runtime alignment checks is within reason.  */
1308
1309  do_versioning =
1310	flag_tree_vect_loop_version
1311	&& optimize_loop_nest_for_speed_p (loop)
1312	&& (!loop->inner); /* FORNOW */
1313
1314  if (do_versioning)
1315    {
1316      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1317        {
1318	  stmt = DR_STMT (dr);
1319	  stmt_info = vinfo_for_stmt (stmt);
1320
1321	  /* For interleaving, only the alignment of the first access
1322	     matters.  */
1323	  if (aligned_access_p (dr)
1324	      || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1325		  && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1326	    continue;
1327
1328	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1329
1330          if (!supportable_dr_alignment)
1331            {
1332              gimple stmt;
1333              int mask;
1334              tree vectype;
1335
1336              if (known_alignment_for_access_p (dr)
1337                  || VEC_length (gimple,
1338                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1339                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1340                {
1341                  do_versioning = false;
1342                  break;
1343                }
1344
1345              stmt = DR_STMT (dr);
1346              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1347              gcc_assert (vectype);
1348
1349              /* The rightmost bits of an aligned address must be zeros.
1350                 Construct the mask needed for this test.  For example,
1351                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1352                 mask must be 15 = 0xf. */
1353              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1354
1355              /* FORNOW: use the same mask to test all potentially unaligned
1356                 references in the loop.  The vectorizer currently supports
1357                 a single vector size, see the reference to
1358                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1359                 vectorization factor is computed.  */
1360              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1361                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1362              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1363              VEC_safe_push (gimple, heap,
1364                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1365                             DR_STMT (dr));
1366            }
1367        }
1368
1369      /* Versioning requires at least one misaligned data reference.  */
1370      if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1371        do_versioning = false;
1372      else if (!do_versioning)
1373        VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1374    }
1375
1376  if (do_versioning)
1377    {
1378      VEC(gimple,heap) *may_misalign_stmts
1379        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1380      gimple stmt;
1381
1382      /* It can now be assumed that the data references in the statements
1383         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1384         of the loop being vectorized.  */
1385      for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1386        {
1387          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388          dr = STMT_VINFO_DATA_REF (stmt_info);
1389	  SET_DR_MISALIGNMENT (dr, 0);
1390	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1391            fprintf (vect_dump, "Alignment of access forced using versioning.");
1392        }
1393
1394      if (vect_print_dump_info (REPORT_DETAILS))
1395        fprintf (vect_dump, "Versioning for alignment will be applied.");
1396
1397      /* Peeling and versioning can't be done together at this time.  */
1398      gcc_assert (! (do_peeling && do_versioning));
1399
1400      stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1401      gcc_assert (stat);
1402      return stat;
1403    }
1404
1405  /* This point is reached if neither peeling nor versioning is being done.  */
1406  gcc_assert (! (do_peeling || do_versioning));
1407
1408  stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1409  return stat;
1410}
1411
1412
1413/* Function vect_analyze_data_refs_alignment
1414
1415   Analyze the alignment of the data-references in the loop.
1416   Return FALSE if a data reference is found that cannot be vectorized.  */
1417
1418bool
1419vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1420                                  bb_vec_info bb_vinfo)
1421{
1422  if (vect_print_dump_info (REPORT_DETAILS))
1423    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1424
1425  if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1426    {
1427      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1428	fprintf (vect_dump,
1429		 "not vectorized: can't calculate alignment for data ref.");
1430      return false;
1431    }
1432
1433  return true;
1434}
1435
1436
1437/* Analyze groups of strided accesses: check that DR belongs to a group of
1438   strided accesses of legal size, step, etc. Detect gaps, single element
1439   interleaving, and other special cases. Set strided access info.
1440   Collect groups of strided stores for further use in SLP analysis.  */
1441
1442static bool
1443vect_analyze_group_access (struct data_reference *dr)
1444{
1445  tree step = DR_STEP (dr);
1446  tree scalar_type = TREE_TYPE (DR_REF (dr));
1447  HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1448  gimple stmt = DR_STMT (dr);
1449  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1450  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1451  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1452  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1453  HOST_WIDE_INT stride;
1454  bool slp_impossible = false;
1455
1456  /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1457     interleaving group (including gaps).  */
1458  stride = dr_step / type_size;
1459
1460  /* Not consecutive access is possible only if it is a part of interleaving.  */
1461  if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1462    {
1463      /* Check if it this DR is a part of interleaving, and is a single
1464	 element of the group that is accessed in the loop.  */
1465
1466      /* Gaps are supported only for loads. STEP must be a multiple of the type
1467	 size.  The size of the group must be a power of 2.  */
1468      if (DR_IS_READ (dr)
1469	  && (dr_step % type_size) == 0
1470	  && stride > 0
1471	  && exact_log2 (stride) != -1)
1472	{
1473	  DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1474	  DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1475	  if (vect_print_dump_info (REPORT_DR_DETAILS))
1476	    {
1477	      fprintf (vect_dump, "Detected single element interleaving ");
1478	      print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1479	      fprintf (vect_dump, " step ");
1480	      print_generic_expr (vect_dump, step, TDF_SLIM);
1481	    }
1482	  return true;
1483	}
1484      if (vect_print_dump_info (REPORT_DETAILS))
1485	fprintf (vect_dump, "not consecutive access");
1486      return false;
1487    }
1488
1489  if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1490    {
1491      /* First stmt in the interleaving chain. Check the chain.  */
1492      gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1493      struct data_reference *data_ref = dr;
1494      unsigned int count = 1;
1495      tree next_step;
1496      tree prev_init = DR_INIT (data_ref);
1497      gimple prev = stmt;
1498      HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1499
1500      while (next)
1501        {
1502          /* Skip same data-refs. In case that two or more stmts share data-ref
1503             (supported only for loads), we vectorize only the first stmt, and
1504             the rest get their vectorized loads from the first one.  */
1505          if (!tree_int_cst_compare (DR_INIT (data_ref),
1506                                     DR_INIT (STMT_VINFO_DATA_REF (
1507						   vinfo_for_stmt (next)))))
1508            {
1509              if (!DR_IS_READ (data_ref))
1510                {
1511                  if (vect_print_dump_info (REPORT_DETAILS))
1512                    fprintf (vect_dump, "Two store stmts share the same dr.");
1513                  return false;
1514                }
1515
1516              /* Check that there is no load-store dependencies for this loads
1517                 to prevent a case of load-store-load to the same location.  */
1518              if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1519                  || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1520                {
1521                  if (vect_print_dump_info (REPORT_DETAILS))
1522                    fprintf (vect_dump,
1523                             "READ_WRITE dependence in interleaving.");
1524                  return false;
1525                }
1526
1527              /* For load use the same data-ref load.  */
1528              DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1529
1530              prev = next;
1531              next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1532              continue;
1533            }
1534          prev = next;
1535
1536          /* Check that all the accesses have the same STEP.  */
1537          next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1538          if (tree_int_cst_compare (step, next_step))
1539            {
1540              if (vect_print_dump_info (REPORT_DETAILS))
1541                fprintf (vect_dump, "not consecutive access in interleaving");
1542              return false;
1543            }
1544
1545          data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1546          /* Check that the distance between two accesses is equal to the type
1547             size. Otherwise, we have gaps.  */
1548          diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1549                  - TREE_INT_CST_LOW (prev_init)) / type_size;
1550	  if (diff != 1)
1551	    {
1552	      /* FORNOW: SLP of accesses with gaps is not supported.  */
1553	      slp_impossible = true;
1554	      if (!DR_IS_READ (data_ref))
1555		{
1556		  if (vect_print_dump_info (REPORT_DETAILS))
1557		    fprintf (vect_dump, "interleaved store with gaps");
1558		  return false;
1559		}
1560
1561              gaps += diff - 1;
1562	    }
1563
1564          /* Store the gap from the previous member of the group. If there is no
1565             gap in the access, DR_GROUP_GAP is always 1.  */
1566          DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1567
1568          prev_init = DR_INIT (data_ref);
1569          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1570          /* Count the number of data-refs in the chain.  */
1571          count++;
1572        }
1573
1574      /* COUNT is the number of accesses found, we multiply it by the size of
1575         the type to get COUNT_IN_BYTES.  */
1576      count_in_bytes = type_size * count;
1577
1578      /* Check that the size of the interleaving (including gaps) is not
1579         greater than STEP.  */
1580      if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1581        {
1582          if (vect_print_dump_info (REPORT_DETAILS))
1583            {
1584              fprintf (vect_dump, "interleaving size is greater than step for ");
1585              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1586            }
1587          return false;
1588        }
1589
1590      /* Check that the size of the interleaving is equal to STEP for stores,
1591         i.e., that there are no gaps.  */
1592      if (dr_step && dr_step != count_in_bytes)
1593        {
1594          if (DR_IS_READ (dr))
1595            {
1596              slp_impossible = true;
1597              /* There is a gap after the last load in the group. This gap is a
1598                 difference between the stride and the number of elements. When
1599                 there is no gap, this difference should be 0.  */
1600              DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1601            }
1602          else
1603            {
1604              if (vect_print_dump_info (REPORT_DETAILS))
1605                fprintf (vect_dump, "interleaved store with gaps");
1606              return false;
1607            }
1608        }
1609
1610      /* Check that STEP is a multiple of type size.  */
1611      if (dr_step && (dr_step % type_size) != 0)
1612        {
1613          if (vect_print_dump_info (REPORT_DETAILS))
1614            {
1615              fprintf (vect_dump, "step is not a multiple of type size: step ");
1616              print_generic_expr (vect_dump, step, TDF_SLIM);
1617              fprintf (vect_dump, " size ");
1618              print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1619                                  TDF_SLIM);
1620            }
1621          return false;
1622        }
1623
1624      /* FORNOW: we handle only interleaving that is a power of 2.
1625         We don't fail here if it may be still possible to vectorize the
1626         group using SLP. If not, the size of the group will be checked in
1627         vect_analyze_operations, and the vectorization will fail.  */
1628      if (exact_log2 (stride) == -1)
1629	{
1630	  if (vect_print_dump_info (REPORT_DETAILS))
1631	    fprintf (vect_dump, "interleaving is not a power of 2");
1632
1633	  if (slp_impossible)
1634	    return false;
1635	}
1636
1637      if (stride == 0)
1638        stride = count;
1639
1640      DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1641      if (vect_print_dump_info (REPORT_DETAILS))
1642        fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1643
1644      /* SLP: create an SLP data structure for every interleaving group of
1645	 stores for further analysis in vect_analyse_slp.  */
1646      if (!DR_IS_READ (dr) && !slp_impossible)
1647        {
1648          if (loop_vinfo)
1649            VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1650                           stmt);
1651          if (bb_vinfo)
1652            VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1653                           stmt);
1654        }
1655    }
1656
1657  return true;
1658}
1659
1660
1661/* Analyze the access pattern of the data-reference DR.
1662   In case of non-consecutive accesses call vect_analyze_group_access() to
1663   analyze groups of strided accesses.  */
1664
1665static bool
1666vect_analyze_data_ref_access (struct data_reference *dr)
1667{
1668  tree step = DR_STEP (dr);
1669  tree scalar_type = TREE_TYPE (DR_REF (dr));
1670  gimple stmt = DR_STMT (dr);
1671  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1672  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1673  struct loop *loop = NULL;
1674  HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1675
1676  if (loop_vinfo)
1677    loop = LOOP_VINFO_LOOP (loop_vinfo);
1678
1679  if (loop_vinfo && !step)
1680    {
1681      if (vect_print_dump_info (REPORT_DETAILS))
1682	fprintf (vect_dump, "bad data-ref access in loop");
1683      return false;
1684    }
1685
1686  /* Don't allow invariant accesses in loops.  */
1687  if (loop_vinfo && dr_step == 0)
1688    return false;
1689
1690  if (loop && nested_in_vect_loop_p (loop, stmt))
1691    {
1692      /* Interleaved accesses are not yet supported within outer-loop
1693        vectorization for references in the inner-loop.  */
1694      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1695
1696      /* For the rest of the analysis we use the outer-loop step.  */
1697      step = STMT_VINFO_DR_STEP (stmt_info);
1698      dr_step = TREE_INT_CST_LOW (step);
1699
1700      if (dr_step == 0)
1701	{
1702	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1703	    fprintf (vect_dump, "zero step in outer loop.");
1704	  if (DR_IS_READ (dr))
1705  	    return true;
1706	  else
1707	    return false;
1708	}
1709    }
1710
1711  /* Consecutive?  */
1712  if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1713    {
1714      /* Mark that it is not interleaving.  */
1715      DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1716      return true;
1717    }
1718
1719  if (loop && nested_in_vect_loop_p (loop, stmt))
1720    {
1721      if (vect_print_dump_info (REPORT_ALIGNMENT))
1722	fprintf (vect_dump, "strided access in outer loop.");
1723      return false;
1724    }
1725
1726  /* Not consecutive access - check if it's a part of interleaving group.  */
1727  return vect_analyze_group_access (dr);
1728}
1729
1730
1731/* Function vect_analyze_data_ref_accesses.
1732
1733   Analyze the access pattern of all the data references in the loop.
1734
1735   FORNOW: the only access pattern that is considered vectorizable is a
1736	   simple step 1 (consecutive) access.
1737
1738   FORNOW: handle only arrays and pointer accesses.  */
1739
1740bool
1741vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1742{
1743  unsigned int i;
1744  VEC (data_reference_p, heap) *datarefs;
1745  struct data_reference *dr;
1746
1747  if (vect_print_dump_info (REPORT_DETAILS))
1748    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1749
1750  if (loop_vinfo)
1751    datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1752  else
1753    datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1754
1755  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1756    if (!vect_analyze_data_ref_access (dr))
1757      {
1758	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1759	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
1760	return false;
1761      }
1762
1763  return true;
1764}
1765
1766/* Function vect_prune_runtime_alias_test_list.
1767
1768   Prune a list of ddrs to be tested at run-time by versioning for alias.
1769   Return FALSE if resulting list of ddrs is longer then allowed by
1770   PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE.  */
1771
1772bool
1773vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1774{
1775  VEC (ddr_p, heap) * ddrs =
1776    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1777  unsigned i, j;
1778
1779  if (vect_print_dump_info (REPORT_DETAILS))
1780    fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1781
1782  for (i = 0; i < VEC_length (ddr_p, ddrs); )
1783    {
1784      bool found;
1785      ddr_p ddr_i;
1786
1787      ddr_i = VEC_index (ddr_p, ddrs, i);
1788      found = false;
1789
1790      for (j = 0; j < i; j++)
1791        {
1792	  ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1793
1794	  if (vect_vfa_range_equal (ddr_i, ddr_j))
1795	    {
1796	      if (vect_print_dump_info (REPORT_DR_DETAILS))
1797		{
1798		  fprintf (vect_dump, "found equal ranges ");
1799		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1800		  fprintf (vect_dump, ", ");
1801		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1802		  fprintf (vect_dump, " and ");
1803		  print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1804		  fprintf (vect_dump, ", ");
1805		  print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1806		}
1807	      found = true;
1808	      break;
1809	    }
1810	}
1811
1812      if (found)
1813      {
1814	VEC_ordered_remove (ddr_p, ddrs, i);
1815	continue;
1816      }
1817      i++;
1818    }
1819
1820  if (VEC_length (ddr_p, ddrs) >
1821       (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1822    {
1823      if (vect_print_dump_info (REPORT_DR_DETAILS))
1824	{
1825	  fprintf (vect_dump,
1826		   "disable versioning for alias - max number of generated "
1827		   "checks exceeded.");
1828	}
1829
1830      VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1831
1832      return false;
1833    }
1834
1835  return true;
1836}
1837
1838
1839/* Function vect_analyze_data_refs.
1840
1841  Find all the data references in the loop or basic block.
1842
1843   The general structure of the analysis of data refs in the vectorizer is as
1844   follows:
1845   1- vect_analyze_data_refs(loop/bb): call
1846      compute_data_dependences_for_loop/bb to find and analyze all data-refs
1847      in the loop/bb and their dependences.
1848   2- vect_analyze_dependences(): apply dependence testing using ddrs.
1849   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1850   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1851
1852*/
1853
1854bool
1855vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1856{
1857  struct loop *loop = NULL;
1858  basic_block bb = NULL;
1859  unsigned int i;
1860  VEC (data_reference_p, heap) *datarefs;
1861  struct data_reference *dr;
1862  tree scalar_type;
1863  bool res;
1864
1865  if (vect_print_dump_info (REPORT_DETAILS))
1866    fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1867
1868  if (loop_vinfo)
1869    {
1870      loop = LOOP_VINFO_LOOP (loop_vinfo);
1871      res = compute_data_dependences_for_loop
1872	(loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
1873	 &LOOP_VINFO_DDRS (loop_vinfo));
1874
1875      if (!res)
1876	{
1877	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1878	    fprintf (vect_dump, "not vectorized: loop contains function calls"
1879		     " or data references that cannot be analyzed");
1880	  return false;
1881	}
1882
1883      datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1884    }
1885  else
1886    {
1887      bb = BB_VINFO_BB (bb_vinfo);
1888      res = compute_data_dependences_for_bb (bb, true,
1889					     &BB_VINFO_DATAREFS (bb_vinfo),
1890					     &BB_VINFO_DDRS (bb_vinfo));
1891      if (!res)
1892	{
1893	  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1894	    fprintf (vect_dump, "not vectorized: basic block contains function"
1895		     " calls or data references that cannot be analyzed");
1896	  return false;
1897	}
1898
1899      datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1900    }
1901
1902  /* Go through the data-refs, check that the analysis succeeded. Update pointer
1903     from stmt_vec_info struct to DR and vectype.  */
1904
1905  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1906    {
1907      gimple stmt;
1908      stmt_vec_info stmt_info;
1909      tree base, offset, init;
1910
1911      if (!dr || !DR_REF (dr))
1912        {
1913          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1914	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1915          return false;
1916        }
1917
1918      stmt = DR_STMT (dr);
1919      stmt_info = vinfo_for_stmt (stmt);
1920
1921      /* Check that analysis of the data-ref succeeded.  */
1922      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1923          || !DR_STEP (dr))
1924        {
1925          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1926            {
1927              fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1928              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1929            }
1930          return false;
1931        }
1932
1933      if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1934        {
1935          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1936            fprintf (vect_dump, "not vectorized: base addr of dr is a "
1937                     "constant");
1938          return false;
1939        }
1940
1941      base = unshare_expr (DR_BASE_ADDRESS (dr));
1942      offset = unshare_expr (DR_OFFSET (dr));
1943      init = unshare_expr (DR_INIT (dr));
1944
1945      /* Update DR field in stmt_vec_info struct.  */
1946
1947      /* If the dataref is in an inner-loop of the loop that is considered for
1948	 for vectorization, we also want to analyze the access relative to
1949	 the outer-loop (DR contains information only relative to the
1950	 inner-most enclosing loop).  We do that by building a reference to the
1951	 first location accessed by the inner-loop, and analyze it relative to
1952	 the outer-loop.  */
1953      if (loop && nested_in_vect_loop_p (loop, stmt))
1954	{
1955	  tree outer_step, outer_base, outer_init;
1956	  HOST_WIDE_INT pbitsize, pbitpos;
1957	  tree poffset;
1958	  enum machine_mode pmode;
1959	  int punsignedp, pvolatilep;
1960	  affine_iv base_iv, offset_iv;
1961	  tree dinit;
1962
1963	  /* Build a reference to the first location accessed by the
1964	     inner-loop: *(BASE+INIT). (The first location is actually
1965	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
1966          tree inner_base = build_fold_indirect_ref
1967                                (fold_build2 (POINTER_PLUS_EXPR,
1968                                              TREE_TYPE (base), base,
1969                                              fold_convert (sizetype, init)));
1970
1971	  if (vect_print_dump_info (REPORT_DETAILS))
1972	    {
1973	      fprintf (vect_dump, "analyze in outer-loop: ");
1974	      print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1975	    }
1976
1977	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1978		          &poffset, &pmode, &punsignedp, &pvolatilep, false);
1979	  gcc_assert (outer_base != NULL_TREE);
1980
1981	  if (pbitpos % BITS_PER_UNIT != 0)
1982	    {
1983	      if (vect_print_dump_info (REPORT_DETAILS))
1984		fprintf (vect_dump, "failed: bit offset alignment.\n");
1985	      return false;
1986	    }
1987
1988	  outer_base = build_fold_addr_expr (outer_base);
1989	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1990                          &base_iv, false))
1991	    {
1992	      if (vect_print_dump_info (REPORT_DETAILS))
1993		fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1994	      return false;
1995	    }
1996
1997	  if (offset)
1998	    {
1999	      if (poffset)
2000		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2001                                       poffset);
2002	      else
2003		poffset = offset;
2004	    }
2005
2006	  if (!poffset)
2007	    {
2008	      offset_iv.base = ssize_int (0);
2009	      offset_iv.step = ssize_int (0);
2010	    }
2011	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2012                               &offset_iv, false))
2013	    {
2014	      if (vect_print_dump_info (REPORT_DETAILS))
2015	        fprintf (vect_dump, "evolution of offset is not affine.\n");
2016	      return false;
2017	    }
2018
2019	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2020	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2021	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2022	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2023	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
2024
2025	  outer_step = size_binop (PLUS_EXPR,
2026				fold_convert (ssizetype, base_iv.step),
2027				fold_convert (ssizetype, offset_iv.step));
2028
2029	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2030	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2031	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2032	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2033	  STMT_VINFO_DR_OFFSET (stmt_info) =
2034				fold_convert (ssizetype, offset_iv.base);
2035	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2036				size_int (highest_pow2_factor (offset_iv.base));
2037
2038	  if (vect_print_dump_info (REPORT_DETAILS))
2039	    {
2040	      fprintf (vect_dump, "\touter base_address: ");
2041	      print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2042	      fprintf (vect_dump, "\n\touter offset from base address: ");
2043	      print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2044	      fprintf (vect_dump, "\n\touter constant offset from base address: ");
2045	      print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2046	      fprintf (vect_dump, "\n\touter step: ");
2047	      print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2048	      fprintf (vect_dump, "\n\touter aligned to: ");
2049	      print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2050	    }
2051	}
2052
2053      if (STMT_VINFO_DATA_REF (stmt_info))
2054        {
2055          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2056            {
2057              fprintf (vect_dump,
2058                       "not vectorized: more than one data ref in stmt: ");
2059              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2060            }
2061          return false;
2062        }
2063
2064      STMT_VINFO_DATA_REF (stmt_info) = dr;
2065
2066      /* Set vectype for STMT.  */
2067      scalar_type = TREE_TYPE (DR_REF (dr));
2068      STMT_VINFO_VECTYPE (stmt_info) =
2069                get_vectype_for_scalar_type (scalar_type);
2070      if (!STMT_VINFO_VECTYPE (stmt_info))
2071        {
2072          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2073            {
2074              fprintf (vect_dump,
2075                       "not vectorized: no vectype for stmt: ");
2076              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2077              fprintf (vect_dump, " scalar_type: ");
2078              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2079            }
2080          return false;
2081        }
2082    }
2083
2084  return true;
2085}
2086
2087
2088/* Function vect_get_new_vect_var.
2089
2090   Returns a name for a new variable. The current naming scheme appends the
2091   prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2092   the name of vectorizer generated variables, and appends that to NAME if
2093   provided.  */
2094
2095tree
2096vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2097{
2098  const char *prefix;
2099  tree new_vect_var;
2100
2101  switch (var_kind)
2102  {
2103  case vect_simple_var:
2104    prefix = "vect_";
2105    break;
2106  case vect_scalar_var:
2107    prefix = "stmp_";
2108    break;
2109  case vect_pointer_var:
2110    prefix = "vect_p";
2111    break;
2112  default:
2113    gcc_unreachable ();
2114  }
2115
2116  if (name)
2117    {
2118      char* tmp = concat (prefix, name, NULL);
2119      new_vect_var = create_tmp_var (type, tmp);
2120      free (tmp);
2121    }
2122  else
2123    new_vect_var = create_tmp_var (type, prefix);
2124
2125  /* Mark vector typed variable as a gimple register variable.  */
2126  if (TREE_CODE (type) == VECTOR_TYPE)
2127    DECL_GIMPLE_REG_P (new_vect_var) = true;
2128
2129  return new_vect_var;
2130}
2131
2132
2133/* Function vect_create_addr_base_for_vector_ref.
2134
2135   Create an expression that computes the address of the first memory location
2136   that will be accessed for a data reference.
2137
2138   Input:
2139   STMT: The statement containing the data reference.
2140   NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2141   OFFSET: Optional. If supplied, it is be added to the initial address.
2142   LOOP:    Specify relative to which loop-nest should the address be computed.
2143            For example, when the dataref is in an inner-loop nested in an
2144	    outer-loop that is now being vectorized, LOOP can be either the
2145	    outer-loop, or the inner-loop. The first memory location accessed
2146	    by the following dataref ('in' points to short):
2147
2148		for (i=0; i<N; i++)
2149		   for (j=0; j<M; j++)
2150		     s += in[i+j]
2151
2152	    is as follows:
2153	    if LOOP=i_loop:	&in		(relative to i_loop)
2154	    if LOOP=j_loop: 	&in+i*2B	(relative to j_loop)
2155
2156   Output:
2157   1. Return an SSA_NAME whose value is the address of the memory location of
2158      the first vector of the data reference.
2159   2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2160      these statement(s) which define the returned SSA_NAME.
2161
2162   FORNOW: We are only handling array accesses with step 1.  */
2163
2164tree
2165vect_create_addr_base_for_vector_ref (gimple stmt,
2166				      gimple_seq *new_stmt_list,
2167				      tree offset,
2168				      struct loop *loop)
2169{
2170  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2172  tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2173  tree base_name;
2174  tree data_ref_base_var;
2175  tree vec_stmt;
2176  tree addr_base, addr_expr;
2177  tree dest;
2178  gimple_seq seq = NULL;
2179  tree base_offset = unshare_expr (DR_OFFSET (dr));
2180  tree init = unshare_expr (DR_INIT (dr));
2181  tree vect_ptr_type;
2182  tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2183  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2184
2185  if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2186    {
2187      struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2188
2189      gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2190
2191      data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2192      base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2193      init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2194    }
2195
2196  if (loop_vinfo)
2197    base_name = build_fold_indirect_ref (data_ref_base);
2198  else
2199    {
2200      base_offset = ssize_int (0);
2201      init = ssize_int (0);
2202      base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2203    }
2204
2205  data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2206  add_referenced_var (data_ref_base_var);
2207  data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2208					data_ref_base_var);
2209  gimple_seq_add_seq (new_stmt_list, seq);
2210
2211  /* Create base_offset */
2212  base_offset = size_binop (PLUS_EXPR,
2213			    fold_convert (sizetype, base_offset),
2214			    fold_convert (sizetype, init));
2215  dest = create_tmp_var (sizetype, "base_off");
2216  add_referenced_var (dest);
2217  base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2218  gimple_seq_add_seq (new_stmt_list, seq);
2219
2220  if (offset)
2221    {
2222      tree tmp = create_tmp_var (sizetype, "offset");
2223
2224      add_referenced_var (tmp);
2225      offset = fold_build2 (MULT_EXPR, sizetype,
2226			    fold_convert (sizetype, offset), step);
2227      base_offset = fold_build2 (PLUS_EXPR, sizetype,
2228				 base_offset, offset);
2229      base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2230      gimple_seq_add_seq (new_stmt_list, seq);
2231    }
2232
2233  /* base + base_offset */
2234  if (loop_vinfo)
2235    addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2236                             data_ref_base, base_offset);
2237  else
2238    {
2239      if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2240        addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2241      else
2242        addr_base = build1 (ADDR_EXPR,
2243                            build_pointer_type (TREE_TYPE (DR_REF (dr))),
2244                            unshare_expr (DR_REF (dr)));
2245    }
2246
2247  vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2248
2249  vec_stmt = fold_convert (vect_ptr_type, addr_base);
2250  addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2251                                     get_name (base_name));
2252  add_referenced_var (addr_expr);
2253  vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2254  gimple_seq_add_seq (new_stmt_list, seq);
2255
2256  if (vect_print_dump_info (REPORT_DETAILS))
2257    {
2258      fprintf (vect_dump, "created ");
2259      print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2260    }
2261
2262  return vec_stmt;
2263}
2264
2265
2266/* Function vect_create_data_ref_ptr.
2267
2268   Create a new pointer to vector type (vp), that points to the first location
2269   accessed in the loop by STMT, along with the def-use update chain to
2270   appropriately advance the pointer through the loop iterations. Also set
2271   aliasing information for the pointer.  This vector pointer is used by the
2272   callers to this function to create a memory reference expression for vector
2273   load/store access.
2274
2275   Input:
2276   1. STMT: a stmt that references memory. Expected to be of the form
2277         GIMPLE_ASSIGN <name, data-ref> or
2278	 GIMPLE_ASSIGN <data-ref, name>.
2279   2. AT_LOOP: the loop where the vector memref is to be created.
2280   3. OFFSET (optional): an offset to be added to the initial address accessed
2281        by the data-ref in STMT.
2282   4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2283        pointing to the initial address.
2284   5. TYPE: if not NULL indicates the required type of the data-ref.
2285
2286   Output:
2287   1. Declare a new ptr to vector_type, and have it point to the base of the
2288      data reference (initial addressed accessed by the data reference).
2289      For example, for vector of type V8HI, the following code is generated:
2290
2291      v8hi *vp;
2292      vp = (v8hi *)initial_address;
2293
2294      if OFFSET is not supplied:
2295         initial_address = &a[init];
2296      if OFFSET is supplied:
2297         initial_address = &a[init + OFFSET];
2298
2299      Return the initial_address in INITIAL_ADDRESS.
2300
2301   2. If ONLY_INIT is true, just return the initial pointer.  Otherwise, also
2302      update the pointer in each iteration of the loop.
2303
2304      Return the increment stmt that updates the pointer in PTR_INCR.
2305
2306   3. Set INV_P to true if the access pattern of the data reference in the
2307      vectorized loop is invariant. Set it to false otherwise.
2308
2309   4. Return the pointer.  */
2310
2311tree
2312vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2313			  tree offset, tree *initial_address, gimple *ptr_incr,
2314			  bool only_init, bool *inv_p)
2315{
2316  tree base_name;
2317  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2318  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2319  struct loop *loop = NULL;
2320  bool nested_in_vect_loop = false;
2321  struct loop *containing_loop = NULL;
2322  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2323  tree vect_ptr_type;
2324  tree vect_ptr;
2325  tree new_temp;
2326  gimple vec_stmt;
2327  gimple_seq new_stmt_list = NULL;
2328  edge pe = NULL;
2329  basic_block new_bb;
2330  tree vect_ptr_init;
2331  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2332  tree vptr;
2333  gimple_stmt_iterator incr_gsi;
2334  bool insert_after;
2335  tree indx_before_incr, indx_after_incr;
2336  gimple incr;
2337  tree step;
2338  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2339  gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2340
2341  if (loop_vinfo)
2342    {
2343      loop = LOOP_VINFO_LOOP (loop_vinfo);
2344      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2345      containing_loop = (gimple_bb (stmt))->loop_father;
2346      pe = loop_preheader_edge (loop);
2347    }
2348  else
2349    {
2350      gcc_assert (bb_vinfo);
2351      only_init = true;
2352      *ptr_incr = NULL;
2353    }
2354
2355  /* Check the step (evolution) of the load in LOOP, and record
2356     whether it's invariant.  */
2357  if (nested_in_vect_loop)
2358    step = STMT_VINFO_DR_STEP (stmt_info);
2359  else
2360    step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2361
2362  if (tree_int_cst_compare (step, size_zero_node) == 0)
2363    *inv_p = true;
2364  else
2365    *inv_p = false;
2366
2367  /* Create an expression for the first address accessed by this load
2368     in LOOP.  */
2369  base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2370
2371  if (vect_print_dump_info (REPORT_DETAILS))
2372    {
2373      tree data_ref_base = base_name;
2374      fprintf (vect_dump, "create vector-pointer variable to type: ");
2375      print_generic_expr (vect_dump, vectype, TDF_SLIM);
2376      if (TREE_CODE (data_ref_base) == VAR_DECL
2377          || TREE_CODE (data_ref_base) == ARRAY_REF)
2378        fprintf (vect_dump, "  vectorizing an array ref: ");
2379      else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2380        fprintf (vect_dump, "  vectorizing a record based array ref: ");
2381      else if (TREE_CODE (data_ref_base) == SSA_NAME)
2382        fprintf (vect_dump, "  vectorizing a pointer ref: ");
2383      print_generic_expr (vect_dump, base_name, TDF_SLIM);
2384    }
2385
2386  /** (1) Create the new vector-pointer variable:  **/
2387  vect_ptr_type = build_pointer_type (vectype);
2388  vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2389                                    get_name (base_name));
2390
2391  /* Vector types inherit the alias set of their component type by default so
2392     we need to use a ref-all pointer if the data reference does not conflict
2393     with the created vector data reference because it is not addressable.  */
2394  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2395			      get_alias_set (DR_REF (dr))))
2396    {
2397      vect_ptr_type
2398	= build_pointer_type_for_mode (vectype,
2399				       TYPE_MODE (vect_ptr_type), true);
2400      vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2401					get_name (base_name));
2402    }
2403
2404  /* Likewise for any of the data references in the stmt group.  */
2405  else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2406    {
2407      gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2408      do
2409	{
2410	  tree lhs = gimple_assign_lhs (orig_stmt);
2411	  if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2412				      get_alias_set (lhs)))
2413	    {
2414	      vect_ptr_type
2415		= build_pointer_type_for_mode (vectype,
2416					       TYPE_MODE (vect_ptr_type), true);
2417	      vect_ptr
2418		= vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2419					 get_name (base_name));
2420	      break;
2421	    }
2422
2423	  orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2424	}
2425      while (orig_stmt);
2426    }
2427
2428  add_referenced_var (vect_ptr);
2429
2430  /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2431      vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2432      def-use update cycles for the pointer: One relative to the outer-loop
2433      (LOOP), which is what steps (3) and (4) below do. The other is relative
2434      to the inner-loop (which is the inner-most loop containing the dataref),
2435      and this is done be step (5) below.
2436
2437      When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2438      inner-most loop, and so steps (3),(4) work the same, and step (5) is
2439      redundant.  Steps (3),(4) create the following:
2440
2441	vp0 = &base_addr;
2442	LOOP:	vp1 = phi(vp0,vp2)
2443		...
2444		...
2445		vp2 = vp1 + step
2446		goto LOOP
2447
2448      If there is an inner-loop nested in loop, then step (5) will also be
2449      applied, and an additional update in the inner-loop will be created:
2450
2451	vp0 = &base_addr;
2452	LOOP:   vp1 = phi(vp0,vp2)
2453		...
2454        inner:     vp3 = phi(vp1,vp4)
2455	           vp4 = vp3 + inner_step
2456	           if () goto inner
2457		...
2458		vp2 = vp1 + step
2459		if () goto LOOP   */
2460
2461  /** (3) Calculate the initial address the vector-pointer, and set
2462          the vector-pointer to point to it before the loop:  **/
2463
2464  /* Create: (&(base[init_val+offset]) in the loop preheader.  */
2465
2466  new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2467                                                   offset, loop);
2468  if (new_stmt_list)
2469    {
2470      if (pe)
2471        {
2472          new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2473          gcc_assert (!new_bb);
2474        }
2475      else
2476        gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2477    }
2478
2479  *initial_address = new_temp;
2480
2481  /* Create: p = (vectype *) initial_base  */
2482  vec_stmt = gimple_build_assign (vect_ptr,
2483				  fold_convert (vect_ptr_type, new_temp));
2484  vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2485  gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2486  if (pe)
2487    {
2488      new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2489      gcc_assert (!new_bb);
2490    }
2491  else
2492    gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2493
2494  /** (4) Handle the updating of the vector-pointer inside the loop.
2495	  This is needed when ONLY_INIT is false, and also when AT_LOOP
2496	  is the inner-loop nested in LOOP (during outer-loop vectorization).
2497   **/
2498
2499  /* No update in loop is required.  */
2500  if (only_init && (!loop_vinfo || at_loop == loop))
2501    {
2502      /* Copy the points-to information if it exists. */
2503      if (DR_PTR_INFO (dr))
2504        duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2505      vptr = vect_ptr_init;
2506    }
2507  else
2508    {
2509      /* The step of the vector pointer is the Vector Size.  */
2510      tree step = TYPE_SIZE_UNIT (vectype);
2511      /* One exception to the above is when the scalar step of the load in
2512	 LOOP is zero. In this case the step here is also zero.  */
2513      if (*inv_p)
2514	step = size_zero_node;
2515
2516      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2517
2518      create_iv (vect_ptr_init,
2519		 fold_convert (vect_ptr_type, step),
2520		 vect_ptr, loop, &incr_gsi, insert_after,
2521		 &indx_before_incr, &indx_after_incr);
2522      incr = gsi_stmt (incr_gsi);
2523      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2524
2525      /* Copy the points-to information if it exists. */
2526      if (DR_PTR_INFO (dr))
2527	{
2528	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2529	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2530	}
2531      if (ptr_incr)
2532	*ptr_incr = incr;
2533
2534      vptr = indx_before_incr;
2535    }
2536
2537  if (!nested_in_vect_loop || only_init)
2538    return vptr;
2539
2540
2541  /** (5) Handle the updating of the vector-pointer inside the inner-loop
2542	  nested in LOOP, if exists: **/
2543
2544  gcc_assert (nested_in_vect_loop);
2545  if (!only_init)
2546    {
2547      standard_iv_increment_position (containing_loop, &incr_gsi,
2548				      &insert_after);
2549      create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2550		 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2551		 &indx_after_incr);
2552      incr = gsi_stmt (incr_gsi);
2553      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2554
2555      /* Copy the points-to information if it exists. */
2556      if (DR_PTR_INFO (dr))
2557	{
2558	  duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2559	  duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2560	}
2561      if (ptr_incr)
2562	*ptr_incr = incr;
2563
2564      return indx_before_incr;
2565    }
2566  else
2567    gcc_unreachable ();
2568}
2569
2570
2571/* Function bump_vector_ptr
2572
2573   Increment a pointer (to a vector type) by vector-size. If requested,
2574   i.e. if PTR-INCR is given, then also connect the new increment stmt
2575   to the existing def-use update-chain of the pointer, by modifying
2576   the PTR_INCR as illustrated below:
2577
2578   The pointer def-use update-chain before this function:
2579                        DATAREF_PTR = phi (p_0, p_2)
2580                        ....
2581        PTR_INCR:       p_2 = DATAREF_PTR + step
2582
2583   The pointer def-use update-chain after this function:
2584                        DATAREF_PTR = phi (p_0, p_2)
2585                        ....
2586                        NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2587                        ....
2588        PTR_INCR:       p_2 = NEW_DATAREF_PTR + step
2589
2590   Input:
2591   DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2592                 in the loop.
2593   PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2594	      the loop.  The increment amount across iterations is expected
2595	      to be vector_size.
2596   BSI - location where the new update stmt is to be placed.
2597   STMT - the original scalar memory-access stmt that is being vectorized.
2598   BUMP - optional. The offset by which to bump the pointer. If not given,
2599	  the offset is assumed to be vector_size.
2600
2601   Output: Return NEW_DATAREF_PTR as illustrated above.
2602
2603*/
2604
2605tree
2606bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2607		 gimple stmt, tree bump)
2608{
2609  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2610  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2611  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2612  tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2613  tree update = TYPE_SIZE_UNIT (vectype);
2614  gimple incr_stmt;
2615  ssa_op_iter iter;
2616  use_operand_p use_p;
2617  tree new_dataref_ptr;
2618
2619  if (bump)
2620    update = bump;
2621
2622  incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2623					    dataref_ptr, update);
2624  new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2625  gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2626  vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2627
2628  /* Copy the points-to information if it exists. */
2629  if (DR_PTR_INFO (dr))
2630    duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2631
2632  if (!ptr_incr)
2633    return new_dataref_ptr;
2634
2635  /* Update the vector-pointer's cross-iteration increment.  */
2636  FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2637    {
2638      tree use = USE_FROM_PTR (use_p);
2639
2640      if (use == dataref_ptr)
2641        SET_USE (use_p, new_dataref_ptr);
2642      else
2643        gcc_assert (tree_int_cst_compare (use, update) == 0);
2644    }
2645
2646  return new_dataref_ptr;
2647}
2648
2649
2650/* Function vect_create_destination_var.
2651
2652   Create a new temporary of type VECTYPE.  */
2653
2654tree
2655vect_create_destination_var (tree scalar_dest, tree vectype)
2656{
2657  tree vec_dest;
2658  const char *new_name;
2659  tree type;
2660  enum vect_var_kind kind;
2661
2662  kind = vectype ? vect_simple_var : vect_scalar_var;
2663  type = vectype ? vectype : TREE_TYPE (scalar_dest);
2664
2665  gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2666
2667  new_name = get_name (scalar_dest);
2668  if (!new_name)
2669    new_name = "var_";
2670  vec_dest = vect_get_new_vect_var (type, kind, new_name);
2671  add_referenced_var (vec_dest);
2672
2673  return vec_dest;
2674}
2675
2676/* Function vect_strided_store_supported.
2677
2678   Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2679   and FALSE otherwise.  */
2680
2681bool
2682vect_strided_store_supported (tree vectype)
2683{
2684  optab interleave_high_optab, interleave_low_optab;
2685  int mode;
2686
2687  mode = (int) TYPE_MODE (vectype);
2688
2689  /* Check that the operation is supported.  */
2690  interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2691					       vectype, optab_default);
2692  interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2693					      vectype, optab_default);
2694  if (!interleave_high_optab || !interleave_low_optab)
2695    {
2696      if (vect_print_dump_info (REPORT_DETAILS))
2697	fprintf (vect_dump, "no optab for interleave.");
2698      return false;
2699    }
2700
2701  if (optab_handler (interleave_high_optab, mode)->insn_code
2702      == CODE_FOR_nothing
2703      || optab_handler (interleave_low_optab, mode)->insn_code
2704      == CODE_FOR_nothing)
2705    {
2706      if (vect_print_dump_info (REPORT_DETAILS))
2707	fprintf (vect_dump, "interleave op not supported by target.");
2708      return false;
2709    }
2710
2711  return true;
2712}
2713
2714
2715/* Function vect_permute_store_chain.
2716
2717   Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2718   a power of 2, generate interleave_high/low stmts to reorder the data
2719   correctly for the stores. Return the final references for stores in
2720   RESULT_CHAIN.
2721
2722   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2723   The input is 4 vectors each containing 8 elements. We assign a number to each
2724   element, the input sequence is:
2725
2726   1st vec:   0  1  2  3  4  5  6  7
2727   2nd vec:   8  9 10 11 12 13 14 15
2728   3rd vec:  16 17 18 19 20 21 22 23
2729   4th vec:  24 25 26 27 28 29 30 31
2730
2731   The output sequence should be:
2732
2733   1st vec:  0  8 16 24  1  9 17 25
2734   2nd vec:  2 10 18 26  3 11 19 27
2735   3rd vec:  4 12 20 28  5 13 21 30
2736   4th vec:  6 14 22 30  7 15 23 31
2737
2738   i.e., we interleave the contents of the four vectors in their order.
2739
2740   We use interleave_high/low instructions to create such output. The input of
2741   each interleave_high/low operation is two vectors:
2742   1st vec    2nd vec
2743   0 1 2 3    4 5 6 7
2744   the even elements of the result vector are obtained left-to-right from the
2745   high/low elements of the first vector. The odd elements of the result are
2746   obtained left-to-right from the high/low elements of the second vector.
2747   The output of interleave_high will be:   0 4 1 5
2748   and of interleave_low:                   2 6 3 7
2749
2750
2751   The permutation is done in log LENGTH stages. In each stage interleave_high
2752   and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2753   where the first argument is taken from the first half of DR_CHAIN and the
2754   second argument from it's second half.
2755   In our example,
2756
2757   I1: interleave_high (1st vec, 3rd vec)
2758   I2: interleave_low (1st vec, 3rd vec)
2759   I3: interleave_high (2nd vec, 4th vec)
2760   I4: interleave_low (2nd vec, 4th vec)
2761
2762   The output for the first stage is:
2763
2764   I1:  0 16  1 17  2 18  3 19
2765   I2:  4 20  5 21  6 22  7 23
2766   I3:  8 24  9 25 10 26 11 27
2767   I4: 12 28 13 29 14 30 15 31
2768
2769   The output of the second stage, i.e. the final result is:
2770
2771   I1:  0  8 16 24  1  9 17 25
2772   I2:  2 10 18 26  3 11 19 27
2773   I3:  4 12 20 28  5 13 21 30
2774   I4:  6 14 22 30  7 15 23 31.  */
2775
2776bool
2777vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2778			  unsigned int length,
2779			  gimple stmt,
2780			  gimple_stmt_iterator *gsi,
2781			  VEC(tree,heap) **result_chain)
2782{
2783  tree perm_dest, vect1, vect2, high, low;
2784  gimple perm_stmt;
2785  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2786  int i;
2787  unsigned int j;
2788  enum tree_code high_code, low_code;
2789
2790  /* Check that the operation is supported.  */
2791  if (!vect_strided_store_supported (vectype))
2792    return false;
2793
2794  *result_chain = VEC_copy (tree, heap, dr_chain);
2795
2796  for (i = 0; i < exact_log2 (length); i++)
2797    {
2798      for (j = 0; j < length/2; j++)
2799	{
2800	  vect1 = VEC_index (tree, dr_chain, j);
2801	  vect2 = VEC_index (tree, dr_chain, j+length/2);
2802
2803	  /* Create interleaving stmt:
2804	     in the case of big endian:
2805                                high = interleave_high (vect1, vect2)
2806             and in the case of little endian:
2807                                high = interleave_low (vect1, vect2).  */
2808	  perm_dest = create_tmp_var (vectype, "vect_inter_high");
2809	  DECL_GIMPLE_REG_P (perm_dest) = 1;
2810	  add_referenced_var (perm_dest);
2811          if (BYTES_BIG_ENDIAN)
2812	    {
2813	      high_code = VEC_INTERLEAVE_HIGH_EXPR;
2814	      low_code = VEC_INTERLEAVE_LOW_EXPR;
2815	    }
2816	  else
2817	    {
2818	      low_code = VEC_INTERLEAVE_HIGH_EXPR;
2819	      high_code = VEC_INTERLEAVE_LOW_EXPR;
2820	    }
2821	  perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2822						    vect1, vect2);
2823	  high = make_ssa_name (perm_dest, perm_stmt);
2824	  gimple_assign_set_lhs (perm_stmt, high);
2825	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2826	  VEC_replace (tree, *result_chain, 2*j, high);
2827
2828	  /* Create interleaving stmt:
2829             in the case of big endian:
2830                               low  = interleave_low (vect1, vect2)
2831             and in the case of little endian:
2832                               low  = interleave_high (vect1, vect2).  */
2833	  perm_dest = create_tmp_var (vectype, "vect_inter_low");
2834	  DECL_GIMPLE_REG_P (perm_dest) = 1;
2835	  add_referenced_var (perm_dest);
2836	  perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2837						    vect1, vect2);
2838	  low = make_ssa_name (perm_dest, perm_stmt);
2839	  gimple_assign_set_lhs (perm_stmt, low);
2840	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2841	  VEC_replace (tree, *result_chain, 2*j+1, low);
2842	}
2843      dr_chain = VEC_copy (tree, heap, *result_chain);
2844    }
2845  return true;
2846}
2847
2848/* Function vect_setup_realignment
2849
2850   This function is called when vectorizing an unaligned load using
2851   the dr_explicit_realign[_optimized] scheme.
2852   This function generates the following code at the loop prolog:
2853
2854      p = initial_addr;
2855   x  msq_init = *(floor(p));   # prolog load
2856      realignment_token = call target_builtin;
2857    loop:
2858   x  msq = phi (msq_init, ---)
2859
2860   The stmts marked with x are generated only for the case of
2861   dr_explicit_realign_optimized.
2862
2863   The code above sets up a new (vector) pointer, pointing to the first
2864   location accessed by STMT, and a "floor-aligned" load using that pointer.
2865   It also generates code to compute the "realignment-token" (if the relevant
2866   target hook was defined), and creates a phi-node at the loop-header bb
2867   whose arguments are the result of the prolog-load (created by this
2868   function) and the result of a load that takes place in the loop (to be
2869   created by the caller to this function).
2870
2871   For the case of dr_explicit_realign_optimized:
2872   The caller to this function uses the phi-result (msq) to create the
2873   realignment code inside the loop, and sets up the missing phi argument,
2874   as follows:
2875    loop:
2876      msq = phi (msq_init, lsq)
2877      lsq = *(floor(p'));        # load in loop
2878      result = realign_load (msq, lsq, realignment_token);
2879
2880   For the case of dr_explicit_realign:
2881    loop:
2882      msq = *(floor(p)); 	# load in loop
2883      p' = p + (VS-1);
2884      lsq = *(floor(p'));	# load in loop
2885      result = realign_load (msq, lsq, realignment_token);
2886
2887   Input:
2888   STMT - (scalar) load stmt to be vectorized. This load accesses
2889          a memory location that may be unaligned.
2890   BSI - place where new code is to be inserted.
2891   ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2892			      is used.
2893
2894   Output:
2895   REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2896                       target hook, if defined.
2897   Return value - the result of the loop-header phi node.  */
2898
2899tree
2900vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2901                        tree *realignment_token,
2902			enum dr_alignment_support alignment_support_scheme,
2903			tree init_addr,
2904			struct loop **at_loop)
2905{
2906  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2907  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2908  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2909  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2910  edge pe;
2911  tree scalar_dest = gimple_assign_lhs (stmt);
2912  tree vec_dest;
2913  gimple inc;
2914  tree ptr;
2915  tree data_ref;
2916  gimple new_stmt;
2917  basic_block new_bb;
2918  tree msq_init = NULL_TREE;
2919  tree new_temp;
2920  gimple phi_stmt;
2921  tree msq = NULL_TREE;
2922  gimple_seq stmts = NULL;
2923  bool inv_p;
2924  bool compute_in_loop = false;
2925  bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2926  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2927  struct loop *loop_for_initial_load;
2928
2929  gcc_assert (alignment_support_scheme == dr_explicit_realign
2930	      || alignment_support_scheme == dr_explicit_realign_optimized);
2931
2932  /* We need to generate three things:
2933     1. the misalignment computation
2934     2. the extra vector load (for the optimized realignment scheme).
2935     3. the phi node for the two vectors from which the realignment is
2936      done (for the optimized realignment scheme).
2937   */
2938
2939  /* 1. Determine where to generate the misalignment computation.
2940
2941     If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2942     calculation will be generated by this function, outside the loop (in the
2943     preheader).  Otherwise, INIT_ADDR had already been computed for us by the
2944     caller, inside the loop.
2945
2946     Background: If the misalignment remains fixed throughout the iterations of
2947     the loop, then both realignment schemes are applicable, and also the
2948     misalignment computation can be done outside LOOP.  This is because we are
2949     vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2950     are a multiple of VS (the Vector Size), and therefore the misalignment in
2951     different vectorized LOOP iterations is always the same.
2952     The problem arises only if the memory access is in an inner-loop nested
2953     inside LOOP, which is now being vectorized using outer-loop vectorization.
2954     This is the only case when the misalignment of the memory access may not
2955     remain fixed throughout the iterations of the inner-loop (as explained in
2956     detail in vect_supportable_dr_alignment).  In this case, not only is the
2957     optimized realignment scheme not applicable, but also the misalignment
2958     computation (and generation of the realignment token that is passed to
2959     REALIGN_LOAD) have to be done inside the loop.
2960
2961     In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2962     or not, which in turn determines if the misalignment is computed inside
2963     the inner-loop, or outside LOOP.  */
2964
2965  if (init_addr != NULL_TREE)
2966    {
2967      compute_in_loop = true;
2968      gcc_assert (alignment_support_scheme == dr_explicit_realign);
2969    }
2970
2971
2972  /* 2. Determine where to generate the extra vector load.
2973
2974     For the optimized realignment scheme, instead of generating two vector
2975     loads in each iteration, we generate a single extra vector load in the
2976     preheader of the loop, and in each iteration reuse the result of the
2977     vector load from the previous iteration.  In case the memory access is in
2978     an inner-loop nested inside LOOP, which is now being vectorized using
2979     outer-loop vectorization, we need to determine whether this initial vector
2980     load should be generated at the preheader of the inner-loop, or can be
2981     generated at the preheader of LOOP.  If the memory access has no evolution
2982     in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2983     to be generated inside LOOP (in the preheader of the inner-loop).  */
2984
2985  if (nested_in_vect_loop)
2986    {
2987      tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2988      bool invariant_in_outerloop =
2989            (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2990      loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2991    }
2992  else
2993    loop_for_initial_load = loop;
2994  if (at_loop)
2995    *at_loop = loop_for_initial_load;
2996
2997  /* 3. For the case of the optimized realignment, create the first vector
2998      load at the loop preheader.  */
2999
3000  if (alignment_support_scheme == dr_explicit_realign_optimized)
3001    {
3002      /* Create msq_init = *(floor(p1)) in the loop preheader  */
3003
3004      gcc_assert (!compute_in_loop);
3005      pe = loop_preheader_edge (loop_for_initial_load);
3006      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3007      ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3008				      &init_addr, &inc, true, &inv_p);
3009      data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3010      new_stmt = gimple_build_assign (vec_dest, data_ref);
3011      new_temp = make_ssa_name (vec_dest, new_stmt);
3012      gimple_assign_set_lhs (new_stmt, new_temp);
3013      mark_symbols_for_renaming (new_stmt);
3014      new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3015      gcc_assert (!new_bb);
3016      msq_init = gimple_assign_lhs (new_stmt);
3017    }
3018
3019  /* 4. Create realignment token using a target builtin, if available.
3020      It is done either inside the containing loop, or before LOOP (as
3021      determined above).  */
3022
3023  if (targetm.vectorize.builtin_mask_for_load)
3024    {
3025      tree builtin_decl;
3026
3027      /* Compute INIT_ADDR - the initial addressed accessed by this memref.  */
3028      if (compute_in_loop)
3029	gcc_assert (init_addr); /* already computed by the caller.  */
3030      else
3031	{
3032	  /* Generate the INIT_ADDR computation outside LOOP.  */
3033	  init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3034							NULL_TREE, loop);
3035	  pe = loop_preheader_edge (loop);
3036	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3037	  gcc_assert (!new_bb);
3038	}
3039
3040      builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3041      new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3042      vec_dest =
3043	vect_create_destination_var (scalar_dest,
3044				     gimple_call_return_type (new_stmt));
3045      new_temp = make_ssa_name (vec_dest, new_stmt);
3046      gimple_call_set_lhs (new_stmt, new_temp);
3047
3048      if (compute_in_loop)
3049	gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3050      else
3051	{
3052	  /* Generate the misalignment computation outside LOOP.  */
3053	  pe = loop_preheader_edge (loop);
3054	  new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3055	  gcc_assert (!new_bb);
3056	}
3057
3058      *realignment_token = gimple_call_lhs (new_stmt);
3059
3060      /* The result of the CALL_EXPR to this builtin is determined from
3061         the value of the parameter and no global variables are touched
3062         which makes the builtin a "const" function.  Requiring the
3063         builtin to have the "const" attribute makes it unnecessary
3064         to call mark_call_clobbered.  */
3065      gcc_assert (TREE_READONLY (builtin_decl));
3066    }
3067
3068  if (alignment_support_scheme == dr_explicit_realign)
3069    return msq;
3070
3071  gcc_assert (!compute_in_loop);
3072  gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3073
3074
3075  /* 5. Create msq = phi <msq_init, lsq> in loop  */
3076
3077  pe = loop_preheader_edge (containing_loop);
3078  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3079  msq = make_ssa_name (vec_dest, NULL);
3080  phi_stmt = create_phi_node (msq, containing_loop->header);
3081  SSA_NAME_DEF_STMT (msq) = phi_stmt;
3082  add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3083
3084  return msq;
3085}
3086
3087
3088/* Function vect_strided_load_supported.
3089
3090   Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3091   and FALSE otherwise.  */
3092
3093bool
3094vect_strided_load_supported (tree vectype)
3095{
3096  optab perm_even_optab, perm_odd_optab;
3097  int mode;
3098
3099  mode = (int) TYPE_MODE (vectype);
3100
3101  perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3102					 optab_default);
3103  if (!perm_even_optab)
3104    {
3105      if (vect_print_dump_info (REPORT_DETAILS))
3106	fprintf (vect_dump, "no optab for perm_even.");
3107      return false;
3108    }
3109
3110  if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3111    {
3112      if (vect_print_dump_info (REPORT_DETAILS))
3113	fprintf (vect_dump, "perm_even op not supported by target.");
3114      return false;
3115    }
3116
3117  perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3118					optab_default);
3119  if (!perm_odd_optab)
3120    {
3121      if (vect_print_dump_info (REPORT_DETAILS))
3122	fprintf (vect_dump, "no optab for perm_odd.");
3123      return false;
3124    }
3125
3126  if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3127    {
3128      if (vect_print_dump_info (REPORT_DETAILS))
3129	fprintf (vect_dump, "perm_odd op not supported by target.");
3130      return false;
3131    }
3132  return true;
3133}
3134
3135
3136/* Function vect_permute_load_chain.
3137
3138   Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3139   a power of 2, generate extract_even/odd stmts to reorder the input data
3140   correctly. Return the final references for loads in RESULT_CHAIN.
3141
3142   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3143   The input is 4 vectors each containing 8 elements. We assign a number to each
3144   element, the input sequence is:
3145
3146   1st vec:   0  1  2  3  4  5  6  7
3147   2nd vec:   8  9 10 11 12 13 14 15
3148   3rd vec:  16 17 18 19 20 21 22 23
3149   4th vec:  24 25 26 27 28 29 30 31
3150
3151   The output sequence should be:
3152
3153   1st vec:  0 4  8 12 16 20 24 28
3154   2nd vec:  1 5  9 13 17 21 25 29
3155   3rd vec:  2 6 10 14 18 22 26 30
3156   4th vec:  3 7 11 15 19 23 27 31
3157
3158   i.e., the first output vector should contain the first elements of each
3159   interleaving group, etc.
3160
3161   We use extract_even/odd instructions to create such output. The input of each
3162   extract_even/odd operation is two vectors
3163   1st vec    2nd vec
3164   0 1 2 3    4 5 6 7
3165
3166   and the output is the vector of extracted even/odd elements. The output of
3167   extract_even will be:   0 2 4 6
3168   and of extract_odd:     1 3 5 7
3169
3170
3171   The permutation is done in log LENGTH stages. In each stage extract_even and
3172   extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3173   order. In our example,
3174
3175   E1: extract_even (1st vec, 2nd vec)
3176   E2: extract_odd (1st vec, 2nd vec)
3177   E3: extract_even (3rd vec, 4th vec)
3178   E4: extract_odd (3rd vec, 4th vec)
3179
3180   The output for the first stage will be:
3181
3182   E1:  0  2  4  6  8 10 12 14
3183   E2:  1  3  5  7  9 11 13 15
3184   E3: 16 18 20 22 24 26 28 30
3185   E4: 17 19 21 23 25 27 29 31
3186
3187   In order to proceed and create the correct sequence for the next stage (or
3188   for the correct output, if the second stage is the last one, as in our
3189   example), we first put the output of extract_even operation and then the
3190   output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3191   The input for the second stage is:
3192
3193   1st vec (E1):  0  2  4  6  8 10 12 14
3194   2nd vec (E3): 16 18 20 22 24 26 28 30
3195   3rd vec (E2):  1  3  5  7  9 11 13 15
3196   4th vec (E4): 17 19 21 23 25 27 29 31
3197
3198   The output of the second stage:
3199
3200   E1: 0 4  8 12 16 20 24 28
3201   E2: 2 6 10 14 18 22 26 30
3202   E3: 1 5  9 13 17 21 25 29
3203   E4: 3 7 11 15 19 23 27 31
3204
3205   And RESULT_CHAIN after reordering:
3206
3207   1st vec (E1):  0 4  8 12 16 20 24 28
3208   2nd vec (E3):  1 5  9 13 17 21 25 29
3209   3rd vec (E2):  2 6 10 14 18 22 26 30
3210   4th vec (E4):  3 7 11 15 19 23 27 31.  */
3211
3212bool
3213vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3214			 unsigned int length,
3215			 gimple stmt,
3216			 gimple_stmt_iterator *gsi,
3217			 VEC(tree,heap) **result_chain)
3218{
3219  tree perm_dest, data_ref, first_vect, second_vect;
3220  gimple perm_stmt;
3221  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3222  int i;
3223  unsigned int j;
3224
3225  /* Check that the operation is supported.  */
3226  if (!vect_strided_load_supported (vectype))
3227    return false;
3228
3229  *result_chain = VEC_copy (tree, heap, dr_chain);
3230  for (i = 0; i < exact_log2 (length); i++)
3231    {
3232      for (j = 0; j < length; j +=2)
3233	{
3234	  first_vect = VEC_index (tree, dr_chain, j);
3235	  second_vect = VEC_index (tree, dr_chain, j+1);
3236
3237	  /* data_ref = permute_even (first_data_ref, second_data_ref);  */
3238	  perm_dest = create_tmp_var (vectype, "vect_perm_even");
3239	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3240	  add_referenced_var (perm_dest);
3241
3242	  perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3243						    perm_dest, first_vect,
3244						    second_vect);
3245
3246	  data_ref = make_ssa_name (perm_dest, perm_stmt);
3247	  gimple_assign_set_lhs (perm_stmt, data_ref);
3248	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3249	  mark_symbols_for_renaming (perm_stmt);
3250
3251	  VEC_replace (tree, *result_chain, j/2, data_ref);
3252
3253	  /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
3254	  perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3255	  DECL_GIMPLE_REG_P (perm_dest) = 1;
3256	  add_referenced_var (perm_dest);
3257
3258	  perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3259						    perm_dest, first_vect,
3260						    second_vect);
3261	  data_ref = make_ssa_name (perm_dest, perm_stmt);
3262	  gimple_assign_set_lhs (perm_stmt, data_ref);
3263	  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3264	  mark_symbols_for_renaming (perm_stmt);
3265
3266	  VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3267	}
3268      dr_chain = VEC_copy (tree, heap, *result_chain);
3269    }
3270  return true;
3271}
3272
3273
3274/* Function vect_transform_strided_load.
3275
3276   Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3277   to perform their permutation and ascribe the result vectorized statements to
3278   the scalar statements.
3279*/
3280
3281bool
3282vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3283			     gimple_stmt_iterator *gsi)
3284{
3285  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3286  gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3287  gimple next_stmt, new_stmt;
3288  VEC(tree,heap) *result_chain = NULL;
3289  unsigned int i, gap_count;
3290  tree tmp_data_ref;
3291
3292  /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3293     RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3294     vectors, that are ready for vector computation.  */
3295  result_chain = VEC_alloc (tree, heap, size);
3296  /* Permute.  */
3297  if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3298    return false;
3299
3300  /* Put a permuted data-ref in the VECTORIZED_STMT field.
3301     Since we scan the chain starting from it's first node, their order
3302     corresponds the order of data-refs in RESULT_CHAIN.  */
3303  next_stmt = first_stmt;
3304  gap_count = 1;
3305  for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3306    {
3307      if (!next_stmt)
3308	break;
3309
3310      /* Skip the gaps. Loads created for the gaps will be removed by dead
3311       code elimination pass later. No need to check for the first stmt in
3312       the group, since it always exists.
3313       DR_GROUP_GAP is the number of steps in elements from the previous
3314       access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3315       correspond to the gaps.
3316      */
3317      if (next_stmt != first_stmt
3318          && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3319      {
3320        gap_count++;
3321        continue;
3322      }
3323
3324      while (next_stmt)
3325        {
3326	  new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3327	  /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3328	     copies, and we put the new vector statement in the first available
3329	     RELATED_STMT.  */
3330	  if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3331	    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3332	  else
3333            {
3334              if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3335                {
3336 	          gimple prev_stmt =
3337		    STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3338	          gimple rel_stmt =
3339		    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3340	          while (rel_stmt)
3341		    {
3342		      prev_stmt = rel_stmt;
3343		      rel_stmt =
3344                        STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3345		    }
3346
3347  	          STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3348                    new_stmt;
3349                }
3350            }
3351
3352	  next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3353	  gap_count = 1;
3354	  /* If NEXT_STMT accesses the same DR as the previous statement,
3355	     put the same TMP_DATA_REF as its vectorized statement; otherwise
3356	     get the next data-ref from RESULT_CHAIN.  */
3357	  if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3358	    break;
3359        }
3360    }
3361
3362  VEC_free (tree, heap, result_chain);
3363  return true;
3364}
3365
3366/* Function vect_force_dr_alignment_p.
3367
3368   Returns whether the alignment of a DECL can be forced to be aligned
3369   on ALIGNMENT bit boundary.  */
3370
3371bool
3372vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3373{
3374  if (TREE_CODE (decl) != VAR_DECL)
3375    return false;
3376
3377  if (DECL_EXTERNAL (decl))
3378    return false;
3379
3380  if (TREE_ASM_WRITTEN (decl))
3381    return false;
3382
3383  if (TREE_STATIC (decl))
3384    return (alignment <= MAX_OFILE_ALIGNMENT);
3385  else
3386    return (alignment <= MAX_STACK_ALIGNMENT);
3387}
3388
3389/* Function vect_supportable_dr_alignment
3390
3391   Return whether the data reference DR is supported with respect to its
3392   alignment.  */
3393
3394enum dr_alignment_support
3395vect_supportable_dr_alignment (struct data_reference *dr)
3396{
3397  gimple stmt = DR_STMT (dr);
3398  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3399  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3400  enum machine_mode mode = TYPE_MODE (vectype);
3401  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3402  struct loop *vect_loop = NULL;
3403  bool nested_in_vect_loop = false;
3404
3405  if (aligned_access_p (dr))
3406    return dr_aligned;
3407
3408  if (!loop_vinfo)
3409    /* FORNOW: Misaligned accesses are supported only in loops.  */
3410    return dr_unaligned_unsupported;
3411
3412  vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3413  nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3414
3415  /* Possibly unaligned access.  */
3416
3417  /* We can choose between using the implicit realignment scheme (generating
3418     a misaligned_move stmt) and the explicit realignment scheme (generating
3419     aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3420     realignment scheme: optimized, and unoptimized.
3421     We can optimize the realignment only if the step between consecutive
3422     vector loads is equal to the vector size.  Since the vector memory
3423     accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3424     is guaranteed that the misalignment amount remains the same throughout the
3425     execution of the vectorized loop.  Therefore, we can create the
3426     "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3427     at the loop preheader.
3428
3429     However, in the case of outer-loop vectorization, when vectorizing a
3430     memory access in the inner-loop nested within the LOOP that is now being
3431     vectorized, while it is guaranteed that the misalignment of the
3432     vectorized memory access will remain the same in different outer-loop
3433     iterations, it is *not* guaranteed that is will remain the same throughout
3434     the execution of the inner-loop.  This is because the inner-loop advances
3435     with the original scalar step (and not in steps of VS).  If the inner-loop
3436     step happens to be a multiple of VS, then the misalignment remains fixed
3437     and we can use the optimized realignment scheme.  For example:
3438
3439      for (i=0; i<N; i++)
3440        for (j=0; j<M; j++)
3441          s += a[i+j];
3442
3443     When vectorizing the i-loop in the above example, the step between
3444     consecutive vector loads is 1, and so the misalignment does not remain
3445     fixed across the execution of the inner-loop, and the realignment cannot
3446     be optimized (as illustrated in the following pseudo vectorized loop):
3447
3448      for (i=0; i<N; i+=4)
3449        for (j=0; j<M; j++){
3450          vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3451                         // when j is {0,1,2,3,4,5,6,7,...} respectively.
3452                         // (assuming that we start from an aligned address).
3453          }
3454
3455     We therefore have to use the unoptimized realignment scheme:
3456
3457      for (i=0; i<N; i+=4)
3458          for (j=k; j<M; j+=4)
3459          vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3460                           // that the misalignment of the initial address is
3461                           // 0).
3462
3463     The loop can then be vectorized as follows:
3464
3465      for (k=0; k<4; k++){
3466        rt = get_realignment_token (&vp[k]);
3467        for (i=0; i<N; i+=4){
3468          v1 = vp[i+k];
3469          for (j=k; j<M; j+=4){
3470            v2 = vp[i+j+VS-1];
3471            va = REALIGN_LOAD <v1,v2,rt>;
3472            vs += va;
3473            v1 = v2;
3474          }
3475        }
3476    } */
3477
3478  if (DR_IS_READ (dr))
3479    {
3480      bool is_packed = false;
3481      tree type = (TREE_TYPE (DR_REF (dr)));
3482
3483      if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3484						   	     CODE_FOR_nothing
3485	  && (!targetm.vectorize.builtin_mask_for_load
3486	      || targetm.vectorize.builtin_mask_for_load ()))
3487	{
3488	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3489	  if (nested_in_vect_loop
3490	      && (TREE_INT_CST_LOW (DR_STEP (dr))
3491		  != GET_MODE_SIZE (TYPE_MODE (vectype))))
3492	    return dr_explicit_realign;
3493	  else
3494	    return dr_explicit_realign_optimized;
3495	}
3496      if (!known_alignment_for_access_p (dr))
3497	{
3498	  tree ba = DR_BASE_OBJECT (dr);
3499
3500	  if (ba)
3501	    is_packed = contains_packed_reference (ba);
3502	}
3503
3504      if (targetm.vectorize.
3505	  builtin_support_vector_misalignment (mode, type,
3506					       DR_MISALIGNMENT (dr), is_packed))
3507	/* Can't software pipeline the loads, but can at least do them.  */
3508	return dr_unaligned_supported;
3509    }
3510  else
3511    {
3512      bool is_packed = false;
3513      tree type = (TREE_TYPE (DR_REF (dr)));
3514
3515      if (!known_alignment_for_access_p (dr))
3516	{
3517	  tree ba = DR_BASE_OBJECT (dr);
3518
3519	  if (ba)
3520	    is_packed = contains_packed_reference (ba);
3521	}
3522
3523     if (targetm.vectorize.
3524         builtin_support_vector_misalignment (mode, type,
3525					      DR_MISALIGNMENT (dr), is_packed))
3526       return dr_unaligned_supported;
3527    }
3528
3529  /* Unsupported.  */
3530  return dr_unaligned_unsupported;
3531}
3532