1169689Skan/* Analysis Utilities for Loop Vectorization.
2169689Skan   Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3169689Skan   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4169689Skan
5169689SkanThis file is part of GCC.
6169689Skan
7169689SkanGCC is free software; you can redistribute it and/or modify it under
8169689Skanthe terms of the GNU General Public License as published by the Free
9169689SkanSoftware Foundation; either version 2, or (at your option) any later
10169689Skanversion.
11169689Skan
12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY
13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or
14169689SkanFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15169689Skanfor more details.
16169689Skan
17169689SkanYou should have received a copy of the GNU General Public License
18169689Skanalong with GCC; see the file COPYING.  If not, write to the Free
19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan02110-1301, USA.  */
21169689Skan
22169689Skan#include "config.h"
23169689Skan#include "system.h"
24169689Skan#include "coretypes.h"
25169689Skan#include "tm.h"
26169689Skan#include "ggc.h"
27169689Skan#include "tree.h"
28220150Smm#include "target.h"
29169689Skan#include "basic-block.h"
30169689Skan#include "diagnostic.h"
31169689Skan#include "tree-flow.h"
32169689Skan#include "tree-dump.h"
33169689Skan#include "timevar.h"
34169689Skan#include "cfgloop.h"
35169689Skan#include "expr.h"
36169689Skan#include "optabs.h"
37169689Skan#include "params.h"
38169689Skan#include "tree-chrec.h"
39169689Skan#include "tree-data-ref.h"
40169689Skan#include "tree-scalar-evolution.h"
41169689Skan#include "tree-vectorizer.h"
42169689Skan
43169689Skan/* Main analysis functions.  */
44169689Skanstatic loop_vec_info vect_analyze_loop_form (struct loop *);
45169689Skanstatic bool vect_analyze_data_refs (loop_vec_info);
46169689Skanstatic bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47169689Skanstatic void vect_analyze_scalar_cycles (loop_vec_info);
48169689Skanstatic bool vect_analyze_data_ref_accesses (loop_vec_info);
49169689Skanstatic bool vect_analyze_data_ref_dependences (loop_vec_info);
50169689Skanstatic bool vect_analyze_data_refs_alignment (loop_vec_info);
51169689Skanstatic bool vect_compute_data_refs_alignment (loop_vec_info);
52169689Skanstatic bool vect_enhance_data_refs_alignment (loop_vec_info);
53169689Skanstatic bool vect_analyze_operations (loop_vec_info);
54169689Skanstatic bool vect_determine_vectorization_factor (loop_vec_info);
55169689Skan
56169689Skan/* Utility functions for the analyses.  */
57169689Skanstatic bool exist_non_indexing_operands_for_use_p (tree, tree);
58169689Skanstatic void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
59169689Skanstatic bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
60169689Skanstatic tree vect_get_loop_niters (struct loop *, tree *);
61169689Skanstatic bool vect_analyze_data_ref_dependence
62169689Skan  (struct data_dependence_relation *, loop_vec_info);
63169689Skanstatic bool vect_compute_data_ref_alignment (struct data_reference *);
64169689Skanstatic bool vect_analyze_data_ref_access (struct data_reference *);
65169689Skanstatic bool vect_can_advance_ivs_p (loop_vec_info);
66169689Skanstatic void vect_update_misalignment_for_peel
67169689Skan  (struct data_reference *, struct data_reference *, int npeel);
68169689Skan
69169689Skan
70169689Skan/* Function vect_determine_vectorization_factor
71169689Skan
72169689Skan   Determine the vectorization factor (VF). VF is the number of data elements
73169689Skan   that are operated upon in parallel in a single iteration of the vectorized
74169689Skan   loop. For example, when vectorizing a loop that operates on 4byte elements,
75169689Skan   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
76169689Skan   elements can fit in a single vector register.
77169689Skan
78169689Skan   We currently support vectorization of loops in which all types operated upon
79169689Skan   are of the same size. Therefore this function currently sets VF according to
80169689Skan   the size of the types operated upon, and fails if there are multiple sizes
81169689Skan   in the loop.
82169689Skan
83169689Skan   VF is also the factor by which the loop iterations are strip-mined, e.g.:
84169689Skan   original loop:
85169689Skan        for (i=0; i<N; i++){
86169689Skan          a[i] = b[i] + c[i];
87169689Skan        }
88169689Skan
89169689Skan   vectorized loop:
90169689Skan        for (i=0; i<N; i+=VF){
91169689Skan          a[i:VF] = b[i:VF] + c[i:VF];
92169689Skan        }
93169689Skan*/
94169689Skan
95169689Skanstatic bool
96169689Skanvect_determine_vectorization_factor (loop_vec_info loop_vinfo)
97169689Skan{
98169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
99169689Skan  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
100169689Skan  int nbbs = loop->num_nodes;
101169689Skan  block_stmt_iterator si;
102169689Skan  unsigned int vectorization_factor = 0;
103169689Skan  int i;
104169689Skan  tree scalar_type;
105169689Skan
106169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
107169689Skan    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
108169689Skan
109169689Skan  for (i = 0; i < nbbs; i++)
110169689Skan    {
111169689Skan      basic_block bb = bbs[i];
112169689Skan
113169689Skan      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
114169689Skan        {
115169689Skan          tree stmt = bsi_stmt (si);
116169689Skan          unsigned int nunits;
117169689Skan          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
118169689Skan          tree vectype;
119169689Skan
120169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
121169689Skan            {
122169689Skan              fprintf (vect_dump, "==> examining statement: ");
123169689Skan              print_generic_expr (vect_dump, stmt, TDF_SLIM);
124169689Skan            }
125169689Skan
126169689Skan          gcc_assert (stmt_info);
127169689Skan          /* skip stmts which do not need to be vectorized.  */
128169689Skan          if (!STMT_VINFO_RELEVANT_P (stmt_info)
129169689Skan	      && !STMT_VINFO_LIVE_P (stmt_info))
130169689Skan            {
131169689Skan              if (vect_print_dump_info (REPORT_DETAILS))
132169689Skan                fprintf (vect_dump, "skip.");
133169689Skan              continue;
134169689Skan            }
135169689Skan
136169689Skan          if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
137169689Skan            {
138169689Skan              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
139169689Skan                {
140169689Skan                  fprintf (vect_dump, "not vectorized: vector stmt in loop:");
141169689Skan                  print_generic_expr (vect_dump, stmt, TDF_SLIM);
142169689Skan                }
143169689Skan              return false;
144169689Skan            }
145169689Skan
146169689Skan	  if (STMT_VINFO_VECTYPE (stmt_info))
147169689Skan	    {
148169689Skan	      vectype = STMT_VINFO_VECTYPE (stmt_info);
149169689Skan	      scalar_type = TREE_TYPE (vectype);
150169689Skan	    }
151169689Skan	  else
152169689Skan	    {
153169689Skan	      if (STMT_VINFO_DATA_REF (stmt_info))
154169689Skan		scalar_type =
155169689Skan			TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
156169689Skan	      else if (TREE_CODE (stmt) == MODIFY_EXPR)
157169689Skan		scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
158169689Skan	      else
159169689Skan		scalar_type = TREE_TYPE (stmt);
160169689Skan
161169689Skan	      if (vect_print_dump_info (REPORT_DETAILS))
162169689Skan		{
163169689Skan		  fprintf (vect_dump, "get vectype for scalar type:  ");
164169689Skan		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165169689Skan		}
166169689Skan
167169689Skan	      vectype = get_vectype_for_scalar_type (scalar_type);
168169689Skan	      if (!vectype)
169169689Skan		{
170169689Skan		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171169689Skan		    {
172169689Skan		      fprintf (vect_dump,
173169689Skan			       "not vectorized: unsupported data-type ");
174169689Skan		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
175169689Skan		    }
176169689Skan		  return false;
177169689Skan		}
178169689Skan	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
179169689Skan            }
180169689Skan
181169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
182169689Skan            {
183169689Skan              fprintf (vect_dump, "vectype: ");
184169689Skan              print_generic_expr (vect_dump, vectype, TDF_SLIM);
185169689Skan            }
186169689Skan
187169689Skan          nunits = TYPE_VECTOR_SUBPARTS (vectype);
188169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
189169689Skan            fprintf (vect_dump, "nunits = %d", nunits);
190169689Skan
191169689Skan          if (vectorization_factor)
192169689Skan            {
193169689Skan              /* FORNOW: don't allow mixed units.
194169689Skan                 This restriction will be relaxed in the future.  */
195169689Skan              if (nunits != vectorization_factor)
196169689Skan                {
197169689Skan                  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
198169689Skan                    fprintf (vect_dump, "not vectorized: mixed data-types");
199169689Skan                  return false;
200169689Skan                }
201169689Skan            }
202169689Skan          else
203169689Skan            vectorization_factor = nunits;
204169689Skan
205169689Skan          gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
206169689Skan                        * vectorization_factor == UNITS_PER_SIMD_WORD);
207169689Skan        }
208169689Skan    }
209169689Skan
210169689Skan  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
211169689Skan
212169689Skan  if (vectorization_factor <= 1)
213169689Skan    {
214169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
215169689Skan        fprintf (vect_dump, "not vectorized: unsupported data-type");
216169689Skan      return false;
217169689Skan    }
218169689Skan  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
219169689Skan
220169689Skan  return true;
221169689Skan}
222169689Skan
223169689Skan
224169689Skan/* Function vect_analyze_operations.
225169689Skan
226169689Skan   Scan the loop stmts and make sure they are all vectorizable.  */
227169689Skan
228169689Skanstatic bool
229169689Skanvect_analyze_operations (loop_vec_info loop_vinfo)
230169689Skan{
231169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232169689Skan  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
233169689Skan  int nbbs = loop->num_nodes;
234169689Skan  block_stmt_iterator si;
235169689Skan  unsigned int vectorization_factor = 0;
236169689Skan  int i;
237169689Skan  bool ok;
238169689Skan  tree phi;
239169689Skan  stmt_vec_info stmt_info;
240169689Skan  bool need_to_vectorize = false;
241169689Skan
242169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
243169689Skan    fprintf (vect_dump, "=== vect_analyze_operations ===");
244169689Skan
245169689Skan  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
246169689Skan  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
247169689Skan
248169689Skan  for (i = 0; i < nbbs; i++)
249169689Skan    {
250169689Skan      basic_block bb = bbs[i];
251169689Skan
252169689Skan      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
253169689Skan        {
254169689Skan	  stmt_info = vinfo_for_stmt (phi);
255169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
256169689Skan	    {
257169689Skan	      fprintf (vect_dump, "examining phi: ");
258169689Skan	      print_generic_expr (vect_dump, phi, TDF_SLIM);
259169689Skan	    }
260169689Skan
261169689Skan	  gcc_assert (stmt_info);
262169689Skan
263169689Skan	  if (STMT_VINFO_LIVE_P (stmt_info))
264169689Skan	    {
265169689Skan	      /* FORNOW: not yet supported.  */
266169689Skan	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
267169689Skan		fprintf (vect_dump, "not vectorized: value used after loop.");
268169689Skan	    return false;
269169689Skan	  }
270169689Skan
271169689Skan	  if (STMT_VINFO_RELEVANT_P (stmt_info))
272169689Skan	    {
273169689Skan	      /* Most likely a reduction-like computation that is used
274169689Skan	         in the loop.  */
275169689Skan	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
276169689Skan	        fprintf (vect_dump, "not vectorized: unsupported pattern.");
277169689Skan 	     return false;
278169689Skan	    }
279169689Skan	}
280169689Skan
281169689Skan      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
282169689Skan	{
283169689Skan	  tree stmt = bsi_stmt (si);
284169689Skan	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
285169689Skan
286169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
287169689Skan	    {
288169689Skan	      fprintf (vect_dump, "==> examining statement: ");
289169689Skan	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
290169689Skan	    }
291169689Skan
292169689Skan	  gcc_assert (stmt_info);
293169689Skan
294169689Skan	  /* skip stmts which do not need to be vectorized.
295169689Skan	     this is expected to include:
296169689Skan	     - the COND_EXPR which is the loop exit condition
297169689Skan	     - any LABEL_EXPRs in the loop
298169689Skan	     - computations that are used only for array indexing or loop
299169689Skan	     control  */
300169689Skan
301169689Skan	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
302169689Skan	      && !STMT_VINFO_LIVE_P (stmt_info))
303169689Skan	    {
304169689Skan	      if (vect_print_dump_info (REPORT_DETAILS))
305169689Skan	        fprintf (vect_dump, "irrelevant.");
306169689Skan	      continue;
307169689Skan	    }
308169689Skan
309169689Skan          if (STMT_VINFO_RELEVANT_P (stmt_info))
310169689Skan            {
311169689Skan              gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
312169689Skan              gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
313169689Skan
314169689Skan	      ok = (vectorizable_operation (stmt, NULL, NULL)
315169689Skan		    || vectorizable_assignment (stmt, NULL, NULL)
316169689Skan		    || vectorizable_load (stmt, NULL, NULL)
317169689Skan		    || vectorizable_store (stmt, NULL, NULL)
318169689Skan		    || vectorizable_condition (stmt, NULL, NULL));
319169689Skan
320169689Skan	      if (!ok)
321169689Skan		{
322169689Skan		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
323169689Skan		    {
324169689Skan		      fprintf (vect_dump,
325169689Skan			       "not vectorized: relevant stmt not supported: ");
326169689Skan		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
327169689Skan		    }
328169689Skan		  return false;
329169689Skan		}
330169689Skan	      need_to_vectorize = true;
331169689Skan            }
332169689Skan
333169689Skan	  if (STMT_VINFO_LIVE_P (stmt_info))
334169689Skan	    {
335169689Skan	      ok = vectorizable_reduction (stmt, NULL, NULL);
336169689Skan
337169689Skan	      if (ok)
338169689Skan                need_to_vectorize = true;
339169689Skan              else
340169689Skan	        ok = vectorizable_live_operation (stmt, NULL, NULL);
341169689Skan
342169689Skan	      if (!ok)
343169689Skan		{
344169689Skan		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
345169689Skan		    {
346169689Skan		      fprintf (vect_dump,
347169689Skan			       "not vectorized: live stmt not supported: ");
348169689Skan		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
349169689Skan		    }
350169689Skan		  return false;
351169689Skan		}
352169689Skan	    }
353169689Skan	} /* stmts in bb */
354169689Skan    } /* bbs */
355169689Skan
356169689Skan  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
357169689Skan
358169689Skan  /* All operations in the loop are either irrelevant (deal with loop
359169689Skan     control, or dead), or only used outside the loop and can be moved
360169689Skan     out of the loop (e.g. invariants, inductions).  The loop can be
361169689Skan     optimized away by scalar optimizations.  We're better off not
362169689Skan     touching this loop.  */
363169689Skan  if (!need_to_vectorize)
364169689Skan    {
365169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
366169689Skan	fprintf (vect_dump,
367169689Skan		 "All the computation can be taken out of the loop.");
368169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
369169689Skan        fprintf (vect_dump,
370169689Skan		 "not vectorized: redundant loop. no profit to vectorize.");
371169689Skan      return false;
372169689Skan    }
373169689Skan
374169689Skan  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
375169689Skan      && vect_print_dump_info (REPORT_DETAILS))
376169689Skan    fprintf (vect_dump,
377169689Skan        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
378169689Skan        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
379169689Skan
380169689Skan  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
381169689Skan      && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
382169689Skan    {
383169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384169689Skan	fprintf (vect_dump, "not vectorized: iteration count too small.");
385169689Skan      return false;
386169689Skan    }
387169689Skan
388169689Skan  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
389169689Skan      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
390169689Skan      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
391169689Skan    {
392169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
393169689Skan        fprintf (vect_dump, "epilog loop required.");
394169689Skan      if (!vect_can_advance_ivs_p (loop_vinfo))
395169689Skan        {
396169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
397169689Skan            fprintf (vect_dump,
398169689Skan                     "not vectorized: can't create epilog loop 1.");
399169689Skan          return false;
400169689Skan        }
401169689Skan      if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
402169689Skan        {
403169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
404169689Skan            fprintf (vect_dump,
405169689Skan                     "not vectorized: can't create epilog loop 2.");
406169689Skan          return false;
407169689Skan        }
408169689Skan    }
409169689Skan
410169689Skan  return true;
411169689Skan}
412169689Skan
413169689Skan
414169689Skan/* Function exist_non_indexing_operands_for_use_p
415169689Skan
416169689Skan   USE is one of the uses attached to STMT. Check if USE is
417169689Skan   used in STMT for anything other than indexing an array.  */
418169689Skan
419169689Skanstatic bool
420169689Skanexist_non_indexing_operands_for_use_p (tree use, tree stmt)
421169689Skan{
422169689Skan  tree operand;
423169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
424169689Skan
425169689Skan  /* USE corresponds to some operand in STMT. If there is no data
426169689Skan     reference in STMT, then any operand that corresponds to USE
427169689Skan     is not indexing an array.  */
428169689Skan  if (!STMT_VINFO_DATA_REF (stmt_info))
429169689Skan    return true;
430169689Skan
431169689Skan  /* STMT has a data_ref. FORNOW this means that its of one of
432169689Skan     the following forms:
433169689Skan     -1- ARRAY_REF = var
434169689Skan     -2- var = ARRAY_REF
435169689Skan     (This should have been verified in analyze_data_refs).
436169689Skan
437169689Skan     'var' in the second case corresponds to a def, not a use,
438169689Skan     so USE cannot correspond to any operands that are not used
439169689Skan     for array indexing.
440169689Skan
441169689Skan     Therefore, all we need to check is if STMT falls into the
442169689Skan     first case, and whether var corresponds to USE.  */
443169689Skan
444169689Skan  if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
445169689Skan    return false;
446169689Skan
447169689Skan  operand = TREE_OPERAND (stmt, 1);
448169689Skan
449169689Skan  if (TREE_CODE (operand) != SSA_NAME)
450169689Skan    return false;
451169689Skan
452169689Skan  if (operand == use)
453169689Skan    return true;
454169689Skan
455169689Skan  return false;
456169689Skan}
457169689Skan
458169689Skan
459169689Skan/* Function vect_analyze_scalar_cycles.
460169689Skan
461169689Skan   Examine the cross iteration def-use cycles of scalar variables, by
462169689Skan   analyzing the loop (scalar) PHIs; Classify each cycle as one of the
463169689Skan   following: invariant, induction, reduction, unknown.
464169689Skan
465169689Skan   Some forms of scalar cycles are not yet supported.
466169689Skan
467169689Skan   Example1: reduction: (unsupported yet)
468169689Skan
469169689Skan              loop1:
470169689Skan              for (i=0; i<N; i++)
471169689Skan                 sum += a[i];
472169689Skan
473169689Skan   Example2: induction: (unsupported yet)
474169689Skan
475169689Skan              loop2:
476169689Skan              for (i=0; i<N; i++)
477169689Skan                 a[i] = i;
478169689Skan
479169689Skan   Note: the following loop *is* vectorizable:
480169689Skan
481169689Skan              loop3:
482169689Skan              for (i=0; i<N; i++)
483169689Skan                 a[i] = b[i];
484169689Skan
485169689Skan         even though it has a def-use cycle caused by the induction variable i:
486169689Skan
487169689Skan              loop: i_2 = PHI (i_0, i_1)
488169689Skan                    a[i_2] = ...;
489169689Skan                    i_1 = i_2 + 1;
490169689Skan                    GOTO loop;
491169689Skan
492169689Skan         because the def-use cycle in loop3 is considered "not relevant" - i.e.,
493169689Skan         it does not need to be vectorized because it is only used for array
494169689Skan         indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
495169689Skan         loop2 on the other hand is relevant (it is being written to memory).
496169689Skan*/
497169689Skan
498169689Skanstatic void
499169689Skanvect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
500169689Skan{
501169689Skan  tree phi;
502169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
503169689Skan  basic_block bb = loop->header;
504169689Skan  tree dummy;
505169689Skan
506169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
507169689Skan    fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
508169689Skan
509169689Skan  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
510169689Skan    {
511169689Skan      tree access_fn = NULL;
512169689Skan      tree def = PHI_RESULT (phi);
513169689Skan      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
514169689Skan      tree reduc_stmt;
515169689Skan
516169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
517169689Skan	{
518169689Skan          fprintf (vect_dump, "Analyze phi: ");
519169689Skan          print_generic_expr (vect_dump, phi, TDF_SLIM);
520169689Skan	}
521169689Skan
522169689Skan      /* Skip virtual phi's. The data dependences that are associated with
523169689Skan         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
524169689Skan
525169689Skan      if (!is_gimple_reg (SSA_NAME_VAR (def)))
526169689Skan	{
527169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
528169689Skan	    fprintf (vect_dump, "virtual phi. skip.");
529169689Skan	  continue;
530169689Skan	}
531169689Skan
532169689Skan      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
533169689Skan
534169689Skan      /* Analyze the evolution function.  */
535169689Skan
536169689Skan      access_fn = analyze_scalar_evolution (loop, def);
537169689Skan
538169689Skan      if (!access_fn)
539169689Skan	continue;
540169689Skan
541169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
542169689Skan        {
543169689Skan           fprintf (vect_dump, "Access function of PHI: ");
544169689Skan           print_generic_expr (vect_dump, access_fn, TDF_SLIM);
545169689Skan        }
546169689Skan
547169689Skan      if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
548169689Skan	{
549169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
550169689Skan	    fprintf (vect_dump, "Detected induction.");
551169689Skan	  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
552169689Skan          continue;
553169689Skan	}
554169689Skan
555169689Skan      /* TODO: handle invariant phis  */
556169689Skan
557169689Skan      reduc_stmt = vect_is_simple_reduction (loop, phi);
558169689Skan      if (reduc_stmt)
559169689Skan        {
560169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
561169689Skan            fprintf (vect_dump, "Detected reduction.");
562169689Skan          STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
563169689Skan          STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
564169689Skan                                                        vect_reduction_def;
565169689Skan        }
566169689Skan      else
567169689Skan        if (vect_print_dump_info (REPORT_DETAILS))
568169689Skan          fprintf (vect_dump, "Unknown def-use cycle pattern.");
569169689Skan
570169689Skan    }
571169689Skan
572169689Skan  return;
573169689Skan}
574169689Skan
575169689Skan
576169689Skan/* Function vect_analyze_data_ref_dependence.
577169689Skan
578169689Skan   Return TRUE if there (might) exist a dependence between a memory-reference
579169689Skan   DRA and a memory-reference DRB.  */
580169689Skan
581169689Skanstatic bool
582169689Skanvect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
583169689Skan                                  loop_vec_info loop_vinfo)
584169689Skan{
585169689Skan  unsigned int i;
586169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
587169689Skan  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
588169689Skan  struct data_reference *dra = DDR_A (ddr);
589169689Skan  struct data_reference *drb = DDR_B (ddr);
590169689Skan  stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
591169689Skan  stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
592169689Skan  lambda_vector dist_v;
593169689Skan  unsigned int loop_depth;
594169689Skan
595169689Skan  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
596169689Skan    return false;
597169689Skan
598169689Skan  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
599169689Skan    {
600169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
601169689Skan        {
602169689Skan          fprintf (vect_dump,
603169689Skan                   "not vectorized: can't determine dependence between ");
604169689Skan          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605169689Skan          fprintf (vect_dump, " and ");
606169689Skan          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607169689Skan        }
608169689Skan      return true;
609169689Skan    }
610169689Skan
611169689Skan  if (DDR_NUM_DIST_VECTS (ddr) == 0)
612169689Skan    {
613169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
614169689Skan        {
615169689Skan          fprintf (vect_dump, "not vectorized: bad dist vector for ");
616169689Skan          print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617169689Skan          fprintf (vect_dump, " and ");
618169689Skan          print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
619169689Skan        }
620169689Skan      return true;
621169689Skan    }
622169689Skan
623169689Skan  loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
624169689Skan  for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
625169689Skan    {
626169689Skan      int dist = dist_v[loop_depth];
627169689Skan
628169689Skan      if (vect_print_dump_info (REPORT_DR_DETAILS))
629169689Skan	fprintf (vect_dump, "dependence distance  = %d.", dist);
630169689Skan
631169689Skan      /* Same loop iteration.  */
632169689Skan      if (dist % vectorization_factor == 0)
633169689Skan	{
634169689Skan	  /* Two references with distance zero have the same alignment.  */
635169689Skan	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
636169689Skan	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
637169689Skan	  if (vect_print_dump_info (REPORT_ALIGNMENT))
638169689Skan	    fprintf (vect_dump, "accesses have the same alignment.");
639169689Skan	  if (vect_print_dump_info (REPORT_DR_DETAILS))
640169689Skan	    {
641169689Skan	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
642169689Skan	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643169689Skan	      fprintf (vect_dump, " and ");
644169689Skan	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645169689Skan	    }
646169689Skan	  continue;
647169689Skan	}
648169689Skan
649169689Skan      if (abs (dist) >= vectorization_factor)
650169689Skan	{
651169689Skan	  /* Dependence distance does not create dependence, as far as vectorization
652169689Skan	     is concerned, in this case.  */
653169689Skan	  if (vect_print_dump_info (REPORT_DR_DETAILS))
654169689Skan	    fprintf (vect_dump, "dependence distance >= VF.");
655169689Skan	  continue;
656169689Skan	}
657169689Skan
658169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
659169689Skan	{
660169689Skan	  fprintf (vect_dump,
661169689Skan		   "not vectorized: possible dependence between data-refs ");
662169689Skan	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663169689Skan	  fprintf (vect_dump, " and ");
664169689Skan	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665169689Skan	}
666169689Skan
667169689Skan      return true;
668169689Skan    }
669169689Skan
670169689Skan  return false;
671169689Skan}
672169689Skan
673169689Skan
674169689Skan/* Function vect_analyze_data_ref_dependences.
675169689Skan
676169689Skan   Examine all the data references in the loop, and make sure there do not
677169689Skan   exist any data dependences between them.  */
678169689Skan
679169689Skanstatic bool
680169689Skanvect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
681169689Skan{
682169689Skan  unsigned int i;
683169689Skan  VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
684169689Skan  struct data_dependence_relation *ddr;
685169689Skan
686169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
687169689Skan    fprintf (vect_dump, "=== vect_analyze_dependences ===");
688169689Skan
689169689Skan  for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
690169689Skan    if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
691169689Skan      return false;
692169689Skan
693169689Skan  return true;
694169689Skan}
695169689Skan
696169689Skan
697169689Skan/* Function vect_compute_data_ref_alignment
698169689Skan
699169689Skan   Compute the misalignment of the data reference DR.
700169689Skan
701169689Skan   Output:
702169689Skan   1. If during the misalignment computation it is found that the data reference
703169689Skan      cannot be vectorized then false is returned.
704169689Skan   2. DR_MISALIGNMENT (DR) is defined.
705169689Skan
706169689Skan   FOR NOW: No analysis is actually performed. Misalignment is calculated
707169689Skan   only for trivial cases. TODO.  */
708169689Skan
709169689Skanstatic bool
710169689Skanvect_compute_data_ref_alignment (struct data_reference *dr)
711169689Skan{
712169689Skan  tree stmt = DR_STMT (dr);
713169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
714169689Skan  tree ref = DR_REF (dr);
715169689Skan  tree vectype;
716169689Skan  tree base, base_addr;
717169689Skan  bool base_aligned;
718169689Skan  tree misalign;
719169689Skan  tree aligned_to, alignment;
720169689Skan
721169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
722169689Skan    fprintf (vect_dump, "vect_compute_data_ref_alignment:");
723169689Skan
724169689Skan  /* Initialize misalignment to unknown.  */
725169689Skan  DR_MISALIGNMENT (dr) = -1;
726169689Skan
727169689Skan  misalign = DR_OFFSET_MISALIGNMENT (dr);
728169689Skan  aligned_to = DR_ALIGNED_TO (dr);
729169689Skan  base_addr = DR_BASE_ADDRESS (dr);
730169689Skan  base = build_fold_indirect_ref (base_addr);
731169689Skan  vectype = STMT_VINFO_VECTYPE (stmt_info);
732169689Skan  alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
733169689Skan
734169689Skan  if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
735169689Skan      || !misalign)
736169689Skan    {
737169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
738169689Skan	{
739169689Skan	  fprintf (vect_dump, "Unknown alignment for access: ");
740169689Skan	  print_generic_expr (vect_dump, base, TDF_SLIM);
741169689Skan	}
742169689Skan      return true;
743169689Skan    }
744169689Skan
745169689Skan  if ((DECL_P (base)
746169689Skan       && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
747169689Skan				alignment) >= 0)
748169689Skan      || (TREE_CODE (base_addr) == SSA_NAME
749169689Skan	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
750169689Skan						      TREE_TYPE (base_addr)))),
751169689Skan				   alignment) >= 0))
752169689Skan    base_aligned = true;
753169689Skan  else
754169689Skan    base_aligned = false;
755169689Skan
756169689Skan  if (!base_aligned)
757169689Skan    {
758169689Skan      /* Do not change the alignment of global variables if
759169689Skan	 flag_section_anchors is enabled.  */
760169689Skan      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
761169689Skan	  || (TREE_STATIC (base) && flag_section_anchors))
762169689Skan	{
763169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
764169689Skan	    {
765169689Skan	      fprintf (vect_dump, "can't force alignment of ref: ");
766169689Skan	      print_generic_expr (vect_dump, ref, TDF_SLIM);
767169689Skan	    }
768169689Skan	  return true;
769169689Skan	}
770169689Skan
771169689Skan      /* Force the alignment of the decl.
772169689Skan	 NOTE: This is the only change to the code we make during
773169689Skan	 the analysis phase, before deciding to vectorize the loop.  */
774169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
775169689Skan	fprintf (vect_dump, "force alignment");
776169689Skan      DECL_ALIGN (base) = TYPE_ALIGN (vectype);
777169689Skan      DECL_USER_ALIGN (base) = 1;
778169689Skan    }
779169689Skan
780169689Skan  /* At this point we assume that the base is aligned.  */
781169689Skan  gcc_assert (base_aligned
782169689Skan	      || (TREE_CODE (base) == VAR_DECL
783169689Skan		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
784169689Skan
785169689Skan  /* Modulo alignment.  */
786169689Skan  misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
787169689Skan
788169689Skan  if (!host_integerp (misalign, 1))
789169689Skan    {
790169689Skan      /* Negative or overflowed misalignment value.  */
791169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
792169689Skan	fprintf (vect_dump, "unexpected misalign value");
793169689Skan      return false;
794169689Skan    }
795169689Skan
796169689Skan  DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
797169689Skan
798169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
799169689Skan    {
800169689Skan      fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
801169689Skan      print_generic_expr (vect_dump, ref, TDF_SLIM);
802169689Skan    }
803169689Skan
804169689Skan  return true;
805169689Skan}
806169689Skan
807169689Skan
808169689Skan/* Function vect_compute_data_refs_alignment
809169689Skan
810169689Skan   Compute the misalignment of data references in the loop.
811169689Skan   Return FALSE if a data reference is found that cannot be vectorized.  */
812169689Skan
813169689Skanstatic bool
814169689Skanvect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
815169689Skan{
816169689Skan  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
817169689Skan  struct data_reference *dr;
818169689Skan  unsigned int i;
819169689Skan
820169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
821169689Skan    if (!vect_compute_data_ref_alignment (dr))
822169689Skan      return false;
823169689Skan
824169689Skan  return true;
825169689Skan}
826169689Skan
827169689Skan
828169689Skan/* Function vect_update_misalignment_for_peel
829169689Skan
830169689Skan   DR - the data reference whose misalignment is to be adjusted.
831169689Skan   DR_PEEL - the data reference whose misalignment is being made
832169689Skan             zero in the vector loop by the peel.
833169689Skan   NPEEL - the number of iterations in the peel loop if the misalignment
834169689Skan           of DR_PEEL is known at compile time.  */
835169689Skan
836169689Skanstatic void
837169689Skanvect_update_misalignment_for_peel (struct data_reference *dr,
838169689Skan                                   struct data_reference *dr_peel, int npeel)
839169689Skan{
840169689Skan  unsigned int i;
841169689Skan  int drsize;
842169689Skan  VEC(dr_p,heap) *same_align_drs;
843169689Skan  struct data_reference *current_dr;
844169689Skan
845169689Skan  if (known_alignment_for_access_p (dr)
846169689Skan      && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
847169689Skan    {
848169689Skan      DR_MISALIGNMENT (dr) = 0;
849169689Skan      return;
850169689Skan    }
851169689Skan
852169689Skan  /* It can be assumed that the data refs with the same alignment as dr_peel
853169689Skan     are aligned in the vector loop.  */
854169689Skan  same_align_drs
855169689Skan    = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
856169689Skan  for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
857169689Skan    {
858169689Skan      if (current_dr != dr)
859169689Skan        continue;
860169689Skan      gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
861169689Skan      DR_MISALIGNMENT (dr) = 0;
862169689Skan      return;
863169689Skan    }
864169689Skan
865169689Skan  if (known_alignment_for_access_p (dr)
866169689Skan      && known_alignment_for_access_p (dr_peel))
867169689Skan    {
868169689Skan      drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869169689Skan      DR_MISALIGNMENT (dr) += npeel * drsize;
870169689Skan      DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
871169689Skan      return;
872169689Skan    }
873169689Skan
874169689Skan  DR_MISALIGNMENT (dr) = -1;
875169689Skan}
876169689Skan
877169689Skan
878169689Skan/* Function vect_verify_datarefs_alignment
879169689Skan
880169689Skan   Return TRUE if all data references in the loop can be
881169689Skan   handled with respect to alignment.  */
882169689Skan
883169689Skanstatic bool
884169689Skanvect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
885169689Skan{
886169689Skan  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
887169689Skan  struct data_reference *dr;
888169689Skan  enum dr_alignment_support supportable_dr_alignment;
889169689Skan  unsigned int i;
890169689Skan
891169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
892169689Skan    {
893169689Skan      supportable_dr_alignment = vect_supportable_dr_alignment (dr);
894169689Skan      if (!supportable_dr_alignment)
895169689Skan        {
896169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
897169689Skan            {
898169689Skan              if (DR_IS_READ (dr))
899169689Skan                fprintf (vect_dump,
900169689Skan                         "not vectorized: unsupported unaligned load.");
901169689Skan              else
902169689Skan                fprintf (vect_dump,
903169689Skan                         "not vectorized: unsupported unaligned store.");
904169689Skan            }
905169689Skan          return false;
906169689Skan        }
907169689Skan      if (supportable_dr_alignment != dr_aligned
908169689Skan          && vect_print_dump_info (REPORT_ALIGNMENT))
909169689Skan        fprintf (vect_dump, "Vectorizing an unaligned access.");
910169689Skan    }
911169689Skan  return true;
912169689Skan}
913169689Skan
914169689Skan
915220150Smm/* Function vector_alignment_reachable_p
916220150Smm
917220150Smm   Return true if vector alignment for DR is reachable by peeling
918220150Smm   a few loop iterations.  Return false otherwise.  */
919220150Smm
920220150Smmstatic bool
921220150Smmvector_alignment_reachable_p (struct data_reference *dr)
922220150Smm{
923220150Smm  tree stmt = DR_STMT (dr);
924220150Smm  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925220150Smm  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926220150Smm
927220150Smm  /* If misalignment is known at the compile time then allow peeling
928220150Smm     only if natural alignment is reachable through peeling.  */
929220150Smm  if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
930220150Smm    {
931220150Smm      HOST_WIDE_INT elmsize =
932220150Smm		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
933220150Smm      if (vect_print_dump_info (REPORT_DETAILS))
934220150Smm	{
935220150Smm	  fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
936220150Smm	  fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
937220150Smm	}
938220150Smm      if (DR_MISALIGNMENT (dr) % elmsize)
939220150Smm	{
940220150Smm	  if (vect_print_dump_info (REPORT_DETAILS))
941220150Smm	    fprintf (vect_dump, "data size does not divide the misalignment.\n");
942220150Smm	  return false;
943220150Smm	}
944220150Smm    }
945220150Smm
946220150Smm  if (!known_alignment_for_access_p (dr))
947220150Smm    {
948220150Smm      tree type = (TREE_TYPE (DR_REF (dr)));
949220150Smm      tree ba = DR_BASE_OBJECT (dr);
950220150Smm      bool is_packed = false;
951220150Smm
952220150Smm      if (ba)
953220150Smm	is_packed = contains_packed_reference (ba);
954220150Smm
955220150Smm      if (vect_print_dump_info (REPORT_DETAILS))
956220150Smm	fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
957220150Smm      if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
958220150Smm	return true;
959220150Smm      else
960220150Smm	return false;
961220150Smm    }
962220150Smm
963220150Smm  return true;
964220150Smm}
965220150Smm
966169689Skan/* Function vect_enhance_data_refs_alignment
967169689Skan
968169689Skan   This pass will use loop versioning and loop peeling in order to enhance
969169689Skan   the alignment of data references in the loop.
970169689Skan
971169689Skan   FOR NOW: we assume that whatever versioning/peeling takes place, only the
972169689Skan   original loop is to be vectorized; Any other loops that are created by
973169689Skan   the transformations performed in this pass - are not supposed to be
974169689Skan   vectorized. This restriction will be relaxed.
975169689Skan
976169689Skan   This pass will require a cost model to guide it whether to apply peeling
977169689Skan   or versioning or a combination of the two. For example, the scheme that
978169689Skan   intel uses when given a loop with several memory accesses, is as follows:
979169689Skan   choose one memory access ('p') which alignment you want to force by doing
980169689Skan   peeling. Then, either (1) generate a loop in which 'p' is aligned and all
981169689Skan   other accesses are not necessarily aligned, or (2) use loop versioning to
982169689Skan   generate one loop in which all accesses are aligned, and another loop in
983169689Skan   which only 'p' is necessarily aligned.
984169689Skan
985169689Skan   ("Automatic Intra-Register Vectorization for the Intel Architecture",
986169689Skan   Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
987169689Skan   Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
988169689Skan
989169689Skan   Devising a cost model is the most critical aspect of this work. It will
990169689Skan   guide us on which access to peel for, whether to use loop versioning, how
991169689Skan   many versions to create, etc. The cost model will probably consist of
992169689Skan   generic considerations as well as target specific considerations (on
993169689Skan   powerpc for example, misaligned stores are more painful than misaligned
994169689Skan   loads).
995169689Skan
996169689Skan   Here are the general steps involved in alignment enhancements:
997169689Skan
998169689Skan     -- original loop, before alignment analysis:
999169689Skan	for (i=0; i<N; i++){
1000169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1001169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1002169689Skan	}
1003169689Skan
1004169689Skan     -- After vect_compute_data_refs_alignment:
1005169689Skan	for (i=0; i<N; i++){
1006169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1007169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1008169689Skan	}
1009169689Skan
1010169689Skan     -- Possibility 1: we do loop versioning:
1011169689Skan     if (p is aligned) {
1012169689Skan	for (i=0; i<N; i++){	# loop 1A
1013169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1014169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1015169689Skan	}
1016169689Skan     }
1017169689Skan     else {
1018169689Skan	for (i=0; i<N; i++){	# loop 1B
1019169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1020169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1021169689Skan	}
1022169689Skan     }
1023169689Skan
1024169689Skan     -- Possibility 2: we do loop peeling:
1025169689Skan     for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1026169689Skan	x = q[i];
1027169689Skan	p[i] = y;
1028169689Skan     }
1029169689Skan     for (i = 3; i < N; i++){	# loop 2A
1030169689Skan	x = q[i];			# DR_MISALIGNMENT(q) = 0
1031169689Skan	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1032169689Skan     }
1033169689Skan
1034169689Skan     -- Possibility 3: combination of loop peeling and versioning:
1035169689Skan     for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1036169689Skan	x = q[i];
1037169689Skan	p[i] = y;
1038169689Skan     }
1039169689Skan     if (p is aligned) {
1040169689Skan	for (i = 3; i<N; i++){	# loop 3A
1041169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1042169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1043169689Skan	}
1044169689Skan     }
1045169689Skan     else {
1046169689Skan	for (i = 3; i<N; i++){	# loop 3B
1047169689Skan	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1048169689Skan	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1049169689Skan	}
1050169689Skan     }
1051169689Skan
1052169689Skan     These loops are later passed to loop_transform to be vectorized. The
1053169689Skan     vectorizer will use the alignment information to guide the transformation
1054169689Skan     (whether to generate regular loads/stores, or with special handling for
1055169689Skan     misalignment).  */
1056169689Skan
1057169689Skanstatic bool
1058169689Skanvect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1059169689Skan{
1060169689Skan  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061169689Skan  enum dr_alignment_support supportable_dr_alignment;
1062169689Skan  struct data_reference *dr0 = NULL;
1063169689Skan  struct data_reference *dr;
1064169689Skan  unsigned int i;
1065169689Skan  bool do_peeling = false;
1066169689Skan  bool do_versioning = false;
1067169689Skan  bool stat;
1068169689Skan
1069169689Skan  /* While cost model enhancements are expected in the future, the high level
1070169689Skan     view of the code at this time is as follows:
1071169689Skan
1072169689Skan     A) If there is a misaligned write then see if peeling to align this write
1073169689Skan        can make all data references satisfy vect_supportable_dr_alignment.
1074169689Skan        If so, update data structures as needed and return true.  Note that
1075169689Skan        at this time vect_supportable_dr_alignment is known to return false
1076169689Skan        for a a misaligned write.
1077169689Skan
1078169689Skan     B) If peeling wasn't possible and there is a data reference with an
1079169689Skan        unknown misalignment that does not satisfy vect_supportable_dr_alignment
1080169689Skan        then see if loop versioning checks can be used to make all data
1081169689Skan        references satisfy vect_supportable_dr_alignment.  If so, update
1082169689Skan        data structures as needed and return true.
1083169689Skan
1084169689Skan     C) If neither peeling nor versioning were successful then return false if
1085169689Skan        any data reference does not satisfy vect_supportable_dr_alignment.
1086169689Skan
1087169689Skan     D) Return true (all data references satisfy vect_supportable_dr_alignment).
1088169689Skan
1089169689Skan     Note, Possibility 3 above (which is peeling and versioning together) is not
1090169689Skan     being done at this time.  */
1091169689Skan
1092169689Skan  /* (1) Peeling to force alignment.  */
1093169689Skan
1094169689Skan  /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1095169689Skan     Considerations:
1096169689Skan     + How many accesses will become aligned due to the peeling
1097169689Skan     - How many accesses will become unaligned due to the peeling,
1098169689Skan       and the cost of misaligned accesses.
1099169689Skan     - The cost of peeling (the extra runtime checks, the increase
1100169689Skan       in code size).
1101169689Skan
1102169689Skan     The scheme we use FORNOW: peel to force the alignment of the first
1103169689Skan     misaligned store in the loop.
1104169689Skan     Rationale: misaligned stores are not yet supported.
1105169689Skan
1106169689Skan     TODO: Use a cost model.  */
1107169689Skan
1108169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1109169689Skan    if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1110169689Skan      {
1111220150Smm        do_peeling = vector_alignment_reachable_p (dr);
1112220150Smm        if (do_peeling)
1113220150Smm          dr0 = dr;
1114220150Smm        if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1115220150Smm          fprintf (vect_dump, "vector alignment may not be reachable");
1116169689Skan	break;
1117169689Skan      }
1118169689Skan
1119169689Skan  /* Often peeling for alignment will require peeling for loop-bound, which in
1120169689Skan     turn requires that we know how to adjust the loop ivs after the loop.  */
1121169689Skan  if (!vect_can_advance_ivs_p (loop_vinfo))
1122169689Skan    do_peeling = false;
1123169689Skan
1124169689Skan  if (do_peeling)
1125169689Skan    {
1126169689Skan      int mis;
1127169689Skan      int npeel = 0;
1128169689Skan
1129169689Skan      if (known_alignment_for_access_p (dr0))
1130169689Skan        {
1131169689Skan          /* Since it's known at compile time, compute the number of iterations
1132169689Skan             in the peeled loop (the peeling factor) for use in updating
1133169689Skan             DR_MISALIGNMENT values.  The peeling factor is the vectorization
1134169689Skan             factor minus the misalignment as an element count.  */
1135169689Skan          mis = DR_MISALIGNMENT (dr0);
1136169689Skan          mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1137169689Skan          npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1138169689Skan        }
1139169689Skan
1140169689Skan      /* Ensure that all data refs can be vectorized after the peel.  */
1141169689Skan      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1142169689Skan        {
1143169689Skan          int save_misalignment;
1144169689Skan
1145169689Skan	  if (dr == dr0)
1146169689Skan	    continue;
1147169689Skan
1148169689Skan	  save_misalignment = DR_MISALIGNMENT (dr);
1149169689Skan	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1150169689Skan	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1151169689Skan	  DR_MISALIGNMENT (dr) = save_misalignment;
1152169689Skan
1153169689Skan	  if (!supportable_dr_alignment)
1154169689Skan	    {
1155169689Skan	      do_peeling = false;
1156169689Skan	      break;
1157169689Skan	    }
1158169689Skan	}
1159169689Skan
1160169689Skan      if (do_peeling)
1161169689Skan        {
1162169689Skan          /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1163169689Skan             If the misalignment of DR_i is identical to that of dr0 then set
1164169689Skan             DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1165169689Skan             dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1166169689Skan             by the peeling factor times the element size of DR_i (MOD the
1167169689Skan             vectorization factor times the size).  Otherwise, the
1168169689Skan             misalignment of DR_i must be set to unknown.  */
1169169689Skan	  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1170169689Skan	    if (dr != dr0)
1171169689Skan	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1172169689Skan
1173169689Skan          LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1174169689Skan          LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1175169689Skan          DR_MISALIGNMENT (dr0) = 0;
1176169689Skan	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1177169689Skan            fprintf (vect_dump, "Alignment of access forced using peeling.");
1178169689Skan
1179169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
1180169689Skan            fprintf (vect_dump, "Peeling for alignment will be applied.");
1181169689Skan
1182169689Skan	  stat = vect_verify_datarefs_alignment (loop_vinfo);
1183169689Skan	  gcc_assert (stat);
1184169689Skan          return stat;
1185169689Skan        }
1186169689Skan    }
1187169689Skan
1188169689Skan
1189169689Skan  /* (2) Versioning to force alignment.  */
1190169689Skan
1191169689Skan  /* Try versioning if:
1192169689Skan     1) flag_tree_vect_loop_version is TRUE
1193169689Skan     2) optimize_size is FALSE
1194169689Skan     3) there is at least one unsupported misaligned data ref with an unknown
1195169689Skan        misalignment, and
1196169689Skan     4) all misaligned data refs with a known misalignment are supported, and
1197169689Skan     5) the number of runtime alignment checks is within reason.  */
1198169689Skan
1199169689Skan  do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1200169689Skan
1201169689Skan  if (do_versioning)
1202169689Skan    {
1203169689Skan      for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1204169689Skan        {
1205169689Skan          if (aligned_access_p (dr))
1206169689Skan            continue;
1207169689Skan
1208169689Skan          supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1209169689Skan
1210169689Skan          if (!supportable_dr_alignment)
1211169689Skan            {
1212169689Skan              tree stmt;
1213169689Skan              int mask;
1214169689Skan              tree vectype;
1215169689Skan
1216169689Skan              if (known_alignment_for_access_p (dr)
1217169689Skan                  || VEC_length (tree,
1218169689Skan                                 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1219169689Skan                     >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1220169689Skan                {
1221169689Skan                  do_versioning = false;
1222169689Skan                  break;
1223169689Skan                }
1224169689Skan
1225169689Skan              stmt = DR_STMT (dr);
1226169689Skan              vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1227169689Skan              gcc_assert (vectype);
1228169689Skan
1229169689Skan              /* The rightmost bits of an aligned address must be zeros.
1230169689Skan                 Construct the mask needed for this test.  For example,
1231169689Skan                 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1232169689Skan                 mask must be 15 = 0xf. */
1233169689Skan              mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1234169689Skan
1235169689Skan              /* FORNOW: use the same mask to test all potentially unaligned
1236169689Skan                 references in the loop.  The vectorizer currently supports
1237169689Skan                 a single vector size, see the reference to
1238169689Skan                 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1239169689Skan                 vectorization factor is computed.  */
1240169689Skan              gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1241169689Skan                          || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1242169689Skan              LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1243169689Skan              VEC_safe_push (tree, heap,
1244169689Skan                             LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1245169689Skan                             DR_STMT (dr));
1246169689Skan            }
1247169689Skan        }
1248169689Skan
1249169689Skan      /* Versioning requires at least one misaligned data reference.  */
1250169689Skan      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1251169689Skan        do_versioning = false;
1252169689Skan      else if (!do_versioning)
1253169689Skan        VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1254169689Skan    }
1255169689Skan
1256169689Skan  if (do_versioning)
1257169689Skan    {
1258169689Skan      VEC(tree,heap) *may_misalign_stmts
1259169689Skan        = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1260169689Skan      tree stmt;
1261169689Skan
1262169689Skan      /* It can now be assumed that the data references in the statements
1263169689Skan         in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1264169689Skan         of the loop being vectorized.  */
1265169689Skan      for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1266169689Skan        {
1267169689Skan          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1268169689Skan          dr = STMT_VINFO_DATA_REF (stmt_info);
1269169689Skan          DR_MISALIGNMENT (dr) = 0;
1270169689Skan	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1271169689Skan            fprintf (vect_dump, "Alignment of access forced using versioning.");
1272169689Skan        }
1273169689Skan
1274169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1275169689Skan        fprintf (vect_dump, "Versioning for alignment will be applied.");
1276169689Skan
1277169689Skan      /* Peeling and versioning can't be done together at this time.  */
1278169689Skan      gcc_assert (! (do_peeling && do_versioning));
1279169689Skan
1280169689Skan      stat = vect_verify_datarefs_alignment (loop_vinfo);
1281169689Skan      gcc_assert (stat);
1282169689Skan      return stat;
1283169689Skan    }
1284169689Skan
1285169689Skan  /* This point is reached if neither peeling nor versioning is being done.  */
1286169689Skan  gcc_assert (! (do_peeling || do_versioning));
1287169689Skan
1288169689Skan  stat = vect_verify_datarefs_alignment (loop_vinfo);
1289169689Skan  return stat;
1290169689Skan}
1291169689Skan
1292169689Skan
1293169689Skan/* Function vect_analyze_data_refs_alignment
1294169689Skan
1295169689Skan   Analyze the alignment of the data-references in the loop.
1296169689Skan   Return FALSE if a data reference is found that cannot be vectorized.  */
1297169689Skan
1298169689Skanstatic bool
1299169689Skanvect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1300169689Skan{
1301169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1302169689Skan    fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1303169689Skan
1304169689Skan  if (!vect_compute_data_refs_alignment (loop_vinfo))
1305169689Skan    {
1306169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1307169689Skan	fprintf (vect_dump,
1308169689Skan		 "not vectorized: can't calculate alignment for data ref.");
1309169689Skan      return false;
1310169689Skan    }
1311169689Skan
1312169689Skan  return true;
1313169689Skan}
1314169689Skan
1315169689Skan
1316169689Skan/* Function vect_analyze_data_ref_access.
1317169689Skan
1318169689Skan   Analyze the access pattern of the data-reference DR. For now, a data access
1319169689Skan   has to be consecutive to be considered vectorizable.  */
1320169689Skan
1321169689Skanstatic bool
1322169689Skanvect_analyze_data_ref_access (struct data_reference *dr)
1323169689Skan{
1324169689Skan  tree step = DR_STEP (dr);
1325169689Skan  tree scalar_type = TREE_TYPE (DR_REF (dr));
1326169689Skan
1327169689Skan  if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1328169689Skan    {
1329169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1330169689Skan	fprintf (vect_dump, "not consecutive access");
1331169689Skan      return false;
1332169689Skan    }
1333169689Skan  return true;
1334169689Skan}
1335169689Skan
1336169689Skan
1337169689Skan/* Function vect_analyze_data_ref_accesses.
1338169689Skan
1339169689Skan   Analyze the access pattern of all the data references in the loop.
1340169689Skan
1341169689Skan   FORNOW: the only access pattern that is considered vectorizable is a
1342169689Skan	   simple step 1 (consecutive) access.
1343169689Skan
1344169689Skan   FORNOW: handle only arrays and pointer accesses.  */
1345169689Skan
1346169689Skanstatic bool
1347169689Skanvect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1348169689Skan{
1349169689Skan  unsigned int i;
1350169689Skan  VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1351169689Skan  struct data_reference *dr;
1352169689Skan
1353169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1354169689Skan    fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1355169689Skan
1356169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1357169689Skan    if (!vect_analyze_data_ref_access (dr))
1358169689Skan      {
1359169689Skan	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1360169689Skan	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
1361169689Skan	return false;
1362169689Skan      }
1363169689Skan
1364169689Skan  return true;
1365169689Skan}
1366169689Skan
1367169689Skan
1368169689Skan/* Function vect_analyze_data_refs.
1369169689Skan
1370169689Skan  Find all the data references in the loop.
1371169689Skan
1372169689Skan   The general structure of the analysis of data refs in the vectorizer is as
1373169689Skan   follows:
1374169689Skan   1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1375169689Skan      find and analyze all data-refs in the loop and their dependences.
1376169689Skan   2- vect_analyze_dependences(): apply dependence testing using ddrs.
1377169689Skan   3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1378169689Skan   4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1379169689Skan
1380169689Skan*/
1381169689Skan
1382169689Skanstatic bool
1383169689Skanvect_analyze_data_refs (loop_vec_info loop_vinfo)
1384169689Skan{
1385169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1386169689Skan  unsigned int i;
1387169689Skan  VEC (data_reference_p, heap) *datarefs;
1388169689Skan  struct data_reference *dr;
1389169689Skan  tree scalar_type;
1390169689Skan
1391169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1392169689Skan    fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1393169689Skan
1394169689Skan  compute_data_dependences_for_loop (loop, false,
1395169689Skan                                     &LOOP_VINFO_DATAREFS (loop_vinfo),
1396169689Skan                                     &LOOP_VINFO_DDRS (loop_vinfo));
1397169689Skan
1398169689Skan  /* Go through the data-refs, check that the analysis succeeded. Update pointer
1399169689Skan     from stmt_vec_info struct to DR and vectype.  */
1400169689Skan  datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1401169689Skan
1402169689Skan  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1403169689Skan    {
1404169689Skan      tree stmt;
1405169689Skan      stmt_vec_info stmt_info;
1406169689Skan
1407169689Skan      if (!dr || !DR_REF (dr))
1408169689Skan        {
1409169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410169689Skan	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1411169689Skan          return false;
1412169689Skan        }
1413169689Skan
1414169689Skan      /* Update DR field in stmt_vec_info struct.  */
1415169689Skan      stmt = DR_STMT (dr);
1416169689Skan      stmt_info = vinfo_for_stmt (stmt);
1417169689Skan
1418169689Skan      if (STMT_VINFO_DATA_REF (stmt_info))
1419169689Skan        {
1420169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1421169689Skan            {
1422169689Skan              fprintf (vect_dump,
1423169689Skan                       "not vectorized: more than one data ref in stmt: ");
1424169689Skan              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1425169689Skan            }
1426169689Skan          return false;
1427169689Skan        }
1428169689Skan      STMT_VINFO_DATA_REF (stmt_info) = dr;
1429169689Skan
1430169689Skan      /* Check that analysis of the data-ref succeeded.  */
1431169689Skan      if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1432169689Skan          || !DR_STEP (dr))
1433169689Skan        {
1434169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1435169689Skan            {
1436169689Skan              fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1437169689Skan              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1438169689Skan            }
1439169689Skan          return false;
1440169689Skan        }
1441169689Skan      if (!DR_MEMTAG (dr))
1442169689Skan        {
1443169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1444169689Skan            {
1445169689Skan              fprintf (vect_dump, "not vectorized: no memory tag for ");
1446169689Skan              print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1447169689Skan            }
1448169689Skan          return false;
1449169689Skan        }
1450169689Skan
1451169689Skan      /* Set vectype for STMT.  */
1452169689Skan      scalar_type = TREE_TYPE (DR_REF (dr));
1453169689Skan      STMT_VINFO_VECTYPE (stmt_info) =
1454169689Skan                get_vectype_for_scalar_type (scalar_type);
1455169689Skan      if (!STMT_VINFO_VECTYPE (stmt_info))
1456169689Skan        {
1457169689Skan          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1458169689Skan            {
1459169689Skan              fprintf (vect_dump,
1460169689Skan                       "not vectorized: no vectype for stmt: ");
1461169689Skan              print_generic_expr (vect_dump, stmt, TDF_SLIM);
1462169689Skan              fprintf (vect_dump, " scalar_type: ");
1463169689Skan              print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1464169689Skan            }
1465169689Skan          return false;
1466169689Skan        }
1467169689Skan    }
1468169689Skan
1469169689Skan  return true;
1470169689Skan}
1471169689Skan
1472169689Skan
1473169689Skan/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1474169689Skan
1475169689Skan/* Function vect_mark_relevant.
1476169689Skan
1477169689Skan   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1478169689Skan
1479169689Skanstatic void
1480169689Skanvect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1481169689Skan		    bool relevant_p, bool live_p)
1482169689Skan{
1483169689Skan  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1484169689Skan  bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1485169689Skan  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1486169689Skan
1487169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1488169689Skan    fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1489169689Skan
1490169689Skan  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1491169689Skan    {
1492169689Skan      tree pattern_stmt;
1493169689Skan
1494169689Skan      /* This is the last stmt in a sequence that was detected as a
1495169689Skan         pattern that can potentially be vectorized.  Don't mark the stmt
1496169689Skan         as relevant/live because it's not going to vectorized.
1497169689Skan         Instead mark the pattern-stmt that replaces it.  */
1498169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1499169689Skan        fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1500169689Skan      pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1501169689Skan      stmt_info = vinfo_for_stmt (pattern_stmt);
1502169689Skan      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1503169689Skan      save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1504169689Skan      save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1505169689Skan      stmt = pattern_stmt;
1506169689Skan    }
1507169689Skan
1508169689Skan  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1509169689Skan  STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1510169689Skan
1511169689Skan  if (TREE_CODE (stmt) == PHI_NODE)
1512169689Skan    /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1513169689Skan       or live will fail vectorization later on.  */
1514169689Skan    return;
1515169689Skan
1516169689Skan  if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1517169689Skan      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1518169689Skan    {
1519169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1520169689Skan        fprintf (vect_dump, "already marked relevant/live.");
1521169689Skan      return;
1522169689Skan    }
1523169689Skan
1524169689Skan  VEC_safe_push (tree, heap, *worklist, stmt);
1525169689Skan}
1526169689Skan
1527169689Skan
1528169689Skan/* Function vect_stmt_relevant_p.
1529169689Skan
1530169689Skan   Return true if STMT in loop that is represented by LOOP_VINFO is
1531169689Skan   "relevant for vectorization".
1532169689Skan
1533169689Skan   A stmt is considered "relevant for vectorization" if:
1534169689Skan   - it has uses outside the loop.
1535169689Skan   - it has vdefs (it alters memory).
1536169689Skan   - control stmts in the loop (except for the exit condition).
1537169689Skan
1538169689Skan   CHECKME: what other side effects would the vectorizer allow?  */
1539169689Skan
1540169689Skanstatic bool
1541169689Skanvect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1542169689Skan		      bool *relevant_p, bool *live_p)
1543169689Skan{
1544169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545169689Skan  ssa_op_iter op_iter;
1546169689Skan  imm_use_iterator imm_iter;
1547169689Skan  use_operand_p use_p;
1548169689Skan  def_operand_p def_p;
1549169689Skan
1550169689Skan  *relevant_p = false;
1551169689Skan  *live_p = false;
1552169689Skan
1553169689Skan  /* cond stmt other than loop exit cond.  */
1554169689Skan  if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1555169689Skan    *relevant_p = true;
1556169689Skan
1557169689Skan  /* changing memory.  */
1558169689Skan  if (TREE_CODE (stmt) != PHI_NODE)
1559169689Skan    if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1560169689Skan      {
1561169689Skan	if (vect_print_dump_info (REPORT_DETAILS))
1562169689Skan	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1563169689Skan	*relevant_p = true;
1564169689Skan      }
1565169689Skan
1566169689Skan  /* uses outside the loop.  */
1567169689Skan  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1568169689Skan    {
1569169689Skan      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1570169689Skan	{
1571169689Skan	  basic_block bb = bb_for_stmt (USE_STMT (use_p));
1572169689Skan	  if (!flow_bb_inside_loop_p (loop, bb))
1573169689Skan	    {
1574169689Skan	      if (vect_print_dump_info (REPORT_DETAILS))
1575169689Skan		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1576169689Skan
1577169689Skan	      /* We expect all such uses to be in the loop exit phis
1578169689Skan		 (because of loop closed form)   */
1579169689Skan	      gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1580169689Skan	      gcc_assert (bb == loop->single_exit->dest);
1581169689Skan
1582169689Skan              *live_p = true;
1583169689Skan	    }
1584169689Skan	}
1585169689Skan    }
1586169689Skan
1587169689Skan  return (*live_p || *relevant_p);
1588169689Skan}
1589169689Skan
1590169689Skan
1591169689Skan/* Function vect_mark_stmts_to_be_vectorized.
1592169689Skan
1593169689Skan   Not all stmts in the loop need to be vectorized. For example:
1594169689Skan
1595169689Skan     for i...
1596169689Skan       for j...
1597169689Skan   1.    T0 = i + j
1598169689Skan   2.	 T1 = a[T0]
1599169689Skan
1600169689Skan   3.    j = j + 1
1601169689Skan
1602169689Skan   Stmt 1 and 3 do not need to be vectorized, because loop control and
1603169689Skan   addressing of vectorized data-refs are handled differently.
1604169689Skan
1605169689Skan   This pass detects such stmts.  */
1606169689Skan
1607169689Skanstatic bool
1608169689Skanvect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1609169689Skan{
1610169689Skan  VEC(tree,heap) *worklist;
1611169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1612169689Skan  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1613169689Skan  unsigned int nbbs = loop->num_nodes;
1614169689Skan  block_stmt_iterator si;
1615169689Skan  tree stmt, use;
1616169689Skan  stmt_ann_t ann;
1617169689Skan  ssa_op_iter iter;
1618169689Skan  unsigned int i;
1619169689Skan  stmt_vec_info stmt_vinfo;
1620169689Skan  basic_block bb;
1621169689Skan  tree phi;
1622169689Skan  bool relevant_p, live_p;
1623169689Skan  tree def, def_stmt;
1624169689Skan  enum vect_def_type dt;
1625169689Skan
1626169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1627169689Skan    fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1628169689Skan
1629169689Skan  worklist = VEC_alloc (tree, heap, 64);
1630169689Skan
1631169689Skan  /* 1. Init worklist.  */
1632169689Skan
1633169689Skan  bb = loop->header;
1634169689Skan  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1635169689Skan    {
1636169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1637169689Skan        {
1638169689Skan          fprintf (vect_dump, "init: phi relevant? ");
1639169689Skan          print_generic_expr (vect_dump, phi, TDF_SLIM);
1640169689Skan        }
1641169689Skan
1642169689Skan      if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1643169689Skan	vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1644169689Skan    }
1645169689Skan
1646169689Skan  for (i = 0; i < nbbs; i++)
1647169689Skan    {
1648169689Skan      bb = bbs[i];
1649169689Skan      for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1650169689Skan	{
1651169689Skan	  stmt = bsi_stmt (si);
1652169689Skan
1653169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1654169689Skan	    {
1655169689Skan	      fprintf (vect_dump, "init: stmt relevant? ");
1656169689Skan	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
1657169689Skan	    }
1658169689Skan
1659169689Skan	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1660169689Skan            vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1661169689Skan	}
1662169689Skan    }
1663169689Skan
1664169689Skan
1665169689Skan  /* 2. Process_worklist */
1666169689Skan
1667169689Skan  while (VEC_length (tree, worklist) > 0)
1668169689Skan    {
1669169689Skan      stmt = VEC_pop (tree, worklist);
1670169689Skan
1671169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1672169689Skan	{
1673169689Skan          fprintf (vect_dump, "worklist: examine stmt: ");
1674169689Skan          print_generic_expr (vect_dump, stmt, TDF_SLIM);
1675169689Skan	}
1676169689Skan
1677169689Skan      /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1678169689Skan         in the loop, mark the stmt that defines it (DEF_STMT) as
1679169689Skan         relevant/irrelevant and live/dead according to the liveness and
1680169689Skan         relevance properties of STMT.
1681169689Skan       */
1682169689Skan
1683169689Skan      gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1684169689Skan
1685169689Skan      ann = stmt_ann (stmt);
1686169689Skan      stmt_vinfo = vinfo_for_stmt (stmt);
1687169689Skan
1688169689Skan      relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1689169689Skan      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1690169689Skan
1691169689Skan      /* Generally, the liveness and relevance properties of STMT are
1692169689Skan         propagated to the DEF_STMTs of its USEs:
1693169689Skan             STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1694169689Skan             STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1695169689Skan
1696169689Skan         Exceptions:
1697169689Skan
1698169689Skan	 (case 1)
1699169689Skan           If USE is used only for address computations (e.g. array indexing),
1700169689Skan           which does not need to be directly vectorized, then the
1701169689Skan           liveness/relevance of the respective DEF_STMT is left unchanged.
1702169689Skan
1703169689Skan	 (case 2)
1704169689Skan           If STMT has been identified as defining a reduction variable, then
1705169689Skan	   we have two cases:
1706169689Skan	   (case 2.1)
1707169689Skan	     The last use of STMT is the reduction-variable, which is defined
1708169689Skan	     by a loop-header-phi. We don't want to mark the phi as live or
1709169689Skan	     relevant (because it does not need to be vectorized, it is handled
1710169689Skan             as part of the vectorization of the reduction), so in this case we
1711169689Skan	     skip the call to vect_mark_relevant.
1712169689Skan	   (case 2.2)
1713169689Skan	     The rest of the uses of STMT are defined in the loop body. For
1714169689Skan             the def_stmt of these uses we want to set liveness/relevance
1715169689Skan             as follows:
1716169689Skan               STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1717169689Skan               STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1718169689Skan             because even though STMT is classified as live (since it defines a
1719169689Skan             value that is used across loop iterations) and irrelevant (since it
1720169689Skan             is not used inside the loop), it will be vectorized, and therefore
1721169689Skan             the corresponding DEF_STMTs need to marked as relevant.
1722169689Skan       */
1723169689Skan
1724169689Skan      /* case 2.2:  */
1725169689Skan      if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1726169689Skan        {
1727169689Skan          gcc_assert (!relevant_p && live_p);
1728169689Skan          relevant_p = true;
1729169689Skan          live_p = false;
1730169689Skan        }
1731169689Skan
1732169689Skan      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1733169689Skan	{
1734169689Skan	  /* case 1: we are only interested in uses that need to be vectorized.
1735169689Skan	     Uses that are used for address computation are not considered
1736169689Skan	     relevant.
1737169689Skan	   */
1738169689Skan	  if (!exist_non_indexing_operands_for_use_p (use, stmt))
1739169689Skan	    continue;
1740169689Skan
1741169689Skan	  if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1742169689Skan            {
1743169689Skan              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1744169689Skan                fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1745169689Skan	      VEC_free (tree, heap, worklist);
1746169689Skan              return false;
1747169689Skan            }
1748169689Skan
1749169689Skan	  if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1750169689Skan	    continue;
1751169689Skan
1752169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
1753169689Skan            {
1754169689Skan              fprintf (vect_dump, "worklist: examine use %d: ", i);
1755169689Skan              print_generic_expr (vect_dump, use, TDF_SLIM);
1756169689Skan            }
1757169689Skan
1758169689Skan	  bb = bb_for_stmt (def_stmt);
1759169689Skan          if (!flow_bb_inside_loop_p (loop, bb))
1760169689Skan            continue;
1761169689Skan
1762169689Skan	  /* case 2.1: the reduction-use does not mark the defining-phi
1763169689Skan	     as relevant.  */
1764169689Skan	  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1765169689Skan	      && TREE_CODE (def_stmt) == PHI_NODE)
1766169689Skan	    continue;
1767169689Skan
1768169689Skan	  vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1769169689Skan	}
1770169689Skan    }				/* while worklist */
1771169689Skan
1772169689Skan  VEC_free (tree, heap, worklist);
1773169689Skan  return true;
1774169689Skan}
1775169689Skan
1776169689Skan
1777169689Skan/* Function vect_can_advance_ivs_p
1778169689Skan
1779169689Skan   In case the number of iterations that LOOP iterates is unknown at compile
1780169689Skan   time, an epilog loop will be generated, and the loop induction variables
1781169689Skan   (IVs) will be "advanced" to the value they are supposed to take just before
1782169689Skan   the epilog loop.  Here we check that the access function of the loop IVs
1783169689Skan   and the expression that represents the loop bound are simple enough.
1784169689Skan   These restrictions will be relaxed in the future.  */
1785169689Skan
1786169689Skanstatic bool
1787169689Skanvect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1788169689Skan{
1789169689Skan  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1790169689Skan  basic_block bb = loop->header;
1791169689Skan  tree phi;
1792169689Skan
1793169689Skan  /* Analyze phi functions of the loop header.  */
1794169689Skan
1795169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1796169689Skan    fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1797169689Skan
1798169689Skan  for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1799169689Skan    {
1800169689Skan      tree access_fn = NULL;
1801169689Skan      tree evolution_part;
1802169689Skan
1803169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1804169689Skan	{
1805169689Skan          fprintf (vect_dump, "Analyze phi: ");
1806169689Skan          print_generic_expr (vect_dump, phi, TDF_SLIM);
1807169689Skan	}
1808169689Skan
1809169689Skan      /* Skip virtual phi's. The data dependences that are associated with
1810169689Skan         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1811169689Skan
1812169689Skan      if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1813169689Skan	{
1814169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1815169689Skan	    fprintf (vect_dump, "virtual phi. skip.");
1816169689Skan	  continue;
1817169689Skan	}
1818169689Skan
1819169689Skan      /* Skip reduction phis.  */
1820169689Skan
1821169689Skan      if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1822169689Skan        {
1823169689Skan          if (vect_print_dump_info (REPORT_DETAILS))
1824169689Skan            fprintf (vect_dump, "reduc phi. skip.");
1825169689Skan          continue;
1826169689Skan        }
1827169689Skan
1828169689Skan      /* Analyze the evolution function.  */
1829169689Skan
1830169689Skan      access_fn = instantiate_parameters
1831169689Skan	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1832169689Skan
1833169689Skan      if (!access_fn)
1834169689Skan	{
1835169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1836169689Skan	    fprintf (vect_dump, "No Access function.");
1837169689Skan	  return false;
1838169689Skan	}
1839169689Skan
1840169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1841169689Skan        {
1842169689Skan	  fprintf (vect_dump, "Access function of PHI: ");
1843169689Skan	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1844169689Skan        }
1845169689Skan
1846169689Skan      evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1847169689Skan
1848169689Skan      if (evolution_part == NULL_TREE)
1849169689Skan        {
1850169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1851169689Skan	    fprintf (vect_dump, "No evolution.");
1852169689Skan	  return false;
1853169689Skan        }
1854169689Skan
1855169689Skan      /* FORNOW: We do not transform initial conditions of IVs
1856169689Skan	 which evolution functions are a polynomial of degree >= 2.  */
1857169689Skan
1858169689Skan      if (tree_is_chrec (evolution_part))
1859169689Skan	return false;
1860169689Skan    }
1861169689Skan
1862169689Skan  return true;
1863169689Skan}
1864169689Skan
1865169689Skan
1866169689Skan/* Function vect_get_loop_niters.
1867169689Skan
1868169689Skan   Determine how many iterations the loop is executed.
1869169689Skan   If an expression that represents the number of iterations
1870169689Skan   can be constructed, place it in NUMBER_OF_ITERATIONS.
1871169689Skan   Return the loop exit condition.  */
1872169689Skan
1873169689Skanstatic tree
1874169689Skanvect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1875169689Skan{
1876169689Skan  tree niters;
1877169689Skan
1878169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1879169689Skan    fprintf (vect_dump, "=== get_loop_niters ===");
1880169689Skan
1881169689Skan  niters = number_of_iterations_in_loop (loop);
1882169689Skan
1883169689Skan  if (niters != NULL_TREE
1884169689Skan      && niters != chrec_dont_know)
1885169689Skan    {
1886169689Skan      *number_of_iterations = niters;
1887169689Skan
1888169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
1889169689Skan	{
1890169689Skan	  fprintf (vect_dump, "==> get_loop_niters:" );
1891169689Skan	  print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1892169689Skan	}
1893169689Skan    }
1894169689Skan
1895169689Skan  return get_loop_exit_condition (loop);
1896169689Skan}
1897169689Skan
1898169689Skan
1899169689Skan/* Function vect_analyze_loop_form.
1900169689Skan
1901169689Skan   Verify the following restrictions (some may be relaxed in the future):
1902169689Skan   - it's an inner-most loop
1903169689Skan   - number of BBs = 2 (which are the loop header and the latch)
1904169689Skan   - the loop has a pre-header
1905169689Skan   - the loop has a single entry and exit
1906169689Skan   - the loop exit condition is simple enough, and the number of iterations
1907169689Skan     can be analyzed (a countable loop).  */
1908169689Skan
1909169689Skanstatic loop_vec_info
1910169689Skanvect_analyze_loop_form (struct loop *loop)
1911169689Skan{
1912169689Skan  loop_vec_info loop_vinfo;
1913169689Skan  tree loop_cond;
1914169689Skan  tree number_of_iterations = NULL;
1915169689Skan
1916169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
1917169689Skan    fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1918169689Skan
1919169689Skan  if (loop->inner)
1920169689Skan    {
1921169689Skan      if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1922169689Skan        fprintf (vect_dump, "not vectorized: nested loop.");
1923169689Skan      return NULL;
1924169689Skan    }
1925169689Skan
1926169689Skan  if (!loop->single_exit
1927169689Skan      || loop->num_nodes != 2
1928169689Skan      || EDGE_COUNT (loop->header->preds) != 2)
1929169689Skan    {
1930169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1931169689Skan        {
1932169689Skan          if (!loop->single_exit)
1933169689Skan            fprintf (vect_dump, "not vectorized: multiple exits.");
1934169689Skan          else if (loop->num_nodes != 2)
1935169689Skan            fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1936169689Skan          else if (EDGE_COUNT (loop->header->preds) != 2)
1937169689Skan            fprintf (vect_dump, "not vectorized: too many incoming edges.");
1938169689Skan        }
1939169689Skan
1940169689Skan      return NULL;
1941169689Skan    }
1942169689Skan
1943169689Skan  /* We assume that the loop exit condition is at the end of the loop. i.e,
1944169689Skan     that the loop is represented as a do-while (with a proper if-guard
1945169689Skan     before the loop if needed), where the loop header contains all the
1946169689Skan     executable statements, and the latch is empty.  */
1947169689Skan  if (!empty_block_p (loop->latch)
1948169689Skan        || phi_nodes (loop->latch))
1949169689Skan    {
1950169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1951169689Skan        fprintf (vect_dump, "not vectorized: unexpected loop form.");
1952169689Skan      return NULL;
1953169689Skan    }
1954169689Skan
1955169689Skan  /* Make sure there exists a single-predecessor exit bb:  */
1956169689Skan  if (!single_pred_p (loop->single_exit->dest))
1957169689Skan    {
1958169689Skan      edge e = loop->single_exit;
1959169689Skan      if (!(e->flags & EDGE_ABNORMAL))
1960169689Skan	{
1961169689Skan	  split_loop_exit_edge (e);
1962169689Skan	  if (vect_print_dump_info (REPORT_DETAILS))
1963169689Skan	    fprintf (vect_dump, "split exit edge.");
1964169689Skan	}
1965169689Skan      else
1966169689Skan	{
1967169689Skan	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1968169689Skan	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1969169689Skan	  return NULL;
1970169689Skan	}
1971169689Skan    }
1972169689Skan
1973169689Skan  if (empty_block_p (loop->header))
1974169689Skan    {
1975169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1976169689Skan        fprintf (vect_dump, "not vectorized: empty loop.");
1977169689Skan      return NULL;
1978169689Skan    }
1979169689Skan
1980169689Skan  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1981169689Skan  if (!loop_cond)
1982169689Skan    {
1983169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1984169689Skan	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1985169689Skan      return NULL;
1986169689Skan    }
1987169689Skan
1988169689Skan  if (!number_of_iterations)
1989169689Skan    {
1990169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1991169689Skan	fprintf (vect_dump,
1992169689Skan		 "not vectorized: number of iterations cannot be computed.");
1993169689Skan      return NULL;
1994169689Skan    }
1995169689Skan
1996169689Skan  if (chrec_contains_undetermined (number_of_iterations))
1997169689Skan    {
1998169689Skan      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1999169689Skan        fprintf (vect_dump, "Infinite number of iterations.");
2000169689Skan      return false;
2001169689Skan    }
2002169689Skan
2003169689Skan  loop_vinfo = new_loop_vec_info (loop);
2004169689Skan  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2005169689Skan
2006169689Skan  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2007169689Skan    {
2008169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2009169689Skan        {
2010169689Skan          fprintf (vect_dump, "Symbolic number of iterations is ");
2011169689Skan          print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2012169689Skan        }
2013169689Skan    }
2014169689Skan  else
2015169689Skan  if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2016169689Skan    {
2017169689Skan      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2018169689Skan        fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2019169689Skan      return NULL;
2020169689Skan    }
2021169689Skan
2022169689Skan  LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2023169689Skan
2024169689Skan  return loop_vinfo;
2025169689Skan}
2026169689Skan
2027169689Skan
2028169689Skan/* Function vect_analyze_loop.
2029169689Skan
2030169689Skan   Apply a set of analyses on LOOP, and create a loop_vec_info struct
2031169689Skan   for it. The different analyses will record information in the
2032169689Skan   loop_vec_info struct.  */
2033169689Skanloop_vec_info
2034169689Skanvect_analyze_loop (struct loop *loop)
2035169689Skan{
2036169689Skan  bool ok;
2037169689Skan  loop_vec_info loop_vinfo;
2038169689Skan
2039169689Skan  if (vect_print_dump_info (REPORT_DETAILS))
2040169689Skan    fprintf (vect_dump, "===== analyze_loop_nest =====");
2041169689Skan
2042169689Skan  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
2043169689Skan
2044169689Skan  loop_vinfo = vect_analyze_loop_form (loop);
2045169689Skan  if (!loop_vinfo)
2046169689Skan    {
2047169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2048169689Skan	fprintf (vect_dump, "bad loop form.");
2049169689Skan      return NULL;
2050169689Skan    }
2051169689Skan
2052169689Skan  /* Find all data references in the loop (which correspond to vdefs/vuses)
2053169689Skan     and analyze their evolution in the loop.
2054169689Skan
2055169689Skan     FORNOW: Handle only simple, array references, which
2056169689Skan     alignment can be forced, and aligned pointer-references.  */
2057169689Skan
2058169689Skan  ok = vect_analyze_data_refs (loop_vinfo);
2059169689Skan  if (!ok)
2060169689Skan    {
2061169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2062169689Skan	fprintf (vect_dump, "bad data references.");
2063169689Skan      destroy_loop_vec_info (loop_vinfo);
2064169689Skan      return NULL;
2065169689Skan    }
2066169689Skan
2067169689Skan  /* Classify all cross-iteration scalar data-flow cycles.
2068169689Skan     Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2069169689Skan
2070169689Skan  vect_analyze_scalar_cycles (loop_vinfo);
2071169689Skan
2072169689Skan  vect_pattern_recog (loop_vinfo);
2073169689Skan
2074169689Skan  /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2075169689Skan
2076169689Skan  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2077169689Skan  if (!ok)
2078169689Skan    {
2079169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2080169689Skan	fprintf (vect_dump, "unexpected pattern.");
2081169689Skan      destroy_loop_vec_info (loop_vinfo);
2082169689Skan      return NULL;
2083169689Skan    }
2084169689Skan
2085169689Skan  /* Analyze the alignment of the data-refs in the loop.
2086169689Skan     Fail if a data reference is found that cannot be vectorized.  */
2087169689Skan
2088169689Skan  ok = vect_analyze_data_refs_alignment (loop_vinfo);
2089169689Skan  if (!ok)
2090169689Skan    {
2091169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2092169689Skan	fprintf (vect_dump, "bad data alignment.");
2093169689Skan      destroy_loop_vec_info (loop_vinfo);
2094169689Skan      return NULL;
2095169689Skan    }
2096169689Skan
2097169689Skan  ok = vect_determine_vectorization_factor (loop_vinfo);
2098169689Skan  if (!ok)
2099169689Skan    {
2100169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2101169689Skan        fprintf (vect_dump, "can't determine vectorization factor.");
2102169689Skan      destroy_loop_vec_info (loop_vinfo);
2103169689Skan      return NULL;
2104169689Skan    }
2105169689Skan
2106169689Skan  /* Analyze data dependences between the data-refs in the loop.
2107169689Skan     FORNOW: fail at the first data dependence that we encounter.  */
2108169689Skan
2109169689Skan  ok = vect_analyze_data_ref_dependences (loop_vinfo);
2110169689Skan  if (!ok)
2111169689Skan    {
2112169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2113169689Skan	fprintf (vect_dump, "bad data dependence.");
2114169689Skan      destroy_loop_vec_info (loop_vinfo);
2115169689Skan      return NULL;
2116169689Skan    }
2117169689Skan
2118169689Skan  /* Analyze the access patterns of the data-refs in the loop (consecutive,
2119169689Skan     complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2120169689Skan
2121169689Skan  ok = vect_analyze_data_ref_accesses (loop_vinfo);
2122169689Skan  if (!ok)
2123169689Skan    {
2124169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2125169689Skan	fprintf (vect_dump, "bad data access.");
2126169689Skan      destroy_loop_vec_info (loop_vinfo);
2127169689Skan      return NULL;
2128169689Skan    }
2129169689Skan
2130169689Skan  /* This pass will decide on using loop versioning and/or loop peeling in
2131169689Skan     order to enhance the alignment of data references in the loop.  */
2132169689Skan
2133169689Skan  ok = vect_enhance_data_refs_alignment (loop_vinfo);
2134169689Skan  if (!ok)
2135169689Skan    {
2136169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2137169689Skan	fprintf (vect_dump, "bad data alignment.");
2138169689Skan      destroy_loop_vec_info (loop_vinfo);
2139169689Skan      return NULL;
2140169689Skan    }
2141169689Skan
2142169689Skan  /* Scan all the operations in the loop and make sure they are
2143169689Skan     vectorizable.  */
2144169689Skan
2145169689Skan  ok = vect_analyze_operations (loop_vinfo);
2146169689Skan  if (!ok)
2147169689Skan    {
2148169689Skan      if (vect_print_dump_info (REPORT_DETAILS))
2149169689Skan	fprintf (vect_dump, "bad operation or unsupported loop bound.");
2150169689Skan      destroy_loop_vec_info (loop_vinfo);
2151169689Skan      return NULL;
2152169689Skan    }
2153169689Skan
2154169689Skan  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2155169689Skan
2156169689Skan  return loop_vinfo;
2157169689Skan}
2158