1/* Loop Vectorization
2   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5   Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "basic-block.h"
30#include "diagnostic.h"
31#include "tree-flow.h"
32#include "tree-dump.h"
33#include "cfgloop.h"
34#include "cfglayout.h"
35#include "expr.h"
36#include "recog.h"
37#include "optabs.h"
38#include "params.h"
39#include "toplev.h"
40#include "tree-chrec.h"
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
43
44/* Loop Vectorization Pass.
45
46   This pass tries to vectorize loops.
47
48   For example, the vectorizer transforms the following simple loop:
49
50        short a[N]; short b[N]; short c[N]; int i;
51
52        for (i=0; i<N; i++){
53          a[i] = b[i] + c[i];
54        }
55
56   as if it was manually vectorized by rewriting the source code into:
57
58        typedef int __attribute__((mode(V8HI))) v8hi;
59        short a[N];  short b[N]; short c[N];   int i;
60        v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
61        v8hi va, vb, vc;
62
63        for (i=0; i<N/8; i++){
64          vb = pb[i];
65          vc = pc[i];
66          va = vb + vc;
67          pa[i] = va;
68        }
69
70        The main entry to this pass is vectorize_loops(), in which
71   the vectorizer applies a set of analyses on a given set of loops,
72   followed by the actual vectorization transformation for the loops that
73   had successfully passed the analysis phase.
74        Throughout this pass we make a distinction between two types of
75   data: scalars (which are represented by SSA_NAMES), and memory references
76   ("data-refs"). These two types of data require different handling both
77   during analysis and transformation. The types of data-refs that the
78   vectorizer currently supports are ARRAY_REFS which base is an array DECL
79   (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
80   accesses are required to have a simple (consecutive) access pattern.
81
82   Analysis phase:
83   ===============
84        The driver for the analysis phase is vect_analyze_loop().
85   It applies a set of analyses, some of which rely on the scalar evolution
86   analyzer (scev) developed by Sebastian Pop.
87
88        During the analysis phase the vectorizer records some information
89   per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
90   loop, as well as general information about the loop as a whole, which is
91   recorded in a "loop_vec_info" struct attached to each loop.
92
93   Transformation phase:
94   =====================
95        The loop transformation phase scans all the stmts in the loop, and
96   creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
97   the loop that needs to be vectorized. It inserts the vector code sequence
98   just before the scalar stmt S, and records a pointer to the vector code
99   in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
100   attached to S). This pointer will be used for the vectorization of following
101   stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
102   otherwise, we rely on dead code elimination for removing it.
103
104        For example, say stmt S1 was vectorized into stmt VS1:
105
106   VS1: vb = px[i];
107   S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
108   S2:  a = b;
109
110   To vectorize stmt S2, the vectorizer first finds the stmt that defines
111   the operand 'b' (S1), and gets the relevant vector def 'vb' from the
112   vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
113   resulting sequence would be:
114
115   VS1: vb = px[i];
116   S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
117   VS2: va = vb;
118   S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
119
120        Operands that are not SSA_NAMEs, are data-refs that appear in
121   load/store operations (like 'x[i]' in S1), and are handled differently.
122
123   Target modeling:
124   =================
125        Currently the only target specific information that is used is the
126   size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
127   support different sizes of vectors, for now will need to specify one value
128   for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
129
130        Since we only vectorize operations which vector form can be
131   expressed using existing tree codes, to verify that an operation is
132   supported, the vectorizer checks the relevant optab at the relevant
133   machine_mode (e.g, optab_handler (add_optab, V8HImode)->insn_code). If
134   the value found is CODE_FOR_nothing, then there's no target support, and
135   we can't vectorize the stmt.
136
137   For additional information on this project see:
138   http://gcc.gnu.org/projects/tree-ssa/vectorization.html
139*/
140
141/* Function vect_determine_vectorization_factor
142
143   Determine the vectorization factor (VF). VF is the number of data elements
144   that are operated upon in parallel in a single iteration of the vectorized
145   loop. For example, when vectorizing a loop that operates on 4byte elements,
146   on a target with vector size (VS) 16byte, the VF is set to 4, since 4
147   elements can fit in a single vector register.
148
149   We currently support vectorization of loops in which all types operated upon
150   are of the same size. Therefore this function currently sets VF according to
151   the size of the types operated upon, and fails if there are multiple sizes
152   in the loop.
153
154   VF is also the factor by which the loop iterations are strip-mined, e.g.:
155   original loop:
156        for (i=0; i<N; i++){
157          a[i] = b[i] + c[i];
158        }
159
160   vectorized loop:
161        for (i=0; i<N; i+=VF){
162          a[i:VF] = b[i:VF] + c[i:VF];
163        }
164*/
165
166static bool
167vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
168{
169  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
170  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
171  int nbbs = loop->num_nodes;
172  gimple_stmt_iterator si;
173  unsigned int vectorization_factor = 0;
174  tree scalar_type;
175  gimple phi;
176  tree vectype;
177  unsigned int nunits;
178  stmt_vec_info stmt_info;
179  int i;
180  HOST_WIDE_INT dummy;
181
182  if (vect_print_dump_info (REPORT_DETAILS))
183    fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
184
185  for (i = 0; i < nbbs; i++)
186    {
187      basic_block bb = bbs[i];
188
189      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
190	{
191	  phi = gsi_stmt (si);
192	  stmt_info = vinfo_for_stmt (phi);
193	  if (vect_print_dump_info (REPORT_DETAILS))
194	    {
195	      fprintf (vect_dump, "==> examining phi: ");
196	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
197	    }
198
199	  gcc_assert (stmt_info);
200
201	  if (STMT_VINFO_RELEVANT_P (stmt_info))
202            {
203	      gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
204              scalar_type = TREE_TYPE (PHI_RESULT (phi));
205
206	      if (vect_print_dump_info (REPORT_DETAILS))
207		{
208		  fprintf (vect_dump, "get vectype for scalar type:  ");
209		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
210		}
211
212	      vectype = get_vectype_for_scalar_type (scalar_type);
213	      if (!vectype)
214		{
215		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
216		    {
217		      fprintf (vect_dump,
218		               "not vectorized: unsupported data-type ");
219		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
220		    }
221		  return false;
222		}
223	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
224
225	      if (vect_print_dump_info (REPORT_DETAILS))
226		{
227		  fprintf (vect_dump, "vectype: ");
228		  print_generic_expr (vect_dump, vectype, TDF_SLIM);
229		}
230
231	      nunits = TYPE_VECTOR_SUBPARTS (vectype);
232	      if (vect_print_dump_info (REPORT_DETAILS))
233		fprintf (vect_dump, "nunits = %d", nunits);
234
235	      if (!vectorization_factor
236		  || (nunits > vectorization_factor))
237		vectorization_factor = nunits;
238	    }
239	}
240
241      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
242        {
243	  gimple stmt = gsi_stmt (si);
244	  stmt_info = vinfo_for_stmt (stmt);
245
246	  if (vect_print_dump_info (REPORT_DETAILS))
247	    {
248	      fprintf (vect_dump, "==> examining statement: ");
249	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
250	    }
251
252	  gcc_assert (stmt_info);
253
254	  /* skip stmts which do not need to be vectorized.  */
255	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
256	      && !STMT_VINFO_LIVE_P (stmt_info))
257	    {
258	      if (vect_print_dump_info (REPORT_DETAILS))
259	        fprintf (vect_dump, "skip.");
260	      continue;
261	    }
262
263	  if (gimple_get_lhs (stmt) == NULL_TREE)
264	    {
265	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
266		{
267	          fprintf (vect_dump, "not vectorized: irregular stmt.");
268		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
269		}
270	      return false;
271	    }
272
273	  if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
274	    {
275	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
276	        {
277	          fprintf (vect_dump, "not vectorized: vector stmt in loop:");
278	          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
279	        }
280	      return false;
281	    }
282
283	  if (STMT_VINFO_VECTYPE (stmt_info))
284	    {
285	      /* The only case when a vectype had been already set is for stmts
286	         that contain a dataref, or for "pattern-stmts" (stmts generated
287		 by the vectorizer to represent/replace a certain idiom).  */
288	      gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
289			  || is_pattern_stmt_p (stmt_info));
290	      vectype = STMT_VINFO_VECTYPE (stmt_info);
291	    }
292	  else
293	    {
294	      gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
295			  && !is_pattern_stmt_p (stmt_info));
296
297	      scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
298                                                           &dummy);
299	      if (vect_print_dump_info (REPORT_DETAILS))
300		{
301		  fprintf (vect_dump, "get vectype for scalar type:  ");
302		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
303		}
304
305	      vectype = get_vectype_for_scalar_type (scalar_type);
306	      if (!vectype)
307		{
308		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
309		    {
310		      fprintf (vect_dump,
311			       "not vectorized: unsupported data-type ");
312		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
313		    }
314		  return false;
315		}
316	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
317            }
318
319	  if (vect_print_dump_info (REPORT_DETAILS))
320	    {
321	      fprintf (vect_dump, "vectype: ");
322	      print_generic_expr (vect_dump, vectype, TDF_SLIM);
323	    }
324
325	  nunits = TYPE_VECTOR_SUBPARTS (vectype);
326	  if (vect_print_dump_info (REPORT_DETAILS))
327	    fprintf (vect_dump, "nunits = %d", nunits);
328
329	  if (!vectorization_factor
330	      || (nunits > vectorization_factor))
331	    vectorization_factor = nunits;
332
333        }
334    }
335
336  /* TODO: Analyze cost. Decide if worth while to vectorize.  */
337  if (vect_print_dump_info (REPORT_DETAILS))
338    fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
339  if (vectorization_factor <= 1)
340    {
341      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
342        fprintf (vect_dump, "not vectorized: unsupported data-type");
343      return false;
344    }
345  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
346
347  return true;
348}
349
350
351/* Function vect_is_simple_iv_evolution.
352
353   FORNOW: A simple evolution of an induction variables in the loop is
354   considered a polynomial evolution with constant step.  */
355
356static bool
357vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
358                             tree * step)
359{
360  tree init_expr;
361  tree step_expr;
362  tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
363
364  /* When there is no evolution in this loop, the evolution function
365     is not "simple".  */
366  if (evolution_part == NULL_TREE)
367    return false;
368
369  /* When the evolution is a polynomial of degree >= 2
370     the evolution function is not "simple".  */
371  if (tree_is_chrec (evolution_part))
372    return false;
373
374  step_expr = evolution_part;
375  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
376
377  if (vect_print_dump_info (REPORT_DETAILS))
378    {
379      fprintf (vect_dump, "step: ");
380      print_generic_expr (vect_dump, step_expr, TDF_SLIM);
381      fprintf (vect_dump, ",  init: ");
382      print_generic_expr (vect_dump, init_expr, TDF_SLIM);
383    }
384
385  *init = init_expr;
386  *step = step_expr;
387
388  if (TREE_CODE (step_expr) != INTEGER_CST)
389    {
390      if (vect_print_dump_info (REPORT_DETAILS))
391        fprintf (vect_dump, "step unknown.");
392      return false;
393    }
394
395  return true;
396}
397
398/* Function vect_analyze_scalar_cycles_1.
399
400   Examine the cross iteration def-use cycles of scalar variables
401   in LOOP. LOOP_VINFO represents the loop that is now being
402   considered for vectorization (can be LOOP, or an outer-loop
403   enclosing LOOP).  */
404
405static void
406vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
407{
408  basic_block bb = loop->header;
409  tree dumy;
410  VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
411  gimple_stmt_iterator gsi;
412  bool double_reduc;
413
414  if (vect_print_dump_info (REPORT_DETAILS))
415    fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
416
417  /* First - identify all inductions. Reduction detection assumes that all the
418     inductions have been identified, therefore, this order must not be
419     changed.  */
420  for (gsi = gsi_start_phis  (bb); !gsi_end_p (gsi); gsi_next (&gsi))
421    {
422      gimple phi = gsi_stmt (gsi);
423      tree access_fn = NULL;
424      tree def = PHI_RESULT (phi);
425      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
426
427      if (vect_print_dump_info (REPORT_DETAILS))
428	{
429	  fprintf (vect_dump, "Analyze phi: ");
430	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
431	}
432
433      /* Skip virtual phi's. The data dependences that are associated with
434         virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
435      if (!is_gimple_reg (SSA_NAME_VAR (def)))
436	continue;
437
438      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
439
440      /* Analyze the evolution function.  */
441      access_fn = analyze_scalar_evolution (loop, def);
442      if (access_fn && vect_print_dump_info (REPORT_DETAILS))
443	{
444	  fprintf (vect_dump, "Access function of PHI: ");
445	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
446	}
447
448      if (!access_fn
449	  || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
450	{
451	  VEC_safe_push (gimple, heap, worklist, phi);
452	  continue;
453	}
454
455      if (vect_print_dump_info (REPORT_DETAILS))
456	fprintf (vect_dump, "Detected induction.");
457      STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
458    }
459
460
461  /* Second - identify all reductions and nested cycles.  */
462  while (VEC_length (gimple, worklist) > 0)
463    {
464      gimple phi = VEC_pop (gimple, worklist);
465      tree def = PHI_RESULT (phi);
466      stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
467      gimple reduc_stmt;
468      bool nested_cycle;
469
470      if (vect_print_dump_info (REPORT_DETAILS))
471        {
472          fprintf (vect_dump, "Analyze phi: ");
473          print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
474        }
475
476      gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
477      gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
478
479      nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
480      reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi, !nested_cycle,
481                                             &double_reduc);
482      if (reduc_stmt)
483        {
484          if (double_reduc)
485            {
486              if (vect_print_dump_info (REPORT_DETAILS))
487                fprintf (vect_dump, "Detected double reduction.");
488
489              STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
490              STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
491                                                    vect_double_reduction_def;
492            }
493          else
494            {
495              if (nested_cycle)
496                {
497                  if (vect_print_dump_info (REPORT_DETAILS))
498                    fprintf (vect_dump, "Detected vectorizable nested cycle.");
499
500                  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
501                  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
502                                                             vect_nested_cycle;
503                }
504              else
505                {
506                  if (vect_print_dump_info (REPORT_DETAILS))
507                    fprintf (vect_dump, "Detected reduction.");
508
509                  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
510                  STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
511                                                           vect_reduction_def;
512                }
513            }
514        }
515      else
516        if (vect_print_dump_info (REPORT_DETAILS))
517          fprintf (vect_dump, "Unknown def-use cycle pattern.");
518    }
519
520  VEC_free (gimple, heap, worklist);
521}
522
523
524/* Function vect_analyze_scalar_cycles.
525
526   Examine the cross iteration def-use cycles of scalar variables, by
527   analyzing the loop-header PHIs of scalar variables; Classify each
528   cycle as one of the following: invariant, induction, reduction, unknown.
529   We do that for the loop represented by LOOP_VINFO, and also to its
530   inner-loop, if exists.
531   Examples for scalar cycles:
532
533   Example1: reduction:
534
535              loop1:
536              for (i=0; i<N; i++)
537                 sum += a[i];
538
539   Example2: induction:
540
541              loop2:
542              for (i=0; i<N; i++)
543                 a[i] = i;  */
544
545static void
546vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
547{
548  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
549
550  vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
551
552  /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
553     Reductions in such inner-loop therefore have different properties than
554     the reductions in the nest that gets vectorized:
555     1. When vectorized, they are executed in the same order as in the original
556        scalar loop, so we can't change the order of computation when
557        vectorizing them.
558     2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
559        current checks are too strict.  */
560
561  if (loop->inner)
562    vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
563}
564
565/* Function vect_get_loop_niters.
566
567   Determine how many iterations the loop is executed.
568   If an expression that represents the number of iterations
569   can be constructed, place it in NUMBER_OF_ITERATIONS.
570   Return the loop exit condition.  */
571
572static gimple
573vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
574{
575  tree niters;
576
577  if (vect_print_dump_info (REPORT_DETAILS))
578    fprintf (vect_dump, "=== get_loop_niters ===");
579
580  niters = number_of_exit_cond_executions (loop);
581
582  if (niters != NULL_TREE
583      && niters != chrec_dont_know)
584    {
585      *number_of_iterations = niters;
586
587      if (vect_print_dump_info (REPORT_DETAILS))
588        {
589          fprintf (vect_dump, "==> get_loop_niters:" );
590          print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
591        }
592    }
593
594  return get_loop_exit_condition (loop);
595}
596
597
598/* Function bb_in_loop_p
599
600   Used as predicate for dfs order traversal of the loop bbs.  */
601
602static bool
603bb_in_loop_p (const_basic_block bb, const void *data)
604{
605  const struct loop *const loop = (const struct loop *)data;
606  if (flow_bb_inside_loop_p (loop, bb))
607    return true;
608  return false;
609}
610
611
612/* Function new_loop_vec_info.
613
614   Create and initialize a new loop_vec_info struct for LOOP, as well as
615   stmt_vec_info structs for all the stmts in LOOP.  */
616
617static loop_vec_info
618new_loop_vec_info (struct loop *loop)
619{
620  loop_vec_info res;
621  basic_block *bbs;
622  gimple_stmt_iterator si;
623  unsigned int i, nbbs;
624
625  res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
626  LOOP_VINFO_LOOP (res) = loop;
627
628  bbs = get_loop_body (loop);
629
630  /* Create/Update stmt_info for all stmts in the loop.  */
631  for (i = 0; i < loop->num_nodes; i++)
632    {
633      basic_block bb = bbs[i];
634
635      /* BBs in a nested inner-loop will have been already processed (because
636         we will have called vect_analyze_loop_form for any nested inner-loop).
637         Therefore, for stmts in an inner-loop we just want to update the
638         STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
639         loop_info of the outer-loop we are currently considering to vectorize
640         (instead of the loop_info of the inner-loop).
641         For stmts in other BBs we need to create a stmt_info from scratch.  */
642      if (bb->loop_father != loop)
643        {
644          /* Inner-loop bb.  */
645          gcc_assert (loop->inner && bb->loop_father == loop->inner);
646          for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647            {
648              gimple phi = gsi_stmt (si);
649              stmt_vec_info stmt_info = vinfo_for_stmt (phi);
650              loop_vec_info inner_loop_vinfo =
651                STMT_VINFO_LOOP_VINFO (stmt_info);
652              gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
653              STMT_VINFO_LOOP_VINFO (stmt_info) = res;
654            }
655          for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
656           {
657              gimple stmt = gsi_stmt (si);
658              stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
659              loop_vec_info inner_loop_vinfo =
660                 STMT_VINFO_LOOP_VINFO (stmt_info);
661              gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
662              STMT_VINFO_LOOP_VINFO (stmt_info) = res;
663           }
664        }
665      else
666        {
667          /* bb in current nest.  */
668          for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
669            {
670              gimple phi = gsi_stmt (si);
671              gimple_set_uid (phi, 0);
672              set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
673            }
674
675          for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
676            {
677              gimple stmt = gsi_stmt (si);
678              gimple_set_uid (stmt, 0);
679              set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
680            }
681        }
682    }
683
684  /* CHECKME: We want to visit all BBs before their successors (except for
685     latch blocks, for which this assertion wouldn't hold).  In the simple
686     case of the loop forms we allow, a dfs order of the BBs would the same
687     as reversed postorder traversal, so we are safe.  */
688
689   free (bbs);
690   bbs = XCNEWVEC (basic_block, loop->num_nodes);
691   nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
692                              bbs, loop->num_nodes, loop);
693   gcc_assert (nbbs == loop->num_nodes);
694
695  LOOP_VINFO_BBS (res) = bbs;
696  LOOP_VINFO_NITERS (res) = NULL;
697  LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
698  LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
699  LOOP_VINFO_VECTORIZABLE_P (res) = 0;
700  LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
701  LOOP_VINFO_VECT_FACTOR (res) = 0;
702  LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
703  LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
704  LOOP_VINFO_UNALIGNED_DR (res) = NULL;
705  LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
706    VEC_alloc (gimple, heap,
707               PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
708  LOOP_VINFO_MAY_ALIAS_DDRS (res) =
709    VEC_alloc (ddr_p, heap,
710               PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
711  LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
712  LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
713  LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
714
715  return res;
716}
717
718
719/* Function destroy_loop_vec_info.
720
721   Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
722   stmts in the loop.  */
723
724void
725destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
726{
727  struct loop *loop;
728  basic_block *bbs;
729  int nbbs;
730  gimple_stmt_iterator si;
731  int j;
732  VEC (slp_instance, heap) *slp_instances;
733  slp_instance instance;
734
735  if (!loop_vinfo)
736    return;
737
738  loop = LOOP_VINFO_LOOP (loop_vinfo);
739
740  bbs = LOOP_VINFO_BBS (loop_vinfo);
741  nbbs = loop->num_nodes;
742
743  if (!clean_stmts)
744    {
745      free (LOOP_VINFO_BBS (loop_vinfo));
746      free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
747      free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
748      VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
749
750      free (loop_vinfo);
751      loop->aux = NULL;
752      return;
753    }
754
755  for (j = 0; j < nbbs; j++)
756    {
757      basic_block bb = bbs[j];
758      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
759        free_stmt_vec_info (gsi_stmt (si));
760
761      for (si = gsi_start_bb (bb); !gsi_end_p (si); )
762        {
763          gimple stmt = gsi_stmt (si);
764          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
765
766          if (stmt_info)
767            {
768              /* Check if this is a "pattern stmt" (introduced by the
769                 vectorizer during the pattern recognition pass).  */
770              bool remove_stmt_p = false;
771              gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
772              if (orig_stmt)
773                {
774                  stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
775                  if (orig_stmt_info
776                      && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
777                    remove_stmt_p = true;
778                }
779
780              /* Free stmt_vec_info.  */
781              free_stmt_vec_info (stmt);
782
783              /* Remove dead "pattern stmts".  */
784              if (remove_stmt_p)
785                gsi_remove (&si, true);
786            }
787          gsi_next (&si);
788        }
789    }
790
791  free (LOOP_VINFO_BBS (loop_vinfo));
792  free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
793  free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
794  VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
795  VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
796  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
797  for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
798    vect_free_slp_instance (instance);
799
800  VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
801  VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
802
803  free (loop_vinfo);
804  loop->aux = NULL;
805}
806
807
808/* Function vect_analyze_loop_1.
809
810   Apply a set of analyses on LOOP, and create a loop_vec_info struct
811   for it. The different analyses will record information in the
812   loop_vec_info struct.  This is a subset of the analyses applied in
813   vect_analyze_loop, to be applied on an inner-loop nested in the loop
814   that is now considered for (outer-loop) vectorization.  */
815
816static loop_vec_info
817vect_analyze_loop_1 (struct loop *loop)
818{
819  loop_vec_info loop_vinfo;
820
821  if (vect_print_dump_info (REPORT_DETAILS))
822    fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
823
824  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
825
826  loop_vinfo = vect_analyze_loop_form (loop);
827  if (!loop_vinfo)
828    {
829      if (vect_print_dump_info (REPORT_DETAILS))
830        fprintf (vect_dump, "bad inner-loop form.");
831      return NULL;
832    }
833
834  return loop_vinfo;
835}
836
837
838/* Function vect_analyze_loop_form.
839
840   Verify that certain CFG restrictions hold, including:
841   - the loop has a pre-header
842   - the loop has a single entry and exit
843   - the loop exit condition is simple enough, and the number of iterations
844     can be analyzed (a countable loop).  */
845
846loop_vec_info
847vect_analyze_loop_form (struct loop *loop)
848{
849  loop_vec_info loop_vinfo;
850  gimple loop_cond;
851  tree number_of_iterations = NULL;
852  loop_vec_info inner_loop_vinfo = NULL;
853
854  if (vect_print_dump_info (REPORT_DETAILS))
855    fprintf (vect_dump, "=== vect_analyze_loop_form ===");
856
857  /* Different restrictions apply when we are considering an inner-most loop,
858     vs. an outer (nested) loop.
859     (FORNOW. May want to relax some of these restrictions in the future).  */
860
861  if (!loop->inner)
862    {
863      /* Inner-most loop.  We currently require that the number of BBs is
864	 exactly 2 (the header and latch).  Vectorizable inner-most loops
865	 look like this:
866
867                        (pre-header)
868                           |
869                          header <--------+
870                           | |            |
871                           | +--> latch --+
872                           |
873                        (exit-bb)  */
874
875      if (loop->num_nodes != 2)
876        {
877          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
878            fprintf (vect_dump, "not vectorized: control flow in loop.");
879          return NULL;
880        }
881
882      if (empty_block_p (loop->header))
883    {
884          if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
885            fprintf (vect_dump, "not vectorized: empty loop.");
886      return NULL;
887    }
888    }
889  else
890    {
891      struct loop *innerloop = loop->inner;
892      edge entryedge;
893
894      /* Nested loop. We currently require that the loop is doubly-nested,
895	 contains a single inner loop, and the number of BBs is exactly 5.
896	 Vectorizable outer-loops look like this:
897
898			(pre-header)
899			   |
900			  header <---+
901			   |         |
902		          inner-loop |
903			   |         |
904			  tail ------+
905			   |
906		        (exit-bb)
907
908	 The inner-loop has the properties expected of inner-most loops
909	 as described above.  */
910
911      if ((loop->inner)->inner || (loop->inner)->next)
912	{
913	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
914	    fprintf (vect_dump, "not vectorized: multiple nested loops.");
915	  return NULL;
916	}
917
918      /* Analyze the inner-loop.  */
919      inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
920      if (!inner_loop_vinfo)
921	{
922	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
923            fprintf (vect_dump, "not vectorized: Bad inner loop.");
924	  return NULL;
925	}
926
927      if (!expr_invariant_in_loop_p (loop,
928					LOOP_VINFO_NITERS (inner_loop_vinfo)))
929	{
930	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
931	    fprintf (vect_dump,
932		     "not vectorized: inner-loop count not invariant.");
933	  destroy_loop_vec_info (inner_loop_vinfo, true);
934	  return NULL;
935	}
936
937      if (loop->num_nodes != 5)
938        {
939	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
940	    fprintf (vect_dump, "not vectorized: control flow in loop.");
941	  destroy_loop_vec_info (inner_loop_vinfo, true);
942	  return NULL;
943        }
944
945      gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
946      entryedge = EDGE_PRED (innerloop->header, 0);
947      if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
948	entryedge = EDGE_PRED (innerloop->header, 1);
949
950      if (entryedge->src != loop->header
951	  || !single_exit (innerloop)
952	  || single_exit (innerloop)->dest !=  EDGE_PRED (loop->latch, 0)->src)
953	{
954	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
955	    fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
956	  destroy_loop_vec_info (inner_loop_vinfo, true);
957	  return NULL;
958	}
959
960      if (vect_print_dump_info (REPORT_DETAILS))
961        fprintf (vect_dump, "Considering outer-loop vectorization.");
962    }
963
964  if (!single_exit (loop)
965      || EDGE_COUNT (loop->header->preds) != 2)
966    {
967      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
968        {
969          if (!single_exit (loop))
970            fprintf (vect_dump, "not vectorized: multiple exits.");
971          else if (EDGE_COUNT (loop->header->preds) != 2)
972            fprintf (vect_dump, "not vectorized: too many incoming edges.");
973        }
974      if (inner_loop_vinfo)
975	destroy_loop_vec_info (inner_loop_vinfo, true);
976      return NULL;
977    }
978
979  /* We assume that the loop exit condition is at the end of the loop. i.e,
980     that the loop is represented as a do-while (with a proper if-guard
981     before the loop if needed), where the loop header contains all the
982     executable statements, and the latch is empty.  */
983  if (!empty_block_p (loop->latch)
984        || !gimple_seq_empty_p (phi_nodes (loop->latch)))
985    {
986      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
987        fprintf (vect_dump, "not vectorized: unexpected loop form.");
988      if (inner_loop_vinfo)
989	destroy_loop_vec_info (inner_loop_vinfo, true);
990      return NULL;
991    }
992
993  /* Make sure there exists a single-predecessor exit bb:  */
994  if (!single_pred_p (single_exit (loop)->dest))
995    {
996      edge e = single_exit (loop);
997      if (!(e->flags & EDGE_ABNORMAL))
998	{
999	  split_loop_exit_edge (e);
1000	  if (vect_print_dump_info (REPORT_DETAILS))
1001	    fprintf (vect_dump, "split exit edge.");
1002	}
1003      else
1004	{
1005	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1006	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1007	  if (inner_loop_vinfo)
1008	    destroy_loop_vec_info (inner_loop_vinfo, true);
1009	  return NULL;
1010	}
1011    }
1012
1013  loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1014  if (!loop_cond)
1015    {
1016      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1017	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1018      if (inner_loop_vinfo)
1019	destroy_loop_vec_info (inner_loop_vinfo, true);
1020      return NULL;
1021    }
1022
1023  if (!number_of_iterations)
1024    {
1025      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1026	fprintf (vect_dump,
1027		 "not vectorized: number of iterations cannot be computed.");
1028      if (inner_loop_vinfo)
1029	destroy_loop_vec_info (inner_loop_vinfo, true);
1030      return NULL;
1031    }
1032
1033  if (chrec_contains_undetermined (number_of_iterations))
1034    {
1035      if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1036        fprintf (vect_dump, "Infinite number of iterations.");
1037      if (inner_loop_vinfo)
1038	destroy_loop_vec_info (inner_loop_vinfo, true);
1039      return NULL;
1040    }
1041
1042  if (!NITERS_KNOWN_P (number_of_iterations))
1043    {
1044      if (vect_print_dump_info (REPORT_DETAILS))
1045        {
1046          fprintf (vect_dump, "Symbolic number of iterations is ");
1047          print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1048        }
1049    }
1050  else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1051    {
1052      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1053        fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1054      if (inner_loop_vinfo)
1055        destroy_loop_vec_info (inner_loop_vinfo, false);
1056      return NULL;
1057    }
1058
1059  loop_vinfo = new_loop_vec_info (loop);
1060  LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1061  LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1062
1063  STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1064
1065  /* CHECKME: May want to keep it around it in the future.  */
1066  if (inner_loop_vinfo)
1067    destroy_loop_vec_info (inner_loop_vinfo, false);
1068
1069  gcc_assert (!loop->aux);
1070  loop->aux = loop_vinfo;
1071  return loop_vinfo;
1072}
1073
1074
1075/* Function vect_analyze_loop_operations.
1076
1077   Scan the loop stmts and make sure they are all vectorizable.  */
1078
1079static bool
1080vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1081{
1082  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1083  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1084  int nbbs = loop->num_nodes;
1085  gimple_stmt_iterator si;
1086  unsigned int vectorization_factor = 0;
1087  int i;
1088  gimple phi;
1089  stmt_vec_info stmt_info;
1090  bool need_to_vectorize = false;
1091  int min_profitable_iters;
1092  int min_scalar_loop_bound;
1093  unsigned int th;
1094  bool only_slp_in_loop = true, ok;
1095
1096  if (vect_print_dump_info (REPORT_DETAILS))
1097    fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1098
1099  gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1100  vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1101
1102  for (i = 0; i < nbbs; i++)
1103    {
1104      basic_block bb = bbs[i];
1105
1106      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1107        {
1108          phi = gsi_stmt (si);
1109          ok = true;
1110
1111          stmt_info = vinfo_for_stmt (phi);
1112          if (vect_print_dump_info (REPORT_DETAILS))
1113            {
1114              fprintf (vect_dump, "examining phi: ");
1115              print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1116            }
1117
1118          if (! is_loop_header_bb_p (bb))
1119            {
1120              /* inner-loop loop-closed exit phi in outer-loop vectorization
1121                 (i.e. a phi in the tail of the outer-loop).
1122                 FORNOW: we currently don't support the case that these phis
1123                 are not used in the outerloop (unless it is double reduction,
1124                 i.e., this phi is vect_reduction_def), cause this case
1125                 requires to actually do something here.  */
1126              if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1127                   || STMT_VINFO_LIVE_P (stmt_info))
1128                  && STMT_VINFO_DEF_TYPE (stmt_info)
1129                     != vect_double_reduction_def)
1130                {
1131                  if (vect_print_dump_info (REPORT_DETAILS))
1132                    fprintf (vect_dump,
1133                             "Unsupported loop-closed phi in outer-loop.");
1134                  return false;
1135                }
1136              continue;
1137            }
1138
1139          gcc_assert (stmt_info);
1140
1141          if (STMT_VINFO_LIVE_P (stmt_info))
1142            {
1143              /* FORNOW: not yet supported.  */
1144              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1145                fprintf (vect_dump, "not vectorized: value used after loop.");
1146              return false;
1147            }
1148
1149          if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1150              && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1151            {
1152              /* A scalar-dependence cycle that we don't support.  */
1153              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1154                fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1155              return false;
1156            }
1157
1158          if (STMT_VINFO_RELEVANT_P (stmt_info))
1159            {
1160              need_to_vectorize = true;
1161              if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1162                ok = vectorizable_induction (phi, NULL, NULL);
1163            }
1164
1165          if (!ok)
1166            {
1167              if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1168                {
1169                  fprintf (vect_dump,
1170                           "not vectorized: relevant phi not supported: ");
1171                  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1172                }
1173              return false;
1174            }
1175        }
1176
1177      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1178        {
1179          gimple stmt = gsi_stmt (si);
1180          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1181
1182          gcc_assert (stmt_info);
1183
1184	  if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1185	    return false;
1186
1187          if ((STMT_VINFO_RELEVANT_P (stmt_info)
1188               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1189              && !PURE_SLP_STMT (stmt_info))
1190
1191            /* STMT needs both SLP and loop-based vectorization.  */
1192            only_slp_in_loop = false;
1193        }
1194    } /* bbs */
1195
1196  /* All operations in the loop are either irrelevant (deal with loop
1197     control, or dead), or only used outside the loop and can be moved
1198     out of the loop (e.g. invariants, inductions).  The loop can be
1199     optimized away by scalar optimizations.  We're better off not
1200     touching this loop.  */
1201  if (!need_to_vectorize)
1202    {
1203      if (vect_print_dump_info (REPORT_DETAILS))
1204        fprintf (vect_dump,
1205                 "All the computation can be taken out of the loop.");
1206      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1207        fprintf (vect_dump,
1208                 "not vectorized: redundant loop. no profit to vectorize.");
1209      return false;
1210    }
1211
1212  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1213     vectorization factor of the loop is the unrolling factor required by the
1214     SLP instances.  If that unrolling factor is 1, we say, that we perform
1215     pure SLP on loop - cross iteration parallelism is not exploited.  */
1216  if (only_slp_in_loop)
1217    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1218  else
1219    vectorization_factor = least_common_multiple (vectorization_factor,
1220                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1221
1222  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1223
1224  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1225      && vect_print_dump_info (REPORT_DETAILS))
1226    fprintf (vect_dump,
1227        "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1228        vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1229
1230  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1231      && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1232    {
1233      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1234        fprintf (vect_dump, "not vectorized: iteration count too small.");
1235      if (vect_print_dump_info (REPORT_DETAILS))
1236        fprintf (vect_dump,"not vectorized: iteration count smaller than "
1237                 "vectorization factor.");
1238      return false;
1239    }
1240
1241  /* Analyze cost. Decide if worth while to vectorize.  */
1242
1243  /* Once VF is set, SLP costs should be updated since the number of created
1244     vector stmts depends on VF.  */
1245  vect_update_slp_costs_according_to_vf (loop_vinfo);
1246
1247  min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1248  LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1249
1250  if (min_profitable_iters < 0)
1251    {
1252      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1253        fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1254      if (vect_print_dump_info (REPORT_DETAILS))
1255        fprintf (vect_dump, "not vectorized: vector version will never be "
1256                 "profitable.");
1257      return false;
1258    }
1259
1260  min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1261                            * vectorization_factor) - 1);
1262
1263  /* Use the cost model only if it is more conservative than user specified
1264     threshold.  */
1265
1266  th = (unsigned) min_scalar_loop_bound;
1267  if (min_profitable_iters
1268      && (!min_scalar_loop_bound
1269          || min_profitable_iters > min_scalar_loop_bound))
1270    th = (unsigned) min_profitable_iters;
1271
1272  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1273      && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1274    {
1275      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1276        fprintf (vect_dump, "not vectorized: vectorization not "
1277                 "profitable.");
1278      if (vect_print_dump_info (REPORT_DETAILS))
1279        fprintf (vect_dump, "not vectorized: iteration count smaller than "
1280                 "user specified loop bound parameter or minimum "
1281                 "profitable iterations (whichever is more conservative).");
1282      return false;
1283    }
1284
1285  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1286      || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1287      || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1288    {
1289      if (vect_print_dump_info (REPORT_DETAILS))
1290        fprintf (vect_dump, "epilog loop required.");
1291      if (!vect_can_advance_ivs_p (loop_vinfo))
1292        {
1293          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1294            fprintf (vect_dump,
1295                     "not vectorized: can't create epilog loop 1.");
1296          return false;
1297        }
1298      if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1299        {
1300          if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1301            fprintf (vect_dump,
1302                     "not vectorized: can't create epilog loop 2.");
1303          return false;
1304        }
1305    }
1306
1307  return true;
1308}
1309
1310
1311/* Function vect_analyze_loop.
1312
1313   Apply a set of analyses on LOOP, and create a loop_vec_info struct
1314   for it. The different analyses will record information in the
1315   loop_vec_info struct.  */
1316loop_vec_info
1317vect_analyze_loop (struct loop *loop)
1318{
1319  bool ok;
1320  loop_vec_info loop_vinfo;
1321
1322  if (vect_print_dump_info (REPORT_DETAILS))
1323    fprintf (vect_dump, "===== analyze_loop_nest =====");
1324
1325  if (loop_outer (loop)
1326      && loop_vec_info_for_loop (loop_outer (loop))
1327      && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1328    {
1329      if (vect_print_dump_info (REPORT_DETAILS))
1330	fprintf (vect_dump, "outer-loop already vectorized.");
1331      return NULL;
1332    }
1333
1334  /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
1335
1336  loop_vinfo = vect_analyze_loop_form (loop);
1337  if (!loop_vinfo)
1338    {
1339      if (vect_print_dump_info (REPORT_DETAILS))
1340	fprintf (vect_dump, "bad loop form.");
1341      return NULL;
1342    }
1343
1344  /* Find all data references in the loop (which correspond to vdefs/vuses)
1345     and analyze their evolution in the loop.
1346
1347     FORNOW: Handle only simple, array references, which
1348     alignment can be forced, and aligned pointer-references.  */
1349
1350  ok = vect_analyze_data_refs (loop_vinfo, NULL);
1351  if (!ok)
1352    {
1353      if (vect_print_dump_info (REPORT_DETAILS))
1354	fprintf (vect_dump, "bad data references.");
1355      destroy_loop_vec_info (loop_vinfo, true);
1356      return NULL;
1357    }
1358
1359  /* Classify all cross-iteration scalar data-flow cycles.
1360     Cross-iteration cycles caused by virtual phis are analyzed separately.  */
1361
1362  vect_analyze_scalar_cycles (loop_vinfo);
1363
1364  vect_pattern_recog (loop_vinfo);
1365
1366  /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
1367
1368  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1369  if (!ok)
1370    {
1371      if (vect_print_dump_info (REPORT_DETAILS))
1372	fprintf (vect_dump, "unexpected pattern.");
1373      destroy_loop_vec_info (loop_vinfo, true);
1374      return NULL;
1375    }
1376
1377  /* Analyze the alignment of the data-refs in the loop.
1378     Fail if a data reference is found that cannot be vectorized.  */
1379
1380  ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1381  if (!ok)
1382    {
1383      if (vect_print_dump_info (REPORT_DETAILS))
1384	fprintf (vect_dump, "bad data alignment.");
1385      destroy_loop_vec_info (loop_vinfo, true);
1386      return NULL;
1387    }
1388
1389  ok = vect_determine_vectorization_factor (loop_vinfo);
1390  if (!ok)
1391    {
1392      if (vect_print_dump_info (REPORT_DETAILS))
1393        fprintf (vect_dump, "can't determine vectorization factor.");
1394      destroy_loop_vec_info (loop_vinfo, true);
1395      return NULL;
1396    }
1397
1398  /* Analyze data dependences between the data-refs in the loop.
1399     FORNOW: fail at the first data dependence that we encounter.  */
1400
1401  ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL);
1402  if (!ok)
1403    {
1404      if (vect_print_dump_info (REPORT_DETAILS))
1405	fprintf (vect_dump, "bad data dependence.");
1406      destroy_loop_vec_info (loop_vinfo, true);
1407      return NULL;
1408    }
1409
1410  /* Analyze the access patterns of the data-refs in the loop (consecutive,
1411     complex, etc.). FORNOW: Only handle consecutive access pattern.  */
1412
1413  ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1414  if (!ok)
1415    {
1416      if (vect_print_dump_info (REPORT_DETAILS))
1417	fprintf (vect_dump, "bad data access.");
1418      destroy_loop_vec_info (loop_vinfo, true);
1419      return NULL;
1420    }
1421
1422  /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1423     It is important to call pruning after vect_analyze_data_ref_accesses,
1424     since we use grouping information gathered by interleaving analysis.  */
1425  ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1426  if (!ok)
1427    {
1428      if (vect_print_dump_info (REPORT_DETAILS))
1429	fprintf (vect_dump, "too long list of versioning for alias "
1430			    "run-time tests.");
1431      destroy_loop_vec_info (loop_vinfo, true);
1432      return NULL;
1433    }
1434
1435  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
1436  ok = vect_analyze_slp (loop_vinfo, NULL);
1437  if (ok)
1438    {
1439      /* Decide which possible SLP instances to SLP.  */
1440      vect_make_slp_decision (loop_vinfo);
1441
1442      /* Find stmts that need to be both vectorized and SLPed.  */
1443      vect_detect_hybrid_slp (loop_vinfo);
1444    }
1445
1446  /* This pass will decide on using loop versioning and/or loop peeling in
1447     order to enhance the alignment of data references in the loop.  */
1448
1449  ok = vect_enhance_data_refs_alignment (loop_vinfo);
1450  if (!ok)
1451    {
1452      if (vect_print_dump_info (REPORT_DETAILS))
1453	fprintf (vect_dump, "bad data alignment.");
1454      destroy_loop_vec_info (loop_vinfo, true);
1455      return NULL;
1456    }
1457
1458  /* Scan all the operations in the loop and make sure they are
1459     vectorizable.  */
1460
1461  ok = vect_analyze_loop_operations (loop_vinfo);
1462  if (!ok)
1463    {
1464      if (vect_print_dump_info (REPORT_DETAILS))
1465	fprintf (vect_dump, "bad operation or unsupported loop bound.");
1466      destroy_loop_vec_info (loop_vinfo, true);
1467      return NULL;
1468    }
1469
1470  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1471
1472  return loop_vinfo;
1473}
1474
1475
1476/* Function reduction_code_for_scalar_code
1477
1478   Input:
1479   CODE - tree_code of a reduction operations.
1480
1481   Output:
1482   REDUC_CODE - the corresponding tree-code to be used to reduce the
1483      vector of partial results into a single scalar result (which
1484      will also reside in a vector) or ERROR_MARK if the operation is
1485      a supported reduction operation, but does not have such tree-code.
1486
1487   Return FALSE if CODE currently cannot be vectorized as reduction.  */
1488
1489static bool
1490reduction_code_for_scalar_code (enum tree_code code,
1491                                enum tree_code *reduc_code)
1492{
1493  switch (code)
1494    {
1495      case MAX_EXPR:
1496        *reduc_code = REDUC_MAX_EXPR;
1497        return true;
1498
1499      case MIN_EXPR:
1500        *reduc_code = REDUC_MIN_EXPR;
1501        return true;
1502
1503      case PLUS_EXPR:
1504        *reduc_code = REDUC_PLUS_EXPR;
1505        return true;
1506
1507      case MULT_EXPR:
1508      case MINUS_EXPR:
1509      case BIT_IOR_EXPR:
1510      case BIT_XOR_EXPR:
1511      case BIT_AND_EXPR:
1512        *reduc_code = ERROR_MARK;
1513        return true;
1514
1515      default:
1516       return false;
1517    }
1518}
1519
1520
1521/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1522   STMT is printed with a message MSG. */
1523
1524static void
1525report_vect_op (gimple stmt, const char *msg)
1526{
1527  fprintf (vect_dump, "%s", msg);
1528  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1529}
1530
1531
1532/* Function vect_is_simple_reduction
1533
1534   (1) Detect a cross-iteration def-use cycle that represents a simple
1535   reduction computation. We look for the following pattern:
1536
1537   loop_header:
1538     a1 = phi < a0, a2 >
1539     a3 = ...
1540     a2 = operation (a3, a1)
1541
1542   such that:
1543   1. operation is commutative and associative and it is safe to
1544      change the order of the computation (if CHECK_REDUCTION is true)
1545   2. no uses for a2 in the loop (a2 is used out of the loop)
1546   3. no uses of a1 in the loop besides the reduction operation.
1547
1548   Condition 1 is tested here.
1549   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1550
1551   (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1552   nested cycles, if CHECK_REDUCTION is false.
1553
1554   (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1555   reductions:
1556
1557     a1 = phi < a0, a2 >
1558     inner loop (def of a3)
1559     a2 = phi < a3 >
1560*/
1561
1562gimple
1563vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
1564                          bool check_reduction, bool *double_reduc)
1565{
1566  struct loop *loop = (gimple_bb (phi))->loop_father;
1567  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1568  edge latch_e = loop_latch_edge (loop);
1569  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1570  gimple def_stmt, def1 = NULL, def2 = NULL;
1571  enum tree_code code;
1572  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1573  tree type;
1574  int nloop_uses;
1575  tree name;
1576  imm_use_iterator imm_iter;
1577  use_operand_p use_p;
1578  bool phi_def;
1579
1580  *double_reduc = false;
1581
1582  /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
1583     otherwise, we assume outer loop vectorization.  */
1584  gcc_assert ((check_reduction && loop == vect_loop)
1585              || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
1586
1587  name = PHI_RESULT (phi);
1588  nloop_uses = 0;
1589  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
1590    {
1591      gimple use_stmt = USE_STMT (use_p);
1592      if (is_gimple_debug (use_stmt))
1593	continue;
1594      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1595	  && vinfo_for_stmt (use_stmt)
1596	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
1597        nloop_uses++;
1598      if (nloop_uses > 1)
1599        {
1600          if (vect_print_dump_info (REPORT_DETAILS))
1601            fprintf (vect_dump, "reduction used in loop.");
1602          return NULL;
1603        }
1604    }
1605
1606  if (TREE_CODE (loop_arg) != SSA_NAME)
1607    {
1608      if (vect_print_dump_info (REPORT_DETAILS))
1609	{
1610	  fprintf (vect_dump, "reduction: not ssa_name: ");
1611	  print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1612	}
1613      return NULL;
1614    }
1615
1616  def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1617  if (!def_stmt)
1618    {
1619      if (vect_print_dump_info (REPORT_DETAILS))
1620	fprintf (vect_dump, "reduction: no def_stmt.");
1621      return NULL;
1622    }
1623
1624  if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
1625    {
1626      if (vect_print_dump_info (REPORT_DETAILS))
1627        print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1628      return NULL;
1629    }
1630
1631  if (is_gimple_assign (def_stmt))
1632    {
1633      name = gimple_assign_lhs (def_stmt);
1634      phi_def = false;
1635    }
1636  else
1637    {
1638      name = PHI_RESULT (def_stmt);
1639      phi_def = true;
1640    }
1641
1642  nloop_uses = 0;
1643  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
1644    {
1645      gimple use_stmt = USE_STMT (use_p);
1646      if (is_gimple_debug (use_stmt))
1647	continue;
1648      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1649	  && vinfo_for_stmt (use_stmt)
1650	  && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
1651	nloop_uses++;
1652      if (nloop_uses > 1)
1653	{
1654	  if (vect_print_dump_info (REPORT_DETAILS))
1655	    fprintf (vect_dump, "reduction used in loop.");
1656	  return NULL;
1657	}
1658    }
1659
1660  /* If DEF_STMT is a phi node itself, we expect it to have a single argument
1661     defined in the inner loop.  */
1662  if (phi_def)
1663    {
1664      op1 = PHI_ARG_DEF (def_stmt, 0);
1665
1666      if (gimple_phi_num_args (def_stmt) != 1
1667          || TREE_CODE (op1) != SSA_NAME)
1668        {
1669          if (vect_print_dump_info (REPORT_DETAILS))
1670            fprintf (vect_dump, "unsupported phi node definition.");
1671
1672          return NULL;
1673        }
1674
1675      def1 = SSA_NAME_DEF_STMT (op1);
1676      if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1677          && loop->inner
1678          && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
1679          && is_gimple_assign (def1))
1680        {
1681          if (vect_print_dump_info (REPORT_DETAILS))
1682            report_vect_op (def_stmt, "detected double reduction: ");
1683
1684          *double_reduc = true;
1685          return def_stmt;
1686        }
1687
1688      return NULL;
1689    }
1690
1691  code = gimple_assign_rhs_code (def_stmt);
1692
1693  if (check_reduction
1694      && (!commutative_tree_code (code) || !associative_tree_code (code)))
1695    {
1696      if (vect_print_dump_info (REPORT_DETAILS))
1697        report_vect_op (def_stmt, "reduction: not commutative/associative: ");
1698      return NULL;
1699    }
1700
1701  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
1702    {
1703      if (code != COND_EXPR)
1704        {
1705          if (vect_print_dump_info (REPORT_DETAILS))
1706	    report_vect_op (def_stmt, "reduction: not binary operation: ");
1707
1708          return NULL;
1709        }
1710
1711      op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
1712      if (COMPARISON_CLASS_P (op3))
1713        {
1714          op4 = TREE_OPERAND (op3, 1);
1715          op3 = TREE_OPERAND (op3, 0);
1716        }
1717
1718      op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
1719      op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
1720
1721      if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
1722        {
1723          if (vect_print_dump_info (REPORT_DETAILS))
1724            report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
1725
1726          return NULL;
1727        }
1728    }
1729  else
1730    {
1731      op1 = gimple_assign_rhs1 (def_stmt);
1732      op2 = gimple_assign_rhs2 (def_stmt);
1733
1734      if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1735        {
1736          if (vect_print_dump_info (REPORT_DETAILS))
1737	    report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
1738
1739          return NULL;
1740        }
1741   }
1742
1743  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
1744  if ((TREE_CODE (op1) == SSA_NAME
1745       && !types_compatible_p (type,TREE_TYPE (op1)))
1746      || (TREE_CODE (op2) == SSA_NAME
1747          && !types_compatible_p (type, TREE_TYPE (op2)))
1748      || (op3 && TREE_CODE (op3) == SSA_NAME
1749          && !types_compatible_p (type, TREE_TYPE (op3)))
1750      || (op4 && TREE_CODE (op4) == SSA_NAME
1751          && !types_compatible_p (type, TREE_TYPE (op4))))
1752    {
1753      if (vect_print_dump_info (REPORT_DETAILS))
1754        {
1755          fprintf (vect_dump, "reduction: multiple types: operation type: ");
1756          print_generic_expr (vect_dump, type, TDF_SLIM);
1757          fprintf (vect_dump, ", operands types: ");
1758          print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1759          fprintf (vect_dump, ",");
1760          print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1761          if (op3)
1762            {
1763              fprintf (vect_dump, ",");
1764              print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
1765            }
1766
1767          if (op4)
1768            {
1769              fprintf (vect_dump, ",");
1770              print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
1771            }
1772        }
1773
1774      return NULL;
1775    }
1776
1777  /* Check that it's ok to change the order of the computation.
1778     Generally, when vectorizing a reduction we change the order of the
1779     computation.  This may change the behavior of the program in some
1780     cases, so we need to check that this is ok.  One exception is when
1781     vectorizing an outer-loop: the inner-loop is executed sequentially,
1782     and therefore vectorizing reductions in the inner-loop during
1783     outer-loop vectorization is safe.  */
1784
1785  /* CHECKME: check for !flag_finite_math_only too?  */
1786  if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
1787      && check_reduction)
1788    {
1789      /* Changing the order of operations changes the semantics.  */
1790      if (vect_print_dump_info (REPORT_DETAILS))
1791	report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
1792      return NULL;
1793    }
1794  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
1795	   && check_reduction)
1796    {
1797      /* Changing the order of operations changes the semantics.  */
1798      if (vect_print_dump_info (REPORT_DETAILS))
1799	report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
1800      return NULL;
1801    }
1802  else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
1803    {
1804      /* Changing the order of operations changes the semantics.  */
1805      if (vect_print_dump_info (REPORT_DETAILS))
1806	report_vect_op (def_stmt,
1807			"reduction: unsafe fixed-point math optimization: ");
1808      return NULL;
1809    }
1810
1811  /* Reduction is safe. We're dealing with one of the following:
1812     1) integer arithmetic and no trapv
1813     2) floating point arithmetic, and special flags permit this optimization
1814     3) nested cycle (i.e., outer loop vectorization).  */
1815  if (TREE_CODE (op1) == SSA_NAME)
1816    def1 = SSA_NAME_DEF_STMT (op1);
1817
1818  if (TREE_CODE (op2) == SSA_NAME)
1819    def2 = SSA_NAME_DEF_STMT (op2);
1820
1821  if (code != COND_EXPR
1822      && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
1823    {
1824      if (vect_print_dump_info (REPORT_DETAILS))
1825	report_vect_op (def_stmt, "reduction: no defs for operands: ");
1826      return NULL;
1827    }
1828
1829  /* Check that one def is the reduction def, defined by PHI,
1830     the other def is either defined in the loop ("vect_internal_def"),
1831     or it's an induction (defined by a loop-header phi-node).  */
1832
1833  if (def2 && def2 == phi
1834      && (code == COND_EXPR
1835          || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
1836              && (is_gimple_assign (def1)
1837  	          || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
1838                      == vect_induction_def
1839   	          || (gimple_code (def1) == GIMPLE_PHI
1840	              && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
1841                          == vect_internal_def
1842 	              && !is_loop_header_bb_p (gimple_bb (def1)))))))
1843    {
1844      if (vect_print_dump_info (REPORT_DETAILS))
1845	report_vect_op (def_stmt, "detected reduction: ");
1846      return def_stmt;
1847    }
1848  else if (def1 && def1 == phi
1849	   && (code == COND_EXPR
1850               || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
1851 	           && (is_gimple_assign (def2)
1852	               || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
1853                           == vect_induction_def
1854 	               || (gimple_code (def2) == GIMPLE_PHI
1855		           && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
1856                               == vect_internal_def
1857		           && !is_loop_header_bb_p (gimple_bb (def2)))))))
1858    {
1859      if (check_reduction)
1860        {
1861          /* Swap operands (just for simplicity - so that the rest of the code
1862	     can assume that the reduction variable is always the last (second)
1863	     argument).  */
1864          if (vect_print_dump_info (REPORT_DETAILS))
1865	    report_vect_op (def_stmt,
1866	  	            "detected reduction: need to swap operands: ");
1867
1868          swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
1869 			      gimple_assign_rhs2_ptr (def_stmt));
1870        }
1871      else
1872        {
1873          if (vect_print_dump_info (REPORT_DETAILS))
1874            report_vect_op (def_stmt, "detected reduction: ");
1875        }
1876
1877      return def_stmt;
1878    }
1879  else
1880    {
1881      if (vect_print_dump_info (REPORT_DETAILS))
1882	report_vect_op (def_stmt, "reduction: unknown pattern: ");
1883
1884      return NULL;
1885    }
1886}
1887
1888
1889/* Function vect_estimate_min_profitable_iters
1890
1891   Return the number of iterations required for the vector version of the
1892   loop to be profitable relative to the cost of the scalar version of the
1893   loop.
1894
1895   TODO: Take profile info into account before making vectorization
1896   decisions, if available.  */
1897
1898int
1899vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
1900{
1901  int i;
1902  int min_profitable_iters;
1903  int peel_iters_prologue;
1904  int peel_iters_epilogue;
1905  int vec_inside_cost = 0;
1906  int vec_outside_cost = 0;
1907  int scalar_single_iter_cost = 0;
1908  int scalar_outside_cost = 0;
1909  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1910  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1911  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1912  int nbbs = loop->num_nodes;
1913  int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1914  int peel_guard_costs = 0;
1915  int innerloop_iters = 0, factor;
1916  VEC (slp_instance, heap) *slp_instances;
1917  slp_instance instance;
1918
1919  /* Cost model disabled.  */
1920  if (!flag_vect_cost_model)
1921    {
1922      if (vect_print_dump_info (REPORT_COST))
1923        fprintf (vect_dump, "cost model disabled.");
1924      return 0;
1925    }
1926
1927  /* Requires loop versioning tests to handle misalignment.  */
1928  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1929    {
1930      /*  FIXME: Make cost depend on complexity of individual check.  */
1931      vec_outside_cost +=
1932	VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1933      if (vect_print_dump_info (REPORT_COST))
1934        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
1935                 "versioning to treat misalignment.\n");
1936    }
1937
1938  /* Requires loop versioning with alias checks.  */
1939  if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
1940    {
1941      /*  FIXME: Make cost depend on complexity of individual check.  */
1942      vec_outside_cost +=
1943        VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
1944      if (vect_print_dump_info (REPORT_COST))
1945        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
1946                 "versioning aliasing.\n");
1947    }
1948
1949  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
1950      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
1951    vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
1952
1953  /* Count statements in scalar loop.  Using this as scalar cost for a single
1954     iteration for now.
1955
1956     TODO: Add outer loop support.
1957
1958     TODO: Consider assigning different costs to different scalar
1959     statements.  */
1960
1961  /* FORNOW.  */
1962  if (loop->inner)
1963    innerloop_iters = 50; /* FIXME */
1964
1965  for (i = 0; i < nbbs; i++)
1966    {
1967      gimple_stmt_iterator si;
1968      basic_block bb = bbs[i];
1969
1970      if (bb->loop_father == loop->inner)
1971 	factor = innerloop_iters;
1972      else
1973 	factor = 1;
1974
1975      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1976	{
1977	  gimple stmt = gsi_stmt (si);
1978	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1979	  /* Skip stmts that are not vectorized inside the loop.  */
1980	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
1981	      && (!STMT_VINFO_LIVE_P (stmt_info)
1982		  || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
1983	    continue;
1984	  scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
1985	  vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
1986	  /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
1987	     some of the "outside" costs are generated inside the outer-loop.  */
1988	  vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
1989	}
1990    }
1991
1992  /* Add additional cost for the peeled instructions in prologue and epilogue
1993     loop.
1994
1995     FORNOW: If we don't know the value of peel_iters for prologue or epilogue
1996     at compile-time - we assume it's vf/2 (the worst would be vf-1).
1997
1998     TODO: Build an expression that represents peel_iters for prologue and
1999     epilogue to be used in a run-time test.  */
2000
2001  if (byte_misalign < 0)
2002    {
2003      peel_iters_prologue = vf/2;
2004      if (vect_print_dump_info (REPORT_COST))
2005        fprintf (vect_dump, "cost model: "
2006                 "prologue peel iters set to vf/2.");
2007
2008      /* If peeling for alignment is unknown, loop bound of main loop becomes
2009         unknown.  */
2010      peel_iters_epilogue = vf/2;
2011      if (vect_print_dump_info (REPORT_COST))
2012        fprintf (vect_dump, "cost model: "
2013                 "epilogue peel iters set to vf/2 because "
2014                 "peeling for alignment is unknown .");
2015
2016      /* If peeled iterations are unknown, count a taken branch and a not taken
2017         branch per peeled loop. Even if scalar loop iterations are known,
2018         vector iterations are not known since peeled prologue iterations are
2019         not known. Hence guards remain the same.  */
2020      peel_guard_costs +=  2 * (TARG_COND_TAKEN_BRANCH_COST
2021                              + TARG_COND_NOT_TAKEN_BRANCH_COST);
2022    }
2023  else
2024    {
2025      if (byte_misalign)
2026	{
2027	  struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2028	  int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
2029	  tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2030	  int nelements = TYPE_VECTOR_SUBPARTS (vectype);
2031
2032	  peel_iters_prologue = nelements - (byte_misalign / element_size);
2033	}
2034      else
2035	peel_iters_prologue = 0;
2036
2037      if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2038        {
2039          peel_iters_epilogue = vf/2;
2040          if (vect_print_dump_info (REPORT_COST))
2041            fprintf (vect_dump, "cost model: "
2042                     "epilogue peel iters set to vf/2 because "
2043                     "loop iterations are unknown .");
2044
2045	  /* If peeled iterations are known but number of scalar loop
2046	     iterations are unknown, count a taken branch per peeled loop.  */
2047	  peel_guard_costs +=  2 * TARG_COND_TAKEN_BRANCH_COST;
2048
2049        }
2050      else
2051	{
2052	  int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2053	  peel_iters_prologue = niters < peel_iters_prologue ?
2054					niters : peel_iters_prologue;
2055	  peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2056	}
2057    }
2058
2059  vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2060                      + (peel_iters_epilogue * scalar_single_iter_cost)
2061                      + peel_guard_costs;
2062
2063  /* FORNOW: The scalar outside cost is incremented in one of the
2064     following ways:
2065
2066     1. The vectorizer checks for alignment and aliasing and generates
2067     a condition that allows dynamic vectorization.  A cost model
2068     check is ANDED with the versioning condition.  Hence scalar code
2069     path now has the added cost of the versioning check.
2070
2071       if (cost > th & versioning_check)
2072         jmp to vector code
2073
2074     Hence run-time scalar is incremented by not-taken branch cost.
2075
2076     2. The vectorizer then checks if a prologue is required.  If the
2077     cost model check was not done before during versioning, it has to
2078     be done before the prologue check.
2079
2080       if (cost <= th)
2081         prologue = scalar_iters
2082       if (prologue == 0)
2083         jmp to vector code
2084       else
2085         execute prologue
2086       if (prologue == num_iters)
2087	 go to exit
2088
2089     Hence the run-time scalar cost is incremented by a taken branch,
2090     plus a not-taken branch, plus a taken branch cost.
2091
2092     3. The vectorizer then checks if an epilogue is required.  If the
2093     cost model check was not done before during prologue check, it
2094     has to be done with the epilogue check.
2095
2096       if (prologue == 0)
2097         jmp to vector code
2098       else
2099         execute prologue
2100       if (prologue == num_iters)
2101	 go to exit
2102       vector code:
2103         if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2104           jmp to epilogue
2105
2106     Hence the run-time scalar cost should be incremented by 2 taken
2107     branches.
2108
2109     TODO: The back end may reorder the BBS's differently and reverse
2110     conditions/branch directions.  Change the estimates below to
2111     something more reasonable.  */
2112
2113  /* If the number of iterations is known and we do not do versioning, we can
2114     decide whether to vectorize at compile time. Hence the scalar version
2115     do not carry cost model guard costs.  */
2116  if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2117      || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2118      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2119    {
2120      /* Cost model check occurs at versioning.  */
2121      if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2122          || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2123	scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
2124      else
2125	{
2126	  /* Cost model check occurs at prologue generation.  */
2127	  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2128	    scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
2129	      + TARG_COND_NOT_TAKEN_BRANCH_COST;
2130	  /* Cost model check occurs at epilogue generation.  */
2131	  else
2132	    scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
2133	}
2134    }
2135
2136  /* Add SLP costs.  */
2137  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2138  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2139    {
2140      vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2141      vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2142    }
2143
2144  /* Calculate number of iterations required to make the vector version
2145     profitable, relative to the loop bodies only. The following condition
2146     must hold true:
2147     SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2148     where
2149     SIC = scalar iteration cost, VIC = vector iteration cost,
2150     VOC = vector outside cost, VF = vectorization factor,
2151     PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2152     SOC = scalar outside cost for run time cost model check.  */
2153
2154  if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2155    {
2156      if (vec_outside_cost <= 0)
2157        min_profitable_iters = 1;
2158      else
2159        {
2160          min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2161				  - vec_inside_cost * peel_iters_prologue
2162                                  - vec_inside_cost * peel_iters_epilogue)
2163                                 / ((scalar_single_iter_cost * vf)
2164                                    - vec_inside_cost);
2165
2166          if ((scalar_single_iter_cost * vf * min_profitable_iters)
2167              <= ((vec_inside_cost * min_profitable_iters)
2168                  + ((vec_outside_cost - scalar_outside_cost) * vf)))
2169            min_profitable_iters++;
2170        }
2171    }
2172  /* vector version will never be profitable.  */
2173  else
2174    {
2175      if (vect_print_dump_info (REPORT_COST))
2176        fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2177		 "divided by the scalar iteration cost = %d "
2178		 "is greater or equal to the vectorization factor = %d.",
2179                 vec_inside_cost, scalar_single_iter_cost, vf);
2180      return -1;
2181    }
2182
2183  if (vect_print_dump_info (REPORT_COST))
2184    {
2185      fprintf (vect_dump, "Cost model analysis: \n");
2186      fprintf (vect_dump, "  Vector inside of loop cost: %d\n",
2187	       vec_inside_cost);
2188      fprintf (vect_dump, "  Vector outside of loop cost: %d\n",
2189	       vec_outside_cost);
2190      fprintf (vect_dump, "  Scalar iteration cost: %d\n",
2191	       scalar_single_iter_cost);
2192      fprintf (vect_dump, "  Scalar outside cost: %d\n", scalar_outside_cost);
2193      fprintf (vect_dump, "  prologue iterations: %d\n",
2194               peel_iters_prologue);
2195      fprintf (vect_dump, "  epilogue iterations: %d\n",
2196               peel_iters_epilogue);
2197      fprintf (vect_dump, "  Calculated minimum iters for profitability: %d\n",
2198	       min_profitable_iters);
2199    }
2200
2201  min_profitable_iters =
2202	min_profitable_iters < vf ? vf : min_profitable_iters;
2203
2204  /* Because the condition we create is:
2205     if (niters <= min_profitable_iters)
2206       then skip the vectorized loop.  */
2207  min_profitable_iters--;
2208
2209  if (vect_print_dump_info (REPORT_COST))
2210    fprintf (vect_dump, "  Profitability threshold = %d\n",
2211	     min_profitable_iters);
2212
2213  return min_profitable_iters;
2214}
2215
2216
2217/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2218   functions. Design better to avoid maintenance issues.  */
2219
2220/* Function vect_model_reduction_cost.
2221
2222   Models cost for a reduction operation, including the vector ops
2223   generated within the strip-mine loop, the initial definition before
2224   the loop, and the epilogue code that must be generated.  */
2225
2226static bool
2227vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2228			   int ncopies)
2229{
2230  int outer_cost = 0;
2231  enum tree_code code;
2232  optab optab;
2233  tree vectype;
2234  gimple stmt, orig_stmt;
2235  tree reduction_op;
2236  enum machine_mode mode;
2237  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2238  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2239
2240
2241  /* Cost of reduction op inside loop.  */
2242  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
2243
2244  stmt = STMT_VINFO_STMT (stmt_info);
2245
2246  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2247    {
2248    case GIMPLE_SINGLE_RHS:
2249      gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2250      reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2251      break;
2252    case GIMPLE_UNARY_RHS:
2253      reduction_op = gimple_assign_rhs1 (stmt);
2254      break;
2255    case GIMPLE_BINARY_RHS:
2256      reduction_op = gimple_assign_rhs2 (stmt);
2257      break;
2258    default:
2259      gcc_unreachable ();
2260    }
2261
2262  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2263  if (!vectype)
2264    {
2265      if (vect_print_dump_info (REPORT_COST))
2266        {
2267          fprintf (vect_dump, "unsupported data-type ");
2268          print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2269        }
2270      return false;
2271   }
2272
2273  mode = TYPE_MODE (vectype);
2274  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2275
2276  if (!orig_stmt)
2277    orig_stmt = STMT_VINFO_STMT (stmt_info);
2278
2279  code = gimple_assign_rhs_code (orig_stmt);
2280
2281  /* Add in cost for initial definition.  */
2282  outer_cost += TARG_SCALAR_TO_VEC_COST;
2283
2284  /* Determine cost of epilogue code.
2285
2286     We have a reduction operator that will reduce the vector in one statement.
2287     Also requires scalar extract.  */
2288
2289  if (!nested_in_vect_loop_p (loop, orig_stmt))
2290    {
2291      if (reduc_code != ERROR_MARK)
2292	outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
2293      else
2294	{
2295	  int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2296	  tree bitsize =
2297	    TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2298	  int element_bitsize = tree_low_cst (bitsize, 1);
2299	  int nelements = vec_size_in_bits / element_bitsize;
2300
2301	  optab = optab_for_tree_code (code, vectype, optab_default);
2302
2303	  /* We have a whole vector shift available.  */
2304	  if (VECTOR_MODE_P (mode)
2305	      && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
2306	      && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2307	    /* Final reduction via vector shifts and the reduction operator. Also
2308	       requires scalar extract.  */
2309	    outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
2310				+ TARG_VEC_TO_SCALAR_COST);
2311	  else
2312	    /* Use extracts and reduction op for final reduction.  For N elements,
2313               we have N extracts and N-1 reduction ops.  */
2314	    outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
2315	}
2316    }
2317
2318  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2319
2320  if (vect_print_dump_info (REPORT_COST))
2321    fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2322             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2323             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2324
2325  return true;
2326}
2327
2328
2329/* Function vect_model_induction_cost.
2330
2331   Models cost for induction operations.  */
2332
2333static void
2334vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2335{
2336  /* loop cost for vec_loop.  */
2337  STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
2338  /* prologue cost for vec_init and vec_step.  */
2339  STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
2340
2341  if (vect_print_dump_info (REPORT_COST))
2342    fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2343             "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2344             STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2345}
2346
2347
2348/* Function get_initial_def_for_induction
2349
2350   Input:
2351   STMT - a stmt that performs an induction operation in the loop.
2352   IV_PHI - the initial value of the induction variable
2353
2354   Output:
2355   Return a vector variable, initialized with the first VF values of
2356   the induction variable. E.g., for an iv with IV_PHI='X' and
2357   evolution S, for a vector of 4 units, we want to return:
2358   [X, X + S, X + 2*S, X + 3*S].  */
2359
2360static tree
2361get_initial_def_for_induction (gimple iv_phi)
2362{
2363  stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2364  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2365  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2366  tree scalar_type;
2367  tree vectype;
2368  int nunits;
2369  edge pe = loop_preheader_edge (loop);
2370  struct loop *iv_loop;
2371  basic_block new_bb;
2372  tree vec, vec_init, vec_step, t;
2373  tree access_fn;
2374  tree new_var;
2375  tree new_name;
2376  gimple init_stmt, induction_phi, new_stmt;
2377  tree induc_def, vec_def, vec_dest;
2378  tree init_expr, step_expr;
2379  int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2380  int i;
2381  bool ok;
2382  int ncopies;
2383  tree expr;
2384  stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2385  bool nested_in_vect_loop = false;
2386  gimple_seq stmts = NULL;
2387  imm_use_iterator imm_iter;
2388  use_operand_p use_p;
2389  gimple exit_phi;
2390  edge latch_e;
2391  tree loop_arg;
2392  gimple_stmt_iterator si;
2393  basic_block bb = gimple_bb (iv_phi);
2394  tree stepvectype;
2395  tree resvectype;
2396
2397  /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop?  */
2398  if (nested_in_vect_loop_p (loop, iv_phi))
2399    {
2400      nested_in_vect_loop = true;
2401      iv_loop = loop->inner;
2402    }
2403  else
2404    iv_loop = loop;
2405  gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
2406
2407  latch_e = loop_latch_edge (iv_loop);
2408  loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
2409
2410  access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
2411  gcc_assert (access_fn);
2412  STRIP_NOPS (access_fn);
2413  ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
2414                                    &init_expr, &step_expr);
2415  gcc_assert (ok);
2416  pe = loop_preheader_edge (iv_loop);
2417
2418  scalar_type = TREE_TYPE (init_expr);
2419  vectype = get_vectype_for_scalar_type (scalar_type);
2420  resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
2421  gcc_assert (vectype);
2422  nunits = TYPE_VECTOR_SUBPARTS (vectype);
2423  ncopies = vf / nunits;
2424
2425  gcc_assert (phi_info);
2426  gcc_assert (ncopies >= 1);
2427
2428  /* Find the first insertion point in the BB.  */
2429  si = gsi_after_labels (bb);
2430
2431  /* Create the vector that holds the initial_value of the induction.  */
2432  if (nested_in_vect_loop)
2433    {
2434      /* iv_loop is nested in the loop to be vectorized.  init_expr had already
2435	 been created during vectorization of previous stmts; We obtain it from
2436	 the STMT_VINFO_VEC_STMT of the defining stmt. */
2437      tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
2438                                           loop_preheader_edge (iv_loop));
2439      vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
2440    }
2441  else
2442    {
2443      /* iv_loop is the loop to be vectorized. Create:
2444	 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr)  */
2445      new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
2446      add_referenced_var (new_var);
2447
2448      new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
2449      if (stmts)
2450	{
2451	  new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2452	  gcc_assert (!new_bb);
2453	}
2454
2455      t = NULL_TREE;
2456      t = tree_cons (NULL_TREE, new_name, t);
2457      for (i = 1; i < nunits; i++)
2458	{
2459	  /* Create: new_name_i = new_name + step_expr  */
2460	  enum tree_code code = POINTER_TYPE_P (scalar_type)
2461				? POINTER_PLUS_EXPR : PLUS_EXPR;
2462	  init_stmt = gimple_build_assign_with_ops (code, new_var,
2463						    new_name, step_expr);
2464	  new_name = make_ssa_name (new_var, init_stmt);
2465	  gimple_assign_set_lhs (init_stmt, new_name);
2466
2467	  new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
2468	  gcc_assert (!new_bb);
2469
2470	  if (vect_print_dump_info (REPORT_DETAILS))
2471	    {
2472	      fprintf (vect_dump, "created new init_stmt: ");
2473	      print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
2474	    }
2475	  t = tree_cons (NULL_TREE, new_name, t);
2476	}
2477      /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1]  */
2478      vec = build_constructor_from_list (vectype, nreverse (t));
2479      vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
2480    }
2481
2482
2483  /* Create the vector that holds the step of the induction.  */
2484  if (nested_in_vect_loop)
2485    /* iv_loop is nested in the loop to be vectorized. Generate:
2486       vec_step = [S, S, S, S]  */
2487    new_name = step_expr;
2488  else
2489    {
2490      /* iv_loop is the loop to be vectorized. Generate:
2491	  vec_step = [VF*S, VF*S, VF*S, VF*S]  */
2492      expr = build_int_cst (TREE_TYPE (step_expr), vf);
2493      new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
2494			      expr, step_expr);
2495    }
2496
2497  t = NULL_TREE;
2498  for (i = 0; i < nunits; i++)
2499    t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
2500  gcc_assert (CONSTANT_CLASS_P (new_name));
2501  stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
2502  gcc_assert (stepvectype);
2503  vec = build_vector (stepvectype, t);
2504  vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
2505
2506
2507  /* Create the following def-use cycle:
2508     loop prolog:
2509         vec_init = ...
2510	 vec_step = ...
2511     loop:
2512         vec_iv = PHI <vec_init, vec_loop>
2513         ...
2514         STMT
2515         ...
2516         vec_loop = vec_iv + vec_step;  */
2517
2518  /* Create the induction-phi that defines the induction-operand.  */
2519  vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
2520  add_referenced_var (vec_dest);
2521  induction_phi = create_phi_node (vec_dest, iv_loop->header);
2522  set_vinfo_for_stmt (induction_phi,
2523		      new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
2524  induc_def = PHI_RESULT (induction_phi);
2525
2526  /* Create the iv update inside the loop  */
2527  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
2528					   induc_def, vec_step);
2529  vec_def = make_ssa_name (vec_dest, new_stmt);
2530  gimple_assign_set_lhs (new_stmt, vec_def);
2531  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2532  set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
2533                                                   NULL));
2534
2535  /* Set the arguments of the phi node:  */
2536  add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
2537  add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
2538	       UNKNOWN_LOCATION);
2539
2540
2541  /* In case that vectorization factor (VF) is bigger than the number
2542     of elements that we can fit in a vectype (nunits), we have to generate
2543     more than one vector stmt - i.e - we need to "unroll" the
2544     vector stmt by a factor VF/nunits.  For more details see documentation
2545     in vectorizable_operation.  */
2546
2547  if (ncopies > 1)
2548    {
2549      stmt_vec_info prev_stmt_vinfo;
2550      /* FORNOW. This restriction should be relaxed.  */
2551      gcc_assert (!nested_in_vect_loop);
2552
2553      /* Create the vector that holds the step of the induction.  */
2554      expr = build_int_cst (TREE_TYPE (step_expr), nunits);
2555      new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
2556			      expr, step_expr);
2557      t = NULL_TREE;
2558      for (i = 0; i < nunits; i++)
2559	t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
2560      gcc_assert (CONSTANT_CLASS_P (new_name));
2561      vec = build_vector (stepvectype, t);
2562      vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
2563
2564      vec_def = induc_def;
2565      prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
2566      for (i = 1; i < ncopies; i++)
2567	{
2568	  /* vec_i = vec_prev + vec_step  */
2569	  new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
2570						   vec_def, vec_step);
2571	  vec_def = make_ssa_name (vec_dest, new_stmt);
2572	  gimple_assign_set_lhs (new_stmt, vec_def);
2573
2574	  gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2575	  if (!useless_type_conversion_p (resvectype, vectype))
2576	    {
2577	      new_stmt = gimple_build_assign_with_ops
2578		  (VIEW_CONVERT_EXPR,
2579		   vect_get_new_vect_var (resvectype, vect_simple_var,
2580					  "vec_iv_"),
2581		   build1 (VIEW_CONVERT_EXPR, resvectype,
2582			   gimple_assign_lhs (new_stmt)), NULL_TREE);
2583	      gimple_assign_set_lhs (new_stmt,
2584				     make_ssa_name
2585				       (gimple_assign_lhs (new_stmt), new_stmt));
2586	      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2587	    }
2588	  set_vinfo_for_stmt (new_stmt,
2589			      new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
2590	  STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
2591	  prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
2592	}
2593    }
2594
2595  if (nested_in_vect_loop)
2596    {
2597      /* Find the loop-closed exit-phi of the induction, and record
2598         the final vector of induction results:  */
2599      exit_phi = NULL;
2600      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
2601        {
2602	  if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
2603	    {
2604	      exit_phi = USE_STMT (use_p);
2605	      break;
2606	    }
2607        }
2608      if (exit_phi)
2609	{
2610	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2611	  /* FORNOW. Currently not supporting the case that an inner-loop induction
2612	     is not used in the outer-loop (i.e. only outside the outer-loop).  */
2613	  gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2614		      && !STMT_VINFO_LIVE_P (stmt_vinfo));
2615
2616	  STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
2617	  if (vect_print_dump_info (REPORT_DETAILS))
2618	    {
2619	      fprintf (vect_dump, "vector of inductions after inner-loop:");
2620	      print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
2621	    }
2622	}
2623    }
2624
2625
2626  if (vect_print_dump_info (REPORT_DETAILS))
2627    {
2628      fprintf (vect_dump, "transform induction: created def-use cycle: ");
2629      print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
2630      fprintf (vect_dump, "\n");
2631      print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
2632    }
2633
2634  STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
2635  if (!useless_type_conversion_p (resvectype, vectype))
2636    {
2637      new_stmt = gimple_build_assign_with_ops
2638	 (VIEW_CONVERT_EXPR,
2639	  vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
2640	  build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
2641      induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
2642      gimple_assign_set_lhs (new_stmt, induc_def);
2643      si = gsi_start_bb (bb);
2644      gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2645      set_vinfo_for_stmt (new_stmt,
2646			  new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
2647      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
2648	= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
2649    }
2650
2651  return induc_def;
2652}
2653
2654
2655/* Function get_initial_def_for_reduction
2656
2657   Input:
2658   STMT - a stmt that performs a reduction operation in the loop.
2659   INIT_VAL - the initial value of the reduction variable
2660
2661   Output:
2662   ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2663        of the reduction (used for adjusting the epilog - see below).
2664   Return a vector variable, initialized according to the operation that STMT
2665        performs. This vector will be used as the initial value of the
2666        vector of partial results.
2667
2668   Option1 (adjust in epilog): Initialize the vector as follows:
2669     add/bit or/xor:    [0,0,...,0,0]
2670     mult/bit and:      [1,1,...,1,1]
2671     min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
2672   and when necessary (e.g. add/mult case) let the caller know
2673   that it needs to adjust the result by init_val.
2674
2675   Option2: Initialize the vector as follows:
2676     add/bit or/xor:    [init_val,0,0,...,0]
2677     mult/bit and:      [init_val,1,1,...,1]
2678     min/max/cond_expr: [init_val,init_val,...,init_val]
2679   and no adjustments are needed.
2680
2681   For example, for the following code:
2682
2683   s = init_val;
2684   for (i=0;i<n;i++)
2685     s = s + a[i];
2686
2687   STMT is 's = s + a[i]', and the reduction variable is 's'.
2688   For a vector of 4 units, we want to return either [0,0,0,init_val],
2689   or [0,0,0,0] and let the caller know that it needs to adjust
2690   the result at the end by 'init_val'.
2691
2692   FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2693   initialization vector is simpler (same element in all entries), if
2694   ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
2695
2696   A cost model should help decide between these two schemes.  */
2697
2698tree
2699get_initial_def_for_reduction (gimple stmt, tree init_val,
2700                               tree *adjustment_def)
2701{
2702  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2703  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2704  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2705  tree scalar_type = TREE_TYPE (init_val);
2706  tree vectype = get_vectype_for_scalar_type (scalar_type);
2707  int nunits;
2708  enum tree_code code = gimple_assign_rhs_code (stmt);
2709  tree def_for_init;
2710  tree init_def;
2711  tree t = NULL_TREE;
2712  int i;
2713  bool nested_in_vect_loop = false;
2714  tree init_value;
2715  REAL_VALUE_TYPE real_init_val = dconst0;
2716  int int_init_val = 0;
2717  gimple def_stmt = NULL;
2718
2719  gcc_assert (vectype);
2720  nunits = TYPE_VECTOR_SUBPARTS (vectype);
2721
2722  gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
2723	      || SCALAR_FLOAT_TYPE_P (scalar_type));
2724
2725  if (nested_in_vect_loop_p (loop, stmt))
2726    nested_in_vect_loop = true;
2727  else
2728    gcc_assert (loop == (gimple_bb (stmt))->loop_father);
2729
2730  /* In case of double reduction we only create a vector variable to be put
2731     in the reduction phi node. The actual statement creation is done in
2732     vect_create_epilog_for_reduction.  */
2733  if (adjustment_def && nested_in_vect_loop
2734      && TREE_CODE (init_val) == SSA_NAME
2735      && (def_stmt = SSA_NAME_DEF_STMT (init_val))
2736      && gimple_code (def_stmt) == GIMPLE_PHI
2737      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2738      && vinfo_for_stmt (def_stmt)
2739      && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2740          == vect_double_reduction_def)
2741    {
2742      *adjustment_def = NULL;
2743      return vect_create_destination_var (init_val, vectype);
2744    }
2745
2746  if (TREE_CONSTANT (init_val))
2747    {
2748      if (SCALAR_FLOAT_TYPE_P (scalar_type))
2749        init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
2750      else
2751        init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
2752    }
2753  else
2754    init_value = init_val;
2755
2756  switch (code)
2757    {
2758      case WIDEN_SUM_EXPR:
2759      case DOT_PROD_EXPR:
2760      case PLUS_EXPR:
2761      case MINUS_EXPR:
2762      case BIT_IOR_EXPR:
2763      case BIT_XOR_EXPR:
2764      case MULT_EXPR:
2765      case BIT_AND_EXPR:
2766        /* ADJUSMENT_DEF is NULL when called from
2767           vect_create_epilog_for_reduction to vectorize double reduction.  */
2768        if (adjustment_def)
2769          {
2770            if (nested_in_vect_loop)
2771              *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
2772                                                              NULL);
2773            else
2774              *adjustment_def = init_val;
2775          }
2776
2777        if (code == MULT_EXPR)
2778          {
2779            real_init_val = dconst1;
2780            int_init_val = 1;
2781          }
2782
2783        if (code == BIT_AND_EXPR)
2784          int_init_val = -1;
2785
2786        if (SCALAR_FLOAT_TYPE_P (scalar_type))
2787          def_for_init = build_real (scalar_type, real_init_val);
2788        else
2789          def_for_init = build_int_cst (scalar_type, int_init_val);
2790
2791        /* Create a vector of '0' or '1' except the first element.  */
2792        for (i = nunits - 2; i >= 0; --i)
2793          t = tree_cons (NULL_TREE, def_for_init, t);
2794
2795        /* Option1: the first element is '0' or '1' as well.  */
2796        if (adjustment_def)
2797          {
2798            t = tree_cons (NULL_TREE, def_for_init, t);
2799            init_def = build_vector (vectype, t);
2800            break;
2801          }
2802
2803        /* Option2: the first element is INIT_VAL.  */
2804        t = tree_cons (NULL_TREE, init_value, t);
2805        if (TREE_CONSTANT (init_val))
2806          init_def = build_vector (vectype, t);
2807        else
2808          init_def = build_constructor_from_list (vectype, t);
2809
2810        break;
2811
2812      case MIN_EXPR:
2813      case MAX_EXPR:
2814      case COND_EXPR:
2815        if (adjustment_def)
2816          {
2817            *adjustment_def = NULL_TREE;
2818            init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2819            break;
2820          }
2821
2822        for (i = nunits - 1; i >= 0; --i)
2823          t = tree_cons (NULL_TREE, init_value, t);
2824
2825        if (TREE_CONSTANT (init_val))
2826          init_def = build_vector (vectype, t);
2827        else
2828          init_def = build_constructor_from_list (vectype, t);
2829
2830        break;
2831
2832      default:
2833        gcc_unreachable ();
2834    }
2835
2836  return init_def;
2837}
2838
2839
2840/* Function vect_create_epilog_for_reduction
2841
2842   Create code at the loop-epilog to finalize the result of a reduction
2843   computation.
2844
2845   VECT_DEF is a vector of partial results.
2846   REDUC_CODE is the tree-code for the epilog reduction.
2847   NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
2848     number of elements that we can fit in a vectype (nunits). In this case
2849     we have to generate more than one vector stmt - i.e - we need to "unroll"
2850     the vector stmt by a factor VF/nunits.  For more details see documentation
2851     in vectorizable_operation.
2852   STMT is the scalar reduction stmt that is being vectorized.
2853   REDUCTION_PHI is the phi-node that carries the reduction computation.
2854   REDUC_INDEX is the index of the operand in the right hand side of the
2855     statement that is defined by REDUCTION_PHI.
2856   DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
2857
2858   This function:
2859   1. Creates the reduction def-use cycle: sets the arguments for
2860      REDUCTION_PHI:
2861      The loop-entry argument is the vectorized initial-value of the reduction.
2862      The loop-latch argument is VECT_DEF - the vector of partial sums.
2863   2. "Reduces" the vector of partial results VECT_DEF into a single result,
2864      by applying the operation specified by REDUC_CODE if available, or by
2865      other means (whole-vector shifts or a scalar loop).
2866      The function also creates a new phi node at the loop exit to preserve
2867      loop-closed form, as illustrated below.
2868
2869     The flow at the entry to this function:
2870
2871        loop:
2872          vec_def = phi <null, null>            # REDUCTION_PHI
2873          VECT_DEF = vector_stmt                # vectorized form of STMT
2874          s_loop = scalar_stmt                  # (scalar) STMT
2875        loop_exit:
2876          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
2877          use <s_out0>
2878          use <s_out0>
2879
2880     The above is transformed by this function into:
2881
2882        loop:
2883          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
2884          VECT_DEF = vector_stmt                # vectorized form of STMT
2885          s_loop = scalar_stmt                  # (scalar) STMT
2886        loop_exit:
2887          s_out0 = phi <s_loop>                 # (scalar) EXIT_PHI
2888          v_out1 = phi <VECT_DEF>               # NEW_EXIT_PHI
2889          v_out2 = reduce <v_out1>
2890          s_out3 = extract_field <v_out2, 0>
2891          s_out4 = adjust_result <s_out3>
2892          use <s_out4>
2893          use <s_out4>
2894*/
2895
2896static void
2897vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
2898				  int ncopies,
2899				  enum tree_code reduc_code,
2900				  gimple reduction_phi,
2901                                  int reduc_index,
2902                                  bool double_reduc)
2903{
2904  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2905  stmt_vec_info prev_phi_info;
2906  tree vectype;
2907  enum machine_mode mode;
2908  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2909  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
2910  basic_block exit_bb;
2911  tree scalar_dest;
2912  tree scalar_type;
2913  gimple new_phi = NULL, phi;
2914  gimple_stmt_iterator exit_gsi;
2915  tree vec_dest;
2916  tree new_temp = NULL_TREE;
2917  tree new_name;
2918  gimple epilog_stmt = NULL;
2919  tree new_scalar_dest, new_dest;
2920  gimple exit_phi;
2921  tree bitsize, bitpos;
2922  enum tree_code code = gimple_assign_rhs_code (stmt);
2923  tree adjustment_def;
2924  tree vec_initial_def, def;
2925  tree orig_name;
2926  imm_use_iterator imm_iter;
2927  use_operand_p use_p;
2928  bool extract_scalar_result = false;
2929  tree reduction_op, expr;
2930  gimple orig_stmt;
2931  gimple use_stmt;
2932  bool nested_in_vect_loop = false;
2933  VEC(gimple,heap) *phis = NULL;
2934  enum vect_def_type dt = vect_unknown_def_type;
2935  int j, i;
2936
2937  if (nested_in_vect_loop_p (loop, stmt))
2938    {
2939      outer_loop = loop;
2940      loop = loop->inner;
2941      nested_in_vect_loop = true;
2942    }
2943
2944  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2945    {
2946    case GIMPLE_SINGLE_RHS:
2947      gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
2948                                       == ternary_op);
2949      reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
2950      break;
2951    case GIMPLE_UNARY_RHS:
2952      reduction_op = gimple_assign_rhs1 (stmt);
2953      break;
2954    case GIMPLE_BINARY_RHS:
2955      reduction_op = reduc_index ?
2956                     gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
2957      break;
2958    default:
2959      gcc_unreachable ();
2960    }
2961
2962  vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2963  gcc_assert (vectype);
2964  mode = TYPE_MODE (vectype);
2965
2966  /*** 1. Create the reduction def-use cycle  ***/
2967
2968  /* For the case of reduction, vect_get_vec_def_for_operand returns
2969     the scalar def before the loop, that defines the initial value
2970     of the reduction variable.  */
2971  vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2972					          &adjustment_def);
2973
2974  phi = reduction_phi;
2975  def = vect_def;
2976  for (j = 0; j < ncopies; j++)
2977    {
2978      /* 1.1 set the loop-entry arg of the reduction-phi:  */
2979      add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop),
2980		   UNKNOWN_LOCATION);
2981
2982      /* 1.2 set the loop-latch arg for the reduction-phi:  */
2983      if (j > 0)
2984        def = vect_get_vec_def_for_stmt_copy (dt, def);
2985      add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
2986
2987      if (vect_print_dump_info (REPORT_DETAILS))
2988	{
2989	  fprintf (vect_dump, "transform reduction: created def-use cycle: ");
2990	  print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
2991          fprintf (vect_dump, "\n");
2992          print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
2993	}
2994
2995      phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
2996    }
2997
2998  /*** 2. Create epilog code
2999	  The reduction epilog code operates across the elements of the vector
3000          of partial results computed by the vectorized loop.
3001          The reduction epilog code consists of:
3002          step 1: compute the scalar result in a vector (v_out2)
3003          step 2: extract the scalar result (s_out3) from the vector (v_out2)
3004          step 3: adjust the scalar result (s_out3) if needed.
3005
3006          Step 1 can be accomplished using one the following three schemes:
3007          (scheme 1) using reduc_code, if available.
3008          (scheme 2) using whole-vector shifts, if available.
3009          (scheme 3) using a scalar loop. In this case steps 1+2 above are
3010                     combined.
3011
3012          The overall epilog code looks like this:
3013
3014          s_out0 = phi <s_loop>         # original EXIT_PHI
3015          v_out1 = phi <VECT_DEF>       # NEW_EXIT_PHI
3016          v_out2 = reduce <v_out1>              # step 1
3017          s_out3 = extract_field <v_out2, 0>    # step 2
3018          s_out4 = adjust_result <s_out3>       # step 3
3019
3020          (step 3 is optional, and steps 1 and 2 may be combined).
3021          Lastly, the uses of s_out0 are replaced by s_out4.
3022
3023	  ***/
3024
3025  /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
3026        v_out1 = phi <v_loop>  */
3027
3028  exit_bb = single_exit (loop)->dest;
3029  def = vect_def;
3030  prev_phi_info = NULL;
3031  for (j = 0; j < ncopies; j++)
3032    {
3033      phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
3034      set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3035      if (j == 0)
3036	new_phi = phi;
3037      else
3038	{
3039	  def = vect_get_vec_def_for_stmt_copy (dt, def);
3040	  STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3041	}
3042      SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3043      prev_phi_info = vinfo_for_stmt (phi);
3044    }
3045
3046  exit_gsi = gsi_after_labels (exit_bb);
3047
3048  /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3049         (i.e. when reduc_code is not available) and in the final adjustment
3050	 code (if needed).  Also get the original scalar reduction variable as
3051         defined in the loop.  In case STMT is a "pattern-stmt" (i.e. - it
3052         represents a reduction pattern), the tree-code and scalar-def are
3053         taken from the original stmt that the pattern-stmt (STMT) replaces.
3054         Otherwise (it is a regular reduction) - the tree-code and scalar-def
3055         are taken from STMT.  */
3056
3057  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3058  if (!orig_stmt)
3059    {
3060      /* Regular reduction  */
3061      orig_stmt = stmt;
3062    }
3063  else
3064    {
3065      /* Reduction pattern  */
3066      stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3067      gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3068      gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3069    }
3070
3071  code = gimple_assign_rhs_code (orig_stmt);
3072  scalar_dest = gimple_assign_lhs (orig_stmt);
3073  scalar_type = TREE_TYPE (scalar_dest);
3074  new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3075  bitsize = TYPE_SIZE (scalar_type);
3076
3077  /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3078     partial results are added and not subtracted.  */
3079  if (code == MINUS_EXPR)
3080    code = PLUS_EXPR;
3081
3082  /* In case this is a reduction in an inner-loop while vectorizing an outer
3083     loop - we don't need to extract a single scalar result at the end of the
3084     inner-loop (unless it is double reduction, i.e., the use of reduction is
3085     outside the outer-loop). The final vector of partial results will be used
3086     in the vectorized outer-loop, or reduced to a scalar result at the end of
3087     the outer-loop.  */
3088  if (nested_in_vect_loop && !double_reduc)
3089    goto vect_finalize_reduction;
3090
3091  /* The epilogue is created for the outer-loop, i.e., for the loop being
3092     vectorized.  */
3093  if (double_reduc)
3094    loop = outer_loop;
3095
3096  /* FORNOW */
3097  gcc_assert (ncopies == 1);
3098
3099  /* 2.3 Create the reduction code, using one of the three schemes described
3100         above.  */
3101
3102  if (reduc_code != ERROR_MARK)
3103    {
3104      tree tmp;
3105
3106      /*** Case 1:  Create:
3107	   v_out2 = reduc_expr <v_out1>  */
3108
3109      if (vect_print_dump_info (REPORT_DETAILS))
3110	fprintf (vect_dump, "Reduce using direct vector reduction.");
3111
3112      vec_dest = vect_create_destination_var (scalar_dest, vectype);
3113      tmp = build1 (reduc_code, vectype,  PHI_RESULT (new_phi));
3114      epilog_stmt = gimple_build_assign (vec_dest, tmp);
3115      new_temp = make_ssa_name (vec_dest, epilog_stmt);
3116      gimple_assign_set_lhs (epilog_stmt, new_temp);
3117      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3118
3119      extract_scalar_result = true;
3120    }
3121  else
3122    {
3123      enum tree_code shift_code = ERROR_MARK;
3124      bool have_whole_vector_shift = true;
3125      int bit_offset;
3126      int element_bitsize = tree_low_cst (bitsize, 1);
3127      int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3128      tree vec_temp;
3129
3130      if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
3131	shift_code = VEC_RSHIFT_EXPR;
3132      else
3133	have_whole_vector_shift = false;
3134
3135      /* Regardless of whether we have a whole vector shift, if we're
3136	 emulating the operation via tree-vect-generic, we don't want
3137	 to use it.  Only the first round of the reduction is likely
3138	 to still be profitable via emulation.  */
3139      /* ??? It might be better to emit a reduction tree code here, so that
3140	 tree-vect-generic can expand the first round via bit tricks.  */
3141      if (!VECTOR_MODE_P (mode))
3142	have_whole_vector_shift = false;
3143      else
3144	{
3145	  optab optab = optab_for_tree_code (code, vectype, optab_default);
3146	  if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
3147	    have_whole_vector_shift = false;
3148	}
3149
3150      if (have_whole_vector_shift)
3151        {
3152	  /*** Case 2: Create:
3153	     for (offset = VS/2; offset >= element_size; offset/=2)
3154	        {
3155	          Create:  va' = vec_shift <va, offset>
3156	          Create:  va = vop <va, va'>
3157	        }  */
3158
3159	  if (vect_print_dump_info (REPORT_DETAILS))
3160	    fprintf (vect_dump, "Reduce using vector shifts");
3161
3162	  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3163	  new_temp = PHI_RESULT (new_phi);
3164
3165	  for (bit_offset = vec_size_in_bits/2;
3166	       bit_offset >= element_bitsize;
3167	       bit_offset /= 2)
3168	    {
3169	      tree bitpos = size_int (bit_offset);
3170
3171	      epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest,
3172							  new_temp, bitpos);
3173	      new_name = make_ssa_name (vec_dest, epilog_stmt);
3174	      gimple_assign_set_lhs (epilog_stmt, new_name);
3175	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3176
3177	      epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3178							  new_name, new_temp);
3179	      new_temp = make_ssa_name (vec_dest, epilog_stmt);
3180	      gimple_assign_set_lhs (epilog_stmt, new_temp);
3181	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3182	    }
3183
3184	  extract_scalar_result = true;
3185	}
3186      else
3187        {
3188	  tree rhs;
3189
3190	  /*** Case 3: Create:
3191	     s = extract_field <v_out2, 0>
3192	     for (offset = element_size;
3193		  offset < vector_size;
3194		  offset += element_size;)
3195	       {
3196	         Create:  s' = extract_field <v_out2, offset>
3197	         Create:  s = op <s, s'>
3198	       }  */
3199
3200	  if (vect_print_dump_info (REPORT_DETAILS))
3201	    fprintf (vect_dump, "Reduce using scalar code. ");
3202
3203	  vec_temp = PHI_RESULT (new_phi);
3204	  vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3205	  rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3206			 bitsize_zero_node);
3207	  epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3208	  new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3209	  gimple_assign_set_lhs (epilog_stmt, new_temp);
3210	  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3211
3212	  for (bit_offset = element_bitsize;
3213	       bit_offset < vec_size_in_bits;
3214	       bit_offset += element_bitsize)
3215	    {
3216	      tree bitpos = bitsize_int (bit_offset);
3217	      tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3218				 bitpos);
3219
3220	      epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3221	      new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3222	      gimple_assign_set_lhs (epilog_stmt, new_name);
3223	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3224
3225	      epilog_stmt = gimple_build_assign_with_ops (code,
3226							  new_scalar_dest,
3227							  new_name, new_temp);
3228	      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3229	      gimple_assign_set_lhs (epilog_stmt, new_temp);
3230	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3231	    }
3232
3233	  extract_scalar_result = false;
3234	}
3235    }
3236
3237  /* 2.4  Extract the final scalar result.  Create:
3238         s_out3 = extract_field <v_out2, bitpos>  */
3239
3240  if (extract_scalar_result)
3241    {
3242      tree rhs;
3243
3244      gcc_assert (!nested_in_vect_loop || double_reduc);
3245      if (vect_print_dump_info (REPORT_DETAILS))
3246	fprintf (vect_dump, "extract scalar result");
3247
3248      if (BYTES_BIG_ENDIAN)
3249	bitpos = size_binop (MULT_EXPR,
3250		       bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
3251		       TYPE_SIZE (scalar_type));
3252      else
3253	bitpos = bitsize_zero_node;
3254
3255      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
3256      epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3257      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3258      gimple_assign_set_lhs (epilog_stmt, new_temp);
3259      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3260    }
3261
3262vect_finalize_reduction:
3263
3264  if (double_reduc)
3265    loop = loop->inner;
3266
3267  /* 2.5 Adjust the final result by the initial value of the reduction
3268	 variable. (When such adjustment is not needed, then
3269	 'adjustment_def' is zero).  For example, if code is PLUS we create:
3270	 new_temp = loop_exit_def + adjustment_def  */
3271
3272  if (adjustment_def)
3273    {
3274      if (nested_in_vect_loop)
3275	{
3276	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
3277	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
3278	  new_dest = vect_create_destination_var (scalar_dest, vectype);
3279	}
3280      else
3281	{
3282	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
3283	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
3284	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
3285	}
3286
3287      epilog_stmt = gimple_build_assign (new_dest, expr);
3288      new_temp = make_ssa_name (new_dest, epilog_stmt);
3289      gimple_assign_set_lhs (epilog_stmt, new_temp);
3290      SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
3291      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3292    }
3293
3294
3295  /* 2.6  Handle the loop-exit phi  */
3296
3297  /* Replace uses of s_out0 with uses of s_out3:
3298     Find the loop-closed-use at the loop exit of the original scalar result.
3299     (The reduction result is expected to have two immediate uses - one at the
3300     latch block, and one at the loop exit).  */
3301  phis = VEC_alloc (gimple, heap, 10);
3302  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
3303    {
3304      if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3305	{
3306	  exit_phi = USE_STMT (use_p);
3307	  VEC_quick_push (gimple, phis, exit_phi);
3308	}
3309    }
3310
3311  /* We expect to have found an exit_phi because of loop-closed-ssa form.  */
3312  gcc_assert (!VEC_empty (gimple, phis));
3313
3314  for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++)
3315    {
3316      if (nested_in_vect_loop)
3317	{
3318	  stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3319          gimple vect_phi;
3320
3321	  /* FORNOW. Currently not supporting the case that an inner-loop
3322	     reduction is not used in the outer-loop (but only outside the
3323	     outer-loop), unless it is double reduction.  */
3324	  gcc_assert ((STMT_VINFO_RELEVANT_P (stmt_vinfo)
3325                      && !STMT_VINFO_LIVE_P (stmt_vinfo)) || double_reduc);
3326
3327	  epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
3328	  STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
3329	  set_vinfo_for_stmt (epilog_stmt,
3330			      new_stmt_vec_info (epilog_stmt, loop_vinfo,
3331			                         NULL));
3332	  if (adjustment_def)
3333	    STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
3334		STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
3335
3336          if (!double_reduc
3337              || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_double_reduction_def)
3338            continue;
3339
3340          /* Handle double reduction:
3341
3342             stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
3343             stmt2:   s3 = phi <s1, s4> - (regular) reduction phi (inner loop)
3344             stmt3:   s4 = use (s3)     - (regular) reduction stmt (inner loop)
3345             stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
3346
3347             At that point the regular reduction (stmt2 and stmt3) is already
3348             vectorized, as well as the exit phi node, stmt4.
3349             Here we vectorize the phi node of double reduction, stmt1, and
3350             update all relevant statements.  */
3351
3352          /* Go through all the uses of s2 to find double reduction phi node,
3353             i.e., stmt1 above.  */
3354          orig_name = PHI_RESULT (exit_phi);
3355          FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
3356            {
3357              stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
3358              stmt_vec_info new_phi_vinfo;
3359              tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
3360              basic_block bb = gimple_bb (use_stmt);
3361              gimple use;
3362
3363              /* Check that USE_STMT is really double reduction phi node.  */
3364              if (gimple_code (use_stmt) != GIMPLE_PHI
3365                  || gimple_phi_num_args (use_stmt) != 2
3366                  || !use_stmt_vinfo
3367                  || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
3368                      != vect_double_reduction_def
3369                  || bb->loop_father != outer_loop)
3370                continue;
3371
3372              /* Create vector phi node for double reduction:
3373                 vs1 = phi <vs0, vs2>
3374                 vs1 was created previously in this function by a call to
3375                 vect_get_vec_def_for_operand and is stored in vec_initial_def;
3376                 vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
3377                 vs0 is created here.  */
3378
3379              /* Create vector phi node.  */
3380              vect_phi = create_phi_node (vec_initial_def, bb);
3381              new_phi_vinfo = new_stmt_vec_info (vect_phi,
3382                                    loop_vec_info_for_loop (outer_loop), NULL);
3383              set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
3384
3385              /* Create vs0 - initial def of the double reduction phi.  */
3386              preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
3387                                             loop_preheader_edge (outer_loop));
3388              init_def = get_initial_def_for_reduction (stmt, preheader_arg,
3389                                                        NULL);
3390              vect_phi_init = vect_init_vector (use_stmt, init_def, vectype,
3391                                                NULL);
3392
3393              /* Update phi node arguments with vs0 and vs2.  */
3394              add_phi_arg (vect_phi, vect_phi_init,
3395                           loop_preheader_edge (outer_loop), UNKNOWN_LOCATION);
3396              add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
3397                           loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
3398              if (vect_print_dump_info (REPORT_DETAILS))
3399                {
3400                  fprintf (vect_dump, "created double reduction phi node: ");
3401                  print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
3402                }
3403
3404              vect_phi_res = PHI_RESULT (vect_phi);
3405
3406              /* Replace the use, i.e., set the correct vs1 in the regular
3407                 reduction phi node. FORNOW, NCOPIES is always 1, so the loop
3408                 is redundant.  */
3409              use = reduction_phi;
3410              for (j = 0; j < ncopies; j++)
3411                {
3412                  edge pr_edge = loop_preheader_edge (loop);
3413                  SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
3414                  use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
3415                }
3416            }
3417	}
3418
3419      /* Replace the uses:  */
3420      orig_name = PHI_RESULT (exit_phi);
3421      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
3422	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
3423	  SET_USE (use_p, new_temp);
3424    }
3425
3426  VEC_free (gimple, heap, phis);
3427}
3428
3429
3430/* Function vectorizable_reduction.
3431
3432   Check if STMT performs a reduction operation that can be vectorized.
3433   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3434   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3435   Return FALSE if not a vectorizable STMT, TRUE otherwise.
3436
3437   This function also handles reduction idioms (patterns) that have been
3438   recognized in advance during vect_pattern_recog. In this case, STMT may be
3439   of this form:
3440     X = pattern_expr (arg0, arg1, ..., X)
3441   and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
3442   sequence that had been detected and replaced by the pattern-stmt (STMT).
3443
3444   In some cases of reduction patterns, the type of the reduction variable X is
3445   different than the type of the other arguments of STMT.
3446   In such cases, the vectype that is used when transforming STMT into a vector
3447   stmt is different than the vectype that is used to determine the
3448   vectorization factor, because it consists of a different number of elements
3449   than the actual number of elements that are being operated upon in parallel.
3450
3451   For example, consider an accumulation of shorts into an int accumulator.
3452   On some targets it's possible to vectorize this pattern operating on 8
3453   shorts at a time (hence, the vectype for purposes of determining the
3454   vectorization factor should be V8HI); on the other hand, the vectype that
3455   is used to create the vector form is actually V4SI (the type of the result).
3456
3457   Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
3458   indicates what is the actual level of parallelism (V8HI in the example), so
3459   that the right vectorization factor would be derived. This vectype
3460   corresponds to the type of arguments to the reduction stmt, and should *NOT*
3461   be used to create the vectorized stmt. The right vectype for the vectorized
3462   stmt is obtained from the type of the result X:
3463        get_vectype_for_scalar_type (TREE_TYPE (X))
3464
3465   This means that, contrary to "regular" reductions (or "regular" stmts in
3466   general), the following equation:
3467      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
3468   does *NOT* necessarily hold for reduction patterns.  */
3469
3470bool
3471vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
3472			gimple *vec_stmt)
3473{
3474  tree vec_dest;
3475  tree scalar_dest;
3476  tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
3477  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3478  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3479  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3480  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3481  enum tree_code code, orig_code, epilog_reduc_code;
3482  enum machine_mode vec_mode;
3483  int op_type;
3484  optab optab, reduc_optab;
3485  tree new_temp = NULL_TREE;
3486  tree def;
3487  gimple def_stmt;
3488  enum vect_def_type dt;
3489  gimple new_phi = NULL;
3490  tree scalar_type;
3491  bool is_simple_use;
3492  gimple orig_stmt;
3493  stmt_vec_info orig_stmt_info;
3494  tree expr = NULL_TREE;
3495  int i;
3496  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3497  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3498  int epilog_copies;
3499  stmt_vec_info prev_stmt_info, prev_phi_info;
3500  gimple first_phi = NULL;
3501  bool single_defuse_cycle = false;
3502  tree reduc_def = NULL_TREE;
3503  gimple new_stmt = NULL;
3504  int j;
3505  tree ops[3];
3506  bool nested_cycle = false, found_nested_cycle_def = false;
3507  gimple reduc_def_stmt = NULL;
3508  /* The default is that the reduction variable is the last in statement.  */
3509  int reduc_index = 2;
3510  bool double_reduc = false, dummy;
3511  basic_block def_bb;
3512  struct loop * def_stmt_loop, *outer_loop = NULL;
3513  tree def_arg;
3514  gimple def_arg_stmt;
3515
3516  if (nested_in_vect_loop_p (loop, stmt))
3517    {
3518      outer_loop = loop;
3519      loop = loop->inner;
3520      nested_cycle = true;
3521    }
3522
3523  gcc_assert (ncopies >= 1);
3524
3525  /* FORNOW: SLP not supported.  */
3526  if (STMT_SLP_TYPE (stmt_info))
3527    return false;
3528
3529  /* 1. Is vectorizable reduction?  */
3530  /* Not supportable if the reduction variable is used in the loop.  */
3531  if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
3532    return false;
3533
3534  /* Reductions that are not used even in an enclosing outer-loop,
3535     are expected to be "live" (used out of the loop).  */
3536  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
3537      && !STMT_VINFO_LIVE_P (stmt_info))
3538    return false;
3539
3540  /* Make sure it was already recognized as a reduction computation.  */
3541  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
3542      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
3543    return false;
3544
3545  /* 2. Has this been recognized as a reduction pattern?
3546
3547     Check if STMT represents a pattern that has been recognized
3548     in earlier analysis stages.  For stmts that represent a pattern,
3549     the STMT_VINFO_RELATED_STMT field records the last stmt in
3550     the original sequence that constitutes the pattern.  */
3551
3552  orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3553  if (orig_stmt)
3554    {
3555      orig_stmt_info = vinfo_for_stmt (orig_stmt);
3556      gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
3557      gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
3558      gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
3559    }
3560
3561  /* 3. Check the operands of the operation. The first operands are defined
3562        inside the loop body. The last operand is the reduction variable,
3563        which is defined by the loop-header-phi.  */
3564
3565  gcc_assert (is_gimple_assign (stmt));
3566
3567  /* Flatten RHS */
3568  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3569    {
3570    case GIMPLE_SINGLE_RHS:
3571      op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
3572      if (op_type == ternary_op)
3573	{
3574	  tree rhs = gimple_assign_rhs1 (stmt);
3575	  ops[0] = TREE_OPERAND (rhs, 0);
3576	  ops[1] = TREE_OPERAND (rhs, 1);
3577	  ops[2] = TREE_OPERAND (rhs, 2);
3578	  code = TREE_CODE (rhs);
3579	}
3580      else
3581	return false;
3582      break;
3583
3584    case GIMPLE_BINARY_RHS:
3585      code = gimple_assign_rhs_code (stmt);
3586      op_type = TREE_CODE_LENGTH (code);
3587      gcc_assert (op_type == binary_op);
3588      ops[0] = gimple_assign_rhs1 (stmt);
3589      ops[1] = gimple_assign_rhs2 (stmt);
3590      break;
3591
3592    case GIMPLE_UNARY_RHS:
3593      return false;
3594
3595    default:
3596      gcc_unreachable ();
3597    }
3598
3599  scalar_dest = gimple_assign_lhs (stmt);
3600  scalar_type = TREE_TYPE (scalar_dest);
3601  if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
3602      && !SCALAR_FLOAT_TYPE_P (scalar_type))
3603    return false;
3604
3605  /* All uses but the last are expected to be defined in the loop.
3606     The last use is the reduction variable. In case of nested cycle this
3607     assumption is not true: we use reduc_index to record the index of the
3608     reduction variable.  */
3609  for (i = 0; i < op_type-1; i++)
3610    {
3611      /* The condition of COND_EXPR is checked in vectorizable_condition().  */
3612      if (i == 0 && code == COND_EXPR)
3613        continue;
3614
3615      is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
3616					  &def, &dt);
3617      gcc_assert (is_simple_use);
3618      if (dt != vect_internal_def
3619	  && dt != vect_external_def
3620	  && dt != vect_constant_def
3621	  && dt != vect_induction_def
3622          && !(dt == vect_nested_cycle && nested_cycle))
3623	return false;
3624
3625      if (dt == vect_nested_cycle)
3626        {
3627          found_nested_cycle_def = true;
3628          reduc_def_stmt = def_stmt;
3629          reduc_index = i;
3630        }
3631    }
3632
3633  is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, NULL, &def_stmt,
3634                                      &def, &dt);
3635  gcc_assert (is_simple_use);
3636  gcc_assert (dt == vect_reduction_def
3637              || dt == vect_nested_cycle
3638              || ((dt == vect_internal_def || dt == vect_external_def
3639                   || dt == vect_constant_def || dt == vect_induction_def)
3640                   && nested_cycle && found_nested_cycle_def));
3641  if (!found_nested_cycle_def)
3642    reduc_def_stmt = def_stmt;
3643
3644  gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
3645  if (orig_stmt)
3646    gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
3647                                                       reduc_def_stmt,
3648                                                       !nested_cycle,
3649                                                       &dummy));
3650  else
3651    gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
3652                                                  !nested_cycle, &dummy));
3653
3654  if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
3655    return false;
3656
3657  vec_mode = TYPE_MODE (vectype);
3658
3659  if (code == COND_EXPR)
3660    {
3661      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
3662        {
3663          if (vect_print_dump_info (REPORT_DETAILS))
3664            fprintf (vect_dump, "unsupported condition in reduction");
3665
3666            return false;
3667        }
3668    }
3669  else
3670    {
3671      /* 4. Supportable by target?  */
3672
3673      /* 4.1. check support for the operation in the loop  */
3674      optab = optab_for_tree_code (code, vectype, optab_default);
3675      if (!optab)
3676        {
3677          if (vect_print_dump_info (REPORT_DETAILS))
3678            fprintf (vect_dump, "no optab.");
3679
3680          return false;
3681        }
3682
3683      if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
3684        {
3685          if (vect_print_dump_info (REPORT_DETAILS))
3686            fprintf (vect_dump, "op not supported by target.");
3687
3688          if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3689              || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3690	          < vect_min_worthwhile_factor (code))
3691            return false;
3692
3693          if (vect_print_dump_info (REPORT_DETAILS))
3694  	    fprintf (vect_dump, "proceeding using word mode.");
3695        }
3696
3697      /* Worthwhile without SIMD support?  */
3698      if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3699          && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3700   	     < vect_min_worthwhile_factor (code))
3701        {
3702          if (vect_print_dump_info (REPORT_DETAILS))
3703	    fprintf (vect_dump, "not worthwhile without SIMD support.");
3704
3705          return false;
3706        }
3707    }
3708
3709  /* 4.2. Check support for the epilog operation.
3710
3711          If STMT represents a reduction pattern, then the type of the
3712          reduction variable may be different than the type of the rest
3713          of the arguments.  For example, consider the case of accumulation
3714          of shorts into an int accumulator; The original code:
3715                        S1: int_a = (int) short_a;
3716          orig_stmt->   S2: int_acc = plus <int_a ,int_acc>;
3717
3718          was replaced with:
3719                        STMT: int_acc = widen_sum <short_a, int_acc>
3720
3721          This means that:
3722          1. The tree-code that is used to create the vector operation in the
3723             epilog code (that reduces the partial results) is not the
3724             tree-code of STMT, but is rather the tree-code of the original
3725             stmt from the pattern that STMT is replacing. I.e, in the example
3726             above we want to use 'widen_sum' in the loop, but 'plus' in the
3727             epilog.
3728          2. The type (mode) we use to check available target support
3729             for the vector operation to be created in the *epilog*, is
3730             determined by the type of the reduction variable (in the example
3731             above we'd check this: plus_optab[vect_int_mode]).
3732             However the type (mode) we use to check available target support
3733             for the vector operation to be created *inside the loop*, is
3734             determined by the type of the other arguments to STMT (in the
3735             example we'd check this: widen_sum_optab[vect_short_mode]).
3736
3737          This is contrary to "regular" reductions, in which the types of all
3738          the arguments are the same as the type of the reduction variable.
3739          For "regular" reductions we can therefore use the same vector type
3740          (and also the same tree-code) when generating the epilog code and
3741          when generating the code inside the loop.  */
3742
3743  if (orig_stmt)
3744    {
3745      /* This is a reduction pattern: get the vectype from the type of the
3746         reduction variable, and get the tree-code from orig_stmt.  */
3747      orig_code = gimple_assign_rhs_code (orig_stmt);
3748      vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
3749      if (!vectype)
3750	{
3751          if (vect_print_dump_info (REPORT_DETAILS))
3752            {
3753              fprintf (vect_dump, "unsupported data-type ");
3754              print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
3755            }
3756          return false;
3757        }
3758
3759      vec_mode = TYPE_MODE (vectype);
3760    }
3761  else
3762    {
3763      /* Regular reduction: use the same vectype and tree-code as used for
3764         the vector code inside the loop can be used for the epilog code. */
3765      orig_code = code;
3766    }
3767
3768  if (nested_cycle)
3769    {
3770      def_bb = gimple_bb (reduc_def_stmt);
3771      def_stmt_loop = def_bb->loop_father;
3772      def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
3773                                       loop_preheader_edge (def_stmt_loop));
3774      if (TREE_CODE (def_arg) == SSA_NAME
3775          && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
3776          && gimple_code (def_arg_stmt) == GIMPLE_PHI
3777          && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
3778          && vinfo_for_stmt (def_arg_stmt)
3779          && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
3780              == vect_double_reduction_def)
3781        double_reduc = true;
3782    }
3783
3784  epilog_reduc_code = ERROR_MARK;
3785  if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
3786    {
3787      reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype,
3788                                         optab_default);
3789      if (!reduc_optab)
3790        {
3791          if (vect_print_dump_info (REPORT_DETAILS))
3792            fprintf (vect_dump, "no optab for reduction.");
3793
3794          epilog_reduc_code = ERROR_MARK;
3795        }
3796
3797      if (reduc_optab
3798          && optab_handler (reduc_optab, vec_mode)->insn_code
3799              == CODE_FOR_nothing)
3800        {
3801          if (vect_print_dump_info (REPORT_DETAILS))
3802            fprintf (vect_dump, "reduc op not supported by target.");
3803
3804          epilog_reduc_code = ERROR_MARK;
3805        }
3806    }
3807  else
3808    {
3809      if (!nested_cycle || double_reduc)
3810        {
3811          if (vect_print_dump_info (REPORT_DETAILS))
3812            fprintf (vect_dump, "no reduc code for scalar code.");
3813
3814          return false;
3815        }
3816    }
3817
3818  if (double_reduc && ncopies > 1)
3819    {
3820      if (vect_print_dump_info (REPORT_DETAILS))
3821        fprintf (vect_dump, "multiple types in double reduction");
3822
3823      return false;
3824    }
3825
3826  if (!vec_stmt) /* transformation not required.  */
3827    {
3828      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3829      if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
3830        return false;
3831      return true;
3832    }
3833
3834  /** Transform.  **/
3835
3836  if (vect_print_dump_info (REPORT_DETAILS))
3837    fprintf (vect_dump, "transform reduction.");
3838
3839  /* FORNOW: Multiple types are not supported for condition.  */
3840  if (code == COND_EXPR)
3841    gcc_assert (ncopies == 1);
3842
3843  /* Create the destination vector  */
3844  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3845
3846  /* In case the vectorization factor (VF) is bigger than the number
3847     of elements that we can fit in a vectype (nunits), we have to generate
3848     more than one vector stmt - i.e - we need to "unroll" the
3849     vector stmt by a factor VF/nunits.  For more details see documentation
3850     in vectorizable_operation.  */
3851
3852  /* If the reduction is used in an outer loop we need to generate
3853     VF intermediate results, like so (e.g. for ncopies=2):
3854	r0 = phi (init, r0)
3855	r1 = phi (init, r1)
3856	r0 = x0 + r0;
3857        r1 = x1 + r1;
3858    (i.e. we generate VF results in 2 registers).
3859    In this case we have a separate def-use cycle for each copy, and therefore
3860    for each copy we get the vector def for the reduction variable from the
3861    respective phi node created for this copy.
3862
3863    Otherwise (the reduction is unused in the loop nest), we can combine
3864    together intermediate results, like so (e.g. for ncopies=2):
3865	r = phi (init, r)
3866	r = x0 + r;
3867	r = x1 + r;
3868   (i.e. we generate VF/2 results in a single register).
3869   In this case for each copy we get the vector def for the reduction variable
3870   from the vectorized reduction operation generated in the previous iteration.
3871  */
3872
3873  if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
3874    {
3875      single_defuse_cycle = true;
3876      epilog_copies = 1;
3877    }
3878  else
3879    epilog_copies = ncopies;
3880
3881  prev_stmt_info = NULL;
3882  prev_phi_info = NULL;
3883  for (j = 0; j < ncopies; j++)
3884    {
3885      if (j == 0 || !single_defuse_cycle)
3886	{
3887	  /* Create the reduction-phi that defines the reduction-operand.  */
3888	  new_phi = create_phi_node (vec_dest, loop->header);
3889	  set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo,
3890	                                                  NULL));
3891          /* Get the vector def for the reduction variable from the phi
3892             node.  */
3893          reduc_def = PHI_RESULT (new_phi);
3894	}
3895
3896      if (code == COND_EXPR)
3897        {
3898          first_phi = new_phi;
3899          vectorizable_condition (stmt, gsi, vec_stmt, reduc_def, reduc_index);
3900          /* Multiple types are not supported for condition.  */
3901          break;
3902        }
3903
3904      /* Handle uses.  */
3905      if (j == 0)
3906        {
3907	  loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
3908                                                        stmt, NULL);
3909          if (op_type == ternary_op)
3910            {
3911              if (reduc_index == 0)
3912 	        loop_vec_def1 = vect_get_vec_def_for_operand (ops[2], stmt,
3913                                                              NULL);
3914              else
3915                loop_vec_def1 = vect_get_vec_def_for_operand (ops[1], stmt,
3916                                                              NULL);
3917            }
3918
3919          /* Get the vector def for the reduction variable from the phi
3920             node.  */
3921	  first_phi = new_phi;
3922        }
3923      else
3924        {
3925          enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
3926          loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
3927          if (op_type == ternary_op)
3928            loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
3929
3930	  if (single_defuse_cycle)
3931	    reduc_def = gimple_assign_lhs (new_stmt);
3932	  else
3933	    reduc_def = PHI_RESULT (new_phi);
3934
3935	  STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
3936        }
3937
3938      /* Arguments are ready. Create the new vector stmt.  */
3939      if (op_type == binary_op)
3940        {
3941          if (reduc_index == 0)
3942            expr = build2 (code, vectype, reduc_def, loop_vec_def0);
3943          else
3944            expr = build2 (code, vectype, loop_vec_def0, reduc_def);
3945        }
3946      else
3947        {
3948          if (reduc_index == 0)
3949            expr = build3 (code, vectype, reduc_def, loop_vec_def0,
3950                           loop_vec_def1);
3951          else
3952            {
3953              if (reduc_index == 1)
3954                expr = build3 (code, vectype, loop_vec_def0, reduc_def,
3955                               loop_vec_def1);
3956              else
3957                expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
3958	     	               reduc_def);
3959            }
3960        }
3961
3962      new_stmt = gimple_build_assign (vec_dest, expr);
3963      new_temp = make_ssa_name (vec_dest, new_stmt);
3964      gimple_assign_set_lhs (new_stmt, new_temp);
3965      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3966
3967      if (j == 0)
3968	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3969      else
3970	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3971
3972      prev_stmt_info = vinfo_for_stmt (new_stmt);
3973      prev_phi_info = vinfo_for_stmt (new_phi);
3974    }
3975
3976  /* Finalize the reduction-phi (set its arguments) and create the
3977     epilog reduction code.  */
3978  if (!single_defuse_cycle || code == COND_EXPR)
3979    new_temp = gimple_assign_lhs (*vec_stmt);
3980
3981  vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
3982				    epilog_reduc_code, first_phi, reduc_index,
3983                                    double_reduc);
3984  return true;
3985}
3986
3987/* Function vect_min_worthwhile_factor.
3988
3989   For a loop where we could vectorize the operation indicated by CODE,
3990   return the minimum vectorization factor that makes it worthwhile
3991   to use generic vectors.  */
3992int
3993vect_min_worthwhile_factor (enum tree_code code)
3994{
3995  switch (code)
3996    {
3997    case PLUS_EXPR:
3998    case MINUS_EXPR:
3999    case NEGATE_EXPR:
4000      return 4;
4001
4002    case BIT_AND_EXPR:
4003    case BIT_IOR_EXPR:
4004    case BIT_XOR_EXPR:
4005    case BIT_NOT_EXPR:
4006      return 2;
4007
4008    default:
4009      return INT_MAX;
4010    }
4011}
4012
4013
4014/* Function vectorizable_induction
4015
4016   Check if PHI performs an induction computation that can be vectorized.
4017   If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
4018   phi to replace it, put it in VEC_STMT, and add it to the same basic block.
4019   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4020
4021bool
4022vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4023			gimple *vec_stmt)
4024{
4025  stmt_vec_info stmt_info = vinfo_for_stmt (phi);
4026  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4027  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4028  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4029  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4030  int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4031  tree vec_def;
4032
4033  gcc_assert (ncopies >= 1);
4034  /* FORNOW. This restriction should be relaxed.  */
4035  if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
4036    {
4037      if (vect_print_dump_info (REPORT_DETAILS))
4038        fprintf (vect_dump, "multiple types in nested loop.");
4039      return false;
4040    }
4041
4042  if (!STMT_VINFO_RELEVANT_P (stmt_info))
4043    return false;
4044
4045  /* FORNOW: SLP not supported.  */
4046  if (STMT_SLP_TYPE (stmt_info))
4047    return false;
4048
4049  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
4050
4051  if (gimple_code (phi) != GIMPLE_PHI)
4052    return false;
4053
4054  if (!vec_stmt) /* transformation not required.  */
4055    {
4056      STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
4057      if (vect_print_dump_info (REPORT_DETAILS))
4058        fprintf (vect_dump, "=== vectorizable_induction ===");
4059      vect_model_induction_cost (stmt_info, ncopies);
4060      return true;
4061    }
4062
4063  /** Transform.  **/
4064
4065  if (vect_print_dump_info (REPORT_DETAILS))
4066    fprintf (vect_dump, "transform induction phi.");
4067
4068  vec_def = get_initial_def_for_induction (phi);
4069  *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
4070  return true;
4071}
4072
4073/* Function vectorizable_live_operation.
4074
4075   STMT computes a value that is used outside the loop. Check if
4076   it can be supported.  */
4077
4078bool
4079vectorizable_live_operation (gimple stmt,
4080			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4081			     gimple *vec_stmt ATTRIBUTE_UNUSED)
4082{
4083  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4084  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4085  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4086  int i;
4087  int op_type;
4088  tree op;
4089  tree def;
4090  gimple def_stmt;
4091  enum vect_def_type dt;
4092  enum tree_code code;
4093  enum gimple_rhs_class rhs_class;
4094
4095  gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4096
4097  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4098    return false;
4099
4100  if (!is_gimple_assign (stmt))
4101    return false;
4102
4103  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4104    return false;
4105
4106  /* FORNOW. CHECKME. */
4107  if (nested_in_vect_loop_p (loop, stmt))
4108    return false;
4109
4110  code = gimple_assign_rhs_code (stmt);
4111  op_type = TREE_CODE_LENGTH (code);
4112  rhs_class = get_gimple_rhs_class (code);
4113  gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
4114  gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
4115
4116  /* FORNOW: support only if all uses are invariant. This means
4117     that the scalar operations can remain in place, unvectorized.
4118     The original last scalar value that they compute will be used.  */
4119
4120  for (i = 0; i < op_type; i++)
4121    {
4122      if (rhs_class == GIMPLE_SINGLE_RHS)
4123	op = TREE_OPERAND (gimple_op (stmt, 1), i);
4124      else
4125	op = gimple_op (stmt, i + 1);
4126      if (op
4127          && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
4128        {
4129          if (vect_print_dump_info (REPORT_DETAILS))
4130            fprintf (vect_dump, "use not simple.");
4131          return false;
4132        }
4133
4134      if (dt != vect_external_def && dt != vect_constant_def)
4135        return false;
4136    }
4137
4138  /* No transformation is required for the cases we currently support.  */
4139  return true;
4140}
4141
4142/* Kill any debug uses outside LOOP of SSA names defined in STMT.  */
4143
4144static void
4145vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
4146{
4147  ssa_op_iter op_iter;
4148  imm_use_iterator imm_iter;
4149  def_operand_p def_p;
4150  gimple ustmt;
4151
4152  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
4153    {
4154      FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
4155	{
4156	  basic_block bb;
4157
4158	  if (!is_gimple_debug (ustmt))
4159	    continue;
4160
4161	  bb = gimple_bb (ustmt);
4162
4163	  if (!flow_bb_inside_loop_p (loop, bb))
4164	    {
4165	      if (gimple_debug_bind_p (ustmt))
4166		{
4167		  if (vect_print_dump_info (REPORT_DETAILS))
4168		    fprintf (vect_dump, "killing debug use");
4169
4170		  gimple_debug_bind_reset_value (ustmt);
4171		  update_stmt (ustmt);
4172		}
4173	      else
4174		gcc_unreachable ();
4175	    }
4176	}
4177    }
4178}
4179
4180/* Function vect_transform_loop.
4181
4182   The analysis phase has determined that the loop is vectorizable.
4183   Vectorize the loop - created vectorized stmts to replace the scalar
4184   stmts in the loop, and update the loop exit condition.  */
4185
4186void
4187vect_transform_loop (loop_vec_info loop_vinfo)
4188{
4189  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4190  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4191  int nbbs = loop->num_nodes;
4192  gimple_stmt_iterator si;
4193  int i;
4194  tree ratio = NULL;
4195  int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4196  bool strided_store;
4197  bool slp_scheduled = false;
4198  unsigned int nunits;
4199  tree cond_expr = NULL_TREE;
4200  gimple_seq cond_expr_stmt_list = NULL;
4201  bool do_peeling_for_loop_bound;
4202
4203  if (vect_print_dump_info (REPORT_DETAILS))
4204    fprintf (vect_dump, "=== vec_transform_loop ===");
4205
4206  /* Peel the loop if there are data refs with unknown alignment.
4207     Only one data ref with unknown store is allowed.  */
4208
4209  if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4210    vect_do_peeling_for_alignment (loop_vinfo);
4211
4212  do_peeling_for_loop_bound
4213    = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4214       || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4215	   && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
4216
4217  if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
4218      || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
4219    vect_loop_versioning (loop_vinfo,
4220			  !do_peeling_for_loop_bound,
4221			  &cond_expr, &cond_expr_stmt_list);
4222
4223  /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4224     compile time constant), or it is a constant that doesn't divide by the
4225     vectorization factor, then an epilog loop needs to be created.
4226     We therefore duplicate the loop: the original loop will be vectorized,
4227     and will compute the first (n/VF) iterations. The second copy of the loop
4228     will remain scalar and will compute the remaining (n%VF) iterations.
4229     (VF is the vectorization factor).  */
4230
4231  if (do_peeling_for_loop_bound)
4232    vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
4233				    cond_expr, cond_expr_stmt_list);
4234  else
4235    ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
4236		LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
4237
4238  /* 1) Make sure the loop header has exactly two entries
4239     2) Make sure we have a preheader basic block.  */
4240
4241  gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
4242
4243  split_edge (loop_preheader_edge (loop));
4244
4245  /* FORNOW: the vectorizer supports only loops which body consist
4246     of one basic block (header + empty latch). When the vectorizer will
4247     support more involved loop forms, the order by which the BBs are
4248     traversed need to be reconsidered.  */
4249
4250  for (i = 0; i < nbbs; i++)
4251    {
4252      basic_block bb = bbs[i];
4253      stmt_vec_info stmt_info;
4254      gimple phi;
4255
4256      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4257        {
4258	  phi = gsi_stmt (si);
4259	  if (vect_print_dump_info (REPORT_DETAILS))
4260	    {
4261	      fprintf (vect_dump, "------>vectorizing phi: ");
4262	      print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4263	    }
4264	  stmt_info = vinfo_for_stmt (phi);
4265	  if (!stmt_info)
4266	    continue;
4267
4268	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
4269	    vect_loop_kill_debug_uses (loop, phi);
4270
4271	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
4272	      && !STMT_VINFO_LIVE_P (stmt_info))
4273	    continue;
4274
4275	  if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
4276	        != (unsigned HOST_WIDE_INT) vectorization_factor)
4277	      && vect_print_dump_info (REPORT_DETAILS))
4278	    fprintf (vect_dump, "multiple-types.");
4279
4280	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
4281	    {
4282	      if (vect_print_dump_info (REPORT_DETAILS))
4283		fprintf (vect_dump, "transform phi.");
4284	      vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
4285	    }
4286	}
4287
4288      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
4289	{
4290	  gimple stmt = gsi_stmt (si);
4291	  bool is_store;
4292
4293	  if (vect_print_dump_info (REPORT_DETAILS))
4294	    {
4295	      fprintf (vect_dump, "------>vectorizing statement: ");
4296	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4297	    }
4298
4299	  stmt_info = vinfo_for_stmt (stmt);
4300
4301	  /* vector stmts created in the outer-loop during vectorization of
4302	     stmts in an inner-loop may not have a stmt_info, and do not
4303	     need to be vectorized.  */
4304	  if (!stmt_info)
4305	    {
4306	      gsi_next (&si);
4307	      continue;
4308	    }
4309
4310	  if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
4311	    vect_loop_kill_debug_uses (loop, stmt);
4312
4313	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
4314	      && !STMT_VINFO_LIVE_P (stmt_info))
4315	    {
4316	      gsi_next (&si);
4317	      continue;
4318	    }
4319
4320	  gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4321	  nunits =
4322	    (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
4323	  if (!STMT_SLP_TYPE (stmt_info)
4324	      && nunits != (unsigned int) vectorization_factor
4325              && vect_print_dump_info (REPORT_DETAILS))
4326	    /* For SLP VF is set according to unrolling factor, and not to
4327	       vector size, hence for SLP this print is not valid.  */
4328            fprintf (vect_dump, "multiple-types.");
4329
4330	  /* SLP. Schedule all the SLP instances when the first SLP stmt is
4331	     reached.  */
4332	  if (STMT_SLP_TYPE (stmt_info))
4333	    {
4334	      if (!slp_scheduled)
4335		{
4336		  slp_scheduled = true;
4337
4338		  if (vect_print_dump_info (REPORT_DETAILS))
4339		    fprintf (vect_dump, "=== scheduling SLP instances ===");
4340
4341		  vect_schedule_slp (loop_vinfo, NULL);
4342		}
4343
4344	      /* Hybrid SLP stmts must be vectorized in addition to SLP.  */
4345	      if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
4346		{
4347		  gsi_next (&si);
4348		  continue;
4349		}
4350	    }
4351
4352	  /* -------- vectorize statement ------------ */
4353	  if (vect_print_dump_info (REPORT_DETAILS))
4354	    fprintf (vect_dump, "transform statement.");
4355
4356	  strided_store = false;
4357	  is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
4358          if (is_store)
4359            {
4360	      if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4361		{
4362		  /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4363		     interleaving chain was completed - free all the stores in
4364		     the chain.  */
4365		  vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
4366		  gsi_remove (&si, true);
4367		  continue;
4368		}
4369	      else
4370		{
4371		  /* Free the attached stmt_vec_info and remove the stmt.  */
4372		  free_stmt_vec_info (stmt);
4373		  gsi_remove (&si, true);
4374		  continue;
4375		}
4376	    }
4377	  gsi_next (&si);
4378	}		        /* stmts in BB */
4379    }				/* BBs in loop */
4380
4381  slpeel_make_loop_iterate_ntimes (loop, ratio);
4382
4383  /* The memory tags and pointers in vectorized statements need to
4384     have their SSA forms updated.  FIXME, why can't this be delayed
4385     until all the loops have been transformed?  */
4386  update_ssa (TODO_update_ssa);
4387
4388  if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
4389    fprintf (vect_dump, "LOOP VECTORIZED.");
4390  if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
4391    fprintf (vect_dump, "OUTER LOOP VECTORIZED.");
4392}
4393