1/* SLP - Basic Block Vectorization
2   Copyright (C) 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Dorit Naishlos <dorit@il.ibm.com>
5   and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "cfglayout.h"
36#include "expr.h"
37#include "recog.h"
38#include "optabs.h"
39#include "tree-vectorizer.h"
40
41/* Extract the location of the basic block in the source code.
42   Return the basic block location if succeed and NULL if not.  */
43
44LOC
45find_bb_location (basic_block bb)
46{
47  gimple stmt = NULL;
48  gimple_stmt_iterator si;
49
50  if (!bb)
51    return UNKNOWN_LOC;
52
53  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54    {
55      stmt = gsi_stmt (si);
56      if (gimple_location (stmt) != UNKNOWN_LOC)
57        return gimple_location (stmt);
58    }
59
60  return UNKNOWN_LOC;
61}
62
63
64/* Recursively free the memory allocated for the SLP tree rooted at NODE.  */
65
66static void
67vect_free_slp_tree (slp_tree node)
68{
69  if (!node)
70    return;
71
72  if (SLP_TREE_LEFT (node))
73    vect_free_slp_tree (SLP_TREE_LEFT (node));
74
75  if (SLP_TREE_RIGHT (node))
76    vect_free_slp_tree (SLP_TREE_RIGHT (node));
77
78  VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
79
80  if (SLP_TREE_VEC_STMTS (node))
81    VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82
83  free (node);
84}
85
86
87/* Free the memory allocated for the SLP instance.  */
88
89void
90vect_free_slp_instance (slp_instance instance)
91{
92  vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93  VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95}
96
97
98/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99   they are of a legal type and that they match the defs of the first stmt of
100   the SLP group (stored in FIRST_STMT_...).  */
101
102static bool
103vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
104                             slp_tree slp_node, gimple stmt,
105			     VEC (gimple, heap) **def_stmts0,
106			     VEC (gimple, heap) **def_stmts1,
107			     enum vect_def_type *first_stmt_dt0,
108			     enum vect_def_type *first_stmt_dt1,
109			     tree *first_stmt_def0_type,
110			     tree *first_stmt_def1_type,
111			     tree *first_stmt_const_oprnd,
112			     int ncopies_for_cost,
113                             bool *pattern0, bool *pattern1)
114{
115  tree oprnd;
116  unsigned int i, number_of_oprnds;
117  tree def;
118  gimple def_stmt;
119  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
120  stmt_vec_info stmt_info =
121    vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122  enum gimple_rhs_class rhs_class;
123  struct loop *loop = NULL;
124
125  if (loop_vinfo)
126    loop = LOOP_VINFO_LOOP (loop_vinfo);
127
128  rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129  number_of_oprnds = gimple_num_ops (stmt) - 1;	/* RHS only */
130
131  for (i = 0; i < number_of_oprnds; i++)
132    {
133      oprnd = gimple_op (stmt, i + 1);
134
135      if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
136                               &dt[i])
137	  || (!def_stmt && dt[i] != vect_constant_def))
138	{
139	  if (vect_print_dump_info (REPORT_SLP))
140	    {
141	      fprintf (vect_dump, "Build SLP failed: can't find def for ");
142	      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143	    }
144
145	  return false;
146	}
147
148      /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149         from the pattern. Check that all the stmts of the node are in the
150         pattern.  */
151      if (loop && def_stmt && gimple_bb (def_stmt)
152          && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153          && vinfo_for_stmt (def_stmt)
154          && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155        {
156          if (!*first_stmt_dt0)
157            *pattern0 = true;
158          else
159            {
160              if (i == 1 && !*first_stmt_dt1)
161                *pattern1 = true;
162              else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163                {
164                  if (vect_print_dump_info (REPORT_DETAILS))
165                    {
166                      fprintf (vect_dump, "Build SLP failed: some of the stmts"
167                                     " are in a pattern, and others are not ");
168                      print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169                    }
170
171                  return false;
172                }
173            }
174
175          def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176          dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177
178          if (*dt == vect_unknown_def_type)
179            {
180              if (vect_print_dump_info (REPORT_DETAILS))
181                fprintf (vect_dump, "Unsupported pattern.");
182              return false;
183            }
184
185          switch (gimple_code (def_stmt))
186            {
187              case GIMPLE_PHI:
188                def = gimple_phi_result (def_stmt);
189                break;
190
191              case GIMPLE_ASSIGN:
192                def = gimple_assign_lhs (def_stmt);
193                break;
194
195              default:
196                if (vect_print_dump_info (REPORT_DETAILS))
197                  fprintf (vect_dump, "unsupported defining stmt: ");
198                return false;
199            }
200        }
201
202      if (!*first_stmt_dt0)
203	{
204	  /* op0 of the first stmt of the group - store its info.  */
205	  *first_stmt_dt0 = dt[i];
206	  if (def)
207	    *first_stmt_def0_type = TREE_TYPE (def);
208	  else
209	    *first_stmt_const_oprnd = oprnd;
210
211	  /* Analyze costs (for the first stmt of the group only).  */
212	  if (rhs_class != GIMPLE_SINGLE_RHS)
213	    /* Not memory operation (we don't call this functions for loads).  */
214	    vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215	  else
216	    /* Store.  */
217	    vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218	}
219
220      else
221	{
222	  if (!*first_stmt_dt1 && i == 1)
223	    {
224	      /* op1 of the first stmt of the group - store its info.  */
225	      *first_stmt_dt1 = dt[i];
226	      if (def)
227		*first_stmt_def1_type = TREE_TYPE (def);
228	      else
229		{
230		  /* We assume that the stmt contains only one constant
231		     operand. We fail otherwise, to be on the safe side.  */
232		  if (*first_stmt_const_oprnd)
233		    {
234		      if (vect_print_dump_info (REPORT_SLP))
235			fprintf (vect_dump, "Build SLP failed: two constant "
236				 "oprnds in stmt");
237		      return false;
238		    }
239		  *first_stmt_const_oprnd = oprnd;
240		}
241	    }
242	  else
243	    {
244	      /* Not first stmt of the group, check that the def-stmt/s match
245		 the def-stmt/s of the first stmt.  */
246	      if ((i == 0
247		   && (*first_stmt_dt0 != dt[i]
248		       || (*first_stmt_def0_type && def
249			   && !types_compatible_p (*first_stmt_def0_type,
250						   TREE_TYPE (def)))))
251		  || (i == 1
252		      && (*first_stmt_dt1 != dt[i]
253			  || (*first_stmt_def1_type && def
254			      && !types_compatible_p (*first_stmt_def1_type,
255						      TREE_TYPE (def)))))
256		  || (!def
257		      && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258					      TREE_TYPE (oprnd))))
259		{
260		  if (vect_print_dump_info (REPORT_SLP))
261		    fprintf (vect_dump, "Build SLP failed: different types ");
262
263		  return false;
264		}
265	    }
266	}
267
268      /* Check the types of the definitions.  */
269      switch (dt[i])
270	{
271	case vect_constant_def:
272	case vect_external_def:
273	  break;
274
275	case vect_internal_def:
276	  if (i == 0)
277	    VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
278	  else
279	    VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
280	  break;
281
282	default:
283	  /* FORNOW: Not supported.  */
284	  if (vect_print_dump_info (REPORT_SLP))
285	    {
286	      fprintf (vect_dump, "Build SLP failed: illegal type of def ");
287	      print_generic_expr (vect_dump, def, TDF_SLIM);
288	    }
289
290	  return false;
291	}
292    }
293
294  return true;
295}
296
297
298/* Recursively build an SLP tree starting from NODE.
299   Fail (and return FALSE) if def-stmts are not isomorphic, require data
300   permutation or are of unsupported types of operation. Otherwise, return
301   TRUE.  */
302
303static bool
304vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
305                     slp_tree *node, unsigned int group_size,
306                     int *inside_cost, int *outside_cost,
307                     int ncopies_for_cost, unsigned int *max_nunits,
308                     VEC (int, heap) **load_permutation,
309                     VEC (slp_tree, heap) **loads,
310                     unsigned int vectorization_factor)
311{
312  VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
313  VEC (gimple, heap) *def_stmts1 =  VEC_alloc (gimple, heap, group_size);
314  unsigned int i;
315  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
316  gimple stmt = VEC_index (gimple, stmts, 0);
317  enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
318  enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
319  enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
320  tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
321  tree lhs;
322  bool stop_recursion = false, need_same_oprnds = false;
323  tree vectype, scalar_type, first_op1 = NULL_TREE;
324  unsigned int ncopies;
325  optab optab;
326  int icode;
327  enum machine_mode optab_op2_mode;
328  enum machine_mode vec_mode;
329  tree first_stmt_const_oprnd = NULL_TREE;
330  struct data_reference *first_dr;
331  bool pattern0 = false, pattern1 = false;
332  HOST_WIDE_INT dummy;
333  bool permutation = false;
334  unsigned int load_place;
335  gimple first_load;
336
337  /* For every stmt in NODE find its def stmt/s.  */
338  for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
339    {
340      if (vect_print_dump_info (REPORT_SLP))
341	{
342	  fprintf (vect_dump, "Build SLP for ");
343	  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
344	}
345
346      lhs = gimple_get_lhs (stmt);
347      if (lhs == NULL_TREE)
348	{
349	  if (vect_print_dump_info (REPORT_SLP))
350	    {
351	      fprintf (vect_dump,
352		       "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
353	      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
354	    }
355
356	  return false;
357	}
358
359      scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
360      vectype = get_vectype_for_scalar_type (scalar_type);
361      if (!vectype)
362        {
363          if (vect_print_dump_info (REPORT_SLP))
364            {
365              fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
366              print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
367            }
368          return false;
369        }
370
371      ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
372      if (ncopies != 1)
373        {
374	  if (vect_print_dump_info (REPORT_SLP))
375            fprintf (vect_dump, "SLP with multiple types ");
376
377          /* FORNOW: multiple types are unsupported in BB SLP.  */
378	  if (bb_vinfo)
379	    return false;
380        }
381
382      /* In case of multiple types we need to detect the smallest type.  */
383      if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
384        *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
385
386      if (is_gimple_call (stmt))
387	rhs_code = CALL_EXPR;
388      else
389	rhs_code = gimple_assign_rhs_code (stmt);
390
391      /* Check the operation.  */
392      if (i == 0)
393	{
394	  first_stmt_code = rhs_code;
395
396	  /* Shift arguments should be equal in all the packed stmts for a
397	     vector shift with scalar shift operand.  */
398	  if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
399	      || rhs_code == LROTATE_EXPR
400	      || rhs_code == RROTATE_EXPR)
401	    {
402	      vec_mode = TYPE_MODE (vectype);
403
404	      /* First see if we have a vector/vector shift.  */
405	      optab = optab_for_tree_code (rhs_code, vectype,
406					   optab_vector);
407
408	      if (!optab
409		  || (optab->handlers[(int) vec_mode].insn_code
410		      == CODE_FOR_nothing))
411		{
412		  /* No vector/vector shift, try for a vector/scalar shift.  */
413		  optab = optab_for_tree_code (rhs_code, vectype,
414					       optab_scalar);
415
416		  if (!optab)
417		    {
418		      if (vect_print_dump_info (REPORT_SLP))
419			fprintf (vect_dump, "Build SLP failed: no optab.");
420		      return false;
421		    }
422		  icode = (int) optab->handlers[(int) vec_mode].insn_code;
423		  if (icode == CODE_FOR_nothing)
424		    {
425		      if (vect_print_dump_info (REPORT_SLP))
426			fprintf (vect_dump, "Build SLP failed: "
427				            "op not supported by target.");
428		      return false;
429		    }
430		  optab_op2_mode = insn_data[icode].operand[2].mode;
431		  if (!VECTOR_MODE_P (optab_op2_mode))
432		    {
433		      need_same_oprnds = true;
434		      first_op1 = gimple_assign_rhs2 (stmt);
435		    }
436		}
437	    }
438	}
439      else
440	{
441	  if (first_stmt_code != rhs_code
442	      && (first_stmt_code != IMAGPART_EXPR
443		  || rhs_code != REALPART_EXPR)
444	      && (first_stmt_code != REALPART_EXPR
445		  || rhs_code != IMAGPART_EXPR))
446	    {
447	      if (vect_print_dump_info (REPORT_SLP))
448		{
449		  fprintf (vect_dump,
450			   "Build SLP failed: different operation in stmt ");
451		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
452		}
453
454	      return false;
455	    }
456
457	  if (need_same_oprnds
458	      && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
459	    {
460	      if (vect_print_dump_info (REPORT_SLP))
461		{
462		  fprintf (vect_dump,
463			   "Build SLP failed: different shift arguments in ");
464		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
465		}
466
467	      return false;
468	    }
469	}
470
471      /* Strided store or load.  */
472      if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
473	{
474	  if (REFERENCE_CLASS_P (lhs))
475	    {
476	      /* Store.  */
477	      if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
478						stmt, &def_stmts0, &def_stmts1,
479						&first_stmt_dt0,
480						&first_stmt_dt1,
481						&first_stmt_def0_type,
482						&first_stmt_def1_type,
483						&first_stmt_const_oprnd,
484						ncopies_for_cost,
485                                                &pattern0, &pattern1))
486		return false;
487	    }
488	    else
489	      {
490		/* Load.  */
491                /* FORNOW: Check that there is no gap between the loads.  */
492                if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
493                     && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
494                    || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
495                        && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
496                  {
497                    if (vect_print_dump_info (REPORT_SLP))
498                      {
499                        fprintf (vect_dump, "Build SLP failed: strided "
500                                            "loads have gaps ");
501                        print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
502                      }
503
504                    return false;
505                  }
506
507                /* Check that the size of interleaved loads group is not
508                   greater than the SLP group size.  */
509                if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
510                    > ncopies * group_size)
511                  {
512                    if (vect_print_dump_info (REPORT_SLP))
513                      {
514                        fprintf (vect_dump, "Build SLP failed: the number of "
515                                            "interleaved loads is greater than"
516                                            " the SLP group size ");
517                        print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518                      }
519
520                    return false;
521                  }
522
523                first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
524
525              if (first_load == stmt)
526                {
527                  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
528                  if (vect_supportable_dr_alignment (first_dr)
529                      == dr_unaligned_unsupported)
530                    {
531                      if (vect_print_dump_info (REPORT_SLP))
532                        {
533                          fprintf (vect_dump, "Build SLP failed: unsupported "
534                                              "unaligned load ");
535                          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
536                        }
537
538                      return false;
539                    }
540
541                  /* Analyze costs (for the first stmt in the group).  */
542                  vect_model_load_cost (vinfo_for_stmt (stmt),
543                                        ncopies_for_cost, *node);
544                }
545
546              /* Store the place of this load in the interleaving chain. In
547                 case that permutation is needed we later decide if a specific
548                 permutation is supported.  */
549              load_place = vect_get_place_in_interleaving_chain (stmt,
550                                                                 first_load);
551              if (load_place != i)
552                permutation = true;
553
554              VEC_safe_push (int, heap, *load_permutation, load_place);
555
556              /* We stop the tree when we reach a group of loads.  */
557              stop_recursion = true;
558             continue;
559           }
560        } /* Strided access.  */
561      else
562	{
563	  if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
564	    {
565	      /* Not strided load. */
566	      if (vect_print_dump_info (REPORT_SLP))
567		{
568		  fprintf (vect_dump, "Build SLP failed: not strided load ");
569		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
570		}
571
572	      /* FORNOW: Not strided loads are not supported.  */
573	      return false;
574	    }
575
576	  /* Not memory operation.  */
577	  if (TREE_CODE_CLASS (rhs_code) != tcc_binary
578	      && TREE_CODE_CLASS (rhs_code) != tcc_unary)
579	    {
580	      if (vect_print_dump_info (REPORT_SLP))
581		{
582		  fprintf (vect_dump, "Build SLP failed: operation");
583		  fprintf (vect_dump, " unsupported ");
584		  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
585		}
586
587	      return false;
588	    }
589
590	  /* Find the def-stmts.  */
591	  if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
592					    &def_stmts0, &def_stmts1,
593					    &first_stmt_dt0, &first_stmt_dt1,
594					    &first_stmt_def0_type,
595					    &first_stmt_def1_type,
596					    &first_stmt_const_oprnd,
597					    ncopies_for_cost,
598                                            &pattern0, &pattern1))
599	    return false;
600	}
601    }
602
603  /* Add the costs of the node to the overall instance costs.  */
604  *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
605  *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
606
607  /* Strided loads were reached - stop the recursion.  */
608  if (stop_recursion)
609    {
610      if (permutation)
611        {
612          VEC_safe_push (slp_tree, heap, *loads, *node);
613          *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
614        }
615
616      return true;
617    }
618
619  /* Create SLP_TREE nodes for the definition node/s.  */
620  if (first_stmt_dt0 == vect_internal_def)
621    {
622      slp_tree left_node = XNEW (struct _slp_tree);
623      SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
624      SLP_TREE_VEC_STMTS (left_node) = NULL;
625      SLP_TREE_LEFT (left_node) = NULL;
626      SLP_TREE_RIGHT (left_node) = NULL;
627      SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
628      SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
629      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
630				inside_cost, outside_cost, ncopies_for_cost,
631				max_nunits, load_permutation, loads,
632				vectorization_factor))
633	return false;
634
635      SLP_TREE_LEFT (*node) = left_node;
636    }
637
638  if (first_stmt_dt1 == vect_internal_def)
639    {
640      slp_tree right_node = XNEW (struct _slp_tree);
641      SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
642      SLP_TREE_VEC_STMTS (right_node) = NULL;
643      SLP_TREE_LEFT (right_node) = NULL;
644      SLP_TREE_RIGHT (right_node) = NULL;
645      SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
646      SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
647      if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
648				inside_cost, outside_cost, ncopies_for_cost,
649				max_nunits, load_permutation, loads,
650				vectorization_factor))
651	return false;
652
653      SLP_TREE_RIGHT (*node) = right_node;
654    }
655
656  return true;
657}
658
659
660static void
661vect_print_slp_tree (slp_tree node)
662{
663  int i;
664  gimple stmt;
665
666  if (!node)
667    return;
668
669  fprintf (vect_dump, "node ");
670  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
671    {
672      fprintf (vect_dump, "\n\tstmt %d ", i);
673      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
674    }
675  fprintf (vect_dump, "\n");
676
677  vect_print_slp_tree (SLP_TREE_LEFT (node));
678  vect_print_slp_tree (SLP_TREE_RIGHT (node));
679}
680
681
682/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
683   If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
684   J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
685   stmts in NODE are to be marked.  */
686
687static void
688vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
689{
690  int i;
691  gimple stmt;
692
693  if (!node)
694    return;
695
696  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
697    if (j < 0 || i == j)
698      STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
699
700  vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
701  vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
702}
703
704
705/* Mark the statements of the tree rooted at NODE as relevant (vect_used).  */
706
707static void
708vect_mark_slp_stmts_relevant (slp_tree node)
709{
710  int i;
711  gimple stmt;
712  stmt_vec_info stmt_info;
713
714  if (!node)
715    return;
716
717  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
718    {
719      stmt_info = vinfo_for_stmt (stmt);
720      gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
721                  || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
722      STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
723    }
724
725  vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
726  vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
727}
728
729
730/* Check if the permutation required by the SLP INSTANCE is supported.
731   Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed.  */
732
733static bool
734vect_supported_slp_permutation_p (slp_instance instance)
735{
736  slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
737  gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
738  gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
739  VEC (slp_tree, heap) *sorted_loads = NULL;
740  int index;
741  slp_tree *tmp_loads = NULL;
742  int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
743  slp_tree load;
744
745  /* FORNOW: The only supported loads permutation is loads from the same
746     location in all the loads in the node, when the data-refs in
747     nodes of LOADS constitute an interleaving chain.
748     Sort the nodes according to the order of accesses in the chain.  */
749  tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
750  for (i = 0, j = 0;
751       VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
752       && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
753       i += group_size, j++)
754    {
755      gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
756      /* Check that the loads are all in the same interleaving chain.  */
757      if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
758        {
759          if (vect_print_dump_info (REPORT_DETAILS))
760            {
761              fprintf (vect_dump, "Build SLP failed: unsupported data "
762                                   "permutation ");
763              print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
764            }
765
766          free (tmp_loads);
767          return false;
768        }
769
770      tmp_loads[index] = load;
771    }
772
773  sorted_loads = VEC_alloc (slp_tree, heap, group_size);
774  for (i = 0; i < group_size; i++)
775     VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
776
777  VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
778  SLP_INSTANCE_LOADS (instance) = sorted_loads;
779  free (tmp_loads);
780
781  if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
782                                     SLP_INSTANCE_UNROLLING_FACTOR (instance),
783                                     instance, true))
784    return false;
785
786  return true;
787}
788
789
790/* Check if the required load permutation is supported.
791   LOAD_PERMUTATION contains a list of indices of the loads.
792   In SLP this permutation is relative to the order of strided stores that are
793   the base of the SLP instance.  */
794
795static bool
796vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
797                                   VEC (int, heap) *load_permutation)
798{
799  int i = 0, j, prev = -1, next, k;
800  bool supported;
801  sbitmap load_index;
802
803  /* FORNOW: permutations are only supported in SLP.  */
804  if (!slp_instn)
805    return false;
806
807  if (vect_print_dump_info (REPORT_SLP))
808    {
809      fprintf (vect_dump, "Load permutation ");
810      for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
811        fprintf (vect_dump, "%d ", next);
812    }
813
814  /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
815     GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
816     well.  */
817  if (VEC_length (int, load_permutation)
818      != (unsigned int) (group_size * group_size))
819    return false;
820
821  supported = true;
822  load_index = sbitmap_alloc (group_size);
823  sbitmap_zero (load_index);
824  for (j = 0; j < group_size; j++)
825    {
826      for (i = j * group_size, k = 0;
827           VEC_iterate (int, load_permutation, i, next) && k < group_size;
828           i++, k++)
829       {
830         if (i != j * group_size && next != prev)
831          {
832            supported = false;
833            break;
834          }
835
836         prev = next;
837       }
838
839      if (TEST_BIT (load_index, prev))
840        {
841          supported = false;
842          break;
843        }
844
845      SET_BIT (load_index, prev);
846    }
847
848  for (j = 0; j < group_size; j++)
849    if (!TEST_BIT (load_index, j))
850      return false;
851
852  sbitmap_free (load_index);
853
854  if (supported && i == group_size * group_size
855      && vect_supported_slp_permutation_p (slp_instn))
856    return true;
857
858  return false;
859}
860
861
862/* Find the first load in the loop that belongs to INSTANCE.
863   When loads are in several SLP nodes, there can be a case in which the first
864   load does not appear in the first SLP node to be transformed, causing
865   incorrect order of statements. Since we generate all the loads together,
866   they must be inserted before the first load of the SLP instance and not
867   before the first load of the first node of the instance.  */
868static gimple
869vect_find_first_load_in_slp_instance (slp_instance instance)
870{
871  int i, j;
872  slp_tree load_node;
873  gimple first_load = NULL, load;
874
875  for (i = 0;
876       VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
877       i++)
878    for (j = 0;
879         VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
880         j++)
881      first_load = get_earlier_stmt (load, first_load);
882
883  return first_load;
884}
885
886
887/* Analyze an SLP instance starting from a group of strided stores. Call
888   vect_build_slp_tree to build a tree of packed stmts if possible.
889   Return FALSE if it's impossible to SLP any stmt in the loop.  */
890
891static bool
892vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
893                           gimple stmt)
894{
895  slp_instance new_instance;
896  slp_tree node = XNEW (struct _slp_tree);
897  unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
898  unsigned int unrolling_factor = 1, nunits;
899  tree vectype, scalar_type;
900  gimple next;
901  unsigned int vectorization_factor = 0;
902  int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
903  unsigned int max_nunits = 0;
904  VEC (int, heap) *load_permutation;
905  VEC (slp_tree, heap) *loads;
906
907  scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
908                                             vinfo_for_stmt (stmt))));
909  vectype = get_vectype_for_scalar_type (scalar_type);
910  if (!vectype)
911    {
912      if (vect_print_dump_info (REPORT_SLP))
913        {
914          fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
915          print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
916        }
917      return false;
918    }
919
920  nunits = TYPE_VECTOR_SUBPARTS (vectype);
921  if (loop_vinfo)
922    vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
923  else
924    /* No multitypes in BB SLP.  */
925    vectorization_factor = nunits;
926
927  /* Calculate the unrolling factor.  */
928  unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
929  if (unrolling_factor != 1 && !loop_vinfo)
930    {
931      if (vect_print_dump_info (REPORT_SLP))
932        fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
933                            " block SLP");
934
935      return false;
936    }
937
938  /* Create a node (a root of the SLP tree) for the packed strided stores.  */
939  SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
940  next = stmt;
941  /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
942  while (next)
943    {
944      VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
945      next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
946    }
947
948  SLP_TREE_VEC_STMTS (node) = NULL;
949  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
950  SLP_TREE_LEFT (node) = NULL;
951  SLP_TREE_RIGHT (node) = NULL;
952  SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
953  SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
954
955  /* Calculate the number of vector stmts to create based on the unrolling
956     factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
957     GROUP_SIZE / NUNITS otherwise.  */
958  ncopies_for_cost = unrolling_factor * group_size / nunits;
959
960  load_permutation = VEC_alloc (int, heap, group_size * group_size);
961  loads = VEC_alloc (slp_tree, heap, group_size);
962
963  /* Build the tree for the SLP instance.  */
964  if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
965                           &inside_cost, &outside_cost, ncopies_for_cost,
966			   &max_nunits, &load_permutation, &loads,
967			   vectorization_factor))
968    {
969      /* Create a new SLP instance.  */
970      new_instance = XNEW (struct _slp_instance);
971      SLP_INSTANCE_TREE (new_instance) = node;
972      SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
973      /* Calculate the unrolling factor based on the smallest type in the
974         loop.  */
975      if (max_nunits > nunits)
976        unrolling_factor = least_common_multiple (max_nunits, group_size)
977                           / group_size;
978
979      SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
980      SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
981      SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
982      SLP_INSTANCE_LOADS (new_instance) = loads;
983      SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
984      SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
985      if (VEC_length (slp_tree, loads))
986        {
987          if (!vect_supported_load_permutation_p (new_instance, group_size,
988                                                  load_permutation))
989            {
990              if (vect_print_dump_info (REPORT_SLP))
991                {
992                  fprintf (vect_dump, "Build SLP failed: unsupported load "
993                                      "permutation ");
994                  print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
995                }
996
997              vect_free_slp_instance (new_instance);
998              return false;
999            }
1000
1001          SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1002             = vect_find_first_load_in_slp_instance (new_instance);
1003        }
1004      else
1005        VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1006
1007      if (loop_vinfo)
1008        VEC_safe_push (slp_instance, heap,
1009                       LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
1010  		       new_instance);
1011      else
1012        VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1013                       new_instance);
1014
1015      if (vect_print_dump_info (REPORT_SLP))
1016	vect_print_slp_tree (node);
1017
1018      return true;
1019    }
1020
1021  /* Failed to SLP.  */
1022  /* Free the allocated memory.  */
1023  vect_free_slp_tree (node);
1024  VEC_free (int, heap, load_permutation);
1025  VEC_free (slp_tree, heap, loads);
1026
1027  return false;
1028}
1029
1030
1031/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1032   trees of packed scalar stmts if SLP is possible.  */
1033
1034bool
1035vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1036{
1037  unsigned int i;
1038  VEC (gimple, heap) *strided_stores;
1039  gimple store;
1040  bool ok = false;
1041
1042  if (vect_print_dump_info (REPORT_SLP))
1043    fprintf (vect_dump, "=== vect_analyze_slp ===");
1044
1045  if (loop_vinfo)
1046    strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1047  else
1048    strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
1049
1050  for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
1051    if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1052      ok = true;
1053
1054  if (bb_vinfo && !ok)
1055    {
1056      if (vect_print_dump_info (REPORT_SLP))
1057        fprintf (vect_dump, "Failed to SLP the basic block.");
1058
1059      return false;
1060    }
1061
1062  return true;
1063}
1064
1065
1066/* For each possible SLP instance decide whether to SLP it and calculate overall
1067   unrolling factor needed to SLP the loop.  */
1068
1069void
1070vect_make_slp_decision (loop_vec_info loop_vinfo)
1071{
1072  unsigned int i, unrolling_factor = 1;
1073  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1074  slp_instance instance;
1075  int decided_to_slp = 0;
1076
1077  if (vect_print_dump_info (REPORT_SLP))
1078    fprintf (vect_dump, "=== vect_make_slp_decision ===");
1079
1080  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1081    {
1082      /* FORNOW: SLP if you can.  */
1083      if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1084	unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1085
1086      /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1087	 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1088	 loop-based vectorization. Such stmts will be marked as HYBRID.  */
1089      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1090      decided_to_slp++;
1091    }
1092
1093  LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1094
1095  if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1096    fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
1097	     decided_to_slp, unrolling_factor);
1098}
1099
1100
1101/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1102   can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID.  */
1103
1104static void
1105vect_detect_hybrid_slp_stmts (slp_tree node)
1106{
1107  int i;
1108  gimple stmt;
1109  imm_use_iterator imm_iter;
1110  gimple use_stmt;
1111  stmt_vec_info stmt_vinfo;
1112
1113  if (!node)
1114    return;
1115
1116  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1117    if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1118	&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1119      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1120	if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1121	    && !STMT_SLP_TYPE (stmt_vinfo)
1122            && (STMT_VINFO_RELEVANT (stmt_vinfo)
1123                || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
1124	  vect_mark_slp_stmts (node, hybrid, i);
1125
1126  vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1127  vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1128}
1129
1130
1131/* Find stmts that must be both vectorized and SLPed.  */
1132
1133void
1134vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1135{
1136  unsigned int i;
1137  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1138  slp_instance instance;
1139
1140  if (vect_print_dump_info (REPORT_SLP))
1141    fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1142
1143  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1144    vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1145}
1146
1147
1148/* Create and initialize a new bb_vec_info struct for BB, as well as
1149   stmt_vec_info structs for all the stmts in it.  */
1150
1151static bb_vec_info
1152new_bb_vec_info (basic_block bb)
1153{
1154  bb_vec_info res = NULL;
1155  gimple_stmt_iterator gsi;
1156
1157  res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1158  BB_VINFO_BB (res) = bb;
1159
1160  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1161    {
1162      gimple stmt = gsi_stmt (gsi);
1163      gimple_set_uid (stmt, 0);
1164      set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1165    }
1166
1167  BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1168  BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1169
1170  bb->aux = res;
1171  return res;
1172}
1173
1174
1175/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1176   stmts in the basic block.  */
1177
1178static void
1179destroy_bb_vec_info (bb_vec_info bb_vinfo)
1180{
1181  basic_block bb;
1182  gimple_stmt_iterator si;
1183
1184  if (!bb_vinfo)
1185    return;
1186
1187  bb = BB_VINFO_BB (bb_vinfo);
1188
1189  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1190    {
1191      gimple stmt = gsi_stmt (si);
1192      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1193
1194      if (stmt_info)
1195        /* Free stmt_vec_info.  */
1196        free_stmt_vec_info (stmt);
1197    }
1198
1199  VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1200  VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1201  free (bb_vinfo);
1202  bb->aux = NULL;
1203}
1204
1205
1206/* Analyze statements contained in SLP tree node after recursively analyzing
1207   the subtree. Return TRUE if the operations are supported.  */
1208
1209static bool
1210vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1211{
1212  bool dummy;
1213  int i;
1214  gimple stmt;
1215
1216  if (!node)
1217    return true;
1218
1219  if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1220      || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1221    return false;
1222
1223  for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1224    {
1225      stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226      gcc_assert (stmt_info);
1227      gcc_assert (PURE_SLP_STMT (stmt_info));
1228
1229      if (!vect_analyze_stmt (stmt, &dummy, node))
1230        return false;
1231    }
1232
1233  return true;
1234}
1235
1236
1237/* Analyze statements in SLP instances of the basic block. Return TRUE if the
1238   operations are supported. */
1239
1240static bool
1241vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1242{
1243  VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1244  slp_instance instance;
1245  int i;
1246
1247  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1248    {
1249      if (!vect_slp_analyze_node_operations (bb_vinfo,
1250                                             SLP_INSTANCE_TREE (instance)))
1251        {
1252 	  vect_free_slp_instance (instance);
1253          VEC_ordered_remove (slp_instance, slp_instances, i);
1254	}
1255      else
1256        i++;
1257    }
1258
1259  if (!VEC_length (slp_instance, slp_instances))
1260    return false;
1261
1262  return true;
1263}
1264
1265
1266/* Cheick if the basic block can be vectorized.  */
1267
1268bb_vec_info
1269vect_slp_analyze_bb (basic_block bb)
1270{
1271  bb_vec_info bb_vinfo;
1272  VEC (ddr_p, heap) *ddrs;
1273  VEC (slp_instance, heap) *slp_instances;
1274  slp_instance instance;
1275  int i, insns = 0;
1276  gimple_stmt_iterator gsi;
1277
1278  if (vect_print_dump_info (REPORT_DETAILS))
1279    fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1280
1281  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1282    {
1283      gimple stmt = gsi_stmt (gsi);
1284      if (!is_gimple_debug (stmt)
1285	  && !gimple_nop_p (stmt)
1286	  && gimple_code (stmt) != GIMPLE_LABEL)
1287	insns++;
1288    }
1289
1290  if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1291    {
1292      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1293        fprintf (vect_dump, "not vectorized: too many instructions in basic "
1294                            "block.\n");
1295
1296      return NULL;
1297    }
1298
1299  bb_vinfo = new_bb_vec_info (bb);
1300  if (!bb_vinfo)
1301    return NULL;
1302
1303  if (!vect_analyze_data_refs (NULL, bb_vinfo))
1304    {
1305      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1306        fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1307                            "block.\n");
1308
1309      destroy_bb_vec_info (bb_vinfo);
1310      return NULL;
1311    }
1312
1313  ddrs = BB_VINFO_DDRS (bb_vinfo);
1314  if (!VEC_length (ddr_p, ddrs))
1315    {
1316      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1317        fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1318                            "block.\n");
1319
1320      destroy_bb_vec_info (bb_vinfo);
1321      return NULL;
1322    }
1323
1324  if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1325    {
1326      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1327        fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1328                            "block.\n");
1329
1330      destroy_bb_vec_info (bb_vinfo);
1331      return NULL;
1332    }
1333
1334   if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
1335    {
1336     if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1337       fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
1338                           " block.\n");
1339
1340      destroy_bb_vec_info (bb_vinfo);
1341      return NULL;
1342    }
1343
1344  if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1345    {
1346     if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1347       fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1348                           "block.\n");
1349
1350      destroy_bb_vec_info (bb_vinfo);
1351      return NULL;
1352    }
1353
1354   if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1355    {
1356      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1357        fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1358                            "block.\n");
1359
1360      destroy_bb_vec_info (bb_vinfo);
1361      return NULL;
1362    }
1363
1364  /* Check the SLP opportunities in the basic block, analyze and build SLP
1365     trees.  */
1366  if (!vect_analyze_slp (NULL, bb_vinfo))
1367    {
1368      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1369        fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1370                            "in basic block.\n");
1371
1372      destroy_bb_vec_info (bb_vinfo);
1373      return NULL;
1374    }
1375
1376  slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1377
1378  /* Mark all the statements that we want to vectorize as pure SLP and
1379     relevant.  */
1380  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1381    {
1382      vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1383      vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
1384    }
1385
1386  if (!vect_slp_analyze_operations (bb_vinfo))
1387    {
1388      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1389        fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1390
1391      destroy_bb_vec_info (bb_vinfo);
1392      return NULL;
1393    }
1394
1395  if (vect_print_dump_info (REPORT_DETAILS))
1396    fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
1397
1398  return bb_vinfo;
1399}
1400
1401
1402/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
1403   the number of created vector stmts depends on the unrolling factor). However,
1404   the actual number of vector stmts for every SLP node depends on VF which is
1405   set later in vect_analyze_operations(). Hence, SLP costs should be updated.
1406   In this function we assume that the inside costs calculated in
1407   vect_model_xxx_cost are linear in ncopies.  */
1408
1409void
1410vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1411{
1412  unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1413  VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1414  slp_instance instance;
1415
1416  if (vect_print_dump_info (REPORT_SLP))
1417    fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1418
1419  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1420    /* We assume that costs are linear in ncopies.  */
1421    SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1422      / SLP_INSTANCE_UNROLLING_FACTOR (instance);
1423}
1424
1425
1426/* For constant and loop invariant defs of SLP_NODE this function returns
1427   (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1428   OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1429   stmts. NUMBER_OF_VECTORS is the number of vector defs to create.  */
1430
1431static void
1432vect_get_constant_vectors (tree op, slp_tree slp_node,
1433                           VEC(tree,heap) **vec_oprnds,
1434			   unsigned int op_num, unsigned int number_of_vectors)
1435{
1436  VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1437  gimple stmt = VEC_index (gimple, stmts, 0);
1438  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1439  int nunits;
1440  tree vec_cst;
1441  tree t = NULL_TREE;
1442  int j, number_of_places_left_in_vector;
1443  tree vector_type;
1444  tree vop;
1445  int group_size = VEC_length (gimple, stmts);
1446  unsigned int vec_num, i;
1447  int number_of_copies = 1;
1448  VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1449  bool constant_p, is_store;
1450
1451  if (STMT_VINFO_DATA_REF (stmt_vinfo))
1452    {
1453      is_store = true;
1454      op = gimple_assign_rhs1 (stmt);
1455    }
1456  else
1457    is_store = false;
1458
1459  if (CONSTANT_CLASS_P (op))
1460    constant_p = true;
1461  else
1462    constant_p = false;
1463
1464  vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1465  gcc_assert (vector_type);
1466
1467  nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1468
1469  /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1470     created vectors. It is greater than 1 if unrolling is performed.
1471
1472     For example, we have two scalar operands, s1 and s2 (e.g., group of
1473     strided accesses of size two), while NUNITS is four (i.e., four scalars
1474     of this type can be packed in a vector). The output vector will contain
1475     two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1476     will be 2).
1477
1478     If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1479     containing the operands.
1480
1481     For example, NUNITS is four as before, and the group size is 8
1482     (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1483     {s5, s6, s7, s8}.  */
1484
1485  number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1486
1487  number_of_places_left_in_vector = nunits;
1488  for (j = 0; j < number_of_copies; j++)
1489    {
1490      for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1491        {
1492          if (is_store)
1493            op = gimple_assign_rhs1 (stmt);
1494          else
1495            op = gimple_op (stmt, op_num + 1);
1496
1497          /* Create 'vect_ = {op0,op1,...,opn}'.  */
1498          t = tree_cons (NULL_TREE, op, t);
1499
1500          number_of_places_left_in_vector--;
1501
1502          if (number_of_places_left_in_vector == 0)
1503            {
1504              number_of_places_left_in_vector = nunits;
1505
1506	      if (constant_p)
1507		vec_cst = build_vector (vector_type, t);
1508	      else
1509		vec_cst = build_constructor_from_list (vector_type, t);
1510              VEC_quick_push (tree, voprnds,
1511                              vect_init_vector (stmt, vec_cst, vector_type, NULL));
1512              t = NULL_TREE;
1513            }
1514        }
1515    }
1516
1517  /* Since the vectors are created in the reverse order, we should invert
1518     them.  */
1519  vec_num = VEC_length (tree, voprnds);
1520  for (j = vec_num - 1; j >= 0; j--)
1521    {
1522      vop = VEC_index (tree, voprnds, j);
1523      VEC_quick_push (tree, *vec_oprnds, vop);
1524    }
1525
1526  VEC_free (tree, heap, voprnds);
1527
1528  /* In case that VF is greater than the unrolling factor needed for the SLP
1529     group of stmts, NUMBER_OF_VECTORS to be created is greater than
1530     NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1531     to replicate the vectors.  */
1532  while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1533    {
1534      for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1535        VEC_quick_push (tree, *vec_oprnds, vop);
1536    }
1537}
1538
1539
1540/* Get vectorized definitions from SLP_NODE that contains corresponding
1541   vectorized def-stmts.  */
1542
1543static void
1544vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1545{
1546  tree vec_oprnd;
1547  gimple vec_def_stmt;
1548  unsigned int i;
1549
1550  gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1551
1552  for (i = 0;
1553       VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1554       i++)
1555    {
1556      gcc_assert (vec_def_stmt);
1557      vec_oprnd = gimple_get_lhs (vec_def_stmt);
1558      VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1559    }
1560}
1561
1562
1563/* Get vectorized definitions for SLP_NODE.
1564   If the scalar definitions are loop invariants or constants, collect them and
1565   call vect_get_constant_vectors() to create vector stmts.
1566   Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1567   must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1568   vect_get_slp_vect_defs() to retrieve them.
1569   If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1570   the right node. This is used when the second operand must remain scalar.  */
1571
1572void
1573vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
1574                   VEC (tree,heap) **vec_oprnds0,
1575                   VEC (tree,heap) **vec_oprnds1)
1576{
1577  gimple first_stmt;
1578  enum tree_code code;
1579  int number_of_vects;
1580  HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1581
1582  first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1583  /* The number of vector defs is determined by the number of vector statements
1584     in the node from which we get those statements.  */
1585  if (SLP_TREE_LEFT (slp_node))
1586    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1587  else
1588    {
1589      number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1590      /* Number of vector stmts was calculated according to LHS in
1591         vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1592         necessary. See vect_get_smallest_scalar_type() for details.  */
1593      vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1594                                     &rhs_size_unit);
1595      if (rhs_size_unit != lhs_size_unit)
1596        {
1597          number_of_vects *= rhs_size_unit;
1598          number_of_vects /= lhs_size_unit;
1599        }
1600    }
1601
1602  /* Allocate memory for vectorized defs.  */
1603  *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1604
1605  /* SLP_NODE corresponds either to a group of stores or to a group of
1606     unary/binary operations. We don't call this function for loads.  */
1607  if (SLP_TREE_LEFT (slp_node))
1608    /* The defs are already vectorized.  */
1609    vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1610  else
1611    /* Build vectors from scalar defs.  */
1612    vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects);
1613
1614  if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1615    /* Since we don't call this function with loads, this is a group of
1616       stores.  */
1617    return;
1618
1619  code = gimple_assign_rhs_code (first_stmt);
1620  if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1621    return;
1622
1623  /* The number of vector defs is determined by the number of vector statements
1624     in the node from which we get those statements.  */
1625  if (SLP_TREE_RIGHT (slp_node))
1626    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1627  else
1628    number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1629
1630  *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1631
1632  if (SLP_TREE_RIGHT (slp_node))
1633    /* The defs are already vectorized.  */
1634    vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1635  else
1636    /* Build vectors from scalar defs.  */
1637    vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects);
1638}
1639
1640
1641/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
1642   building a vector of type MASK_TYPE from it) and two input vectors placed in
1643   DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1644   shifting by STRIDE elements of DR_CHAIN for every copy.
1645   (STRIDE is the number of vectorized stmts for NODE divided by the number of
1646   copies).
1647   VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1648   the created stmts must be inserted.  */
1649
1650static inline void
1651vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
1652                           tree mask, int first_vec_indx, int second_vec_indx,
1653                           gimple_stmt_iterator *gsi, slp_tree node,
1654                           tree builtin_decl, tree vectype,
1655                           VEC(tree,heap) *dr_chain,
1656                           int ncopies, int vect_stmts_counter)
1657{
1658  tree perm_dest;
1659  gimple perm_stmt = NULL;
1660  stmt_vec_info next_stmt_info;
1661  int i, stride;
1662  tree first_vec, second_vec, data_ref;
1663  VEC (tree, heap) *params = NULL;
1664
1665  stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
1666
1667  /* Initialize the vect stmts of NODE to properly insert the generated
1668     stmts later.  */
1669  for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
1670       i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1671    VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1672
1673  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1674  for (i = 0; i < ncopies; i++)
1675    {
1676      first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1677      second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1678
1679      /* Build argument list for the vectorized call.  */
1680      VEC_free (tree, heap, params);
1681      params = VEC_alloc (tree, heap, 3);
1682      VEC_quick_push (tree, params, first_vec);
1683      VEC_quick_push (tree, params, second_vec);
1684      VEC_quick_push (tree, params, mask);
1685
1686      /* Generate the permute statement.  */
1687      perm_stmt = gimple_build_call_vec (builtin_decl, params);
1688      data_ref = make_ssa_name (perm_dest, perm_stmt);
1689      gimple_call_set_lhs (perm_stmt, data_ref);
1690      vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1691
1692      /* Store the vector statement in NODE.  */
1693      VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
1694                   stride * i + vect_stmts_counter, perm_stmt);
1695
1696      first_vec_indx += stride;
1697      second_vec_indx += stride;
1698    }
1699
1700  /* Mark the scalar stmt as vectorized.  */
1701  next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1702  STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1703}
1704
1705
1706/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
1707   return in CURRENT_MASK_ELEMENT its equivalent in target specific
1708   representation. Check that the mask is valid and return FALSE if not.
1709   Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1710   the next vector, i.e., the current first vector is not needed.  */
1711
1712static bool
1713vect_get_mask_element (gimple stmt, int first_mask_element, int m,
1714                       int mask_nunits, bool only_one_vec, int index,
1715                       int *mask, int *current_mask_element,
1716                       bool *need_next_vector, int *number_of_mask_fixes,
1717                       bool *mask_fixed, bool *needs_first_vector)
1718{
1719  int i;
1720
1721  /* Convert to target specific representation.  */
1722  *current_mask_element = first_mask_element + m;
1723  /* Adjust the value in case it's a mask for second and third vectors.  */
1724  *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
1725
1726  if (*current_mask_element < mask_nunits)
1727    *needs_first_vector = true;
1728
1729  /* We have only one input vector to permute but the mask accesses values in
1730     the next vector as well.  */
1731  if (only_one_vec && *current_mask_element >= mask_nunits)
1732    {
1733      if (vect_print_dump_info (REPORT_DETAILS))
1734        {
1735          fprintf (vect_dump, "permutation requires at least two vectors ");
1736          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1737        }
1738
1739      return false;
1740    }
1741
1742  /* The mask requires the next vector.  */
1743  if (*current_mask_element >= mask_nunits * 2)
1744    {
1745      if (*needs_first_vector || *mask_fixed)
1746        {
1747          /* We either need the first vector too or have already moved to the
1748             next vector. In both cases, this permutation needs three
1749             vectors.  */
1750          if (vect_print_dump_info (REPORT_DETAILS))
1751            {
1752              fprintf (vect_dump, "permutation requires at "
1753                                  "least three vectors ");
1754              print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1755            }
1756
1757          return false;
1758        }
1759
1760      /* We move to the next vector, dropping the first one and working with
1761         the second and the third - we need to adjust the values of the mask
1762         accordingly.  */
1763      *current_mask_element -= mask_nunits * *number_of_mask_fixes;
1764
1765      for (i = 0; i < index; i++)
1766        mask[i] -= mask_nunits * *number_of_mask_fixes;
1767
1768      (*number_of_mask_fixes)++;
1769      *mask_fixed = true;
1770    }
1771
1772  *need_next_vector = *mask_fixed;
1773
1774  /* This was the last element of this mask. Start a new one.  */
1775  if (index == mask_nunits - 1)
1776    {
1777      *number_of_mask_fixes = 1;
1778      *mask_fixed = false;
1779      *needs_first_vector = false;
1780    }
1781
1782  return true;
1783}
1784
1785
1786/* Generate vector permute statements from a list of loads in DR_CHAIN.
1787   If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
1788   permute statements for SLP_NODE_INSTANCE.  */
1789bool
1790vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
1791                              gimple_stmt_iterator *gsi, int vf,
1792                              slp_instance slp_node_instance, bool analyze_only)
1793{
1794  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1795  tree mask_element_type = NULL_TREE, mask_type;
1796  int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
1797  slp_tree node;
1798  tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
1799  gimple next_scalar_stmt;
1800  int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
1801  int first_mask_element;
1802  int index, unroll_factor, *mask, current_mask_element, ncopies;
1803  bool only_one_vec = false, need_next_vector = false;
1804  int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
1805  int number_of_mask_fixes = 1;
1806  bool mask_fixed = false;
1807  bool needs_first_vector = false;
1808
1809  if (!targetm.vectorize.builtin_vec_perm)
1810    {
1811      if (vect_print_dump_info (REPORT_DETAILS))
1812        {
1813          fprintf (vect_dump, "no builtin for vect permute for ");
1814          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1815        }
1816
1817       return false;
1818    }
1819
1820  builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
1821                                                     &mask_element_type);
1822  if (!builtin_decl || !mask_element_type)
1823    {
1824      if (vect_print_dump_info (REPORT_DETAILS))
1825        {
1826          fprintf (vect_dump, "no builtin for vect permute for ");
1827          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1828        }
1829
1830       return false;
1831    }
1832
1833  mask_type = get_vectype_for_scalar_type (mask_element_type);
1834  mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
1835  mask = (int *) xmalloc (sizeof (int) * mask_nunits);
1836  nunits = TYPE_VECTOR_SUBPARTS (vectype);
1837  scale = mask_nunits / nunits;
1838  unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1839
1840  /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
1841     unrolling factor.  */
1842  orig_vec_stmts_num = group_size *
1843                SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
1844  if (orig_vec_stmts_num == 1)
1845    only_one_vec = true;
1846
1847  /* Number of copies is determined by the final vectorization factor
1848     relatively to SLP_NODE_INSTANCE unrolling factor.  */
1849  ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
1850
1851  /* Generate permutation masks for every NODE. Number of masks for each NODE
1852     is equal to GROUP_SIZE.
1853     E.g., we have a group of three nodes with three loads from the same
1854     location in each node, and the vector size is 4. I.e., we have a
1855     a0b0c0a1b1c1... sequence and we need to create the following vectors:
1856     for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
1857     for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
1858     ...
1859
1860     The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
1861     scpecific type, e.g., in bytes for Altivec.
1862     The last mask is illegal since we assume two operands for permute
1863     operation, and the mask element values can't be outside that range. Hence,
1864     the last mask must be converted into {2,5,5,5}.
1865     For the first two permutations we need the first and the second input
1866     vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
1867     we need the second and the third vectors: {b1,c1,a2,b2} and
1868     {c2,a3,b3,c3}.  */
1869
1870  for (i = 0;
1871       VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
1872                    i, node);
1873       i++)
1874    {
1875      scalar_index = 0;
1876      index = 0;
1877      vect_stmts_counter = 0;
1878      vec_index = 0;
1879      first_vec_index = vec_index++;
1880      if (only_one_vec)
1881        second_vec_index = first_vec_index;
1882      else
1883        second_vec_index =  vec_index++;
1884
1885      for (j = 0; j < unroll_factor; j++)
1886        {
1887          for (k = 0; k < group_size; k++)
1888            {
1889              first_mask_element = (i + j * group_size) * scale;
1890              for (m = 0; m < scale; m++)
1891                {
1892                  if (!vect_get_mask_element (stmt, first_mask_element, m,
1893                                   mask_nunits, only_one_vec, index, mask,
1894                                   &current_mask_element, &need_next_vector,
1895                                   &number_of_mask_fixes, &mask_fixed,
1896                                   &needs_first_vector))
1897                    return false;
1898
1899                  mask[index++] = current_mask_element;
1900                }
1901
1902              if (index == mask_nunits)
1903                {
1904		  tree mask_vec = NULL;
1905
1906		  while (--index >= 0)
1907		    {
1908		      tree t = build_int_cst (mask_element_type, mask[index]);
1909		      mask_vec = tree_cons (NULL, t, mask_vec);
1910		    }
1911		  mask_vec = build_vector (mask_type, mask_vec);
1912		  index = 0;
1913
1914		  if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
1915							      mask_vec))
1916		    {
1917		      if (vect_print_dump_info (REPORT_DETAILS))
1918			{
1919			  fprintf (vect_dump, "unsupported vect permute ");
1920			  print_generic_expr (vect_dump, mask_vec, 0);
1921			}
1922		      free (mask);
1923		      return false;
1924		    }
1925
1926                  if (!analyze_only)
1927                    {
1928                      if (need_next_vector)
1929                        {
1930                          first_vec_index = second_vec_index;
1931                          second_vec_index = vec_index;
1932                        }
1933
1934                      next_scalar_stmt = VEC_index (gimple,
1935                                SLP_TREE_SCALAR_STMTS (node), scalar_index++);
1936
1937                      vect_create_mask_and_perm (stmt, next_scalar_stmt,
1938                               mask_vec, first_vec_index, second_vec_index,
1939			       gsi, node, builtin_decl, vectype, dr_chain,
1940			       ncopies, vect_stmts_counter++);
1941                    }
1942                }
1943            }
1944        }
1945    }
1946
1947  free (mask);
1948  return true;
1949}
1950
1951
1952
1953/* Vectorize SLP instance tree in postorder.  */
1954
1955static bool
1956vect_schedule_slp_instance (slp_tree node, slp_instance instance,
1957                            unsigned int vectorization_factor)
1958{
1959  gimple stmt;
1960  bool strided_store, is_store;
1961  gimple_stmt_iterator si;
1962  stmt_vec_info stmt_info;
1963  unsigned int vec_stmts_size, nunits, group_size;
1964  tree vectype;
1965  int i;
1966  slp_tree loads_node;
1967
1968  if (!node)
1969    return false;
1970
1971  vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
1972                              vectorization_factor);
1973  vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
1974                              vectorization_factor);
1975
1976  stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1977  stmt_info = vinfo_for_stmt (stmt);
1978
1979  /* VECTYPE is the type of the destination.  */
1980  vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
1981  nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
1982  group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1983
1984  /* For each SLP instance calculate number of vector stmts to be created
1985     for the scalar stmts in each node of the SLP tree. Number of vector
1986     elements in one vector iteration is the number of scalar elements in
1987     one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
1988     size.  */
1989  vec_stmts_size = (vectorization_factor * group_size) / nunits;
1990
1991  /* In case of load permutation we have to allocate vectorized statements for
1992     all the nodes that participate in that permutation.  */
1993  if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
1994    {
1995      for (i = 0;
1996           VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
1997           i++)
1998        {
1999          if (!SLP_TREE_VEC_STMTS (loads_node))
2000            {
2001              SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2002                                                           vec_stmts_size);
2003              SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2004            }
2005        }
2006    }
2007
2008  if (!SLP_TREE_VEC_STMTS (node))
2009    {
2010      SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2011      SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2012    }
2013
2014  if (vect_print_dump_info (REPORT_DETAILS))
2015    {
2016      fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2017      print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2018    }
2019
2020  /* Loads should be inserted before the first load.  */
2021  if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2022      && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2023      && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2024    si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2025  else
2026    si = gsi_for_stmt (stmt);
2027
2028  is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
2029  if (is_store)
2030    {
2031      if (DR_GROUP_FIRST_DR (stmt_info))
2032	/* If IS_STORE is TRUE, the vectorization of the
2033	   interleaving chain was completed - free all the stores in
2034	   the chain.  */
2035	vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
2036      else
2037	/* FORNOW: SLP originates only from strided stores.  */
2038	gcc_unreachable ();
2039
2040      return true;
2041    }
2042
2043  /* FORNOW: SLP originates only from strided stores.  */
2044  return false;
2045}
2046
2047
2048bool
2049vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2050{
2051  VEC (slp_instance, heap) *slp_instances;
2052  slp_instance instance;
2053  unsigned int i, vf;
2054  bool is_store = false;
2055
2056  if (loop_vinfo)
2057    {
2058      slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2059      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2060    }
2061  else
2062    {
2063      slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2064      vf = 1;
2065    }
2066
2067  for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2068    {
2069      /* Schedule the tree of INSTANCE.  */
2070      is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
2071                                             instance, vf);
2072      if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2073	  || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2074	fprintf (vect_dump, "vectorizing stmts using SLP.");
2075    }
2076
2077  return is_store;
2078}
2079
2080
2081/* Vectorize the basic block.  */
2082
2083void
2084vect_slp_transform_bb (basic_block bb)
2085{
2086  bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2087  gimple_stmt_iterator si;
2088
2089  gcc_assert (bb_vinfo);
2090
2091  if (vect_print_dump_info (REPORT_DETAILS))
2092    fprintf (vect_dump, "SLPing BB\n");
2093
2094  for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2095    {
2096      gimple stmt = gsi_stmt (si);
2097      stmt_vec_info stmt_info;
2098
2099      if (vect_print_dump_info (REPORT_DETAILS))
2100        {
2101          fprintf (vect_dump, "------>SLPing statement: ");
2102          print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2103        }
2104
2105      stmt_info = vinfo_for_stmt (stmt);
2106      gcc_assert (stmt_info);
2107
2108      /* Schedule all the SLP instances when the first SLP stmt is reached.  */
2109      if (STMT_SLP_TYPE (stmt_info))
2110        {
2111          vect_schedule_slp (NULL, bb_vinfo);
2112          break;
2113        }
2114    }
2115
2116  mark_sym_for_renaming (gimple_vop (cfun));
2117  /* The memory tags and pointers in vectorized statements need to
2118     have their SSA forms updated.  FIXME, why can't this be delayed
2119     until all the loops have been transformed?  */
2120  update_ssa (TODO_update_ssa);
2121
2122  if (vect_print_dump_info (REPORT_DETAILS))
2123    fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
2124
2125  destroy_bb_vec_info (bb_vinfo);
2126}
2127
2128