1/* Statement Analysis and Transformation for Vectorization
2   Copyright (C) 2003-2015 Free Software Foundation, Inc.
3   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4   and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "dumpfile.h"
26#include "tm.h"
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
36#include "tree.h"
37#include "fold-const.h"
38#include "stor-layout.h"
39#include "target.h"
40#include "predict.h"
41#include "hard-reg-set.h"
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "basic-block.h"
46#include "gimple-pretty-print.h"
47#include "tree-ssa-alias.h"
48#include "internal-fn.h"
49#include "tree-eh.h"
50#include "gimple-expr.h"
51#include "is-a.h"
52#include "gimple.h"
53#include "gimplify.h"
54#include "gimple-iterator.h"
55#include "gimplify-me.h"
56#include "gimple-ssa.h"
57#include "tree-cfg.h"
58#include "tree-phinodes.h"
59#include "ssa-iterators.h"
60#include "stringpool.h"
61#include "tree-ssanames.h"
62#include "tree-ssa-loop-manip.h"
63#include "cfgloop.h"
64#include "tree-ssa-loop.h"
65#include "tree-scalar-evolution.h"
66#include "hashtab.h"
67#include "rtl.h"
68#include "flags.h"
69#include "statistics.h"
70#include "real.h"
71#include "fixed-value.h"
72#include "insn-config.h"
73#include "expmed.h"
74#include "dojump.h"
75#include "explow.h"
76#include "calls.h"
77#include "emit-rtl.h"
78#include "varasm.h"
79#include "stmt.h"
80#include "expr.h"
81#include "recog.h"		/* FIXME: for insn_data */
82#include "insn-codes.h"
83#include "optabs.h"
84#include "diagnostic-core.h"
85#include "tree-vectorizer.h"
86#include "hash-map.h"
87#include "plugin-api.h"
88#include "ipa-ref.h"
89#include "cgraph.h"
90#include "builtins.h"
91
92/* For lang_hooks.types.type_for_mode.  */
93#include "langhooks.h"
94
95/* Return the vectorized type for the given statement.  */
96
97tree
98stmt_vectype (struct _stmt_vec_info *stmt_info)
99{
100  return STMT_VINFO_VECTYPE (stmt_info);
101}
102
103/* Return TRUE iff the given statement is in an inner loop relative to
104   the loop being vectorized.  */
105bool
106stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
107{
108  gimple stmt = STMT_VINFO_STMT (stmt_info);
109  basic_block bb = gimple_bb (stmt);
110  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
111  struct loop* loop;
112
113  if (!loop_vinfo)
114    return false;
115
116  loop = LOOP_VINFO_LOOP (loop_vinfo);
117
118  return (bb->loop_father == loop->inner);
119}
120
121/* Record the cost of a statement, either by directly informing the
122   target model or by saving it in a vector for later processing.
123   Return a preliminary estimate of the statement's cost.  */
124
125unsigned
126record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
128		  int misalign, enum vect_cost_model_location where)
129{
130  if (body_cost_vec)
131    {
132      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
133      add_stmt_info_to_vec (body_cost_vec, count, kind,
134			    stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
135			    misalign);
136      return (unsigned)
137	(builtin_vectorization_cost (kind, vectype, misalign) * count);
138
139    }
140  else
141    {
142      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
143      bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
144      void *target_cost_data;
145
146      if (loop_vinfo)
147	target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
148      else
149	target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
150
151      return add_stmt_cost (target_cost_data, count, kind, stmt_info,
152			    misalign, where);
153    }
154}
155
156/* Return a variable of type ELEM_TYPE[NELEMS].  */
157
158static tree
159create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
160{
161  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
162			 "vect_array");
163}
164
165/* ARRAY is an array of vectors created by create_vector_array.
166   Return an SSA_NAME for the vector in index N.  The reference
167   is part of the vectorization of STMT and the vector is associated
168   with scalar destination SCALAR_DEST.  */
169
170static tree
171read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
172		   tree array, unsigned HOST_WIDE_INT n)
173{
174  tree vect_type, vect, vect_name, array_ref;
175  gimple new_stmt;
176
177  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
178  vect_type = TREE_TYPE (TREE_TYPE (array));
179  vect = vect_create_destination_var (scalar_dest, vect_type);
180  array_ref = build4 (ARRAY_REF, vect_type, array,
181		      build_int_cst (size_type_node, n),
182		      NULL_TREE, NULL_TREE);
183
184  new_stmt = gimple_build_assign (vect, array_ref);
185  vect_name = make_ssa_name (vect, new_stmt);
186  gimple_assign_set_lhs (new_stmt, vect_name);
187  vect_finish_stmt_generation (stmt, new_stmt, gsi);
188
189  return vect_name;
190}
191
192/* ARRAY is an array of vectors created by create_vector_array.
193   Emit code to store SSA_NAME VECT in index N of the array.
194   The store is part of the vectorization of STMT.  */
195
196static void
197write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
198		    tree array, unsigned HOST_WIDE_INT n)
199{
200  tree array_ref;
201  gimple new_stmt;
202
203  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
204		      build_int_cst (size_type_node, n),
205		      NULL_TREE, NULL_TREE);
206
207  new_stmt = gimple_build_assign (array_ref, vect);
208  vect_finish_stmt_generation (stmt, new_stmt, gsi);
209}
210
211/* PTR is a pointer to an array of type TYPE.  Return a representation
212   of *PTR.  The memory reference replaces those in FIRST_DR
213   (and its group).  */
214
215static tree
216create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
217{
218  tree mem_ref, alias_ptr_type;
219
220  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
221  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
222  /* Arrays have the same alignment as their type.  */
223  set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
224  return mem_ref;
225}
226
227/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
228
229/* Function vect_mark_relevant.
230
231   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
232
233static void
234vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
235		    enum vect_relevant relevant, bool live_p,
236		    bool used_in_pattern)
237{
238  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
239  enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241  gimple pattern_stmt;
242
243  if (dump_enabled_p ())
244    dump_printf_loc (MSG_NOTE, vect_location,
245                     "mark relevant %d, live %d.\n", relevant, live_p);
246
247  /* If this stmt is an original stmt in a pattern, we might need to mark its
248     related pattern stmt instead of the original stmt.  However, such stmts
249     may have their own uses that are not in any pattern, in such cases the
250     stmt itself should be marked.  */
251  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252    {
253      bool found = false;
254      if (!used_in_pattern)
255        {
256          imm_use_iterator imm_iter;
257          use_operand_p use_p;
258          gimple use_stmt;
259          tree lhs;
260	  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
261	  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
262
263          if (is_gimple_assign (stmt))
264            lhs = gimple_assign_lhs (stmt);
265          else
266            lhs = gimple_call_lhs (stmt);
267
268          /* This use is out of pattern use, if LHS has other uses that are
269             pattern uses, we should mark the stmt itself, and not the pattern
270             stmt.  */
271	  if (lhs && TREE_CODE (lhs) == SSA_NAME)
272	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
273	      {
274		if (is_gimple_debug (USE_STMT (use_p)))
275		  continue;
276		use_stmt = USE_STMT (use_p);
277
278		if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
279		  continue;
280
281		if (vinfo_for_stmt (use_stmt)
282		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
283		  {
284		    found = true;
285		    break;
286		  }
287	      }
288        }
289
290      if (!found)
291        {
292          /* This is the last stmt in a sequence that was detected as a
293             pattern that can potentially be vectorized.  Don't mark the stmt
294             as relevant/live because it's not going to be vectorized.
295             Instead mark the pattern-stmt that replaces it.  */
296
297          pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
298
299          if (dump_enabled_p ())
300            dump_printf_loc (MSG_NOTE, vect_location,
301                             "last stmt in pattern. don't mark"
302                             " relevant/live.\n");
303          stmt_info = vinfo_for_stmt (pattern_stmt);
304          gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
305          save_relevant = STMT_VINFO_RELEVANT (stmt_info);
306          save_live_p = STMT_VINFO_LIVE_P (stmt_info);
307          stmt = pattern_stmt;
308        }
309    }
310
311  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
312  if (relevant > STMT_VINFO_RELEVANT (stmt_info))
313    STMT_VINFO_RELEVANT (stmt_info) = relevant;
314
315  if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
316      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
317    {
318      if (dump_enabled_p ())
319        dump_printf_loc (MSG_NOTE, vect_location,
320                         "already marked relevant/live.\n");
321      return;
322    }
323
324  worklist->safe_push (stmt);
325}
326
327
328/* Function vect_stmt_relevant_p.
329
330   Return true if STMT in loop that is represented by LOOP_VINFO is
331   "relevant for vectorization".
332
333   A stmt is considered "relevant for vectorization" if:
334   - it has uses outside the loop.
335   - it has vdefs (it alters memory).
336   - control stmts in the loop (except for the exit condition).
337
338   CHECKME: what other side effects would the vectorizer allow?  */
339
340static bool
341vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
342		      enum vect_relevant *relevant, bool *live_p)
343{
344  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
345  ssa_op_iter op_iter;
346  imm_use_iterator imm_iter;
347  use_operand_p use_p;
348  def_operand_p def_p;
349
350  *relevant = vect_unused_in_scope;
351  *live_p = false;
352
353  /* cond stmt other than loop exit cond.  */
354  if (is_ctrl_stmt (stmt)
355      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
356         != loop_exit_ctrl_vec_info_type)
357    *relevant = vect_used_in_scope;
358
359  /* changing memory.  */
360  if (gimple_code (stmt) != GIMPLE_PHI)
361    if (gimple_vdef (stmt)
362	&& !gimple_clobber_p (stmt))
363      {
364	if (dump_enabled_p ())
365	  dump_printf_loc (MSG_NOTE, vect_location,
366                           "vec_stmt_relevant_p: stmt has vdefs.\n");
367	*relevant = vect_used_in_scope;
368      }
369
370  /* uses outside the loop.  */
371  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
372    {
373      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
374	{
375	  basic_block bb = gimple_bb (USE_STMT (use_p));
376	  if (!flow_bb_inside_loop_p (loop, bb))
377	    {
378	      if (dump_enabled_p ())
379		dump_printf_loc (MSG_NOTE, vect_location,
380                                 "vec_stmt_relevant_p: used out of loop.\n");
381
382	      if (is_gimple_debug (USE_STMT (use_p)))
383		continue;
384
385	      /* We expect all such uses to be in the loop exit phis
386		 (because of loop closed form)   */
387	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
388	      gcc_assert (bb == single_exit (loop)->dest);
389
390              *live_p = true;
391	    }
392	}
393    }
394
395  return (*live_p || *relevant);
396}
397
398
399/* Function exist_non_indexing_operands_for_use_p
400
401   USE is one of the uses attached to STMT.  Check if USE is
402   used in STMT for anything other than indexing an array.  */
403
404static bool
405exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
406{
407  tree operand;
408  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
409
410  /* USE corresponds to some operand in STMT.  If there is no data
411     reference in STMT, then any operand that corresponds to USE
412     is not indexing an array.  */
413  if (!STMT_VINFO_DATA_REF (stmt_info))
414    return true;
415
416  /* STMT has a data_ref. FORNOW this means that its of one of
417     the following forms:
418     -1- ARRAY_REF = var
419     -2- var = ARRAY_REF
420     (This should have been verified in analyze_data_refs).
421
422     'var' in the second case corresponds to a def, not a use,
423     so USE cannot correspond to any operands that are not used
424     for array indexing.
425
426     Therefore, all we need to check is if STMT falls into the
427     first case, and whether var corresponds to USE.  */
428
429  if (!gimple_assign_copy_p (stmt))
430    {
431      if (is_gimple_call (stmt)
432	  && gimple_call_internal_p (stmt))
433	switch (gimple_call_internal_fn (stmt))
434	  {
435	  case IFN_MASK_STORE:
436	    operand = gimple_call_arg (stmt, 3);
437	    if (operand == use)
438	      return true;
439	    /* FALLTHRU */
440	  case IFN_MASK_LOAD:
441	    operand = gimple_call_arg (stmt, 2);
442	    if (operand == use)
443	      return true;
444	    break;
445	  default:
446	    break;
447	  }
448      return false;
449    }
450
451  if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
452    return false;
453  operand = gimple_assign_rhs1 (stmt);
454  if (TREE_CODE (operand) != SSA_NAME)
455    return false;
456
457  if (operand == use)
458    return true;
459
460  return false;
461}
462
463
464/*
465   Function process_use.
466
467   Inputs:
468   - a USE in STMT in a loop represented by LOOP_VINFO
469   - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
470     that defined USE.  This is done by calling mark_relevant and passing it
471     the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
472   - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
473     be performed.
474
475   Outputs:
476   Generally, LIVE_P and RELEVANT are used to define the liveness and
477   relevance info of the DEF_STMT of this USE:
478       STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
479       STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
480   Exceptions:
481   - case 1: If USE is used only for address computations (e.g. array indexing),
482   which does not need to be directly vectorized, then the liveness/relevance
483   of the respective DEF_STMT is left unchanged.
484   - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
485   skip DEF_STMT cause it had already been processed.
486   - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
487   be modified accordingly.
488
489   Return true if everything is as expected. Return false otherwise.  */
490
491static bool
492process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
493	     enum vect_relevant relevant, vec<gimple> *worklist,
494	     bool force)
495{
496  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
497  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
498  stmt_vec_info dstmt_vinfo;
499  basic_block bb, def_bb;
500  tree def;
501  gimple def_stmt;
502  enum vect_def_type dt;
503
504  /* case 1: we are only interested in uses that need to be vectorized.  Uses
505     that are used for address computation are not considered relevant.  */
506  if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
507     return true;
508
509  if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
510    {
511      if (dump_enabled_p ())
512        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513                         "not vectorized: unsupported use in stmt.\n");
514      return false;
515    }
516
517  if (!def_stmt || gimple_nop_p (def_stmt))
518    return true;
519
520  def_bb = gimple_bb (def_stmt);
521  if (!flow_bb_inside_loop_p (loop, def_bb))
522    {
523      if (dump_enabled_p ())
524	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
525      return true;
526    }
527
528  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
529     DEF_STMT must have already been processed, because this should be the
530     only way that STMT, which is a reduction-phi, was put in the worklist,
531     as there should be no other uses for DEF_STMT in the loop.  So we just
532     check that everything is as expected, and we are done.  */
533  dstmt_vinfo = vinfo_for_stmt (def_stmt);
534  bb = gimple_bb (stmt);
535  if (gimple_code (stmt) == GIMPLE_PHI
536      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
537      && gimple_code (def_stmt) != GIMPLE_PHI
538      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
539      && bb->loop_father == def_bb->loop_father)
540    {
541      if (dump_enabled_p ())
542	dump_printf_loc (MSG_NOTE, vect_location,
543                         "reduc-stmt defining reduc-phi in the same nest.\n");
544      if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
545	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
546      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
547      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
548		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
549      return true;
550    }
551
552  /* case 3a: outer-loop stmt defining an inner-loop stmt:
553	outer-loop-header-bb:
554		d = def_stmt
555	inner-loop:
556		stmt # use (d)
557	outer-loop-tail-bb:
558		...		  */
559  if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
560    {
561      if (dump_enabled_p ())
562	dump_printf_loc (MSG_NOTE, vect_location,
563                         "outer-loop def-stmt defining inner-loop stmt.\n");
564
565      switch (relevant)
566	{
567	case vect_unused_in_scope:
568	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
569		      vect_used_in_scope : vect_unused_in_scope;
570	  break;
571
572	case vect_used_in_outer_by_reduction:
573          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
574	  relevant = vect_used_by_reduction;
575	  break;
576
577	case vect_used_in_outer:
578          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
579	  relevant = vect_used_in_scope;
580	  break;
581
582	case vect_used_in_scope:
583	  break;
584
585	default:
586	  gcc_unreachable ();
587	}
588    }
589
590  /* case 3b: inner-loop stmt defining an outer-loop stmt:
591	outer-loop-header-bb:
592		...
593	inner-loop:
594		d = def_stmt
595	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
596		stmt # use (d)		*/
597  else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
598    {
599      if (dump_enabled_p ())
600	dump_printf_loc (MSG_NOTE, vect_location,
601                         "inner-loop def-stmt defining outer-loop stmt.\n");
602
603      switch (relevant)
604        {
605        case vect_unused_in_scope:
606          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
607            || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
608                      vect_used_in_outer_by_reduction : vect_unused_in_scope;
609          break;
610
611        case vect_used_by_reduction:
612          relevant = vect_used_in_outer_by_reduction;
613          break;
614
615        case vect_used_in_scope:
616          relevant = vect_used_in_outer;
617          break;
618
619        default:
620          gcc_unreachable ();
621        }
622    }
623
624  vect_mark_relevant (worklist, def_stmt, relevant, live_p,
625                      is_pattern_stmt_p (stmt_vinfo));
626  return true;
627}
628
629
630/* Function vect_mark_stmts_to_be_vectorized.
631
632   Not all stmts in the loop need to be vectorized. For example:
633
634     for i...
635       for j...
636   1.    T0 = i + j
637   2.	 T1 = a[T0]
638
639   3.    j = j + 1
640
641   Stmt 1 and 3 do not need to be vectorized, because loop control and
642   addressing of vectorized data-refs are handled differently.
643
644   This pass detects such stmts.  */
645
646bool
647vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
648{
649  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
650  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
651  unsigned int nbbs = loop->num_nodes;
652  gimple_stmt_iterator si;
653  gimple stmt;
654  unsigned int i;
655  stmt_vec_info stmt_vinfo;
656  basic_block bb;
657  gimple phi;
658  bool live_p;
659  enum vect_relevant relevant, tmp_relevant;
660  enum vect_def_type def_type;
661
662  if (dump_enabled_p ())
663    dump_printf_loc (MSG_NOTE, vect_location,
664                     "=== vect_mark_stmts_to_be_vectorized ===\n");
665
666  auto_vec<gimple, 64> worklist;
667
668  /* 1. Init worklist.  */
669  for (i = 0; i < nbbs; i++)
670    {
671      bb = bbs[i];
672      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
673	{
674	  phi = gsi_stmt (si);
675	  if (dump_enabled_p ())
676	    {
677	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
678	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
679	    }
680
681	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
682	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
683	}
684      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
685	{
686	  stmt = gsi_stmt (si);
687	  if (dump_enabled_p ())
688	    {
689	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
690	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
691	    }
692
693	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
694            vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
695	}
696    }
697
698  /* 2. Process_worklist */
699  while (worklist.length () > 0)
700    {
701      use_operand_p use_p;
702      ssa_op_iter iter;
703
704      stmt = worklist.pop ();
705      if (dump_enabled_p ())
706	{
707          dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
708          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
709	}
710
711      /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
712	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
713	 liveness and relevance properties of STMT.  */
714      stmt_vinfo = vinfo_for_stmt (stmt);
715      relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
716      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
717
718      /* Generally, the liveness and relevance properties of STMT are
719	 propagated as is to the DEF_STMTs of its USEs:
720	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
721	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
722
723	 One exception is when STMT has been identified as defining a reduction
724	 variable; in this case we set the liveness/relevance as follows:
725	   live_p = false
726	   relevant = vect_used_by_reduction
727	 This is because we distinguish between two kinds of relevant stmts -
728	 those that are used by a reduction computation, and those that are
729	 (also) used by a regular computation.  This allows us later on to
730	 identify stmts that are used solely by a reduction, and therefore the
731	 order of the results that they produce does not have to be kept.  */
732
733      def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
734      tmp_relevant = relevant;
735      switch (def_type)
736        {
737          case vect_reduction_def:
738	    switch (tmp_relevant)
739	      {
740	        case vect_unused_in_scope:
741	          relevant = vect_used_by_reduction;
742	          break;
743
744	        case vect_used_by_reduction:
745	          if (gimple_code (stmt) == GIMPLE_PHI)
746                    break;
747  	          /* fall through */
748
749	        default:
750	          if (dump_enabled_p ())
751	            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752                                     "unsupported use of reduction.\n");
753	          return false;
754	      }
755
756	    live_p = false;
757	    break;
758
759          case vect_nested_cycle:
760            if (tmp_relevant != vect_unused_in_scope
761                && tmp_relevant != vect_used_in_outer_by_reduction
762                && tmp_relevant != vect_used_in_outer)
763              {
764                if (dump_enabled_p ())
765                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
766                                   "unsupported use of nested cycle.\n");
767
768                return false;
769              }
770
771            live_p = false;
772            break;
773
774          case vect_double_reduction_def:
775            if (tmp_relevant != vect_unused_in_scope
776                && tmp_relevant != vect_used_by_reduction)
777              {
778                if (dump_enabled_p ())
779                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
780                                   "unsupported use of double reduction.\n");
781
782                return false;
783              }
784
785            live_p = false;
786            break;
787
788          default:
789            break;
790        }
791
792      if (is_pattern_stmt_p (stmt_vinfo))
793        {
794          /* Pattern statements are not inserted into the code, so
795             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
796             have to scan the RHS or function arguments instead.  */
797          if (is_gimple_assign (stmt))
798            {
799	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
800	      tree op = gimple_assign_rhs1 (stmt);
801
802	      i = 1;
803	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
804		{
805		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
806				    live_p, relevant, &worklist, false)
807		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
808				       live_p, relevant, &worklist, false))
809		    return false;
810		  i = 2;
811		}
812	      for (; i < gimple_num_ops (stmt); i++)
813                {
814		  op = gimple_op (stmt, i);
815                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
816				    &worklist, false))
817                    return false;
818                 }
819            }
820          else if (is_gimple_call (stmt))
821            {
822              for (i = 0; i < gimple_call_num_args (stmt); i++)
823                {
824                  tree arg = gimple_call_arg (stmt, i);
825                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
826				    &worklist, false))
827                    return false;
828                }
829            }
830        }
831      else
832        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
833          {
834            tree op = USE_FROM_PTR (use_p);
835            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
836			      &worklist, false))
837              return false;
838          }
839
840      if (STMT_VINFO_GATHER_P (stmt_vinfo))
841	{
842	  tree off;
843	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
844	  gcc_assert (decl);
845	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
846			    &worklist, true))
847	    return false;
848	}
849    } /* while worklist */
850
851  return true;
852}
853
854
855/* Function vect_model_simple_cost.
856
857   Models cost for simple operations, i.e. those that only emit ncopies of a
858   single op.  Right now, this does not account for multiple insns that could
859   be generated for the single vector op.  We will handle that shortly.  */
860
861void
862vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
863			enum vect_def_type *dt,
864			stmt_vector_for_cost *prologue_cost_vec,
865			stmt_vector_for_cost *body_cost_vec)
866{
867  int i;
868  int inside_cost = 0, prologue_cost = 0;
869
870  /* The SLP costs were already calculated during SLP tree build.  */
871  if (PURE_SLP_STMT (stmt_info))
872    return;
873
874  /* FORNOW: Assuming maximum 2 args per stmts.  */
875  for (i = 0; i < 2; i++)
876    if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
877      prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
878					 stmt_info, 0, vect_prologue);
879
880  /* Pass the inside-of-loop statements to the target-specific cost model.  */
881  inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
882				  stmt_info, 0, vect_body);
883
884  if (dump_enabled_p ())
885    dump_printf_loc (MSG_NOTE, vect_location,
886                     "vect_model_simple_cost: inside_cost = %d, "
887                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
888}
889
890
891/* Model cost for type demotion and promotion operations.  PWR is normally
892   zero for single-step promotions and demotions.  It will be one if
893   two-step promotion/demotion is required, and so on.  Each additional
894   step doubles the number of instructions required.  */
895
896static void
897vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
898				    enum vect_def_type *dt, int pwr)
899{
900  int i, tmp;
901  int inside_cost = 0, prologue_cost = 0;
902  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
904  void *target_cost_data;
905
906  /* The SLP costs were already calculated during SLP tree build.  */
907  if (PURE_SLP_STMT (stmt_info))
908    return;
909
910  if (loop_vinfo)
911    target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
912  else
913    target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
914
915  for (i = 0; i < pwr + 1; i++)
916    {
917      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
918	(i + 1) : i;
919      inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
920				    vec_promote_demote, stmt_info, 0,
921				    vect_body);
922    }
923
924  /* FORNOW: Assuming maximum 2 args per stmts.  */
925  for (i = 0; i < 2; i++)
926    if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
927      prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
928				      stmt_info, 0, vect_prologue);
929
930  if (dump_enabled_p ())
931    dump_printf_loc (MSG_NOTE, vect_location,
932                     "vect_model_promotion_demotion_cost: inside_cost = %d, "
933                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
934}
935
936/* Function vect_cost_group_size
937
938   For grouped load or store, return the group_size only if it is the first
939   load or store of a group, else return 1.  This ensures that group size is
940   only returned once per group.  */
941
942static int
943vect_cost_group_size (stmt_vec_info stmt_info)
944{
945  gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
946
947  if (first_stmt == STMT_VINFO_STMT (stmt_info))
948    return GROUP_SIZE (stmt_info);
949
950  return 1;
951}
952
953
954/* Function vect_model_store_cost
955
956   Models cost for stores.  In the case of grouped accesses, one access
957   has the overhead of the grouped access attributed to it.  */
958
959void
960vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
961		       bool store_lanes_p, enum vect_def_type dt,
962		       slp_tree slp_node,
963		       stmt_vector_for_cost *prologue_cost_vec,
964		       stmt_vector_for_cost *body_cost_vec)
965{
966  int group_size;
967  unsigned int inside_cost = 0, prologue_cost = 0;
968  struct data_reference *first_dr;
969  gimple first_stmt;
970
971  /* The SLP costs were already calculated during SLP tree build.  */
972  if (PURE_SLP_STMT (stmt_info))
973    return;
974
975  if (dt == vect_constant_def || dt == vect_external_def)
976    prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
977				       stmt_info, 0, vect_prologue);
978
979  /* Grouped access?  */
980  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
981    {
982      if (slp_node)
983        {
984          first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
985          group_size = 1;
986        }
987      else
988        {
989          first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
990          group_size = vect_cost_group_size (stmt_info);
991        }
992
993      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
994    }
995  /* Not a grouped access.  */
996  else
997    {
998      group_size = 1;
999      first_dr = STMT_VINFO_DATA_REF (stmt_info);
1000    }
1001
1002  /* We assume that the cost of a single store-lanes instruction is
1003     equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
1004     access is instead being provided by a permute-and-store operation,
1005     include the cost of the permutes.  */
1006  if (!store_lanes_p && group_size > 1)
1007    {
1008      /* Uses a high and low interleave or shuffle operations for each
1009	 needed permute.  */
1010      int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1011      inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1012				      stmt_info, 0, vect_body);
1013
1014      if (dump_enabled_p ())
1015        dump_printf_loc (MSG_NOTE, vect_location,
1016                         "vect_model_store_cost: strided group_size = %d .\n",
1017                         group_size);
1018    }
1019
1020  /* Costs of the stores.  */
1021  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1022
1023  if (dump_enabled_p ())
1024    dump_printf_loc (MSG_NOTE, vect_location,
1025                     "vect_model_store_cost: inside_cost = %d, "
1026                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
1027}
1028
1029
1030/* Calculate cost of DR's memory access.  */
1031void
1032vect_get_store_cost (struct data_reference *dr, int ncopies,
1033		     unsigned int *inside_cost,
1034		     stmt_vector_for_cost *body_cost_vec)
1035{
1036  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1037  gimple stmt = DR_STMT (dr);
1038  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1039
1040  switch (alignment_support_scheme)
1041    {
1042    case dr_aligned:
1043      {
1044	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1045					  vector_store, stmt_info, 0,
1046					  vect_body);
1047
1048        if (dump_enabled_p ())
1049          dump_printf_loc (MSG_NOTE, vect_location,
1050                           "vect_model_store_cost: aligned.\n");
1051        break;
1052      }
1053
1054    case dr_unaligned_supported:
1055      {
1056        /* Here, we assign an additional cost for the unaligned store.  */
1057	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1058					  unaligned_store, stmt_info,
1059					  DR_MISALIGNMENT (dr), vect_body);
1060        if (dump_enabled_p ())
1061          dump_printf_loc (MSG_NOTE, vect_location,
1062                           "vect_model_store_cost: unaligned supported by "
1063                           "hardware.\n");
1064        break;
1065      }
1066
1067    case dr_unaligned_unsupported:
1068      {
1069        *inside_cost = VECT_MAX_COST;
1070
1071        if (dump_enabled_p ())
1072          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073                           "vect_model_store_cost: unsupported access.\n");
1074        break;
1075      }
1076
1077    default:
1078      gcc_unreachable ();
1079    }
1080}
1081
1082
1083/* Function vect_model_load_cost
1084
1085   Models cost for loads.  In the case of grouped accesses, the last access
1086   has the overhead of the grouped access attributed to it.  Since unaligned
1087   accesses are supported for loads, we also account for the costs of the
1088   access scheme chosen.  */
1089
1090void
1091vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1092		      bool load_lanes_p, slp_tree slp_node,
1093		      stmt_vector_for_cost *prologue_cost_vec,
1094		      stmt_vector_for_cost *body_cost_vec)
1095{
1096  int group_size;
1097  gimple first_stmt;
1098  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1099  unsigned int inside_cost = 0, prologue_cost = 0;
1100
1101  /* The SLP costs were already calculated during SLP tree build.  */
1102  if (PURE_SLP_STMT (stmt_info))
1103    return;
1104
1105  /* Grouped accesses?  */
1106  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1107  if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1108    {
1109      group_size = vect_cost_group_size (stmt_info);
1110      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1111    }
1112  /* Not a grouped access.  */
1113  else
1114    {
1115      group_size = 1;
1116      first_dr = dr;
1117    }
1118
1119  /* We assume that the cost of a single load-lanes instruction is
1120     equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1121     access is instead being provided by a load-and-permute operation,
1122     include the cost of the permutes.  */
1123  if (!load_lanes_p && group_size > 1)
1124    {
1125      /* Uses an even and odd extract operations or shuffle operations
1126	 for each needed permute.  */
1127      int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1128      inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1129				      stmt_info, 0, vect_body);
1130
1131      if (dump_enabled_p ())
1132        dump_printf_loc (MSG_NOTE, vect_location,
1133                         "vect_model_load_cost: strided group_size = %d .\n",
1134                         group_size);
1135    }
1136
1137  /* The loads themselves.  */
1138  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1139    {
1140      /* N scalar loads plus gathering them into a vector.  */
1141      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1142      inside_cost += record_stmt_cost (body_cost_vec,
1143				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1144				       scalar_load, stmt_info, 0, vect_body);
1145      inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1146				       stmt_info, 0, vect_body);
1147    }
1148  else
1149    vect_get_load_cost (first_dr, ncopies,
1150			((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1151			 || group_size > 1 || slp_node),
1152			&inside_cost, &prologue_cost,
1153			prologue_cost_vec, body_cost_vec, true);
1154
1155  if (dump_enabled_p ())
1156    dump_printf_loc (MSG_NOTE, vect_location,
1157                     "vect_model_load_cost: inside_cost = %d, "
1158                     "prologue_cost = %d .\n", inside_cost, prologue_cost);
1159}
1160
1161
1162/* Calculate cost of DR's memory access.  */
1163void
1164vect_get_load_cost (struct data_reference *dr, int ncopies,
1165		    bool add_realign_cost, unsigned int *inside_cost,
1166		    unsigned int *prologue_cost,
1167		    stmt_vector_for_cost *prologue_cost_vec,
1168		    stmt_vector_for_cost *body_cost_vec,
1169		    bool record_prologue_costs)
1170{
1171  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1172  gimple stmt = DR_STMT (dr);
1173  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174
1175  switch (alignment_support_scheme)
1176    {
1177    case dr_aligned:
1178      {
1179	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1180					  stmt_info, 0, vect_body);
1181
1182        if (dump_enabled_p ())
1183          dump_printf_loc (MSG_NOTE, vect_location,
1184                           "vect_model_load_cost: aligned.\n");
1185
1186        break;
1187      }
1188    case dr_unaligned_supported:
1189      {
1190        /* Here, we assign an additional cost for the unaligned load.  */
1191	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1192					  unaligned_load, stmt_info,
1193					  DR_MISALIGNMENT (dr), vect_body);
1194
1195        if (dump_enabled_p ())
1196          dump_printf_loc (MSG_NOTE, vect_location,
1197                           "vect_model_load_cost: unaligned supported by "
1198                           "hardware.\n");
1199
1200        break;
1201      }
1202    case dr_explicit_realign:
1203      {
1204	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1205					  vector_load, stmt_info, 0, vect_body);
1206	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1207					  vec_perm, stmt_info, 0, vect_body);
1208
1209        /* FIXME: If the misalignment remains fixed across the iterations of
1210           the containing loop, the following cost should be added to the
1211           prologue costs.  */
1212        if (targetm.vectorize.builtin_mask_for_load)
1213	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1214					    stmt_info, 0, vect_body);
1215
1216        if (dump_enabled_p ())
1217          dump_printf_loc (MSG_NOTE, vect_location,
1218                           "vect_model_load_cost: explicit realign\n");
1219
1220        break;
1221      }
1222    case dr_explicit_realign_optimized:
1223      {
1224        if (dump_enabled_p ())
1225          dump_printf_loc (MSG_NOTE, vect_location,
1226                           "vect_model_load_cost: unaligned software "
1227                           "pipelined.\n");
1228
1229        /* Unaligned software pipeline has a load of an address, an initial
1230           load, and possibly a mask operation to "prime" the loop.  However,
1231           if this is an access in a group of loads, which provide grouped
1232           access, then the above cost should only be considered for one
1233           access in the group.  Inside the loop, there is a load op
1234           and a realignment op.  */
1235
1236        if (add_realign_cost && record_prologue_costs)
1237          {
1238	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1239						vector_stmt, stmt_info,
1240						0, vect_prologue);
1241            if (targetm.vectorize.builtin_mask_for_load)
1242	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1243						  vector_stmt, stmt_info,
1244						  0, vect_prologue);
1245          }
1246
1247	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1248					  stmt_info, 0, vect_body);
1249	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1250					  stmt_info, 0, vect_body);
1251
1252        if (dump_enabled_p ())
1253          dump_printf_loc (MSG_NOTE, vect_location,
1254                           "vect_model_load_cost: explicit realign optimized"
1255                           "\n");
1256
1257        break;
1258      }
1259
1260    case dr_unaligned_unsupported:
1261      {
1262        *inside_cost = VECT_MAX_COST;
1263
1264        if (dump_enabled_p ())
1265          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1266                           "vect_model_load_cost: unsupported access.\n");
1267        break;
1268      }
1269
1270    default:
1271      gcc_unreachable ();
1272    }
1273}
1274
1275/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1276   the loop preheader for the vectorized stmt STMT.  */
1277
1278static void
1279vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1280{
1281  if (gsi)
1282    vect_finish_stmt_generation (stmt, new_stmt, gsi);
1283  else
1284    {
1285      stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1286      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1287
1288      if (loop_vinfo)
1289        {
1290          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1291	  basic_block new_bb;
1292	  edge pe;
1293
1294          if (nested_in_vect_loop_p (loop, stmt))
1295            loop = loop->inner;
1296
1297	  pe = loop_preheader_edge (loop);
1298          new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1299          gcc_assert (!new_bb);
1300	}
1301      else
1302       {
1303          bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1304          basic_block bb;
1305          gimple_stmt_iterator gsi_bb_start;
1306
1307          gcc_assert (bb_vinfo);
1308          bb = BB_VINFO_BB (bb_vinfo);
1309          gsi_bb_start = gsi_after_labels (bb);
1310          gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1311       }
1312    }
1313
1314  if (dump_enabled_p ())
1315    {
1316      dump_printf_loc (MSG_NOTE, vect_location,
1317                       "created new init_stmt: ");
1318      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1319    }
1320}
1321
1322/* Function vect_init_vector.
1323
1324   Insert a new stmt (INIT_STMT) that initializes a new variable of type
1325   TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1326   vector type a vector with all elements equal to VAL is created first.
1327   Place the initialization at BSI if it is not NULL.  Otherwise, place the
1328   initialization at the loop preheader.
1329   Return the DEF of INIT_STMT.
1330   It will be used in the vectorization of STMT.  */
1331
1332tree
1333vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1334{
1335  tree new_var;
1336  gimple init_stmt;
1337  tree vec_oprnd;
1338  tree new_temp;
1339
1340  if (TREE_CODE (type) == VECTOR_TYPE
1341      && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1342    {
1343      if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1344	{
1345	  if (CONSTANT_CLASS_P (val))
1346	    val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1347	  else
1348	    {
1349	      new_temp = make_ssa_name (TREE_TYPE (type));
1350	      init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1351	      vect_init_vector_1 (stmt, init_stmt, gsi);
1352	      val = new_temp;
1353	    }
1354	}
1355      val = build_vector_from_val (type, val);
1356    }
1357
1358  new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1359  init_stmt = gimple_build_assign  (new_var, val);
1360  new_temp = make_ssa_name (new_var, init_stmt);
1361  gimple_assign_set_lhs (init_stmt, new_temp);
1362  vect_init_vector_1 (stmt, init_stmt, gsi);
1363  vec_oprnd = gimple_assign_lhs (init_stmt);
1364  return vec_oprnd;
1365}
1366
1367
1368/* Function vect_get_vec_def_for_operand.
1369
1370   OP is an operand in STMT.  This function returns a (vector) def that will be
1371   used in the vectorized stmt for STMT.
1372
1373   In the case that OP is an SSA_NAME which is defined in the loop, then
1374   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1375
1376   In case OP is an invariant or constant, a new stmt that creates a vector def
1377   needs to be introduced.  */
1378
1379tree
1380vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1381{
1382  tree vec_oprnd;
1383  gimple vec_stmt;
1384  gimple def_stmt;
1385  stmt_vec_info def_stmt_info = NULL;
1386  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1387  unsigned int nunits;
1388  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1389  tree def;
1390  enum vect_def_type dt;
1391  bool is_simple_use;
1392  tree vector_type;
1393
1394  if (dump_enabled_p ())
1395    {
1396      dump_printf_loc (MSG_NOTE, vect_location,
1397                       "vect_get_vec_def_for_operand: ");
1398      dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1399      dump_printf (MSG_NOTE, "\n");
1400    }
1401
1402  is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1403				      &def_stmt, &def, &dt);
1404  gcc_assert (is_simple_use);
1405  if (dump_enabled_p ())
1406    {
1407      int loc_printed = 0;
1408      if (def)
1409        {
1410          dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
1411          loc_printed = 1;
1412          dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1413          dump_printf (MSG_NOTE, "\n");
1414        }
1415      if (def_stmt)
1416        {
1417          if (loc_printed)
1418            dump_printf (MSG_NOTE, "  def_stmt =  ");
1419          else
1420            dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1421	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1422        }
1423    }
1424
1425  switch (dt)
1426    {
1427    /* Case 1: operand is a constant.  */
1428    case vect_constant_def:
1429      {
1430	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1431	gcc_assert (vector_type);
1432	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1433
1434	if (scalar_def)
1435	  *scalar_def = op;
1436
1437        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1438        if (dump_enabled_p ())
1439          dump_printf_loc (MSG_NOTE, vect_location,
1440                           "Create vector_cst. nunits = %d\n", nunits);
1441
1442        return vect_init_vector (stmt, op, vector_type, NULL);
1443      }
1444
1445    /* Case 2: operand is defined outside the loop - loop invariant.  */
1446    case vect_external_def:
1447      {
1448	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1449	gcc_assert (vector_type);
1450
1451	if (scalar_def)
1452	  *scalar_def = def;
1453
1454        /* Create 'vec_inv = {inv,inv,..,inv}'  */
1455        if (dump_enabled_p ())
1456          dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1457
1458        return vect_init_vector (stmt, def, vector_type, NULL);
1459      }
1460
1461    /* Case 3: operand is defined inside the loop.  */
1462    case vect_internal_def:
1463      {
1464	if (scalar_def)
1465	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1466
1467        /* Get the def from the vectorized stmt.  */
1468        def_stmt_info = vinfo_for_stmt (def_stmt);
1469
1470        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1471        /* Get vectorized pattern statement.  */
1472        if (!vec_stmt
1473            && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1474            && !STMT_VINFO_RELEVANT (def_stmt_info))
1475          vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1476                       STMT_VINFO_RELATED_STMT (def_stmt_info)));
1477        gcc_assert (vec_stmt);
1478	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1479	  vec_oprnd = PHI_RESULT (vec_stmt);
1480	else if (is_gimple_call (vec_stmt))
1481	  vec_oprnd = gimple_call_lhs (vec_stmt);
1482	else
1483	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1484        return vec_oprnd;
1485      }
1486
1487    /* Case 4: operand is defined by a loop header phi - reduction  */
1488    case vect_reduction_def:
1489    case vect_double_reduction_def:
1490    case vect_nested_cycle:
1491      {
1492	struct loop *loop;
1493
1494	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1495	loop = (gimple_bb (def_stmt))->loop_father;
1496
1497        /* Get the def before the loop  */
1498        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1499        return get_initial_def_for_reduction (stmt, op, scalar_def);
1500     }
1501
1502    /* Case 5: operand is defined by loop-header phi - induction.  */
1503    case vect_induction_def:
1504      {
1505	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1506
1507        /* Get the def from the vectorized stmt.  */
1508        def_stmt_info = vinfo_for_stmt (def_stmt);
1509        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1510	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1511	  vec_oprnd = PHI_RESULT (vec_stmt);
1512	else
1513	  vec_oprnd = gimple_get_lhs (vec_stmt);
1514        return vec_oprnd;
1515      }
1516
1517    default:
1518      gcc_unreachable ();
1519    }
1520}
1521
1522
1523/* Function vect_get_vec_def_for_stmt_copy
1524
1525   Return a vector-def for an operand.  This function is used when the
1526   vectorized stmt to be created (by the caller to this function) is a "copy"
1527   created in case the vectorized result cannot fit in one vector, and several
1528   copies of the vector-stmt are required.  In this case the vector-def is
1529   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1530   of the stmt that defines VEC_OPRND.
1531   DT is the type of the vector def VEC_OPRND.
1532
1533   Context:
1534        In case the vectorization factor (VF) is bigger than the number
1535   of elements that can fit in a vectype (nunits), we have to generate
1536   more than one vector stmt to vectorize the scalar stmt.  This situation
1537   arises when there are multiple data-types operated upon in the loop; the
1538   smallest data-type determines the VF, and as a result, when vectorizing
1539   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1540   vector stmt (each computing a vector of 'nunits' results, and together
1541   computing 'VF' results in each iteration).  This function is called when
1542   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1543   which VF=16 and nunits=4, so the number of copies required is 4):
1544
1545   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1546
1547   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1548                        VS1.1:  vx.1 = memref1      VS1.2
1549                        VS1.2:  vx.2 = memref2      VS1.3
1550                        VS1.3:  vx.3 = memref3
1551
1552   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1553                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1554                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1555                        VSnew.3:  vz3 = vx.3 + ...
1556
1557   The vectorization of S1 is explained in vectorizable_load.
1558   The vectorization of S2:
1559        To create the first vector-stmt out of the 4 copies - VSnew.0 -
1560   the function 'vect_get_vec_def_for_operand' is called to
1561   get the relevant vector-def for each operand of S2.  For operand x it
1562   returns  the vector-def 'vx.0'.
1563
1564        To create the remaining copies of the vector-stmt (VSnew.j), this
1565   function is called to get the relevant vector-def for each operand.  It is
1566   obtained from the respective VS1.j stmt, which is recorded in the
1567   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1568
1569        For example, to obtain the vector-def 'vx.1' in order to create the
1570   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1571   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1572   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1573   and return its def ('vx.1').
1574   Overall, to create the above sequence this function will be called 3 times:
1575        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1576        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1577        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1578
1579tree
1580vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1581{
1582  gimple vec_stmt_for_operand;
1583  stmt_vec_info def_stmt_info;
1584
1585  /* Do nothing; can reuse same def.  */
1586  if (dt == vect_external_def || dt == vect_constant_def )
1587    return vec_oprnd;
1588
1589  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1590  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1591  gcc_assert (def_stmt_info);
1592  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1593  gcc_assert (vec_stmt_for_operand);
1594  vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1595  if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1596    vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1597  else
1598    vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1599  return vec_oprnd;
1600}
1601
1602
1603/* Get vectorized definitions for the operands to create a copy of an original
1604   stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1605
1606static void
1607vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1608				 vec<tree> *vec_oprnds0,
1609				 vec<tree> *vec_oprnds1)
1610{
1611  tree vec_oprnd = vec_oprnds0->pop ();
1612
1613  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1614  vec_oprnds0->quick_push (vec_oprnd);
1615
1616  if (vec_oprnds1 && vec_oprnds1->length ())
1617    {
1618      vec_oprnd = vec_oprnds1->pop ();
1619      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1620      vec_oprnds1->quick_push (vec_oprnd);
1621    }
1622}
1623
1624
1625/* Get vectorized definitions for OP0 and OP1.
1626   REDUC_INDEX is the index of reduction operand in case of reduction,
1627   and -1 otherwise.  */
1628
1629void
1630vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1631		   vec<tree> *vec_oprnds0,
1632		   vec<tree> *vec_oprnds1,
1633		   slp_tree slp_node, int reduc_index)
1634{
1635  if (slp_node)
1636    {
1637      int nops = (op1 == NULL_TREE) ? 1 : 2;
1638      auto_vec<tree> ops (nops);
1639      auto_vec<vec<tree> > vec_defs (nops);
1640
1641      ops.quick_push (op0);
1642      if (op1)
1643        ops.quick_push (op1);
1644
1645      vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1646
1647      *vec_oprnds0 = vec_defs[0];
1648      if (op1)
1649	*vec_oprnds1 = vec_defs[1];
1650    }
1651  else
1652    {
1653      tree vec_oprnd;
1654
1655      vec_oprnds0->create (1);
1656      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1657      vec_oprnds0->quick_push (vec_oprnd);
1658
1659      if (op1)
1660	{
1661	  vec_oprnds1->create (1);
1662	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1663	  vec_oprnds1->quick_push (vec_oprnd);
1664	}
1665    }
1666}
1667
1668
1669/* Function vect_finish_stmt_generation.
1670
1671   Insert a new stmt.  */
1672
1673void
1674vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1675			     gimple_stmt_iterator *gsi)
1676{
1677  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1678  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1679  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1680
1681  gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1682
1683  if (!gsi_end_p (*gsi)
1684      && gimple_has_mem_ops (vec_stmt))
1685    {
1686      gimple at_stmt = gsi_stmt (*gsi);
1687      tree vuse = gimple_vuse (at_stmt);
1688      if (vuse && TREE_CODE (vuse) == SSA_NAME)
1689	{
1690	  tree vdef = gimple_vdef (at_stmt);
1691	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1692	  /* If we have an SSA vuse and insert a store, update virtual
1693	     SSA form to avoid triggering the renamer.  Do so only
1694	     if we can easily see all uses - which is what almost always
1695	     happens with the way vectorized stmts are inserted.  */
1696	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1697	      && ((is_gimple_assign (vec_stmt)
1698		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1699		  || (is_gimple_call (vec_stmt)
1700		      && !(gimple_call_flags (vec_stmt)
1701			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1702	    {
1703	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1704	      gimple_set_vdef (vec_stmt, new_vdef);
1705	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1706	    }
1707	}
1708    }
1709  gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1710
1711  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1712                                                   bb_vinfo));
1713
1714  if (dump_enabled_p ())
1715    {
1716      dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1717      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1718    }
1719
1720  gimple_set_location (vec_stmt, gimple_location (stmt));
1721
1722  /* While EH edges will generally prevent vectorization, stmt might
1723     e.g. be in a must-not-throw region.  Ensure newly created stmts
1724     that could throw are part of the same region.  */
1725  int lp_nr = lookup_stmt_eh_lp (stmt);
1726  if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1727    add_stmt_to_eh_lp (vec_stmt, lp_nr);
1728}
1729
1730/* Checks if CALL can be vectorized in type VECTYPE.  Returns
1731   a function declaration if the target has a vectorized version
1732   of the function, or NULL_TREE if the function cannot be vectorized.  */
1733
1734tree
1735vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1736{
1737  tree fndecl = gimple_call_fndecl (call);
1738
1739  /* We only handle functions that do not read or clobber memory -- i.e.
1740     const or novops ones.  */
1741  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1742    return NULL_TREE;
1743
1744  if (!fndecl
1745      || TREE_CODE (fndecl) != FUNCTION_DECL
1746      || !DECL_BUILT_IN (fndecl))
1747    return NULL_TREE;
1748
1749  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1750						        vectype_in);
1751}
1752
1753
1754static tree permute_vec_elements (tree, tree, tree, gimple,
1755				  gimple_stmt_iterator *);
1756
1757
1758/* Function vectorizable_mask_load_store.
1759
1760   Check if STMT performs a conditional load or store that can be vectorized.
1761   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1762   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1763   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1764
1765static bool
1766vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1767			      gimple *vec_stmt, slp_tree slp_node)
1768{
1769  tree vec_dest = NULL;
1770  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1771  stmt_vec_info prev_stmt_info;
1772  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1773  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1774  bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1775  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1776  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1777  tree elem_type;
1778  gimple new_stmt;
1779  tree dummy;
1780  tree dataref_ptr = NULL_TREE;
1781  gimple ptr_incr;
1782  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1783  int ncopies;
1784  int i, j;
1785  bool inv_p;
1786  tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1787  tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1788  int gather_scale = 1;
1789  enum vect_def_type gather_dt = vect_unknown_def_type;
1790  bool is_store;
1791  tree mask;
1792  gimple def_stmt;
1793  tree def;
1794  enum vect_def_type dt;
1795
1796  if (slp_node != NULL)
1797    return false;
1798
1799  ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1800  gcc_assert (ncopies >= 1);
1801
1802  is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1803  mask = gimple_call_arg (stmt, 2);
1804  if (TYPE_PRECISION (TREE_TYPE (mask))
1805      != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1806    return false;
1807
1808  /* FORNOW. This restriction should be relaxed.  */
1809  if (nested_in_vect_loop && ncopies > 1)
1810    {
1811      if (dump_enabled_p ())
1812	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1813			 "multiple types in nested loop.");
1814      return false;
1815    }
1816
1817  if (!STMT_VINFO_RELEVANT_P (stmt_info))
1818    return false;
1819
1820  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1821    return false;
1822
1823  if (!STMT_VINFO_DATA_REF (stmt_info))
1824    return false;
1825
1826  elem_type = TREE_TYPE (vectype);
1827
1828  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1829    return false;
1830
1831  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1832    return false;
1833
1834  if (STMT_VINFO_GATHER_P (stmt_info))
1835    {
1836      gimple def_stmt;
1837      tree def;
1838      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1839				       &gather_off, &gather_scale);
1840      gcc_assert (gather_decl);
1841      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1842				 &def_stmt, &def, &gather_dt,
1843				 &gather_off_vectype))
1844	{
1845	  if (dump_enabled_p ())
1846	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847			     "gather index use not simple.");
1848	  return false;
1849	}
1850
1851      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1852      tree masktype
1853	= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1854      if (TREE_CODE (masktype) == INTEGER_TYPE)
1855	{
1856	  if (dump_enabled_p ())
1857	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1858			     "masked gather with integer mask not supported.");
1859	  return false;
1860	}
1861    }
1862  else if (tree_int_cst_compare (nested_in_vect_loop
1863				 ? STMT_VINFO_DR_STEP (stmt_info)
1864				 : DR_STEP (dr), size_zero_node) <= 0)
1865    return false;
1866  else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1867	   || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1868    return false;
1869
1870  if (TREE_CODE (mask) != SSA_NAME)
1871    return false;
1872
1873  if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1874			   &def_stmt, &def, &dt))
1875    return false;
1876
1877  if (is_store)
1878    {
1879      tree rhs = gimple_call_arg (stmt, 3);
1880      if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1881			       &def_stmt, &def, &dt))
1882	return false;
1883    }
1884
1885  if (!vec_stmt) /* transformation not required.  */
1886    {
1887      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1888      if (is_store)
1889	vect_model_store_cost (stmt_info, ncopies, false, dt,
1890			       NULL, NULL, NULL);
1891      else
1892	vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1893      return true;
1894    }
1895
1896  /** Transform.  **/
1897
1898  if (STMT_VINFO_GATHER_P (stmt_info))
1899    {
1900      tree vec_oprnd0 = NULL_TREE, op;
1901      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1902      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1903      tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1904      tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1905      tree mask_perm_mask = NULL_TREE;
1906      edge pe = loop_preheader_edge (loop);
1907      gimple_seq seq;
1908      basic_block new_bb;
1909      enum { NARROW, NONE, WIDEN } modifier;
1910      int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1911
1912      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1913      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1914      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1915      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1916      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1917      scaletype = TREE_VALUE (arglist);
1918      gcc_checking_assert (types_compatible_p (srctype, rettype)
1919			   && types_compatible_p (srctype, masktype));
1920
1921      if (nunits == gather_off_nunits)
1922	modifier = NONE;
1923      else if (nunits == gather_off_nunits / 2)
1924	{
1925	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1926	  modifier = WIDEN;
1927
1928	  for (i = 0; i < gather_off_nunits; ++i)
1929	    sel[i] = i | nunits;
1930
1931	  perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1932	}
1933      else if (nunits == gather_off_nunits * 2)
1934	{
1935	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1936	  modifier = NARROW;
1937
1938	  for (i = 0; i < nunits; ++i)
1939	    sel[i] = i < gather_off_nunits
1940		     ? i : i + nunits - gather_off_nunits;
1941
1942	  perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1943	  ncopies *= 2;
1944	  for (i = 0; i < nunits; ++i)
1945	    sel[i] = i | gather_off_nunits;
1946	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1947	}
1948      else
1949	gcc_unreachable ();
1950
1951      vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1952
1953      ptr = fold_convert (ptrtype, gather_base);
1954      if (!is_gimple_min_invariant (ptr))
1955	{
1956	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1957	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1958	  gcc_assert (!new_bb);
1959	}
1960
1961      scale = build_int_cst (scaletype, gather_scale);
1962
1963      prev_stmt_info = NULL;
1964      for (j = 0; j < ncopies; ++j)
1965	{
1966	  if (modifier == WIDEN && (j & 1))
1967	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1968				       perm_mask, stmt, gsi);
1969	  else if (j == 0)
1970	    op = vec_oprnd0
1971	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1972	  else
1973	    op = vec_oprnd0
1974	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1975
1976	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1977	    {
1978	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1979			  == TYPE_VECTOR_SUBPARTS (idxtype));
1980	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1981	      var = make_ssa_name (var);
1982	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1983	      new_stmt
1984		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1985	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
1986	      op = var;
1987	    }
1988
1989	  if (mask_perm_mask && (j & 1))
1990	    mask_op = permute_vec_elements (mask_op, mask_op,
1991					    mask_perm_mask, stmt, gsi);
1992	  else
1993	    {
1994	      if (j == 0)
1995		vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1996	      else
1997		{
1998		  vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1999				      &def_stmt, &def, &dt);
2000		  vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2001		}
2002
2003	      mask_op = vec_mask;
2004	      if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2005		{
2006		  gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2007			      == TYPE_VECTOR_SUBPARTS (masktype));
2008		  var = vect_get_new_vect_var (masktype, vect_simple_var,
2009					       NULL);
2010		  var = make_ssa_name (var);
2011		  mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2012		  new_stmt
2013		    = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2014		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2015		  mask_op = var;
2016		}
2017	    }
2018
2019	  new_stmt
2020	    = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2021				 scale);
2022
2023	  if (!useless_type_conversion_p (vectype, rettype))
2024	    {
2025	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2026			  == TYPE_VECTOR_SUBPARTS (rettype));
2027	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2028	      op = make_ssa_name (var, new_stmt);
2029	      gimple_call_set_lhs (new_stmt, op);
2030	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2031	      var = make_ssa_name (vec_dest);
2032	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2033	      new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2034	    }
2035	  else
2036	    {
2037	      var = make_ssa_name (vec_dest, new_stmt);
2038	      gimple_call_set_lhs (new_stmt, var);
2039	    }
2040
2041	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2042
2043	  if (modifier == NARROW)
2044	    {
2045	      if ((j & 1) == 0)
2046		{
2047		  prev_res = var;
2048		  continue;
2049		}
2050	      var = permute_vec_elements (prev_res, var,
2051					  perm_mask, stmt, gsi);
2052	      new_stmt = SSA_NAME_DEF_STMT (var);
2053	    }
2054
2055	  if (prev_stmt_info == NULL)
2056	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2057	  else
2058	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2059	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2060	}
2061
2062      /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2063	 from the IL.  */
2064      tree lhs = gimple_call_lhs (stmt);
2065      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2066      set_vinfo_for_stmt (new_stmt, stmt_info);
2067      set_vinfo_for_stmt (stmt, NULL);
2068      STMT_VINFO_STMT (stmt_info) = new_stmt;
2069      gsi_replace (gsi, new_stmt, true);
2070      return true;
2071    }
2072  else if (is_store)
2073    {
2074      tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2075      prev_stmt_info = NULL;
2076      for (i = 0; i < ncopies; i++)
2077	{
2078	  unsigned align, misalign;
2079
2080	  if (i == 0)
2081	    {
2082	      tree rhs = gimple_call_arg (stmt, 3);
2083	      vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2084	      vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2085	      /* We should have catched mismatched types earlier.  */
2086	      gcc_assert (useless_type_conversion_p (vectype,
2087						     TREE_TYPE (vec_rhs)));
2088	      dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2089						      NULL_TREE, &dummy, gsi,
2090						      &ptr_incr, false, &inv_p);
2091	      gcc_assert (!inv_p);
2092	    }
2093	  else
2094	    {
2095	      vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2096				  &def, &dt);
2097	      vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2098	      vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2099				  &def, &dt);
2100	      vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2101	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2102					     TYPE_SIZE_UNIT (vectype));
2103	    }
2104
2105	  align = TYPE_ALIGN_UNIT (vectype);
2106	  if (aligned_access_p (dr))
2107	    misalign = 0;
2108	  else if (DR_MISALIGNMENT (dr) == -1)
2109	    {
2110	      align = TYPE_ALIGN_UNIT (elem_type);
2111	      misalign = 0;
2112	    }
2113	  else
2114	    misalign = DR_MISALIGNMENT (dr);
2115	  set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2116				  misalign);
2117	  new_stmt
2118	    = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2119					  gimple_call_arg (stmt, 1),
2120					  vec_mask, vec_rhs);
2121	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2122	  if (i == 0)
2123	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2124	  else
2125	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2126	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2127	}
2128    }
2129  else
2130    {
2131      tree vec_mask = NULL_TREE;
2132      prev_stmt_info = NULL;
2133      vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2134      for (i = 0; i < ncopies; i++)
2135	{
2136	  unsigned align, misalign;
2137
2138	  if (i == 0)
2139	    {
2140	      vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2141	      dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2142						      NULL_TREE, &dummy, gsi,
2143						      &ptr_incr, false, &inv_p);
2144	      gcc_assert (!inv_p);
2145	    }
2146	  else
2147	    {
2148	      vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2149				  &def, &dt);
2150	      vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2151	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2152					     TYPE_SIZE_UNIT (vectype));
2153	    }
2154
2155	  align = TYPE_ALIGN_UNIT (vectype);
2156	  if (aligned_access_p (dr))
2157	    misalign = 0;
2158	  else if (DR_MISALIGNMENT (dr) == -1)
2159	    {
2160	      align = TYPE_ALIGN_UNIT (elem_type);
2161	      misalign = 0;
2162	    }
2163	  else
2164	    misalign = DR_MISALIGNMENT (dr);
2165	  set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2166				  misalign);
2167	  new_stmt
2168	    = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2169					  gimple_call_arg (stmt, 1),
2170					  vec_mask);
2171	  gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2172	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2173	  if (i == 0)
2174	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2175	  else
2176	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2177	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2178	}
2179    }
2180
2181  if (!is_store)
2182    {
2183      /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2184	 from the IL.  */
2185      tree lhs = gimple_call_lhs (stmt);
2186      new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2187      set_vinfo_for_stmt (new_stmt, stmt_info);
2188      set_vinfo_for_stmt (stmt, NULL);
2189      STMT_VINFO_STMT (stmt_info) = new_stmt;
2190      gsi_replace (gsi, new_stmt, true);
2191    }
2192
2193  return true;
2194}
2195
2196
2197/* Function vectorizable_call.
2198
2199   Check if GS performs a function call that can be vectorized.
2200   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2201   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2202   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2203
2204static bool
2205vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2206		   slp_tree slp_node)
2207{
2208  gcall *stmt;
2209  tree vec_dest;
2210  tree scalar_dest;
2211  tree op, type;
2212  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2213  stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2214  tree vectype_out, vectype_in;
2215  int nunits_in;
2216  int nunits_out;
2217  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2218  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2219  tree fndecl, new_temp, def, rhs_type;
2220  gimple def_stmt;
2221  enum vect_def_type dt[3]
2222    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2223  gimple new_stmt = NULL;
2224  int ncopies, j;
2225  vec<tree> vargs = vNULL;
2226  enum { NARROW, NONE, WIDEN } modifier;
2227  size_t i, nargs;
2228  tree lhs;
2229
2230  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2231    return false;
2232
2233  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2234    return false;
2235
2236  /* Is GS a vectorizable call?   */
2237  stmt = dyn_cast <gcall *> (gs);
2238  if (!stmt)
2239    return false;
2240
2241  if (gimple_call_internal_p (stmt)
2242      && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2243	  || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2244    return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2245					 slp_node);
2246
2247  if (gimple_call_lhs (stmt) == NULL_TREE
2248      || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2249    return false;
2250
2251  gcc_checking_assert (!stmt_can_throw_internal (stmt));
2252
2253  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2254
2255  /* Process function arguments.  */
2256  rhs_type = NULL_TREE;
2257  vectype_in = NULL_TREE;
2258  nargs = gimple_call_num_args (stmt);
2259
2260  /* Bail out if the function has more than three arguments, we do not have
2261     interesting builtin functions to vectorize with more than two arguments
2262     except for fma.  No arguments is also not good.  */
2263  if (nargs == 0 || nargs > 3)
2264    return false;
2265
2266  /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
2267  if (gimple_call_internal_p (stmt)
2268      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2269    {
2270      nargs = 0;
2271      rhs_type = unsigned_type_node;
2272    }
2273
2274  for (i = 0; i < nargs; i++)
2275    {
2276      tree opvectype;
2277
2278      op = gimple_call_arg (stmt, i);
2279
2280      /* We can only handle calls with arguments of the same type.  */
2281      if (rhs_type
2282	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2283	{
2284	  if (dump_enabled_p ())
2285	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286                             "argument types differ.\n");
2287	  return false;
2288	}
2289      if (!rhs_type)
2290	rhs_type = TREE_TYPE (op);
2291
2292      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2293				 &def_stmt, &def, &dt[i], &opvectype))
2294	{
2295	  if (dump_enabled_p ())
2296	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2297                             "use not simple.\n");
2298	  return false;
2299	}
2300
2301      if (!vectype_in)
2302	vectype_in = opvectype;
2303      else if (opvectype
2304	       && opvectype != vectype_in)
2305	{
2306	  if (dump_enabled_p ())
2307	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308                             "argument vector types differ.\n");
2309	  return false;
2310	}
2311    }
2312  /* If all arguments are external or constant defs use a vector type with
2313     the same size as the output vector type.  */
2314  if (!vectype_in)
2315    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2316  if (vec_stmt)
2317    gcc_assert (vectype_in);
2318  if (!vectype_in)
2319    {
2320      if (dump_enabled_p ())
2321        {
2322          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2323                           "no vectype for scalar type ");
2324          dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2325          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2326        }
2327
2328      return false;
2329    }
2330
2331  /* FORNOW */
2332  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2333  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2334  if (nunits_in == nunits_out / 2)
2335    modifier = NARROW;
2336  else if (nunits_out == nunits_in)
2337    modifier = NONE;
2338  else if (nunits_out == nunits_in / 2)
2339    modifier = WIDEN;
2340  else
2341    return false;
2342
2343  /* For now, we only vectorize functions if a target specific builtin
2344     is available.  TODO -- in some cases, it might be profitable to
2345     insert the calls for pieces of the vector, in order to be able
2346     to vectorize other operations in the loop.  */
2347  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2348  if (fndecl == NULL_TREE)
2349    {
2350      if (gimple_call_internal_p (stmt)
2351	  && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2352	  && !slp_node
2353	  && loop_vinfo
2354	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2355	  && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2356	  && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2357	     == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2358	{
2359	  /* We can handle IFN_GOMP_SIMD_LANE by returning a
2360	     { 0, 1, 2, ... vf - 1 } vector.  */
2361	  gcc_assert (nargs == 0);
2362	}
2363      else
2364	{
2365	  if (dump_enabled_p ())
2366	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367			     "function is not vectorizable.\n");
2368	  return false;
2369	}
2370    }
2371
2372  gcc_assert (!gimple_vuse (stmt));
2373
2374  if (slp_node || PURE_SLP_STMT (stmt_info))
2375    ncopies = 1;
2376  else if (modifier == NARROW)
2377    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2378  else
2379    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2380
2381  /* Sanity check: make sure that at least one copy of the vectorized stmt
2382     needs to be generated.  */
2383  gcc_assert (ncopies >= 1);
2384
2385  if (!vec_stmt) /* transformation not required.  */
2386    {
2387      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2388      if (dump_enabled_p ())
2389        dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2390                         "\n");
2391      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2392      return true;
2393    }
2394
2395  /** Transform.  **/
2396
2397  if (dump_enabled_p ())
2398    dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2399
2400  /* Handle def.  */
2401  scalar_dest = gimple_call_lhs (stmt);
2402  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2403
2404  prev_stmt_info = NULL;
2405  switch (modifier)
2406    {
2407    case NONE:
2408      for (j = 0; j < ncopies; ++j)
2409	{
2410	  /* Build argument list for the vectorized call.  */
2411	  if (j == 0)
2412	    vargs.create (nargs);
2413	  else
2414	    vargs.truncate (0);
2415
2416	  if (slp_node)
2417	    {
2418	      auto_vec<vec<tree> > vec_defs (nargs);
2419	      vec<tree> vec_oprnds0;
2420
2421	      for (i = 0; i < nargs; i++)
2422		vargs.quick_push (gimple_call_arg (stmt, i));
2423	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2424	      vec_oprnds0 = vec_defs[0];
2425
2426	      /* Arguments are ready.  Create the new vector stmt.  */
2427	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2428		{
2429		  size_t k;
2430		  for (k = 0; k < nargs; k++)
2431		    {
2432		      vec<tree> vec_oprndsk = vec_defs[k];
2433		      vargs[k] = vec_oprndsk[i];
2434		    }
2435		  new_stmt = gimple_build_call_vec (fndecl, vargs);
2436		  new_temp = make_ssa_name (vec_dest, new_stmt);
2437		  gimple_call_set_lhs (new_stmt, new_temp);
2438		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2439		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2440		}
2441
2442	      for (i = 0; i < nargs; i++)
2443		{
2444		  vec<tree> vec_oprndsi = vec_defs[i];
2445		  vec_oprndsi.release ();
2446		}
2447	      continue;
2448	    }
2449
2450	  for (i = 0; i < nargs; i++)
2451	    {
2452	      op = gimple_call_arg (stmt, i);
2453	      if (j == 0)
2454		vec_oprnd0
2455		  = vect_get_vec_def_for_operand (op, stmt, NULL);
2456	      else
2457		{
2458		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
2459		  vec_oprnd0
2460                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2461		}
2462
2463	      vargs.quick_push (vec_oprnd0);
2464	    }
2465
2466	  if (gimple_call_internal_p (stmt)
2467	      && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2468	    {
2469	      tree *v = XALLOCAVEC (tree, nunits_out);
2470	      int k;
2471	      for (k = 0; k < nunits_out; ++k)
2472		v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2473	      tree cst = build_vector (vectype_out, v);
2474	      tree new_var
2475		= vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2476	      gimple init_stmt = gimple_build_assign (new_var, cst);
2477	      new_temp = make_ssa_name (new_var, init_stmt);
2478	      gimple_assign_set_lhs (init_stmt, new_temp);
2479	      vect_init_vector_1 (stmt, init_stmt, NULL);
2480	      new_temp = make_ssa_name (vec_dest);
2481	      new_stmt = gimple_build_assign (new_temp,
2482					      gimple_assign_lhs (init_stmt));
2483	    }
2484	  else
2485	    {
2486	      new_stmt = gimple_build_call_vec (fndecl, vargs);
2487	      new_temp = make_ssa_name (vec_dest, new_stmt);
2488	      gimple_call_set_lhs (new_stmt, new_temp);
2489	    }
2490	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2491
2492	  if (j == 0)
2493	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2494	  else
2495	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2496
2497	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2498	}
2499
2500      break;
2501
2502    case NARROW:
2503      for (j = 0; j < ncopies; ++j)
2504	{
2505	  /* Build argument list for the vectorized call.  */
2506	  if (j == 0)
2507	    vargs.create (nargs * 2);
2508	  else
2509	    vargs.truncate (0);
2510
2511	  if (slp_node)
2512	    {
2513	      auto_vec<vec<tree> > vec_defs (nargs);
2514	      vec<tree> vec_oprnds0;
2515
2516	      for (i = 0; i < nargs; i++)
2517		vargs.quick_push (gimple_call_arg (stmt, i));
2518	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2519	      vec_oprnds0 = vec_defs[0];
2520
2521	      /* Arguments are ready.  Create the new vector stmt.  */
2522	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2523		{
2524		  size_t k;
2525		  vargs.truncate (0);
2526		  for (k = 0; k < nargs; k++)
2527		    {
2528		      vec<tree> vec_oprndsk = vec_defs[k];
2529		      vargs.quick_push (vec_oprndsk[i]);
2530		      vargs.quick_push (vec_oprndsk[i + 1]);
2531		    }
2532		  new_stmt = gimple_build_call_vec (fndecl, vargs);
2533		  new_temp = make_ssa_name (vec_dest, new_stmt);
2534		  gimple_call_set_lhs (new_stmt, new_temp);
2535		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2536		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2537		}
2538
2539	      for (i = 0; i < nargs; i++)
2540		{
2541		  vec<tree> vec_oprndsi = vec_defs[i];
2542		  vec_oprndsi.release ();
2543		}
2544	      continue;
2545	    }
2546
2547	  for (i = 0; i < nargs; i++)
2548	    {
2549	      op = gimple_call_arg (stmt, i);
2550	      if (j == 0)
2551		{
2552		  vec_oprnd0
2553		    = vect_get_vec_def_for_operand (op, stmt, NULL);
2554		  vec_oprnd1
2555		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2556		}
2557	      else
2558		{
2559		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2560		  vec_oprnd0
2561		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2562		  vec_oprnd1
2563		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2564		}
2565
2566	      vargs.quick_push (vec_oprnd0);
2567	      vargs.quick_push (vec_oprnd1);
2568	    }
2569
2570	  new_stmt = gimple_build_call_vec (fndecl, vargs);
2571	  new_temp = make_ssa_name (vec_dest, new_stmt);
2572	  gimple_call_set_lhs (new_stmt, new_temp);
2573	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2574
2575	  if (j == 0)
2576	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2577	  else
2578	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2579
2580	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2581	}
2582
2583      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2584
2585      break;
2586
2587    case WIDEN:
2588      /* No current target implements this case.  */
2589      return false;
2590    }
2591
2592  vargs.release ();
2593
2594  /* The call in STMT might prevent it from being removed in dce.
2595     We however cannot remove it here, due to the way the ssa name
2596     it defines is mapped to the new definition.  So just replace
2597     rhs of the statement with something harmless.  */
2598
2599  if (slp_node)
2600    return true;
2601
2602  type = TREE_TYPE (scalar_dest);
2603  if (is_pattern_stmt_p (stmt_info))
2604    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2605  else
2606    lhs = gimple_call_lhs (stmt);
2607  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2608  set_vinfo_for_stmt (new_stmt, stmt_info);
2609  set_vinfo_for_stmt (stmt, NULL);
2610  STMT_VINFO_STMT (stmt_info) = new_stmt;
2611  gsi_replace (gsi, new_stmt, false);
2612
2613  return true;
2614}
2615
2616
2617struct simd_call_arg_info
2618{
2619  tree vectype;
2620  tree op;
2621  enum vect_def_type dt;
2622  HOST_WIDE_INT linear_step;
2623  unsigned int align;
2624};
2625
2626/* Function vectorizable_simd_clone_call.
2627
2628   Check if STMT performs a function call that can be vectorized
2629   by calling a simd clone of the function.
2630   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2631   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2632   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2633
2634static bool
2635vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2636			      gimple *vec_stmt, slp_tree slp_node)
2637{
2638  tree vec_dest;
2639  tree scalar_dest;
2640  tree op, type;
2641  tree vec_oprnd0 = NULL_TREE;
2642  stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2643  tree vectype;
2644  unsigned int nunits;
2645  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2646  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2647  struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2648  tree fndecl, new_temp, def;
2649  gimple def_stmt;
2650  gimple new_stmt = NULL;
2651  int ncopies, j;
2652  vec<simd_call_arg_info> arginfo = vNULL;
2653  vec<tree> vargs = vNULL;
2654  size_t i, nargs;
2655  tree lhs, rtype, ratype;
2656  vec<constructor_elt, va_gc> *ret_ctor_elts;
2657
2658  /* Is STMT a vectorizable call?   */
2659  if (!is_gimple_call (stmt))
2660    return false;
2661
2662  fndecl = gimple_call_fndecl (stmt);
2663  if (fndecl == NULL_TREE)
2664    return false;
2665
2666  struct cgraph_node *node = cgraph_node::get (fndecl);
2667  if (node == NULL || node->simd_clones == NULL)
2668    return false;
2669
2670  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2671    return false;
2672
2673  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2674    return false;
2675
2676  if (gimple_call_lhs (stmt)
2677      && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2678    return false;
2679
2680  gcc_checking_assert (!stmt_can_throw_internal (stmt));
2681
2682  vectype = STMT_VINFO_VECTYPE (stmt_info);
2683
2684  if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2685    return false;
2686
2687  /* FORNOW */
2688  if (slp_node || PURE_SLP_STMT (stmt_info))
2689    return false;
2690
2691  /* Process function arguments.  */
2692  nargs = gimple_call_num_args (stmt);
2693
2694  /* Bail out if the function has zero arguments.  */
2695  if (nargs == 0)
2696    return false;
2697
2698  arginfo.create (nargs);
2699
2700  for (i = 0; i < nargs; i++)
2701    {
2702      simd_call_arg_info thisarginfo;
2703      affine_iv iv;
2704
2705      thisarginfo.linear_step = 0;
2706      thisarginfo.align = 0;
2707      thisarginfo.op = NULL_TREE;
2708
2709      op = gimple_call_arg (stmt, i);
2710      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2711				 &def_stmt, &def, &thisarginfo.dt,
2712				 &thisarginfo.vectype)
2713	  || thisarginfo.dt == vect_uninitialized_def)
2714	{
2715	  if (dump_enabled_p ())
2716	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2717			     "use not simple.\n");
2718	  arginfo.release ();
2719	  return false;
2720	}
2721
2722      if (thisarginfo.dt == vect_constant_def
2723	  || thisarginfo.dt == vect_external_def)
2724	gcc_assert (thisarginfo.vectype == NULL_TREE);
2725      else
2726	gcc_assert (thisarginfo.vectype != NULL_TREE);
2727
2728      /* For linear arguments, the analyze phase should have saved
2729	 the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
2730      if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2731	  && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2732	{
2733	  gcc_assert (vec_stmt);
2734	  thisarginfo.linear_step
2735	    = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2736	  thisarginfo.op
2737	    = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2738	  /* If loop has been peeled for alignment, we need to adjust it.  */
2739	  tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2740	  tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2741	  if (n1 != n2)
2742	    {
2743	      tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2744	      tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2745	      tree opt = TREE_TYPE (thisarginfo.op);
2746	      bias = fold_convert (TREE_TYPE (step), bias);
2747	      bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2748	      thisarginfo.op
2749		= fold_build2 (POINTER_TYPE_P (opt)
2750			       ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2751			       thisarginfo.op, bias);
2752	    }
2753	}
2754      else if (!vec_stmt
2755	       && thisarginfo.dt != vect_constant_def
2756	       && thisarginfo.dt != vect_external_def
2757	       && loop_vinfo
2758	       && TREE_CODE (op) == SSA_NAME
2759	       && simple_iv (loop, loop_containing_stmt (stmt), op,
2760			     &iv, false)
2761	       && tree_fits_shwi_p (iv.step))
2762	{
2763	  thisarginfo.linear_step = tree_to_shwi (iv.step);
2764	  thisarginfo.op = iv.base;
2765	}
2766      else if ((thisarginfo.dt == vect_constant_def
2767		|| thisarginfo.dt == vect_external_def)
2768	       && POINTER_TYPE_P (TREE_TYPE (op)))
2769	thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2770
2771      arginfo.quick_push (thisarginfo);
2772    }
2773
2774  unsigned int badness = 0;
2775  struct cgraph_node *bestn = NULL;
2776  if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2777    bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2778  else
2779    for (struct cgraph_node *n = node->simd_clones; n != NULL;
2780	 n = n->simdclone->next_clone)
2781      {
2782	unsigned int this_badness = 0;
2783	if (n->simdclone->simdlen
2784	    > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2785	    || n->simdclone->nargs != nargs)
2786	  continue;
2787	if (n->simdclone->simdlen
2788	    < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2789	  this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2790			   - exact_log2 (n->simdclone->simdlen)) * 1024;
2791	if (n->simdclone->inbranch)
2792	  this_badness += 2048;
2793	int target_badness = targetm.simd_clone.usable (n);
2794	if (target_badness < 0)
2795	  continue;
2796	this_badness += target_badness * 512;
2797	/* FORNOW: Have to add code to add the mask argument.  */
2798	if (n->simdclone->inbranch)
2799	  continue;
2800	for (i = 0; i < nargs; i++)
2801	  {
2802	    switch (n->simdclone->args[i].arg_type)
2803	      {
2804	      case SIMD_CLONE_ARG_TYPE_VECTOR:
2805		if (!useless_type_conversion_p
2806			(n->simdclone->args[i].orig_type,
2807			 TREE_TYPE (gimple_call_arg (stmt, i))))
2808		  i = -1;
2809		else if (arginfo[i].dt == vect_constant_def
2810			 || arginfo[i].dt == vect_external_def
2811			 || arginfo[i].linear_step)
2812		  this_badness += 64;
2813		break;
2814	      case SIMD_CLONE_ARG_TYPE_UNIFORM:
2815		if (arginfo[i].dt != vect_constant_def
2816		    && arginfo[i].dt != vect_external_def)
2817		  i = -1;
2818		break;
2819	      case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2820		if (arginfo[i].dt == vect_constant_def
2821		    || arginfo[i].dt == vect_external_def
2822		    || (arginfo[i].linear_step
2823			!= n->simdclone->args[i].linear_step))
2824		  i = -1;
2825		break;
2826	      case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2827		/* FORNOW */
2828		i = -1;
2829		break;
2830	      case SIMD_CLONE_ARG_TYPE_MASK:
2831		gcc_unreachable ();
2832	      }
2833	    if (i == (size_t) -1)
2834	      break;
2835	    if (n->simdclone->args[i].alignment > arginfo[i].align)
2836	      {
2837		i = -1;
2838		break;
2839	      }
2840	    if (arginfo[i].align)
2841	      this_badness += (exact_log2 (arginfo[i].align)
2842			       - exact_log2 (n->simdclone->args[i].alignment));
2843	  }
2844	if (i == (size_t) -1)
2845	  continue;
2846	if (bestn == NULL || this_badness < badness)
2847	  {
2848	    bestn = n;
2849	    badness = this_badness;
2850	  }
2851      }
2852
2853  if (bestn == NULL)
2854    {
2855      arginfo.release ();
2856      return false;
2857    }
2858
2859  for (i = 0; i < nargs; i++)
2860    if ((arginfo[i].dt == vect_constant_def
2861	 || arginfo[i].dt == vect_external_def)
2862	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2863      {
2864	arginfo[i].vectype
2865	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2866								     i)));
2867	if (arginfo[i].vectype == NULL
2868	    || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2869		> bestn->simdclone->simdlen))
2870	  {
2871	    arginfo.release ();
2872	    return false;
2873	  }
2874      }
2875
2876  fndecl = bestn->decl;
2877  nunits = bestn->simdclone->simdlen;
2878  ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2879
2880  /* If the function isn't const, only allow it in simd loops where user
2881     has asserted that at least nunits consecutive iterations can be
2882     performed using SIMD instructions.  */
2883  if ((loop == NULL || (unsigned) loop->safelen < nunits)
2884      && gimple_vuse (stmt))
2885    {
2886      arginfo.release ();
2887      return false;
2888    }
2889
2890  /* Sanity check: make sure that at least one copy of the vectorized stmt
2891     needs to be generated.  */
2892  gcc_assert (ncopies >= 1);
2893
2894  if (!vec_stmt) /* transformation not required.  */
2895    {
2896      STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2897      for (i = 0; i < nargs; i++)
2898	if (bestn->simdclone->args[i].arg_type
2899	    == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2900	  {
2901	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2902									+ 1);
2903	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2904	    tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2905		       ? size_type_node : TREE_TYPE (arginfo[i].op);
2906	    tree ls = build_int_cst (lst, arginfo[i].linear_step);
2907	    STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2908	  }
2909      STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2910      if (dump_enabled_p ())
2911	dump_printf_loc (MSG_NOTE, vect_location,
2912			 "=== vectorizable_simd_clone_call ===\n");
2913/*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2914      arginfo.release ();
2915      return true;
2916    }
2917
2918  /** Transform.  **/
2919
2920  if (dump_enabled_p ())
2921    dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2922
2923  /* Handle def.  */
2924  scalar_dest = gimple_call_lhs (stmt);
2925  vec_dest = NULL_TREE;
2926  rtype = NULL_TREE;
2927  ratype = NULL_TREE;
2928  if (scalar_dest)
2929    {
2930      vec_dest = vect_create_destination_var (scalar_dest, vectype);
2931      rtype = TREE_TYPE (TREE_TYPE (fndecl));
2932      if (TREE_CODE (rtype) == ARRAY_TYPE)
2933	{
2934	  ratype = rtype;
2935	  rtype = TREE_TYPE (ratype);
2936	}
2937    }
2938
2939  prev_stmt_info = NULL;
2940  for (j = 0; j < ncopies; ++j)
2941    {
2942      /* Build argument list for the vectorized call.  */
2943      if (j == 0)
2944	vargs.create (nargs);
2945      else
2946	vargs.truncate (0);
2947
2948      for (i = 0; i < nargs; i++)
2949	{
2950	  unsigned int k, l, m, o;
2951	  tree atype;
2952	  op = gimple_call_arg (stmt, i);
2953	  switch (bestn->simdclone->args[i].arg_type)
2954	    {
2955	    case SIMD_CLONE_ARG_TYPE_VECTOR:
2956	      atype = bestn->simdclone->args[i].vector_type;
2957	      o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2958	      for (m = j * o; m < (j + 1) * o; m++)
2959		{
2960		  if (TYPE_VECTOR_SUBPARTS (atype)
2961		      < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2962		    {
2963		      unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2964		      k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2965			   / TYPE_VECTOR_SUBPARTS (atype));
2966		      gcc_assert ((k & (k - 1)) == 0);
2967		      if (m == 0)
2968			vec_oprnd0
2969			  = vect_get_vec_def_for_operand (op, stmt, NULL);
2970		      else
2971			{
2972			  vec_oprnd0 = arginfo[i].op;
2973			  if ((m & (k - 1)) == 0)
2974			    vec_oprnd0
2975			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2976								vec_oprnd0);
2977			}
2978		      arginfo[i].op = vec_oprnd0;
2979		      vec_oprnd0
2980			= build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2981				  size_int (prec),
2982				  bitsize_int ((m & (k - 1)) * prec));
2983		      new_stmt
2984			= gimple_build_assign (make_ssa_name (atype),
2985					       vec_oprnd0);
2986		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2987		      vargs.safe_push (gimple_assign_lhs (new_stmt));
2988		    }
2989		  else
2990		    {
2991		      k = (TYPE_VECTOR_SUBPARTS (atype)
2992			   / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2993		      gcc_assert ((k & (k - 1)) == 0);
2994		      vec<constructor_elt, va_gc> *ctor_elts;
2995		      if (k != 1)
2996			vec_alloc (ctor_elts, k);
2997		      else
2998			ctor_elts = NULL;
2999		      for (l = 0; l < k; l++)
3000			{
3001			  if (m == 0 && l == 0)
3002			    vec_oprnd0
3003			      = vect_get_vec_def_for_operand (op, stmt, NULL);
3004			  else
3005			    vec_oprnd0
3006			      = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3007								arginfo[i].op);
3008			  arginfo[i].op = vec_oprnd0;
3009			  if (k == 1)
3010			    break;
3011			  CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3012						  vec_oprnd0);
3013			}
3014		      if (k == 1)
3015			vargs.safe_push (vec_oprnd0);
3016		      else
3017			{
3018			  vec_oprnd0 = build_constructor (atype, ctor_elts);
3019			  new_stmt
3020			    = gimple_build_assign (make_ssa_name (atype),
3021						   vec_oprnd0);
3022			  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3023			  vargs.safe_push (gimple_assign_lhs (new_stmt));
3024			}
3025		    }
3026		}
3027	      break;
3028	    case SIMD_CLONE_ARG_TYPE_UNIFORM:
3029	      vargs.safe_push (op);
3030	      break;
3031	    case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3032	      if (j == 0)
3033		{
3034		  gimple_seq stmts;
3035		  arginfo[i].op
3036		    = force_gimple_operand (arginfo[i].op, &stmts, true,
3037					    NULL_TREE);
3038		  if (stmts != NULL)
3039		    {
3040		      basic_block new_bb;
3041		      edge pe = loop_preheader_edge (loop);
3042		      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3043		      gcc_assert (!new_bb);
3044		    }
3045		  tree phi_res = copy_ssa_name (op);
3046		  gphi *new_phi = create_phi_node (phi_res, loop->header);
3047		  set_vinfo_for_stmt (new_phi,
3048				      new_stmt_vec_info (new_phi, loop_vinfo,
3049							 NULL));
3050		  add_phi_arg (new_phi, arginfo[i].op,
3051			       loop_preheader_edge (loop), UNKNOWN_LOCATION);
3052		  enum tree_code code
3053		    = POINTER_TYPE_P (TREE_TYPE (op))
3054		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
3055		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
3056			      ? sizetype : TREE_TYPE (op);
3057		  widest_int cst
3058		    = wi::mul (bestn->simdclone->args[i].linear_step,
3059			       ncopies * nunits);
3060		  tree tcst = wide_int_to_tree (type, cst);
3061		  tree phi_arg = copy_ssa_name (op);
3062		  new_stmt
3063		    = gimple_build_assign (phi_arg, code, phi_res, tcst);
3064		  gimple_stmt_iterator si = gsi_after_labels (loop->header);
3065		  gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3066		  set_vinfo_for_stmt (new_stmt,
3067				      new_stmt_vec_info (new_stmt, loop_vinfo,
3068							 NULL));
3069		  add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3070			       UNKNOWN_LOCATION);
3071		  arginfo[i].op = phi_res;
3072		  vargs.safe_push (phi_res);
3073		}
3074	      else
3075		{
3076		  enum tree_code code
3077		    = POINTER_TYPE_P (TREE_TYPE (op))
3078		      ? POINTER_PLUS_EXPR : PLUS_EXPR;
3079		  tree type = POINTER_TYPE_P (TREE_TYPE (op))
3080			      ? sizetype : TREE_TYPE (op);
3081		  widest_int cst
3082		    = wi::mul (bestn->simdclone->args[i].linear_step,
3083			       j * nunits);
3084		  tree tcst = wide_int_to_tree (type, cst);
3085		  new_temp = make_ssa_name (TREE_TYPE (op));
3086		  new_stmt = gimple_build_assign (new_temp, code,
3087						  arginfo[i].op, tcst);
3088		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3089		  vargs.safe_push (new_temp);
3090		}
3091	      break;
3092	    case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3093	    default:
3094	      gcc_unreachable ();
3095	    }
3096	}
3097
3098      new_stmt = gimple_build_call_vec (fndecl, vargs);
3099      if (vec_dest)
3100	{
3101	  gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3102	  if (ratype)
3103	    new_temp = create_tmp_var (ratype);
3104	  else if (TYPE_VECTOR_SUBPARTS (vectype)
3105		   == TYPE_VECTOR_SUBPARTS (rtype))
3106	    new_temp = make_ssa_name (vec_dest, new_stmt);
3107	  else
3108	    new_temp = make_ssa_name (rtype, new_stmt);
3109	  gimple_call_set_lhs (new_stmt, new_temp);
3110	}
3111      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112
3113      if (vec_dest)
3114	{
3115	  if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3116	    {
3117	      unsigned int k, l;
3118	      unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3119	      k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3120	      gcc_assert ((k & (k - 1)) == 0);
3121	      for (l = 0; l < k; l++)
3122		{
3123		  tree t;
3124		  if (ratype)
3125		    {
3126		      t = build_fold_addr_expr (new_temp);
3127		      t = build2 (MEM_REF, vectype, t,
3128				  build_int_cst (TREE_TYPE (t),
3129						 l * prec / BITS_PER_UNIT));
3130		    }
3131		  else
3132		    t = build3 (BIT_FIELD_REF, vectype, new_temp,
3133				size_int (prec), bitsize_int (l * prec));
3134		  new_stmt
3135		    = gimple_build_assign (make_ssa_name (vectype), t);
3136		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3137		  if (j == 0 && l == 0)
3138		    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3139		  else
3140		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3141
3142		  prev_stmt_info = vinfo_for_stmt (new_stmt);
3143		}
3144
3145	      if (ratype)
3146		{
3147		  tree clobber = build_constructor (ratype, NULL);
3148		  TREE_THIS_VOLATILE (clobber) = 1;
3149		  new_stmt = gimple_build_assign (new_temp, clobber);
3150		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3151		}
3152	      continue;
3153	    }
3154	  else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3155	    {
3156	      unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3157				/ TYPE_VECTOR_SUBPARTS (rtype));
3158	      gcc_assert ((k & (k - 1)) == 0);
3159	      if ((j & (k - 1)) == 0)
3160		vec_alloc (ret_ctor_elts, k);
3161	      if (ratype)
3162		{
3163		  unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3164		  for (m = 0; m < o; m++)
3165		    {
3166		      tree tem = build4 (ARRAY_REF, rtype, new_temp,
3167					 size_int (m), NULL_TREE, NULL_TREE);
3168		      new_stmt
3169			= gimple_build_assign (make_ssa_name (rtype), tem);
3170		      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3171		      CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3172					      gimple_assign_lhs (new_stmt));
3173		    }
3174		  tree clobber = build_constructor (ratype, NULL);
3175		  TREE_THIS_VOLATILE (clobber) = 1;
3176		  new_stmt = gimple_build_assign (new_temp, clobber);
3177		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3178		}
3179	      else
3180		CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3181	      if ((j & (k - 1)) != k - 1)
3182		continue;
3183	      vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3184	      new_stmt
3185		= gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3186	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3187
3188	      if ((unsigned) j == k - 1)
3189		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3190	      else
3191		STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3192
3193	      prev_stmt_info = vinfo_for_stmt (new_stmt);
3194	      continue;
3195	    }
3196	  else if (ratype)
3197	    {
3198	      tree t = build_fold_addr_expr (new_temp);
3199	      t = build2 (MEM_REF, vectype, t,
3200			  build_int_cst (TREE_TYPE (t), 0));
3201	      new_stmt
3202		= gimple_build_assign (make_ssa_name (vec_dest), t);
3203	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3204	      tree clobber = build_constructor (ratype, NULL);
3205	      TREE_THIS_VOLATILE (clobber) = 1;
3206	      vect_finish_stmt_generation (stmt,
3207					   gimple_build_assign (new_temp,
3208								clobber), gsi);
3209	    }
3210	}
3211
3212      if (j == 0)
3213	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3214      else
3215	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3216
3217      prev_stmt_info = vinfo_for_stmt (new_stmt);
3218    }
3219
3220  vargs.release ();
3221
3222  /* The call in STMT might prevent it from being removed in dce.
3223     We however cannot remove it here, due to the way the ssa name
3224     it defines is mapped to the new definition.  So just replace
3225     rhs of the statement with something harmless.  */
3226
3227  if (slp_node)
3228    return true;
3229
3230  if (scalar_dest)
3231    {
3232      type = TREE_TYPE (scalar_dest);
3233      if (is_pattern_stmt_p (stmt_info))
3234	lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3235      else
3236	lhs = gimple_call_lhs (stmt);
3237      new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3238    }
3239  else
3240    new_stmt = gimple_build_nop ();
3241  set_vinfo_for_stmt (new_stmt, stmt_info);
3242  set_vinfo_for_stmt (stmt, NULL);
3243  STMT_VINFO_STMT (stmt_info) = new_stmt;
3244  gsi_replace (gsi, new_stmt, true);
3245  unlink_stmt_vdef (stmt);
3246
3247  return true;
3248}
3249
3250
3251/* Function vect_gen_widened_results_half
3252
3253   Create a vector stmt whose code, type, number of arguments, and result
3254   variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3255   VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
3256   In the case that CODE is a CALL_EXPR, this means that a call to DECL
3257   needs to be created (DECL is a function-decl of a target-builtin).
3258   STMT is the original scalar stmt that we are vectorizing.  */
3259
3260static gimple
3261vect_gen_widened_results_half (enum tree_code code,
3262			       tree decl,
3263                               tree vec_oprnd0, tree vec_oprnd1, int op_type,
3264			       tree vec_dest, gimple_stmt_iterator *gsi,
3265			       gimple stmt)
3266{
3267  gimple new_stmt;
3268  tree new_temp;
3269
3270  /* Generate half of the widened result:  */
3271  if (code == CALL_EXPR)
3272    {
3273      /* Target specific support  */
3274      if (op_type == binary_op)
3275	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3276      else
3277	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3278      new_temp = make_ssa_name (vec_dest, new_stmt);
3279      gimple_call_set_lhs (new_stmt, new_temp);
3280    }
3281  else
3282    {
3283      /* Generic support */
3284      gcc_assert (op_type == TREE_CODE_LENGTH (code));
3285      if (op_type != binary_op)
3286	vec_oprnd1 = NULL;
3287      new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3288      new_temp = make_ssa_name (vec_dest, new_stmt);
3289      gimple_assign_set_lhs (new_stmt, new_temp);
3290    }
3291  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3292
3293  return new_stmt;
3294}
3295
3296
3297/* Get vectorized definitions for loop-based vectorization.  For the first
3298   operand we call vect_get_vec_def_for_operand() (with OPRND containing
3299   scalar operand), and for the rest we get a copy with
3300   vect_get_vec_def_for_stmt_copy() using the previous vector definition
3301   (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3302   The vectors are collected into VEC_OPRNDS.  */
3303
3304static void
3305vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3306			  vec<tree> *vec_oprnds, int multi_step_cvt)
3307{
3308  tree vec_oprnd;
3309
3310  /* Get first vector operand.  */
3311  /* All the vector operands except the very first one (that is scalar oprnd)
3312     are stmt copies.  */
3313  if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3314    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3315  else
3316    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3317
3318  vec_oprnds->quick_push (vec_oprnd);
3319
3320  /* Get second vector operand.  */
3321  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3322  vec_oprnds->quick_push (vec_oprnd);
3323
3324  *oprnd = vec_oprnd;
3325
3326  /* For conversion in multiple steps, continue to get operands
3327     recursively.  */
3328  if (multi_step_cvt)
3329    vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
3330}
3331
3332
3333/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3334   For multi-step conversions store the resulting vectors and call the function
3335   recursively.  */
3336
3337static void
3338vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3339				       int multi_step_cvt, gimple stmt,
3340				       vec<tree> vec_dsts,
3341				       gimple_stmt_iterator *gsi,
3342				       slp_tree slp_node, enum tree_code code,
3343				       stmt_vec_info *prev_stmt_info)
3344{
3345  unsigned int i;
3346  tree vop0, vop1, new_tmp, vec_dest;
3347  gimple new_stmt;
3348  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3349
3350  vec_dest = vec_dsts.pop ();
3351
3352  for (i = 0; i < vec_oprnds->length (); i += 2)
3353    {
3354      /* Create demotion operation.  */
3355      vop0 = (*vec_oprnds)[i];
3356      vop1 = (*vec_oprnds)[i + 1];
3357      new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3358      new_tmp = make_ssa_name (vec_dest, new_stmt);
3359      gimple_assign_set_lhs (new_stmt, new_tmp);
3360      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361
3362      if (multi_step_cvt)
3363	/* Store the resulting vector for next recursive call.  */
3364	(*vec_oprnds)[i/2] = new_tmp;
3365      else
3366	{
3367	  /* This is the last step of the conversion sequence. Store the
3368	     vectors in SLP_NODE or in vector info of the scalar statement
3369	     (or in STMT_VINFO_RELATED_STMT chain).  */
3370	  if (slp_node)
3371	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3372	  else
3373	    {
3374	      if (!*prev_stmt_info)
3375		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3376	      else
3377		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3378
3379	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
3380	    }
3381	}
3382    }
3383
3384  /* For multi-step demotion operations we first generate demotion operations
3385     from the source type to the intermediate types, and then combine the
3386     results (stored in VEC_OPRNDS) in demotion operation to the destination
3387     type.  */
3388  if (multi_step_cvt)
3389    {
3390      /* At each level of recursion we have half of the operands we had at the
3391	 previous level.  */
3392      vec_oprnds->truncate ((i+1)/2);
3393      vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3394					     stmt, vec_dsts, gsi, slp_node,
3395					     VEC_PACK_TRUNC_EXPR,
3396					     prev_stmt_info);
3397    }
3398
3399  vec_dsts.quick_push (vec_dest);
3400}
3401
3402
3403/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3404   and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
3405   the resulting vectors and call the function recursively.  */
3406
3407static void
3408vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3409					vec<tree> *vec_oprnds1,
3410					gimple stmt, tree vec_dest,
3411					gimple_stmt_iterator *gsi,
3412					enum tree_code code1,
3413					enum tree_code code2, tree decl1,
3414					tree decl2, int op_type)
3415{
3416  int i;
3417  tree vop0, vop1, new_tmp1, new_tmp2;
3418  gimple new_stmt1, new_stmt2;
3419  vec<tree> vec_tmp = vNULL;
3420
3421  vec_tmp.create (vec_oprnds0->length () * 2);
3422  FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3423    {
3424      if (op_type == binary_op)
3425	vop1 = (*vec_oprnds1)[i];
3426      else
3427	vop1 = NULL_TREE;
3428
3429      /* Generate the two halves of promotion operation.  */
3430      new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3431						 op_type, vec_dest, gsi, stmt);
3432      new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3433						 op_type, vec_dest, gsi, stmt);
3434      if (is_gimple_call (new_stmt1))
3435	{
3436	  new_tmp1 = gimple_call_lhs (new_stmt1);
3437	  new_tmp2 = gimple_call_lhs (new_stmt2);
3438	}
3439      else
3440	{
3441	  new_tmp1 = gimple_assign_lhs (new_stmt1);
3442	  new_tmp2 = gimple_assign_lhs (new_stmt2);
3443	}
3444
3445      /* Store the results for the next step.  */
3446      vec_tmp.quick_push (new_tmp1);
3447      vec_tmp.quick_push (new_tmp2);
3448    }
3449
3450  vec_oprnds0->release ();
3451  *vec_oprnds0 = vec_tmp;
3452}
3453
3454
3455/* Check if STMT performs a conversion operation, that can be vectorized.
3456   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3457   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3458   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3459
3460static bool
3461vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3462			 gimple *vec_stmt, slp_tree slp_node)
3463{
3464  tree vec_dest;
3465  tree scalar_dest;
3466  tree op0, op1 = NULL_TREE;
3467  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3468  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3469  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3470  enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3471  enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3472  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3473  tree new_temp;
3474  tree def;
3475  gimple def_stmt;
3476  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3477  gimple new_stmt = NULL;
3478  stmt_vec_info prev_stmt_info;
3479  int nunits_in;
3480  int nunits_out;
3481  tree vectype_out, vectype_in;
3482  int ncopies, i, j;
3483  tree lhs_type, rhs_type;
3484  enum { NARROW, NONE, WIDEN } modifier;
3485  vec<tree> vec_oprnds0 = vNULL;
3486  vec<tree> vec_oprnds1 = vNULL;
3487  tree vop0;
3488  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3489  int multi_step_cvt = 0;
3490  vec<tree> vec_dsts = vNULL;
3491  vec<tree> interm_types = vNULL;
3492  tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3493  int op_type;
3494  machine_mode rhs_mode;
3495  unsigned short fltsz;
3496
3497  /* Is STMT a vectorizable conversion?   */
3498
3499  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3500    return false;
3501
3502  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3503    return false;
3504
3505  if (!is_gimple_assign (stmt))
3506    return false;
3507
3508  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3509    return false;
3510
3511  code = gimple_assign_rhs_code (stmt);
3512  if (!CONVERT_EXPR_CODE_P (code)
3513      && code != FIX_TRUNC_EXPR
3514      && code != FLOAT_EXPR
3515      && code != WIDEN_MULT_EXPR
3516      && code != WIDEN_LSHIFT_EXPR)
3517    return false;
3518
3519  op_type = TREE_CODE_LENGTH (code);
3520
3521  /* Check types of lhs and rhs.  */
3522  scalar_dest = gimple_assign_lhs (stmt);
3523  lhs_type = TREE_TYPE (scalar_dest);
3524  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3525
3526  op0 = gimple_assign_rhs1 (stmt);
3527  rhs_type = TREE_TYPE (op0);
3528
3529  if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3530      && !((INTEGRAL_TYPE_P (lhs_type)
3531	    && INTEGRAL_TYPE_P (rhs_type))
3532	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
3533	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
3534    return false;
3535
3536  if ((INTEGRAL_TYPE_P (lhs_type)
3537       && (TYPE_PRECISION (lhs_type)
3538	   != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3539      || (INTEGRAL_TYPE_P (rhs_type)
3540	  && (TYPE_PRECISION (rhs_type)
3541	      != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3542    {
3543      if (dump_enabled_p ())
3544	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3545                         "type conversion to/from bit-precision unsupported."
3546                         "\n");
3547      return false;
3548    }
3549
3550  /* Check the operands of the operation.  */
3551  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3552			     &def_stmt, &def, &dt[0], &vectype_in))
3553    {
3554      if (dump_enabled_p ())
3555	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556                         "use not simple.\n");
3557      return false;
3558    }
3559  if (op_type == binary_op)
3560    {
3561      bool ok;
3562
3563      op1 = gimple_assign_rhs2 (stmt);
3564      gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3565      /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3566	 OP1.  */
3567      if (CONSTANT_CLASS_P (op0))
3568	ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3569				   &def_stmt, &def, &dt[1], &vectype_in);
3570      else
3571	ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3572				 &def, &dt[1]);
3573
3574      if (!ok)
3575	{
3576          if (dump_enabled_p ())
3577            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3578                             "use not simple.\n");
3579	  return false;
3580	}
3581    }
3582
3583  /* If op0 is an external or constant defs use a vector type of
3584     the same size as the output vector type.  */
3585  if (!vectype_in)
3586    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3587  if (vec_stmt)
3588    gcc_assert (vectype_in);
3589  if (!vectype_in)
3590    {
3591      if (dump_enabled_p ())
3592	{
3593	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3594                           "no vectype for scalar type ");
3595	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3596          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3597	}
3598
3599      return false;
3600    }
3601
3602  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3603  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3604  if (nunits_in < nunits_out)
3605    modifier = NARROW;
3606  else if (nunits_out == nunits_in)
3607    modifier = NONE;
3608  else
3609    modifier = WIDEN;
3610
3611  /* Multiple types in SLP are handled by creating the appropriate number of
3612     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3613     case of SLP.  */
3614  if (slp_node || PURE_SLP_STMT (stmt_info))
3615    ncopies = 1;
3616  else if (modifier == NARROW)
3617    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3618  else
3619    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3620
3621  /* Sanity check: make sure that at least one copy of the vectorized stmt
3622     needs to be generated.  */
3623  gcc_assert (ncopies >= 1);
3624
3625  /* Supportable by target?  */
3626  switch (modifier)
3627    {
3628    case NONE:
3629      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3630	return false;
3631      if (supportable_convert_operation (code, vectype_out, vectype_in,
3632					 &decl1, &code1))
3633	break;
3634      /* FALLTHRU */
3635    unsupported:
3636      if (dump_enabled_p ())
3637	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638                         "conversion not supported by target.\n");
3639      return false;
3640
3641    case WIDEN:
3642      if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3643					  &code1, &code2, &multi_step_cvt,
3644					  &interm_types))
3645	{
3646	  /* Binary widening operation can only be supported directly by the
3647	     architecture.  */
3648	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
3649	  break;
3650	}
3651
3652      if (code != FLOAT_EXPR
3653	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3654	      <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3655	goto unsupported;
3656
3657      rhs_mode = TYPE_MODE (rhs_type);
3658      fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3659      for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3660	   rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3661	   rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3662	{
3663	  cvt_type
3664	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3665	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3666	  if (cvt_type == NULL_TREE)
3667	    goto unsupported;
3668
3669	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
3670	    {
3671	      if (!supportable_convert_operation (code, vectype_out,
3672						  cvt_type, &decl1, &codecvt1))
3673		goto unsupported;
3674	    }
3675	  else if (!supportable_widening_operation (code, stmt, vectype_out,
3676						    cvt_type, &codecvt1,
3677						    &codecvt2, &multi_step_cvt,
3678						    &interm_types))
3679	    continue;
3680	  else
3681	    gcc_assert (multi_step_cvt == 0);
3682
3683	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3684					      vectype_in, &code1, &code2,
3685					      &multi_step_cvt, &interm_types))
3686	    break;
3687	}
3688
3689      if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3690	goto unsupported;
3691
3692      if (GET_MODE_SIZE (rhs_mode) == fltsz)
3693	codecvt2 = ERROR_MARK;
3694      else
3695	{
3696	  multi_step_cvt++;
3697	  interm_types.safe_push (cvt_type);
3698	  cvt_type = NULL_TREE;
3699	}
3700      break;
3701
3702    case NARROW:
3703      gcc_assert (op_type == unary_op);
3704      if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3705					   &code1, &multi_step_cvt,
3706					   &interm_types))
3707	break;
3708
3709      if (code != FIX_TRUNC_EXPR
3710	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3711	      >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3712	goto unsupported;
3713
3714      rhs_mode = TYPE_MODE (rhs_type);
3715      cvt_type
3716	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3717      cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3718      if (cvt_type == NULL_TREE)
3719	goto unsupported;
3720      if (!supportable_convert_operation (code, cvt_type, vectype_in,
3721					  &decl1, &codecvt1))
3722	goto unsupported;
3723      if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3724					   &code1, &multi_step_cvt,
3725					   &interm_types))
3726	break;
3727      goto unsupported;
3728
3729    default:
3730      gcc_unreachable ();
3731    }
3732
3733  if (!vec_stmt)		/* transformation not required.  */
3734    {
3735      if (dump_enabled_p ())
3736	dump_printf_loc (MSG_NOTE, vect_location,
3737                         "=== vectorizable_conversion ===\n");
3738      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3739        {
3740	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3741	  vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3742	}
3743      else if (modifier == NARROW)
3744	{
3745	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3746	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747	}
3748      else
3749	{
3750	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3751	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3752	}
3753      interm_types.release ();
3754      return true;
3755    }
3756
3757  /** Transform.  **/
3758  if (dump_enabled_p ())
3759    dump_printf_loc (MSG_NOTE, vect_location,
3760                     "transform conversion. ncopies = %d.\n", ncopies);
3761
3762  if (op_type == binary_op)
3763    {
3764      if (CONSTANT_CLASS_P (op0))
3765	op0 = fold_convert (TREE_TYPE (op1), op0);
3766      else if (CONSTANT_CLASS_P (op1))
3767	op1 = fold_convert (TREE_TYPE (op0), op1);
3768    }
3769
3770  /* In case of multi-step conversion, we first generate conversion operations
3771     to the intermediate types, and then from that types to the final one.
3772     We create vector destinations for the intermediate type (TYPES) received
3773     from supportable_*_operation, and store them in the correct order
3774     for future use in vect_create_vectorized_*_stmts ().  */
3775  vec_dsts.create (multi_step_cvt + 1);
3776  vec_dest = vect_create_destination_var (scalar_dest,
3777					  (cvt_type && modifier == WIDEN)
3778					  ? cvt_type : vectype_out);
3779  vec_dsts.quick_push (vec_dest);
3780
3781  if (multi_step_cvt)
3782    {
3783      for (i = interm_types.length () - 1;
3784	   interm_types.iterate (i, &intermediate_type); i--)
3785	{
3786	  vec_dest = vect_create_destination_var (scalar_dest,
3787						  intermediate_type);
3788	  vec_dsts.quick_push (vec_dest);
3789	}
3790    }
3791
3792  if (cvt_type)
3793    vec_dest = vect_create_destination_var (scalar_dest,
3794					    modifier == WIDEN
3795					    ? vectype_out : cvt_type);
3796
3797  if (!slp_node)
3798    {
3799      if (modifier == WIDEN)
3800	{
3801	  vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3802	  if (op_type == binary_op)
3803	    vec_oprnds1.create (1);
3804	}
3805      else if (modifier == NARROW)
3806	vec_oprnds0.create (
3807		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3808    }
3809  else if (code == WIDEN_LSHIFT_EXPR)
3810    vec_oprnds1.create (slp_node->vec_stmts_size);
3811
3812  last_oprnd = op0;
3813  prev_stmt_info = NULL;
3814  switch (modifier)
3815    {
3816    case NONE:
3817      for (j = 0; j < ncopies; j++)
3818	{
3819	  if (j == 0)
3820	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3821			       -1);
3822	  else
3823	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3824
3825	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3826	    {
3827	      /* Arguments are ready, create the new vector stmt.  */
3828	      if (code1 == CALL_EXPR)
3829		{
3830		  new_stmt = gimple_build_call (decl1, 1, vop0);
3831		  new_temp = make_ssa_name (vec_dest, new_stmt);
3832		  gimple_call_set_lhs (new_stmt, new_temp);
3833		}
3834	      else
3835		{
3836		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3837		  new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3838		  new_temp = make_ssa_name (vec_dest, new_stmt);
3839		  gimple_assign_set_lhs (new_stmt, new_temp);
3840		}
3841
3842	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
3843	      if (slp_node)
3844		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3845	    }
3846
3847	  if (j == 0)
3848	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3849	  else
3850	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3851	  prev_stmt_info = vinfo_for_stmt (new_stmt);
3852	}
3853      break;
3854
3855    case WIDEN:
3856      /* In case the vectorization factor (VF) is bigger than the number
3857	 of elements that we can fit in a vectype (nunits), we have to
3858	 generate more than one vector stmt - i.e - we need to "unroll"
3859	 the vector stmt by a factor VF/nunits.  */
3860      for (j = 0; j < ncopies; j++)
3861	{
3862	  /* Handle uses.  */
3863	  if (j == 0)
3864	    {
3865	      if (slp_node)
3866		{
3867		  if (code == WIDEN_LSHIFT_EXPR)
3868		    {
3869		      unsigned int k;
3870
3871		      vec_oprnd1 = op1;
3872		      /* Store vec_oprnd1 for every vector stmt to be created
3873			 for SLP_NODE.  We check during the analysis that all
3874			 the shift arguments are the same.  */
3875		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3876			vec_oprnds1.quick_push (vec_oprnd1);
3877
3878		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3879					 slp_node, -1);
3880		    }
3881		  else
3882		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3883				       &vec_oprnds1, slp_node, -1);
3884		}
3885	      else
3886		{
3887		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3888		  vec_oprnds0.quick_push (vec_oprnd0);
3889		  if (op_type == binary_op)
3890		    {
3891		      if (code == WIDEN_LSHIFT_EXPR)
3892			vec_oprnd1 = op1;
3893		      else
3894			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3895								   NULL);
3896		      vec_oprnds1.quick_push (vec_oprnd1);
3897		    }
3898		}
3899	    }
3900	  else
3901	    {
3902	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3903	      vec_oprnds0.truncate (0);
3904	      vec_oprnds0.quick_push (vec_oprnd0);
3905	      if (op_type == binary_op)
3906		{
3907		  if (code == WIDEN_LSHIFT_EXPR)
3908		    vec_oprnd1 = op1;
3909		  else
3910		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3911								 vec_oprnd1);
3912		  vec_oprnds1.truncate (0);
3913		  vec_oprnds1.quick_push (vec_oprnd1);
3914		}
3915	    }
3916
3917	  /* Arguments are ready.  Create the new vector stmts.  */
3918	  for (i = multi_step_cvt; i >= 0; i--)
3919	    {
3920	      tree this_dest = vec_dsts[i];
3921	      enum tree_code c1 = code1, c2 = code2;
3922	      if (i == 0 && codecvt2 != ERROR_MARK)
3923		{
3924		  c1 = codecvt1;
3925		  c2 = codecvt2;
3926		}
3927	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3928						      &vec_oprnds1,
3929						      stmt, this_dest, gsi,
3930						      c1, c2, decl1, decl2,
3931						      op_type);
3932	    }
3933
3934	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3935	    {
3936	      if (cvt_type)
3937		{
3938		  if (codecvt1 == CALL_EXPR)
3939		    {
3940		      new_stmt = gimple_build_call (decl1, 1, vop0);
3941		      new_temp = make_ssa_name (vec_dest, new_stmt);
3942		      gimple_call_set_lhs (new_stmt, new_temp);
3943		    }
3944		  else
3945		    {
3946		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3947		      new_temp = make_ssa_name (vec_dest);
3948		      new_stmt = gimple_build_assign (new_temp, codecvt1,
3949						      vop0);
3950		    }
3951
3952		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3953		}
3954	      else
3955		new_stmt = SSA_NAME_DEF_STMT (vop0);
3956
3957	      if (slp_node)
3958		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3959	      else
3960		{
3961		  if (!prev_stmt_info)
3962		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3963		  else
3964		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3965		  prev_stmt_info = vinfo_for_stmt (new_stmt);
3966		}
3967	    }
3968	}
3969
3970      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3971      break;
3972
3973    case NARROW:
3974      /* In case the vectorization factor (VF) is bigger than the number
3975	 of elements that we can fit in a vectype (nunits), we have to
3976	 generate more than one vector stmt - i.e - we need to "unroll"
3977	 the vector stmt by a factor VF/nunits.  */
3978      for (j = 0; j < ncopies; j++)
3979	{
3980	  /* Handle uses.  */
3981	  if (slp_node)
3982	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3983			       slp_node, -1);
3984	  else
3985	    {
3986	      vec_oprnds0.truncate (0);
3987	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3988					vect_pow2 (multi_step_cvt) - 1);
3989	    }
3990
3991	  /* Arguments are ready.  Create the new vector stmts.  */
3992	  if (cvt_type)
3993	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3994	      {
3995		if (codecvt1 == CALL_EXPR)
3996		  {
3997		    new_stmt = gimple_build_call (decl1, 1, vop0);
3998		    new_temp = make_ssa_name (vec_dest, new_stmt);
3999		    gimple_call_set_lhs (new_stmt, new_temp);
4000		  }
4001		else
4002		  {
4003		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4004		    new_temp = make_ssa_name (vec_dest);
4005		    new_stmt = gimple_build_assign (new_temp, codecvt1,
4006						    vop0);
4007		  }
4008
4009		vect_finish_stmt_generation (stmt, new_stmt, gsi);
4010		vec_oprnds0[i] = new_temp;
4011	      }
4012
4013	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4014						 stmt, vec_dsts, gsi,
4015						 slp_node, code1,
4016						 &prev_stmt_info);
4017	}
4018
4019      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4020      break;
4021    }
4022
4023  vec_oprnds0.release ();
4024  vec_oprnds1.release ();
4025  vec_dsts.release ();
4026  interm_types.release ();
4027
4028  return true;
4029}
4030
4031
4032/* Function vectorizable_assignment.
4033
4034   Check if STMT performs an assignment (copy) that can be vectorized.
4035   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4036   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4037   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4038
4039static bool
4040vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4041			 gimple *vec_stmt, slp_tree slp_node)
4042{
4043  tree vec_dest;
4044  tree scalar_dest;
4045  tree op;
4046  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4047  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4048  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4049  tree new_temp;
4050  tree def;
4051  gimple def_stmt;
4052  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4053  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4054  int ncopies;
4055  int i, j;
4056  vec<tree> vec_oprnds = vNULL;
4057  tree vop;
4058  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4059  gimple new_stmt = NULL;
4060  stmt_vec_info prev_stmt_info = NULL;
4061  enum tree_code code;
4062  tree vectype_in;
4063
4064  /* Multiple types in SLP are handled by creating the appropriate number of
4065     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4066     case of SLP.  */
4067  if (slp_node || PURE_SLP_STMT (stmt_info))
4068    ncopies = 1;
4069  else
4070    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4071
4072  gcc_assert (ncopies >= 1);
4073
4074  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4075    return false;
4076
4077  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4078    return false;
4079
4080  /* Is vectorizable assignment?  */
4081  if (!is_gimple_assign (stmt))
4082    return false;
4083
4084  scalar_dest = gimple_assign_lhs (stmt);
4085  if (TREE_CODE (scalar_dest) != SSA_NAME)
4086    return false;
4087
4088  code = gimple_assign_rhs_code (stmt);
4089  if (gimple_assign_single_p (stmt)
4090      || code == PAREN_EXPR
4091      || CONVERT_EXPR_CODE_P (code))
4092    op = gimple_assign_rhs1 (stmt);
4093  else
4094    return false;
4095
4096  if (code == VIEW_CONVERT_EXPR)
4097    op = TREE_OPERAND (op, 0);
4098
4099  if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4100			     &def_stmt, &def, &dt[0], &vectype_in))
4101    {
4102      if (dump_enabled_p ())
4103        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4104                         "use not simple.\n");
4105      return false;
4106    }
4107
4108  /* We can handle NOP_EXPR conversions that do not change the number
4109     of elements or the vector size.  */
4110  if ((CONVERT_EXPR_CODE_P (code)
4111       || code == VIEW_CONVERT_EXPR)
4112      && (!vectype_in
4113	  || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4114	  || (GET_MODE_SIZE (TYPE_MODE (vectype))
4115	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4116    return false;
4117
4118  /* We do not handle bit-precision changes.  */
4119  if ((CONVERT_EXPR_CODE_P (code)
4120       || code == VIEW_CONVERT_EXPR)
4121      && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4122      && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4123	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4124	  || ((TYPE_PRECISION (TREE_TYPE (op))
4125	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4126      /* But a conversion that does not change the bit-pattern is ok.  */
4127      && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4128	    > TYPE_PRECISION (TREE_TYPE (op)))
4129	   && TYPE_UNSIGNED (TREE_TYPE (op))))
4130    {
4131      if (dump_enabled_p ())
4132        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4133                         "type conversion to/from bit-precision "
4134                         "unsupported.\n");
4135      return false;
4136    }
4137
4138  if (!vec_stmt) /* transformation not required.  */
4139    {
4140      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4141      if (dump_enabled_p ())
4142        dump_printf_loc (MSG_NOTE, vect_location,
4143                         "=== vectorizable_assignment ===\n");
4144      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4145      return true;
4146    }
4147
4148  /** Transform.  **/
4149  if (dump_enabled_p ())
4150    dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4151
4152  /* Handle def.  */
4153  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4154
4155  /* Handle use.  */
4156  for (j = 0; j < ncopies; j++)
4157    {
4158      /* Handle uses.  */
4159      if (j == 0)
4160        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4161      else
4162        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4163
4164      /* Arguments are ready. create the new vector stmt.  */
4165      FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4166       {
4167	 if (CONVERT_EXPR_CODE_P (code)
4168	     || code == VIEW_CONVERT_EXPR)
4169	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4170         new_stmt = gimple_build_assign (vec_dest, vop);
4171         new_temp = make_ssa_name (vec_dest, new_stmt);
4172         gimple_assign_set_lhs (new_stmt, new_temp);
4173         vect_finish_stmt_generation (stmt, new_stmt, gsi);
4174         if (slp_node)
4175           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4176       }
4177
4178      if (slp_node)
4179        continue;
4180
4181      if (j == 0)
4182        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4183      else
4184        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4185
4186      prev_stmt_info = vinfo_for_stmt (new_stmt);
4187    }
4188
4189  vec_oprnds.release ();
4190  return true;
4191}
4192
4193
4194/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4195   either as shift by a scalar or by a vector.  */
4196
4197bool
4198vect_supportable_shift (enum tree_code code, tree scalar_type)
4199{
4200
4201  machine_mode vec_mode;
4202  optab optab;
4203  int icode;
4204  tree vectype;
4205
4206  vectype = get_vectype_for_scalar_type (scalar_type);
4207  if (!vectype)
4208    return false;
4209
4210  optab = optab_for_tree_code (code, vectype, optab_scalar);
4211  if (!optab
4212      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4213    {
4214      optab = optab_for_tree_code (code, vectype, optab_vector);
4215      if (!optab
4216          || (optab_handler (optab, TYPE_MODE (vectype))
4217                      == CODE_FOR_nothing))
4218        return false;
4219    }
4220
4221  vec_mode = TYPE_MODE (vectype);
4222  icode = (int) optab_handler (optab, vec_mode);
4223  if (icode == CODE_FOR_nothing)
4224    return false;
4225
4226  return true;
4227}
4228
4229
4230/* Function vectorizable_shift.
4231
4232   Check if STMT performs a shift operation that can be vectorized.
4233   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4234   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4235   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4236
4237static bool
4238vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4239                    gimple *vec_stmt, slp_tree slp_node)
4240{
4241  tree vec_dest;
4242  tree scalar_dest;
4243  tree op0, op1 = NULL;
4244  tree vec_oprnd1 = NULL_TREE;
4245  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4246  tree vectype;
4247  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4248  enum tree_code code;
4249  machine_mode vec_mode;
4250  tree new_temp;
4251  optab optab;
4252  int icode;
4253  machine_mode optab_op2_mode;
4254  tree def;
4255  gimple def_stmt;
4256  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4257  gimple new_stmt = NULL;
4258  stmt_vec_info prev_stmt_info;
4259  int nunits_in;
4260  int nunits_out;
4261  tree vectype_out;
4262  tree op1_vectype;
4263  int ncopies;
4264  int j, i;
4265  vec<tree> vec_oprnds0 = vNULL;
4266  vec<tree> vec_oprnds1 = vNULL;
4267  tree vop0, vop1;
4268  unsigned int k;
4269  bool scalar_shift_arg = true;
4270  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4271  int vf;
4272
4273  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4274    return false;
4275
4276  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4277    return false;
4278
4279  /* Is STMT a vectorizable binary/unary operation?   */
4280  if (!is_gimple_assign (stmt))
4281    return false;
4282
4283  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4284    return false;
4285
4286  code = gimple_assign_rhs_code (stmt);
4287
4288  if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4289      || code == RROTATE_EXPR))
4290    return false;
4291
4292  scalar_dest = gimple_assign_lhs (stmt);
4293  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4294  if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4295      != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4296    {
4297      if (dump_enabled_p ())
4298        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4299                         "bit-precision shifts not supported.\n");
4300      return false;
4301    }
4302
4303  op0 = gimple_assign_rhs1 (stmt);
4304  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4305                             &def_stmt, &def, &dt[0], &vectype))
4306    {
4307      if (dump_enabled_p ())
4308        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4309                         "use not simple.\n");
4310      return false;
4311    }
4312  /* If op0 is an external or constant def use a vector type with
4313     the same size as the output vector type.  */
4314  if (!vectype)
4315    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4316  if (vec_stmt)
4317    gcc_assert (vectype);
4318  if (!vectype)
4319    {
4320      if (dump_enabled_p ())
4321        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4322                         "no vectype for scalar type\n");
4323      return false;
4324    }
4325
4326  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4327  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4328  if (nunits_out != nunits_in)
4329    return false;
4330
4331  op1 = gimple_assign_rhs2 (stmt);
4332  if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4333			     &def, &dt[1], &op1_vectype))
4334    {
4335      if (dump_enabled_p ())
4336        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4337                         "use not simple.\n");
4338      return false;
4339    }
4340
4341  if (loop_vinfo)
4342    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4343  else
4344    vf = 1;
4345
4346  /* Multiple types in SLP are handled by creating the appropriate number of
4347     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4348     case of SLP.  */
4349  if (slp_node || PURE_SLP_STMT (stmt_info))
4350    ncopies = 1;
4351  else
4352    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4353
4354  gcc_assert (ncopies >= 1);
4355
4356  /* Determine whether the shift amount is a vector, or scalar.  If the
4357     shift/rotate amount is a vector, use the vector/vector shift optabs.  */
4358
4359  if (dt[1] == vect_internal_def && !slp_node)
4360    scalar_shift_arg = false;
4361  else if (dt[1] == vect_constant_def
4362	   || dt[1] == vect_external_def
4363	   || dt[1] == vect_internal_def)
4364    {
4365      /* In SLP, need to check whether the shift count is the same,
4366	 in loops if it is a constant or invariant, it is always
4367	 a scalar shift.  */
4368      if (slp_node)
4369	{
4370	  vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4371	  gimple slpstmt;
4372
4373	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4374	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4375	      scalar_shift_arg = false;
4376	}
4377    }
4378  else
4379    {
4380      if (dump_enabled_p ())
4381        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4382                         "operand mode requires invariant argument.\n");
4383      return false;
4384    }
4385
4386  /* Vector shifted by vector.  */
4387  if (!scalar_shift_arg)
4388    {
4389      optab = optab_for_tree_code (code, vectype, optab_vector);
4390      if (dump_enabled_p ())
4391        dump_printf_loc (MSG_NOTE, vect_location,
4392                         "vector/vector shift/rotate found.\n");
4393
4394      if (!op1_vectype)
4395	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4396      if (op1_vectype == NULL_TREE
4397	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4398	{
4399	  if (dump_enabled_p ())
4400	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4401                             "unusable type for last operand in"
4402                             " vector/vector shift/rotate.\n");
4403	  return false;
4404	}
4405    }
4406  /* See if the machine has a vector shifted by scalar insn and if not
4407     then see if it has a vector shifted by vector insn.  */
4408  else
4409    {
4410      optab = optab_for_tree_code (code, vectype, optab_scalar);
4411      if (optab
4412          && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4413        {
4414          if (dump_enabled_p ())
4415            dump_printf_loc (MSG_NOTE, vect_location,
4416                             "vector/scalar shift/rotate found.\n");
4417        }
4418      else
4419        {
4420          optab = optab_for_tree_code (code, vectype, optab_vector);
4421          if (optab
4422               && (optab_handler (optab, TYPE_MODE (vectype))
4423                      != CODE_FOR_nothing))
4424            {
4425	      scalar_shift_arg = false;
4426
4427              if (dump_enabled_p ())
4428                dump_printf_loc (MSG_NOTE, vect_location,
4429                                 "vector/vector shift/rotate found.\n");
4430
4431              /* Unlike the other binary operators, shifts/rotates have
4432                 the rhs being int, instead of the same type as the lhs,
4433                 so make sure the scalar is the right type if we are
4434		 dealing with vectors of long long/long/short/char.  */
4435              if (dt[1] == vect_constant_def)
4436                op1 = fold_convert (TREE_TYPE (vectype), op1);
4437	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4438						   TREE_TYPE (op1)))
4439		{
4440		  if (slp_node
4441		      && TYPE_MODE (TREE_TYPE (vectype))
4442			 != TYPE_MODE (TREE_TYPE (op1)))
4443		    {
4444                      if (dump_enabled_p ())
4445                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4446                                         "unusable type for last operand in"
4447                                         " vector/vector shift/rotate.\n");
4448			return false;
4449		    }
4450		  if (vec_stmt && !slp_node)
4451		    {
4452		      op1 = fold_convert (TREE_TYPE (vectype), op1);
4453		      op1 = vect_init_vector (stmt, op1,
4454					      TREE_TYPE (vectype), NULL);
4455		    }
4456		}
4457            }
4458        }
4459    }
4460
4461  /* Supportable by target?  */
4462  if (!optab)
4463    {
4464      if (dump_enabled_p ())
4465        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4466                         "no optab.\n");
4467      return false;
4468    }
4469  vec_mode = TYPE_MODE (vectype);
4470  icode = (int) optab_handler (optab, vec_mode);
4471  if (icode == CODE_FOR_nothing)
4472    {
4473      if (dump_enabled_p ())
4474        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4475                         "op not supported by target.\n");
4476      /* Check only during analysis.  */
4477      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4478          || (vf < vect_min_worthwhile_factor (code)
4479              && !vec_stmt))
4480        return false;
4481      if (dump_enabled_p ())
4482        dump_printf_loc (MSG_NOTE, vect_location,
4483                         "proceeding using word mode.\n");
4484    }
4485
4486  /* Worthwhile without SIMD support?  Check only during analysis.  */
4487  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4488      && vf < vect_min_worthwhile_factor (code)
4489      && !vec_stmt)
4490    {
4491      if (dump_enabled_p ())
4492        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4493                         "not worthwhile without SIMD support.\n");
4494      return false;
4495    }
4496
4497  if (!vec_stmt) /* transformation not required.  */
4498    {
4499      STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4500      if (dump_enabled_p ())
4501        dump_printf_loc (MSG_NOTE, vect_location,
4502                         "=== vectorizable_shift ===\n");
4503      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4504      return true;
4505    }
4506
4507  /** Transform.  **/
4508
4509  if (dump_enabled_p ())
4510    dump_printf_loc (MSG_NOTE, vect_location,
4511                     "transform binary/unary operation.\n");
4512
4513  /* Handle def.  */
4514  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4515
4516  prev_stmt_info = NULL;
4517  for (j = 0; j < ncopies; j++)
4518    {
4519      /* Handle uses.  */
4520      if (j == 0)
4521        {
4522          if (scalar_shift_arg)
4523            {
4524              /* Vector shl and shr insn patterns can be defined with scalar
4525                 operand 2 (shift operand).  In this case, use constant or loop
4526                 invariant op1 directly, without extending it to vector mode
4527                 first.  */
4528              optab_op2_mode = insn_data[icode].operand[2].mode;
4529              if (!VECTOR_MODE_P (optab_op2_mode))
4530                {
4531                  if (dump_enabled_p ())
4532                    dump_printf_loc (MSG_NOTE, vect_location,
4533                                     "operand 1 using scalar mode.\n");
4534                  vec_oprnd1 = op1;
4535                  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4536                  vec_oprnds1.quick_push (vec_oprnd1);
4537                  if (slp_node)
4538                    {
4539                      /* Store vec_oprnd1 for every vector stmt to be created
4540                         for SLP_NODE.  We check during the analysis that all
4541                         the shift arguments are the same.
4542                         TODO: Allow different constants for different vector
4543                         stmts generated for an SLP instance.  */
4544                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4545                        vec_oprnds1.quick_push (vec_oprnd1);
4546                    }
4547                }
4548            }
4549
4550          /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4551             (a special case for certain kind of vector shifts); otherwise,
4552             operand 1 should be of a vector type (the usual case).  */
4553          if (vec_oprnd1)
4554            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4555                               slp_node, -1);
4556          else
4557            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4558                               slp_node, -1);
4559        }
4560      else
4561        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4562
4563      /* Arguments are ready.  Create the new vector stmt.  */
4564      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4565        {
4566          vop1 = vec_oprnds1[i];
4567	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4568          new_temp = make_ssa_name (vec_dest, new_stmt);
4569          gimple_assign_set_lhs (new_stmt, new_temp);
4570          vect_finish_stmt_generation (stmt, new_stmt, gsi);
4571          if (slp_node)
4572            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4573        }
4574
4575      if (slp_node)
4576        continue;
4577
4578      if (j == 0)
4579        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4580      else
4581        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4582      prev_stmt_info = vinfo_for_stmt (new_stmt);
4583    }
4584
4585  vec_oprnds0.release ();
4586  vec_oprnds1.release ();
4587
4588  return true;
4589}
4590
4591
4592/* Function vectorizable_operation.
4593
4594   Check if STMT performs a binary, unary or ternary operation that can
4595   be vectorized.
4596   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4597   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4598   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4599
4600static bool
4601vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4602			gimple *vec_stmt, slp_tree slp_node)
4603{
4604  tree vec_dest;
4605  tree scalar_dest;
4606  tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4607  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4608  tree vectype;
4609  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4610  enum tree_code code;
4611  machine_mode vec_mode;
4612  tree new_temp;
4613  int op_type;
4614  optab optab;
4615  int icode;
4616  tree def;
4617  gimple def_stmt;
4618  enum vect_def_type dt[3]
4619    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4620  gimple new_stmt = NULL;
4621  stmt_vec_info prev_stmt_info;
4622  int nunits_in;
4623  int nunits_out;
4624  tree vectype_out;
4625  int ncopies;
4626  int j, i;
4627  vec<tree> vec_oprnds0 = vNULL;
4628  vec<tree> vec_oprnds1 = vNULL;
4629  vec<tree> vec_oprnds2 = vNULL;
4630  tree vop0, vop1, vop2;
4631  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4632  int vf;
4633
4634  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4635    return false;
4636
4637  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4638    return false;
4639
4640  /* Is STMT a vectorizable binary/unary operation?   */
4641  if (!is_gimple_assign (stmt))
4642    return false;
4643
4644  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4645    return false;
4646
4647  code = gimple_assign_rhs_code (stmt);
4648
4649  /* For pointer addition, we should use the normal plus for
4650     the vector addition.  */
4651  if (code == POINTER_PLUS_EXPR)
4652    code = PLUS_EXPR;
4653
4654  /* Support only unary or binary operations.  */
4655  op_type = TREE_CODE_LENGTH (code);
4656  if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4657    {
4658      if (dump_enabled_p ())
4659        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4660                         "num. args = %d (not unary/binary/ternary op).\n",
4661                         op_type);
4662      return false;
4663    }
4664
4665  scalar_dest = gimple_assign_lhs (stmt);
4666  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4667
4668  /* Most operations cannot handle bit-precision types without extra
4669     truncations.  */
4670  if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4671       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4672      /* Exception are bitwise binary operations.  */
4673      && code != BIT_IOR_EXPR
4674      && code != BIT_XOR_EXPR
4675      && code != BIT_AND_EXPR)
4676    {
4677      if (dump_enabled_p ())
4678        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4679                         "bit-precision arithmetic not supported.\n");
4680      return false;
4681    }
4682
4683  op0 = gimple_assign_rhs1 (stmt);
4684  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4685			     &def_stmt, &def, &dt[0], &vectype))
4686    {
4687      if (dump_enabled_p ())
4688        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4689                         "use not simple.\n");
4690      return false;
4691    }
4692  /* If op0 is an external or constant def use a vector type with
4693     the same size as the output vector type.  */
4694  if (!vectype)
4695    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4696  if (vec_stmt)
4697    gcc_assert (vectype);
4698  if (!vectype)
4699    {
4700      if (dump_enabled_p ())
4701        {
4702          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4703                           "no vectype for scalar type ");
4704          dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4705                             TREE_TYPE (op0));
4706          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4707        }
4708
4709      return false;
4710    }
4711
4712  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4713  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4714  if (nunits_out != nunits_in)
4715    return false;
4716
4717  if (op_type == binary_op || op_type == ternary_op)
4718    {
4719      op1 = gimple_assign_rhs2 (stmt);
4720      if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4721			       &def, &dt[1]))
4722	{
4723	  if (dump_enabled_p ())
4724	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4725                             "use not simple.\n");
4726	  return false;
4727	}
4728    }
4729  if (op_type == ternary_op)
4730    {
4731      op2 = gimple_assign_rhs3 (stmt);
4732      if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4733			       &def, &dt[2]))
4734	{
4735	  if (dump_enabled_p ())
4736	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4737                             "use not simple.\n");
4738	  return false;
4739	}
4740    }
4741
4742  if (loop_vinfo)
4743    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4744  else
4745    vf = 1;
4746
4747  /* Multiple types in SLP are handled by creating the appropriate number of
4748     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4749     case of SLP.  */
4750  if (slp_node || PURE_SLP_STMT (stmt_info))
4751    ncopies = 1;
4752  else
4753    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4754
4755  gcc_assert (ncopies >= 1);
4756
4757  /* Shifts are handled in vectorizable_shift ().  */
4758  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4759      || code == RROTATE_EXPR)
4760   return false;
4761
4762  /* Supportable by target?  */
4763
4764  vec_mode = TYPE_MODE (vectype);
4765  if (code == MULT_HIGHPART_EXPR)
4766    {
4767      if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4768	icode = LAST_INSN_CODE;
4769      else
4770	icode = CODE_FOR_nothing;
4771    }
4772  else
4773    {
4774      optab = optab_for_tree_code (code, vectype, optab_default);
4775      if (!optab)
4776	{
4777          if (dump_enabled_p ())
4778            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4779                             "no optab.\n");
4780	  return false;
4781	}
4782      icode = (int) optab_handler (optab, vec_mode);
4783    }
4784
4785  if (icode == CODE_FOR_nothing)
4786    {
4787      if (dump_enabled_p ())
4788	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4789                         "op not supported by target.\n");
4790      /* Check only during analysis.  */
4791      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4792	  || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4793        return false;
4794      if (dump_enabled_p ())
4795	dump_printf_loc (MSG_NOTE, vect_location,
4796                         "proceeding using word mode.\n");
4797    }
4798
4799  /* Worthwhile without SIMD support?  Check only during analysis.  */
4800  if (!VECTOR_MODE_P (vec_mode)
4801      && !vec_stmt
4802      && vf < vect_min_worthwhile_factor (code))
4803    {
4804      if (dump_enabled_p ())
4805        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4806                         "not worthwhile without SIMD support.\n");
4807      return false;
4808    }
4809
4810  if (!vec_stmt) /* transformation not required.  */
4811    {
4812      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4813      if (dump_enabled_p ())
4814        dump_printf_loc (MSG_NOTE, vect_location,
4815                         "=== vectorizable_operation ===\n");
4816      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4817      return true;
4818    }
4819
4820  /** Transform.  **/
4821
4822  if (dump_enabled_p ())
4823    dump_printf_loc (MSG_NOTE, vect_location,
4824                     "transform binary/unary operation.\n");
4825
4826  /* Handle def.  */
4827  vec_dest = vect_create_destination_var (scalar_dest, vectype);
4828
4829  /* In case the vectorization factor (VF) is bigger than the number
4830     of elements that we can fit in a vectype (nunits), we have to generate
4831     more than one vector stmt - i.e - we need to "unroll" the
4832     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4833     from one copy of the vector stmt to the next, in the field
4834     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4835     stages to find the correct vector defs to be used when vectorizing
4836     stmts that use the defs of the current stmt.  The example below
4837     illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4838     we need to create 4 vectorized stmts):
4839
4840     before vectorization:
4841                                RELATED_STMT    VEC_STMT
4842        S1:     x = memref      -               -
4843        S2:     z = x + 1       -               -
4844
4845     step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4846             there):
4847                                RELATED_STMT    VEC_STMT
4848        VS1_0:  vx0 = memref0   VS1_1           -
4849        VS1_1:  vx1 = memref1   VS1_2           -
4850        VS1_2:  vx2 = memref2   VS1_3           -
4851        VS1_3:  vx3 = memref3   -               -
4852        S1:     x = load        -               VS1_0
4853        S2:     z = x + 1       -               -
4854
4855     step2: vectorize stmt S2 (done here):
4856        To vectorize stmt S2 we first need to find the relevant vector
4857        def for the first operand 'x'.  This is, as usual, obtained from
4858        the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4859        that defines 'x' (S1).  This way we find the stmt VS1_0, and the
4860        relevant vector def 'vx0'.  Having found 'vx0' we can generate
4861        the vector stmt VS2_0, and as usual, record it in the
4862        STMT_VINFO_VEC_STMT of stmt S2.
4863        When creating the second copy (VS2_1), we obtain the relevant vector
4864        def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4865        stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
4866        vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
4867        pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4868        Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
4869        chain of stmts and pointers:
4870                                RELATED_STMT    VEC_STMT
4871        VS1_0:  vx0 = memref0   VS1_1           -
4872        VS1_1:  vx1 = memref1   VS1_2           -
4873        VS1_2:  vx2 = memref2   VS1_3           -
4874        VS1_3:  vx3 = memref3   -               -
4875        S1:     x = load        -               VS1_0
4876        VS2_0:  vz0 = vx0 + v1  VS2_1           -
4877        VS2_1:  vz1 = vx1 + v1  VS2_2           -
4878        VS2_2:  vz2 = vx2 + v1  VS2_3           -
4879        VS2_3:  vz3 = vx3 + v1  -               -
4880        S2:     z = x + 1       -               VS2_0  */
4881
4882  prev_stmt_info = NULL;
4883  for (j = 0; j < ncopies; j++)
4884    {
4885      /* Handle uses.  */
4886      if (j == 0)
4887	{
4888	  if (op_type == binary_op || op_type == ternary_op)
4889	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4890			       slp_node, -1);
4891	  else
4892	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4893			       slp_node, -1);
4894	  if (op_type == ternary_op)
4895	    {
4896	      vec_oprnds2.create (1);
4897	      vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4898		                                                    stmt,
4899								    NULL));
4900	    }
4901	}
4902      else
4903	{
4904	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4905	  if (op_type == ternary_op)
4906	    {
4907	      tree vec_oprnd = vec_oprnds2.pop ();
4908	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4909							           vec_oprnd));
4910	    }
4911	}
4912
4913      /* Arguments are ready.  Create the new vector stmt.  */
4914      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4915        {
4916	  vop1 = ((op_type == binary_op || op_type == ternary_op)
4917		  ? vec_oprnds1[i] : NULL_TREE);
4918	  vop2 = ((op_type == ternary_op)
4919		  ? vec_oprnds2[i] : NULL_TREE);
4920	  new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4921	  new_temp = make_ssa_name (vec_dest, new_stmt);
4922	  gimple_assign_set_lhs (new_stmt, new_temp);
4923	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4924          if (slp_node)
4925	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4926        }
4927
4928      if (slp_node)
4929        continue;
4930
4931      if (j == 0)
4932	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4933      else
4934	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4935      prev_stmt_info = vinfo_for_stmt (new_stmt);
4936    }
4937
4938  vec_oprnds0.release ();
4939  vec_oprnds1.release ();
4940  vec_oprnds2.release ();
4941
4942  return true;
4943}
4944
4945/* A helper function to ensure data reference DR's base alignment
4946   for STMT_INFO.  */
4947
4948static void
4949ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4950{
4951  if (!dr->aux)
4952    return;
4953
4954  if (DR_VECT_AUX (dr)->base_misaligned)
4955    {
4956      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4957      tree base_decl = DR_VECT_AUX (dr)->base_decl;
4958
4959      if (decl_in_symtab_p (base_decl))
4960	symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
4961      else
4962	{
4963          DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4964          DECL_USER_ALIGN (base_decl) = 1;
4965	}
4966      DR_VECT_AUX (dr)->base_misaligned = false;
4967    }
4968}
4969
4970
4971/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4972   reversal of the vector elements.  If that is impossible to do,
4973   returns NULL.  */
4974
4975static tree
4976perm_mask_for_reverse (tree vectype)
4977{
4978  int i, nunits;
4979  unsigned char *sel;
4980
4981  nunits = TYPE_VECTOR_SUBPARTS (vectype);
4982  sel = XALLOCAVEC (unsigned char, nunits);
4983
4984  for (i = 0; i < nunits; ++i)
4985    sel[i] = nunits - 1 - i;
4986
4987  if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4988    return NULL_TREE;
4989  return vect_gen_perm_mask_checked (vectype, sel);
4990}
4991
4992/* Function vectorizable_store.
4993
4994   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4995   can be vectorized.
4996   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4997   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4998   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4999
5000static bool
5001vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5002                    slp_tree slp_node)
5003{
5004  tree scalar_dest;
5005  tree data_ref;
5006  tree op;
5007  tree vec_oprnd = NULL_TREE;
5008  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5009  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5010  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5011  tree elem_type;
5012  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5013  struct loop *loop = NULL;
5014  machine_mode vec_mode;
5015  tree dummy;
5016  enum dr_alignment_support alignment_support_scheme;
5017  tree def;
5018  gimple def_stmt;
5019  enum vect_def_type dt;
5020  stmt_vec_info prev_stmt_info = NULL;
5021  tree dataref_ptr = NULL_TREE;
5022  tree dataref_offset = NULL_TREE;
5023  gimple ptr_incr = NULL;
5024  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5025  int ncopies;
5026  int j;
5027  gimple next_stmt, first_stmt = NULL;
5028  bool grouped_store = false;
5029  bool store_lanes_p = false;
5030  unsigned int group_size, i;
5031  vec<tree> dr_chain = vNULL;
5032  vec<tree> oprnds = vNULL;
5033  vec<tree> result_chain = vNULL;
5034  bool inv_p;
5035  bool negative = false;
5036  tree offset = NULL_TREE;
5037  vec<tree> vec_oprnds = vNULL;
5038  bool slp = (slp_node != NULL);
5039  unsigned int vec_num;
5040  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5041  tree aggr_type;
5042
5043  if (loop_vinfo)
5044    loop = LOOP_VINFO_LOOP (loop_vinfo);
5045
5046  /* Multiple types in SLP are handled by creating the appropriate number of
5047     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5048     case of SLP.  */
5049  if (slp || PURE_SLP_STMT (stmt_info))
5050    ncopies = 1;
5051  else
5052    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5053
5054  gcc_assert (ncopies >= 1);
5055
5056  /* FORNOW. This restriction should be relaxed.  */
5057  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5058    {
5059      if (dump_enabled_p ())
5060        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5061                         "multiple types in nested loop.\n");
5062      return false;
5063    }
5064
5065  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5066    return false;
5067
5068  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5069    return false;
5070
5071  /* Is vectorizable store? */
5072
5073  if (!is_gimple_assign (stmt))
5074    return false;
5075
5076  scalar_dest = gimple_assign_lhs (stmt);
5077  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5078      && is_pattern_stmt_p (stmt_info))
5079    scalar_dest = TREE_OPERAND (scalar_dest, 0);
5080  if (TREE_CODE (scalar_dest) != ARRAY_REF
5081      && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5082      && TREE_CODE (scalar_dest) != INDIRECT_REF
5083      && TREE_CODE (scalar_dest) != COMPONENT_REF
5084      && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5085      && TREE_CODE (scalar_dest) != REALPART_EXPR
5086      && TREE_CODE (scalar_dest) != MEM_REF)
5087    return false;
5088
5089  gcc_assert (gimple_assign_single_p (stmt));
5090  op = gimple_assign_rhs1 (stmt);
5091  if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5092			   &def, &dt))
5093    {
5094      if (dump_enabled_p ())
5095        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5096                         "use not simple.\n");
5097      return false;
5098    }
5099
5100  elem_type = TREE_TYPE (vectype);
5101  vec_mode = TYPE_MODE (vectype);
5102
5103  /* FORNOW. In some cases can vectorize even if data-type not supported
5104     (e.g. - array initialization with 0).  */
5105  if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5106    return false;
5107
5108  if (!STMT_VINFO_DATA_REF (stmt_info))
5109    return false;
5110
5111  negative =
5112    tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5113			  ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5114			  size_zero_node) < 0;
5115  if (negative && ncopies > 1)
5116    {
5117      if (dump_enabled_p ())
5118        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5119			 "multiple types with negative step.\n");
5120      return false;
5121    }
5122
5123  if (negative)
5124    {
5125      gcc_assert (!grouped_store);
5126      alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5127      if (alignment_support_scheme != dr_aligned
5128	  && alignment_support_scheme != dr_unaligned_supported)
5129	{
5130	  if (dump_enabled_p ())
5131	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5132			     "negative step but alignment required.\n");
5133	  return false;
5134	}
5135      if (dt != vect_constant_def
5136	  && dt != vect_external_def
5137	  && !perm_mask_for_reverse (vectype))
5138	{
5139	  if (dump_enabled_p ())
5140	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5141			     "negative step and reversing not supported.\n");
5142	  return false;
5143	}
5144    }
5145
5146  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5147    {
5148      grouped_store = true;
5149      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5150      if (!slp && !PURE_SLP_STMT (stmt_info))
5151	{
5152	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5153	  if (vect_store_lanes_supported (vectype, group_size))
5154	    store_lanes_p = true;
5155	  else if (!vect_grouped_store_supported (vectype, group_size))
5156	    return false;
5157	}
5158
5159      if (first_stmt == stmt)
5160	{
5161          /* STMT is the leader of the group. Check the operands of all the
5162             stmts of the group.  */
5163          next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5164          while (next_stmt)
5165            {
5166	      gcc_assert (gimple_assign_single_p (next_stmt));
5167	      op = gimple_assign_rhs1 (next_stmt);
5168              if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5169				       &def_stmt, &def, &dt))
5170                {
5171                  if (dump_enabled_p ())
5172                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5173                                     "use not simple.\n");
5174                  return false;
5175                }
5176              next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5177            }
5178        }
5179    }
5180
5181  if (!vec_stmt) /* transformation not required.  */
5182    {
5183      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5184      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5185			     NULL, NULL, NULL);
5186      return true;
5187    }
5188
5189  /** Transform.  **/
5190
5191  ensure_base_align (stmt_info, dr);
5192
5193  if (grouped_store)
5194    {
5195      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5196      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5197
5198      GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5199
5200      /* FORNOW */
5201      gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5202
5203      /* We vectorize all the stmts of the interleaving group when we
5204	 reach the last stmt in the group.  */
5205      if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5206	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5207	  && !slp)
5208	{
5209	  *vec_stmt = NULL;
5210	  return true;
5211	}
5212
5213      if (slp)
5214        {
5215          grouped_store = false;
5216          /* VEC_NUM is the number of vect stmts to be created for this
5217             group.  */
5218          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5219          first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5220          first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5221	  op = gimple_assign_rhs1 (first_stmt);
5222        }
5223      else
5224        /* VEC_NUM is the number of vect stmts to be created for this
5225           group.  */
5226	vec_num = group_size;
5227    }
5228  else
5229    {
5230      first_stmt = stmt;
5231      first_dr = dr;
5232      group_size = vec_num = 1;
5233    }
5234
5235  if (dump_enabled_p ())
5236    dump_printf_loc (MSG_NOTE, vect_location,
5237                     "transform store. ncopies = %d\n", ncopies);
5238
5239  dr_chain.create (group_size);
5240  oprnds.create (group_size);
5241
5242  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5243  gcc_assert (alignment_support_scheme);
5244  /* Targets with store-lane instructions must not require explicit
5245     realignment.  */
5246  gcc_assert (!store_lanes_p
5247	      || alignment_support_scheme == dr_aligned
5248	      || alignment_support_scheme == dr_unaligned_supported);
5249
5250  if (negative)
5251    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5252
5253  if (store_lanes_p)
5254    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5255  else
5256    aggr_type = vectype;
5257
5258  /* In case the vectorization factor (VF) is bigger than the number
5259     of elements that we can fit in a vectype (nunits), we have to generate
5260     more than one vector stmt - i.e - we need to "unroll" the
5261     vector stmt by a factor VF/nunits.  For more details see documentation in
5262     vect_get_vec_def_for_copy_stmt.  */
5263
5264  /* In case of interleaving (non-unit grouped access):
5265
5266        S1:  &base + 2 = x2
5267        S2:  &base = x0
5268        S3:  &base + 1 = x1
5269        S4:  &base + 3 = x3
5270
5271     We create vectorized stores starting from base address (the access of the
5272     first stmt in the chain (S2 in the above example), when the last store stmt
5273     of the chain (S4) is reached:
5274
5275        VS1: &base = vx2
5276	VS2: &base + vec_size*1 = vx0
5277	VS3: &base + vec_size*2 = vx1
5278	VS4: &base + vec_size*3 = vx3
5279
5280     Then permutation statements are generated:
5281
5282	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5283	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5284	...
5285
5286     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5287     (the order of the data-refs in the output of vect_permute_store_chain
5288     corresponds to the order of scalar stmts in the interleaving chain - see
5289     the documentation of vect_permute_store_chain()).
5290
5291     In case of both multiple types and interleaving, above vector stores and
5292     permutation stmts are created for every copy.  The result vector stmts are
5293     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5294     STMT_VINFO_RELATED_STMT for the next copies.
5295  */
5296
5297  prev_stmt_info = NULL;
5298  for (j = 0; j < ncopies; j++)
5299    {
5300      gimple new_stmt;
5301
5302      if (j == 0)
5303	{
5304          if (slp)
5305            {
5306	      /* Get vectorized arguments for SLP_NODE.  */
5307              vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5308                                 NULL, slp_node, -1);
5309
5310              vec_oprnd = vec_oprnds[0];
5311            }
5312          else
5313            {
5314	      /* For interleaved stores we collect vectorized defs for all the
5315		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5316		 used as an input to vect_permute_store_chain(), and OPRNDS as
5317		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5318
5319		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5320		 OPRNDS are of size 1.  */
5321	      next_stmt = first_stmt;
5322	      for (i = 0; i < group_size; i++)
5323		{
5324		  /* Since gaps are not supported for interleaved stores,
5325		     GROUP_SIZE is the exact number of stmts in the chain.
5326		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
5327		     there is no interleaving, GROUP_SIZE is 1, and only one
5328		     iteration of the loop will be executed.  */
5329		  gcc_assert (next_stmt
5330			      && gimple_assign_single_p (next_stmt));
5331		  op = gimple_assign_rhs1 (next_stmt);
5332
5333		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5334							    NULL);
5335		  dr_chain.quick_push (vec_oprnd);
5336		  oprnds.quick_push (vec_oprnd);
5337		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5338		}
5339	    }
5340
5341	  /* We should have catched mismatched types earlier.  */
5342	  gcc_assert (useless_type_conversion_p (vectype,
5343						 TREE_TYPE (vec_oprnd)));
5344	  bool simd_lane_access_p
5345	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5346	  if (simd_lane_access_p
5347	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5348	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5349	      && integer_zerop (DR_OFFSET (first_dr))
5350	      && integer_zerop (DR_INIT (first_dr))
5351	      && alias_sets_conflict_p (get_alias_set (aggr_type),
5352					get_alias_set (DR_REF (first_dr))))
5353	    {
5354	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5355	      dataref_offset = build_int_cst (reference_alias_ptr_type
5356					      (DR_REF (first_dr)), 0);
5357	      inv_p = false;
5358	    }
5359	  else
5360	    dataref_ptr
5361	      = vect_create_data_ref_ptr (first_stmt, aggr_type,
5362					  simd_lane_access_p ? loop : NULL,
5363					  offset, &dummy, gsi, &ptr_incr,
5364					  simd_lane_access_p, &inv_p);
5365	  gcc_assert (bb_vinfo || !inv_p);
5366	}
5367      else
5368	{
5369	  /* For interleaved stores we created vectorized defs for all the
5370	     defs stored in OPRNDS in the previous iteration (previous copy).
5371	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
5372	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5373	     next copy.
5374	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5375	     OPRNDS are of size 1.  */
5376	  for (i = 0; i < group_size; i++)
5377	    {
5378	      op = oprnds[i];
5379	      vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5380				  &def, &dt);
5381	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5382	      dr_chain[i] = vec_oprnd;
5383	      oprnds[i] = vec_oprnd;
5384	    }
5385	  if (dataref_offset)
5386	    dataref_offset
5387	      = int_const_binop (PLUS_EXPR, dataref_offset,
5388				 TYPE_SIZE_UNIT (aggr_type));
5389	  else
5390	    dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5391					   TYPE_SIZE_UNIT (aggr_type));
5392	}
5393
5394      if (store_lanes_p)
5395	{
5396	  tree vec_array;
5397
5398	  /* Combine all the vectors into an array.  */
5399	  vec_array = create_vector_array (vectype, vec_num);
5400	  for (i = 0; i < vec_num; i++)
5401	    {
5402	      vec_oprnd = dr_chain[i];
5403	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5404	    }
5405
5406	  /* Emit:
5407	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
5408	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5409	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5410	  gimple_call_set_lhs (new_stmt, data_ref);
5411	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5412	}
5413      else
5414	{
5415	  new_stmt = NULL;
5416	  if (grouped_store)
5417	    {
5418	      if (j == 0)
5419		result_chain.create (group_size);
5420	      /* Permute.  */
5421	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5422					&result_chain);
5423	    }
5424
5425	  next_stmt = first_stmt;
5426	  for (i = 0; i < vec_num; i++)
5427	    {
5428	      unsigned align, misalign;
5429
5430	      if (i > 0)
5431		/* Bump the vector pointer.  */
5432		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5433					       stmt, NULL_TREE);
5434
5435	      if (slp)
5436		vec_oprnd = vec_oprnds[i];
5437	      else if (grouped_store)
5438		/* For grouped stores vectorized defs are interleaved in
5439		   vect_permute_store_chain().  */
5440		vec_oprnd = result_chain[i];
5441
5442	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5443				 dataref_offset
5444				 ? dataref_offset
5445				 : build_int_cst (reference_alias_ptr_type
5446						  (DR_REF (first_dr)), 0));
5447	      align = TYPE_ALIGN_UNIT (vectype);
5448	      if (aligned_access_p (first_dr))
5449		misalign = 0;
5450	      else if (DR_MISALIGNMENT (first_dr) == -1)
5451		{
5452		  if (DR_VECT_AUX (first_dr)->base_element_aligned)
5453		    align = TYPE_ALIGN_UNIT (elem_type);
5454		  else
5455		    align = get_object_alignment (DR_REF (first_dr))
5456			/ BITS_PER_UNIT;
5457		  misalign = 0;
5458		  TREE_TYPE (data_ref)
5459		    = build_aligned_type (TREE_TYPE (data_ref),
5460					  align * BITS_PER_UNIT);
5461		}
5462	      else
5463		{
5464		  TREE_TYPE (data_ref)
5465		    = build_aligned_type (TREE_TYPE (data_ref),
5466					  TYPE_ALIGN (elem_type));
5467		  misalign = DR_MISALIGNMENT (first_dr);
5468		}
5469	      if (dataref_offset == NULL_TREE)
5470		set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5471					misalign);
5472
5473	      if (negative
5474		  && dt != vect_constant_def
5475		  && dt != vect_external_def)
5476		{
5477		  tree perm_mask = perm_mask_for_reverse (vectype);
5478		  tree perm_dest
5479		    = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5480						   vectype);
5481		  tree new_temp = make_ssa_name (perm_dest);
5482
5483		  /* Generate the permute statement.  */
5484		  gimple perm_stmt
5485		    = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5486					   vec_oprnd, perm_mask);
5487		  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5488
5489		  perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5490		  vec_oprnd = new_temp;
5491		}
5492
5493	      /* Arguments are ready.  Create the new vector stmt.  */
5494	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5495	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5496
5497	      if (slp)
5498		continue;
5499
5500	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5501	      if (!next_stmt)
5502		break;
5503	    }
5504	}
5505      if (!slp)
5506	{
5507	  if (j == 0)
5508	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5509	  else
5510	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5511	  prev_stmt_info = vinfo_for_stmt (new_stmt);
5512	}
5513    }
5514
5515  dr_chain.release ();
5516  oprnds.release ();
5517  result_chain.release ();
5518  vec_oprnds.release ();
5519
5520  return true;
5521}
5522
5523/* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5524   VECTOR_CST mask.  No checks are made that the target platform supports the
5525   mask, so callers may wish to test can_vec_perm_p separately, or use
5526   vect_gen_perm_mask_checked.  */
5527
5528tree
5529vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5530{
5531  tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5532  int i, nunits;
5533
5534  nunits = TYPE_VECTOR_SUBPARTS (vectype);
5535
5536  mask_elt_type = lang_hooks.types.type_for_mode
5537		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5538  mask_type = get_vectype_for_scalar_type (mask_elt_type);
5539
5540  mask_elts = XALLOCAVEC (tree, nunits);
5541  for (i = nunits - 1; i >= 0; i--)
5542    mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5543  mask_vec = build_vector (mask_type, mask_elts);
5544
5545  return mask_vec;
5546}
5547
5548/* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_p,
5549   i.e. that the target supports the pattern _for arbitrary input vectors_.  */
5550
5551tree
5552vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5553{
5554  gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5555  return vect_gen_perm_mask_any (vectype, sel);
5556}
5557
5558/* Given a vector variable X and Y, that was generated for the scalar
5559   STMT, generate instructions to permute the vector elements of X and Y
5560   using permutation mask MASK_VEC, insert them at *GSI and return the
5561   permuted vector variable.  */
5562
5563static tree
5564permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5565		      gimple_stmt_iterator *gsi)
5566{
5567  tree vectype = TREE_TYPE (x);
5568  tree perm_dest, data_ref;
5569  gimple perm_stmt;
5570
5571  perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5572  data_ref = make_ssa_name (perm_dest);
5573
5574  /* Generate the permute statement.  */
5575  perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5576  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5577
5578  return data_ref;
5579}
5580
5581/* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5582   inserting them on the loops preheader edge.  Returns true if we
5583   were successful in doing so (and thus STMT can be moved then),
5584   otherwise returns false.  */
5585
5586static bool
5587hoist_defs_of_uses (gimple stmt, struct loop *loop)
5588{
5589  ssa_op_iter i;
5590  tree op;
5591  bool any = false;
5592
5593  FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5594    {
5595      gimple def_stmt = SSA_NAME_DEF_STMT (op);
5596      if (!gimple_nop_p (def_stmt)
5597	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5598	{
5599	  /* Make sure we don't need to recurse.  While we could do
5600	     so in simple cases when there are more complex use webs
5601	     we don't have an easy way to preserve stmt order to fulfil
5602	     dependencies within them.  */
5603	  tree op2;
5604	  ssa_op_iter i2;
5605	  if (gimple_code (def_stmt) == GIMPLE_PHI)
5606	    return false;
5607	  FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5608	    {
5609	      gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5610	      if (!gimple_nop_p (def_stmt2)
5611		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5612		return false;
5613	    }
5614	  any = true;
5615	}
5616    }
5617
5618  if (!any)
5619    return true;
5620
5621  FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5622    {
5623      gimple def_stmt = SSA_NAME_DEF_STMT (op);
5624      if (!gimple_nop_p (def_stmt)
5625	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5626	{
5627	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5628	  gsi_remove (&gsi, false);
5629	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5630	}
5631    }
5632
5633  return true;
5634}
5635
5636/* vectorizable_load.
5637
5638   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5639   can be vectorized.
5640   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5641   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5642   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5643
5644static bool
5645vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5646                   slp_tree slp_node, slp_instance slp_node_instance)
5647{
5648  tree scalar_dest;
5649  tree vec_dest = NULL;
5650  tree data_ref = NULL;
5651  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5652  stmt_vec_info prev_stmt_info;
5653  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5654  struct loop *loop = NULL;
5655  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5656  bool nested_in_vect_loop = false;
5657  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5658  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5659  tree elem_type;
5660  tree new_temp;
5661  machine_mode mode;
5662  gimple new_stmt = NULL;
5663  tree dummy;
5664  enum dr_alignment_support alignment_support_scheme;
5665  tree dataref_ptr = NULL_TREE;
5666  tree dataref_offset = NULL_TREE;
5667  gimple ptr_incr = NULL;
5668  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5669  int ncopies;
5670  int i, j, group_size, group_gap;
5671  tree msq = NULL_TREE, lsq;
5672  tree offset = NULL_TREE;
5673  tree byte_offset = NULL_TREE;
5674  tree realignment_token = NULL_TREE;
5675  gphi *phi = NULL;
5676  vec<tree> dr_chain = vNULL;
5677  bool grouped_load = false;
5678  bool load_lanes_p = false;
5679  gimple first_stmt;
5680  bool inv_p;
5681  bool negative = false;
5682  bool compute_in_loop = false;
5683  struct loop *at_loop;
5684  int vec_num;
5685  bool slp = (slp_node != NULL);
5686  bool slp_perm = false;
5687  enum tree_code code;
5688  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5689  int vf;
5690  tree aggr_type;
5691  tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5692  tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5693  int gather_scale = 1;
5694  enum vect_def_type gather_dt = vect_unknown_def_type;
5695
5696  if (loop_vinfo)
5697    {
5698      loop = LOOP_VINFO_LOOP (loop_vinfo);
5699      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5700      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5701    }
5702  else
5703    vf = 1;
5704
5705  /* Multiple types in SLP are handled by creating the appropriate number of
5706     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5707     case of SLP.  */
5708  if (slp || PURE_SLP_STMT (stmt_info))
5709    ncopies = 1;
5710  else
5711    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5712
5713  gcc_assert (ncopies >= 1);
5714
5715  /* FORNOW. This restriction should be relaxed.  */
5716  if (nested_in_vect_loop && ncopies > 1)
5717    {
5718      if (dump_enabled_p ())
5719        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5720                         "multiple types in nested loop.\n");
5721      return false;
5722    }
5723
5724  /* Invalidate assumptions made by dependence analysis when vectorization
5725     on the unrolled body effectively re-orders stmts.  */
5726  if (ncopies > 1
5727      && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5728      && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5729	  > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5730    {
5731      if (dump_enabled_p ())
5732	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5733			 "cannot perform implicit CSE when unrolling "
5734			 "with negative dependence distance\n");
5735      return false;
5736    }
5737
5738  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5739    return false;
5740
5741  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5742    return false;
5743
5744  /* Is vectorizable load? */
5745  if (!is_gimple_assign (stmt))
5746    return false;
5747
5748  scalar_dest = gimple_assign_lhs (stmt);
5749  if (TREE_CODE (scalar_dest) != SSA_NAME)
5750    return false;
5751
5752  code = gimple_assign_rhs_code (stmt);
5753  if (code != ARRAY_REF
5754      && code != BIT_FIELD_REF
5755      && code != INDIRECT_REF
5756      && code != COMPONENT_REF
5757      && code != IMAGPART_EXPR
5758      && code != REALPART_EXPR
5759      && code != MEM_REF
5760      && TREE_CODE_CLASS (code) != tcc_declaration)
5761    return false;
5762
5763  if (!STMT_VINFO_DATA_REF (stmt_info))
5764    return false;
5765
5766  elem_type = TREE_TYPE (vectype);
5767  mode = TYPE_MODE (vectype);
5768
5769  /* FORNOW. In some cases can vectorize even if data-type not supported
5770    (e.g. - data copies).  */
5771  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5772    {
5773      if (dump_enabled_p ())
5774        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775                         "Aligned load, but unsupported type.\n");
5776      return false;
5777    }
5778
5779  /* Check if the load is a part of an interleaving chain.  */
5780  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5781    {
5782      grouped_load = true;
5783      /* FORNOW */
5784      gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5785
5786      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5787
5788      /* If this is single-element interleaving with an element distance
5789         that leaves unused vector loads around punt - we at least create
5790	 very sub-optimal code in that case (and blow up memory,
5791	 see PR65518).  */
5792      if (first_stmt == stmt
5793	  && !GROUP_NEXT_ELEMENT (stmt_info)
5794	  && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
5795	{
5796	  if (dump_enabled_p ())
5797	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798			     "single-element interleaving not supported "
5799			     "for not adjacent vector loads\n");
5800	  return false;
5801	}
5802
5803      if (!slp && !PURE_SLP_STMT (stmt_info))
5804	{
5805	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5806	  if (vect_load_lanes_supported (vectype, group_size))
5807	    load_lanes_p = true;
5808	  else if (!vect_grouped_load_supported (vectype, group_size))
5809	    return false;
5810	}
5811
5812      /* Invalidate assumptions made by dependence analysis when vectorization
5813	 on the unrolled body effectively re-orders stmts.  */
5814      if (!PURE_SLP_STMT (stmt_info)
5815	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5816	  && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5817	      > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5818	{
5819	  if (dump_enabled_p ())
5820	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5821			     "cannot perform implicit CSE when performing "
5822			     "group loads with negative dependence distance\n");
5823	  return false;
5824	}
5825
5826      /* Similarly when the stmt is a load that is both part of a SLP
5827         instance and a loop vectorized stmt via the same-dr mechanism
5828	 we have to give up.  */
5829      if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
5830	  && (STMT_SLP_TYPE (stmt_info)
5831	      != STMT_SLP_TYPE (vinfo_for_stmt
5832				 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
5833	{
5834	  if (dump_enabled_p ())
5835	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836			     "conflicting SLP types for CSEd load\n");
5837	  return false;
5838	}
5839    }
5840
5841
5842  if (STMT_VINFO_GATHER_P (stmt_info))
5843    {
5844      gimple def_stmt;
5845      tree def;
5846      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5847				       &gather_off, &gather_scale);
5848      gcc_assert (gather_decl);
5849      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5850				 &def_stmt, &def, &gather_dt,
5851				 &gather_off_vectype))
5852	{
5853	  if (dump_enabled_p ())
5854	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855                             "gather index use not simple.\n");
5856	  return false;
5857	}
5858    }
5859  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5860    ;
5861  else
5862    {
5863      negative = tree_int_cst_compare (nested_in_vect_loop
5864				       ? STMT_VINFO_DR_STEP (stmt_info)
5865				       : DR_STEP (dr),
5866				       size_zero_node) < 0;
5867      if (negative && ncopies > 1)
5868	{
5869	  if (dump_enabled_p ())
5870	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5871                             "multiple types with negative step.\n");
5872	  return false;
5873	}
5874
5875      if (negative)
5876	{
5877	  if (grouped_load)
5878	    {
5879	      if (dump_enabled_p ())
5880		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881				 "negative step for group load not supported"
5882                                 "\n");
5883	      return false;
5884	    }
5885	  alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5886	  if (alignment_support_scheme != dr_aligned
5887	      && alignment_support_scheme != dr_unaligned_supported)
5888	    {
5889              if (dump_enabled_p ())
5890                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5891                                 "negative step but alignment required.\n");
5892	      return false;
5893	    }
5894	  if (!perm_mask_for_reverse (vectype))
5895	    {
5896              if (dump_enabled_p ())
5897                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5898                                 "negative step and reversing not supported."
5899                                 "\n");
5900	      return false;
5901	    }
5902	}
5903    }
5904
5905  if (!vec_stmt) /* transformation not required.  */
5906    {
5907      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5908      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5909      return true;
5910    }
5911
5912  if (dump_enabled_p ())
5913    dump_printf_loc (MSG_NOTE, vect_location,
5914                     "transform load. ncopies = %d\n", ncopies);
5915
5916  /** Transform.  **/
5917
5918  ensure_base_align (stmt_info, dr);
5919
5920  if (STMT_VINFO_GATHER_P (stmt_info))
5921    {
5922      tree vec_oprnd0 = NULL_TREE, op;
5923      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5924      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5925      tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5926      edge pe = loop_preheader_edge (loop);
5927      gimple_seq seq;
5928      basic_block new_bb;
5929      enum { NARROW, NONE, WIDEN } modifier;
5930      int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5931
5932      if (nunits == gather_off_nunits)
5933	modifier = NONE;
5934      else if (nunits == gather_off_nunits / 2)
5935	{
5936	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5937	  modifier = WIDEN;
5938
5939	  for (i = 0; i < gather_off_nunits; ++i)
5940	    sel[i] = i | nunits;
5941
5942	  perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5943	}
5944      else if (nunits == gather_off_nunits * 2)
5945	{
5946	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5947	  modifier = NARROW;
5948
5949	  for (i = 0; i < nunits; ++i)
5950	    sel[i] = i < gather_off_nunits
5951		     ? i : i + nunits - gather_off_nunits;
5952
5953	  perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5954	  ncopies *= 2;
5955	}
5956      else
5957	gcc_unreachable ();
5958
5959      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5960      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5961      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5962      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5963      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5964      scaletype = TREE_VALUE (arglist);
5965      gcc_checking_assert (types_compatible_p (srctype, rettype));
5966
5967      vec_dest = vect_create_destination_var (scalar_dest, vectype);
5968
5969      ptr = fold_convert (ptrtype, gather_base);
5970      if (!is_gimple_min_invariant (ptr))
5971	{
5972	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5973	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5974	  gcc_assert (!new_bb);
5975	}
5976
5977      /* Currently we support only unconditional gather loads,
5978	 so mask should be all ones.  */
5979      if (TREE_CODE (masktype) == INTEGER_TYPE)
5980	mask = build_int_cst (masktype, -1);
5981      else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5982	{
5983	  mask = build_int_cst (TREE_TYPE (masktype), -1);
5984	  mask = build_vector_from_val (masktype, mask);
5985	  mask = vect_init_vector (stmt, mask, masktype, NULL);
5986	}
5987      else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5988	{
5989	  REAL_VALUE_TYPE r;
5990	  long tmp[6];
5991	  for (j = 0; j < 6; ++j)
5992	    tmp[j] = -1;
5993	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5994	  mask = build_real (TREE_TYPE (masktype), r);
5995	  mask = build_vector_from_val (masktype, mask);
5996	  mask = vect_init_vector (stmt, mask, masktype, NULL);
5997	}
5998      else
5999	gcc_unreachable ();
6000
6001      scale = build_int_cst (scaletype, gather_scale);
6002
6003      if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6004	merge = build_int_cst (TREE_TYPE (rettype), 0);
6005      else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6006	{
6007	  REAL_VALUE_TYPE r;
6008	  long tmp[6];
6009	  for (j = 0; j < 6; ++j)
6010	    tmp[j] = 0;
6011	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6012	  merge = build_real (TREE_TYPE (rettype), r);
6013	}
6014      else
6015	gcc_unreachable ();
6016      merge = build_vector_from_val (rettype, merge);
6017      merge = vect_init_vector (stmt, merge, rettype, NULL);
6018
6019      prev_stmt_info = NULL;
6020      for (j = 0; j < ncopies; ++j)
6021	{
6022	  if (modifier == WIDEN && (j & 1))
6023	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6024				       perm_mask, stmt, gsi);
6025	  else if (j == 0)
6026	    op = vec_oprnd0
6027	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6028	  else
6029	    op = vec_oprnd0
6030	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6031
6032	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6033	    {
6034	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6035			  == TYPE_VECTOR_SUBPARTS (idxtype));
6036	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6037	      var = make_ssa_name (var);
6038	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6039	      new_stmt
6040		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6041	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6042	      op = var;
6043	    }
6044
6045	  new_stmt
6046	    = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6047
6048	  if (!useless_type_conversion_p (vectype, rettype))
6049	    {
6050	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6051			  == TYPE_VECTOR_SUBPARTS (rettype));
6052	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6053	      op = make_ssa_name (var, new_stmt);
6054	      gimple_call_set_lhs (new_stmt, op);
6055	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6056	      var = make_ssa_name (vec_dest);
6057	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6058	      new_stmt
6059		= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6060	    }
6061	  else
6062	    {
6063	      var = make_ssa_name (vec_dest, new_stmt);
6064	      gimple_call_set_lhs (new_stmt, var);
6065	    }
6066
6067	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6068
6069	  if (modifier == NARROW)
6070	    {
6071	      if ((j & 1) == 0)
6072		{
6073		  prev_res = var;
6074		  continue;
6075		}
6076	      var = permute_vec_elements (prev_res, var,
6077					  perm_mask, stmt, gsi);
6078	      new_stmt = SSA_NAME_DEF_STMT (var);
6079	    }
6080
6081	  if (prev_stmt_info == NULL)
6082	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6083	  else
6084	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6085	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6086	}
6087      return true;
6088    }
6089  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6090    {
6091      gimple_stmt_iterator incr_gsi;
6092      bool insert_after;
6093      gimple incr;
6094      tree offvar;
6095      tree ivstep;
6096      tree running_off;
6097      vec<constructor_elt, va_gc> *v = NULL;
6098      gimple_seq stmts = NULL;
6099      tree stride_base, stride_step, alias_off;
6100
6101      gcc_assert (!nested_in_vect_loop);
6102
6103      stride_base
6104	= fold_build_pointer_plus
6105	    (unshare_expr (DR_BASE_ADDRESS (dr)),
6106	     size_binop (PLUS_EXPR,
6107			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6108			 convert_to_ptrofftype (DR_INIT (dr))));
6109      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6110
6111      /* For a load with loop-invariant (but other than power-of-2)
6112         stride (i.e. not a grouped access) like so:
6113
6114	   for (i = 0; i < n; i += stride)
6115	     ... = array[i];
6116
6117	 we generate a new induction variable and new accesses to
6118	 form a new vector (or vectors, depending on ncopies):
6119
6120	   for (j = 0; ; j += VF*stride)
6121	     tmp1 = array[j];
6122	     tmp2 = array[j + stride];
6123	     ...
6124	     vectemp = {tmp1, tmp2, ...}
6125         */
6126
6127      ivstep = stride_step;
6128      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6129			    build_int_cst (TREE_TYPE (ivstep), vf));
6130
6131      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6132
6133      create_iv (stride_base, ivstep, NULL,
6134		 loop, &incr_gsi, insert_after,
6135		 &offvar, NULL);
6136      incr = gsi_stmt (incr_gsi);
6137      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6138
6139      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6140      if (stmts)
6141	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6142
6143      prev_stmt_info = NULL;
6144      running_off = offvar;
6145      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6146      for (j = 0; j < ncopies; j++)
6147	{
6148	  tree vec_inv;
6149
6150	  vec_alloc (v, nunits);
6151	  for (i = 0; i < nunits; i++)
6152	    {
6153	      tree newref, newoff;
6154	      gimple incr;
6155	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
6156			       running_off, alias_off);
6157
6158	      newref = force_gimple_operand_gsi (gsi, newref, true,
6159						 NULL_TREE, true,
6160						 GSI_SAME_STMT);
6161	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6162	      newoff = copy_ssa_name (running_off);
6163	      incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6164					  running_off, stride_step);
6165	      vect_finish_stmt_generation (stmt, incr, gsi);
6166
6167	      running_off = newoff;
6168	    }
6169
6170	  vec_inv = build_constructor (vectype, v);
6171	  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6172	  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6173
6174	  if (j == 0)
6175	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6176	  else
6177	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6178	  prev_stmt_info = vinfo_for_stmt (new_stmt);
6179	}
6180      return true;
6181    }
6182
6183  if (grouped_load)
6184    {
6185      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6186      if (slp
6187          && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6188	  && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6189        first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6190
6191      /* Check if the chain of loads is already vectorized.  */
6192      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6193	  /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6194	     ???  But we can only do so if there is exactly one
6195	     as we have no way to get at the rest.  Leave the CSE
6196	     opportunity alone.
6197	     ???  With the group load eventually participating
6198	     in multiple different permutations (having multiple
6199	     slp nodes which refer to the same group) the CSE
6200	     is even wrong code.  See PR56270.  */
6201	  && !slp)
6202	{
6203	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6204	  return true;
6205	}
6206      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6207      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6208
6209      /* VEC_NUM is the number of vect stmts to be created for this group.  */
6210      if (slp)
6211	{
6212	  grouped_load = false;
6213	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6214          if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6215            slp_perm = true;
6216	  group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6217    	}
6218      else
6219	{
6220	  vec_num = group_size;
6221	  group_gap = 0;
6222	}
6223    }
6224  else
6225    {
6226      first_stmt = stmt;
6227      first_dr = dr;
6228      group_size = vec_num = 1;
6229      group_gap = 0;
6230    }
6231
6232  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6233  gcc_assert (alignment_support_scheme);
6234  /* Targets with load-lane instructions must not require explicit
6235     realignment.  */
6236  gcc_assert (!load_lanes_p
6237	      || alignment_support_scheme == dr_aligned
6238	      || alignment_support_scheme == dr_unaligned_supported);
6239
6240  /* In case the vectorization factor (VF) is bigger than the number
6241     of elements that we can fit in a vectype (nunits), we have to generate
6242     more than one vector stmt - i.e - we need to "unroll" the
6243     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6244     from one copy of the vector stmt to the next, in the field
6245     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6246     stages to find the correct vector defs to be used when vectorizing
6247     stmts that use the defs of the current stmt.  The example below
6248     illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6249     need to create 4 vectorized stmts):
6250
6251     before vectorization:
6252                                RELATED_STMT    VEC_STMT
6253        S1:     x = memref      -               -
6254        S2:     z = x + 1       -               -
6255
6256     step 1: vectorize stmt S1:
6257        We first create the vector stmt VS1_0, and, as usual, record a
6258        pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6259        Next, we create the vector stmt VS1_1, and record a pointer to
6260        it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6261        Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
6262        stmts and pointers:
6263                                RELATED_STMT    VEC_STMT
6264        VS1_0:  vx0 = memref0   VS1_1           -
6265        VS1_1:  vx1 = memref1   VS1_2           -
6266        VS1_2:  vx2 = memref2   VS1_3           -
6267        VS1_3:  vx3 = memref3   -               -
6268        S1:     x = load        -               VS1_0
6269        S2:     z = x + 1       -               -
6270
6271     See in documentation in vect_get_vec_def_for_stmt_copy for how the
6272     information we recorded in RELATED_STMT field is used to vectorize
6273     stmt S2.  */
6274
6275  /* In case of interleaving (non-unit grouped access):
6276
6277     S1:  x2 = &base + 2
6278     S2:  x0 = &base
6279     S3:  x1 = &base + 1
6280     S4:  x3 = &base + 3
6281
6282     Vectorized loads are created in the order of memory accesses
6283     starting from the access of the first stmt of the chain:
6284
6285     VS1: vx0 = &base
6286     VS2: vx1 = &base + vec_size*1
6287     VS3: vx3 = &base + vec_size*2
6288     VS4: vx4 = &base + vec_size*3
6289
6290     Then permutation statements are generated:
6291
6292     VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6293     VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6294       ...
6295
6296     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6297     (the order of the data-refs in the output of vect_permute_load_chain
6298     corresponds to the order of scalar stmts in the interleaving chain - see
6299     the documentation of vect_permute_load_chain()).
6300     The generation of permutation stmts and recording them in
6301     STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6302
6303     In case of both multiple types and interleaving, the vector loads and
6304     permutation stmts above are created for every copy.  The result vector
6305     stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6306     corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
6307
6308  /* If the data reference is aligned (dr_aligned) or potentially unaligned
6309     on a target that supports unaligned accesses (dr_unaligned_supported)
6310     we generate the following code:
6311         p = initial_addr;
6312         indx = 0;
6313         loop {
6314	   p = p + indx * vectype_size;
6315           vec_dest = *(p);
6316           indx = indx + 1;
6317         }
6318
6319     Otherwise, the data reference is potentially unaligned on a target that
6320     does not support unaligned accesses (dr_explicit_realign_optimized) -
6321     then generate the following code, in which the data in each iteration is
6322     obtained by two vector loads, one from the previous iteration, and one
6323     from the current iteration:
6324         p1 = initial_addr;
6325         msq_init = *(floor(p1))
6326         p2 = initial_addr + VS - 1;
6327         realignment_token = call target_builtin;
6328         indx = 0;
6329         loop {
6330           p2 = p2 + indx * vectype_size
6331           lsq = *(floor(p2))
6332           vec_dest = realign_load (msq, lsq, realignment_token)
6333           indx = indx + 1;
6334           msq = lsq;
6335         }   */
6336
6337  /* If the misalignment remains the same throughout the execution of the
6338     loop, we can create the init_addr and permutation mask at the loop
6339     preheader.  Otherwise, it needs to be created inside the loop.
6340     This can only occur when vectorizing memory accesses in the inner-loop
6341     nested within an outer-loop that is being vectorized.  */
6342
6343  if (nested_in_vect_loop
6344      && (TREE_INT_CST_LOW (DR_STEP (dr))
6345	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6346    {
6347      gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6348      compute_in_loop = true;
6349    }
6350
6351  if ((alignment_support_scheme == dr_explicit_realign_optimized
6352       || alignment_support_scheme == dr_explicit_realign)
6353      && !compute_in_loop)
6354    {
6355      msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6356				    alignment_support_scheme, NULL_TREE,
6357				    &at_loop);
6358      if (alignment_support_scheme == dr_explicit_realign_optimized)
6359	{
6360	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6361	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6362				    size_one_node);
6363	}
6364    }
6365  else
6366    at_loop = loop;
6367
6368  if (negative)
6369    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6370
6371  if (load_lanes_p)
6372    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6373  else
6374    aggr_type = vectype;
6375
6376  prev_stmt_info = NULL;
6377  for (j = 0; j < ncopies; j++)
6378    {
6379      /* 1. Create the vector or array pointer update chain.  */
6380      if (j == 0)
6381	{
6382	  bool simd_lane_access_p
6383	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6384	  if (simd_lane_access_p
6385	      && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6386	      && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6387	      && integer_zerop (DR_OFFSET (first_dr))
6388	      && integer_zerop (DR_INIT (first_dr))
6389	      && alias_sets_conflict_p (get_alias_set (aggr_type),
6390					get_alias_set (DR_REF (first_dr)))
6391	      && (alignment_support_scheme == dr_aligned
6392		  || alignment_support_scheme == dr_unaligned_supported))
6393	    {
6394	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6395	      dataref_offset = build_int_cst (reference_alias_ptr_type
6396					      (DR_REF (first_dr)), 0);
6397	      inv_p = false;
6398	    }
6399	  else
6400	    dataref_ptr
6401	      = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6402					  offset, &dummy, gsi, &ptr_incr,
6403					  simd_lane_access_p, &inv_p,
6404					  byte_offset);
6405	}
6406      else if (dataref_offset)
6407	dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6408					  TYPE_SIZE_UNIT (aggr_type));
6409      else
6410        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6411				       TYPE_SIZE_UNIT (aggr_type));
6412
6413      if (grouped_load || slp_perm)
6414	dr_chain.create (vec_num);
6415
6416      if (load_lanes_p)
6417	{
6418	  tree vec_array;
6419
6420	  vec_array = create_vector_array (vectype, vec_num);
6421
6422	  /* Emit:
6423	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
6424	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6425	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6426	  gimple_call_set_lhs (new_stmt, vec_array);
6427	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6428
6429	  /* Extract each vector into an SSA_NAME.  */
6430	  for (i = 0; i < vec_num; i++)
6431	    {
6432	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
6433					    vec_array, i);
6434	      dr_chain.quick_push (new_temp);
6435	    }
6436
6437	  /* Record the mapping between SSA_NAMEs and statements.  */
6438	  vect_record_grouped_load_vectors (stmt, dr_chain);
6439	}
6440      else
6441	{
6442	  for (i = 0; i < vec_num; i++)
6443	    {
6444	      if (i > 0)
6445		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6446					       stmt, NULL_TREE);
6447
6448	      /* 2. Create the vector-load in the loop.  */
6449	      switch (alignment_support_scheme)
6450		{
6451		case dr_aligned:
6452		case dr_unaligned_supported:
6453		  {
6454		    unsigned int align, misalign;
6455
6456		    data_ref
6457		      = build2 (MEM_REF, vectype, dataref_ptr,
6458				dataref_offset
6459				? dataref_offset
6460				: build_int_cst (reference_alias_ptr_type
6461						 (DR_REF (first_dr)), 0));
6462		    align = TYPE_ALIGN_UNIT (vectype);
6463		    if (alignment_support_scheme == dr_aligned)
6464		      {
6465			gcc_assert (aligned_access_p (first_dr));
6466			misalign = 0;
6467		      }
6468		    else if (DR_MISALIGNMENT (first_dr) == -1)
6469		      {
6470			if (DR_VECT_AUX (first_dr)->base_element_aligned)
6471			  align = TYPE_ALIGN_UNIT (elem_type);
6472			else
6473			  align = (get_object_alignment (DR_REF (first_dr))
6474				   / BITS_PER_UNIT);
6475			misalign = 0;
6476			TREE_TYPE (data_ref)
6477			  = build_aligned_type (TREE_TYPE (data_ref),
6478						align * BITS_PER_UNIT);
6479		      }
6480		    else
6481		      {
6482			TREE_TYPE (data_ref)
6483			  = build_aligned_type (TREE_TYPE (data_ref),
6484						TYPE_ALIGN (elem_type));
6485			misalign = DR_MISALIGNMENT (first_dr);
6486		      }
6487		    if (dataref_offset == NULL_TREE)
6488		      set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6489					      align, misalign);
6490		    break;
6491		  }
6492		case dr_explicit_realign:
6493		  {
6494		    tree ptr, bump;
6495
6496		    tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
6497
6498		    if (compute_in_loop)
6499		      msq = vect_setup_realignment (first_stmt, gsi,
6500						    &realignment_token,
6501						    dr_explicit_realign,
6502						    dataref_ptr, NULL);
6503
6504		    ptr = copy_ssa_name (dataref_ptr);
6505		    new_stmt = gimple_build_assign
6506				 (ptr, BIT_AND_EXPR, dataref_ptr,
6507				  build_int_cst
6508				  (TREE_TYPE (dataref_ptr),
6509				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6510		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6511		    data_ref
6512		      = build2 (MEM_REF, vectype, ptr,
6513				build_int_cst (reference_alias_ptr_type
6514						 (DR_REF (first_dr)), 0));
6515		    vec_dest = vect_create_destination_var (scalar_dest,
6516							    vectype);
6517		    new_stmt = gimple_build_assign (vec_dest, data_ref);
6518		    new_temp = make_ssa_name (vec_dest, new_stmt);
6519		    gimple_assign_set_lhs (new_stmt, new_temp);
6520		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6521		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6522		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6523		    msq = new_temp;
6524
6525		    bump = size_binop (MULT_EXPR, vs,
6526				       TYPE_SIZE_UNIT (elem_type));
6527		    bump = size_binop (MINUS_EXPR, bump, size_one_node);
6528		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6529		    new_stmt = gimple_build_assign
6530				 (NULL_TREE, BIT_AND_EXPR, ptr,
6531				  build_int_cst
6532				  (TREE_TYPE (ptr),
6533				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6534		    ptr = copy_ssa_name (dataref_ptr, new_stmt);
6535		    gimple_assign_set_lhs (new_stmt, ptr);
6536		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
6537		    data_ref
6538		      = build2 (MEM_REF, vectype, ptr,
6539				build_int_cst (reference_alias_ptr_type
6540						 (DR_REF (first_dr)), 0));
6541		    break;
6542		  }
6543		case dr_explicit_realign_optimized:
6544		  new_temp = copy_ssa_name (dataref_ptr);
6545		  new_stmt = gimple_build_assign
6546			       (new_temp, BIT_AND_EXPR, dataref_ptr,
6547				build_int_cst
6548				  (TREE_TYPE (dataref_ptr),
6549				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6550		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6551		  data_ref
6552		    = build2 (MEM_REF, vectype, new_temp,
6553			      build_int_cst (reference_alias_ptr_type
6554					       (DR_REF (first_dr)), 0));
6555		  break;
6556		default:
6557		  gcc_unreachable ();
6558		}
6559	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
6560	      new_stmt = gimple_build_assign (vec_dest, data_ref);
6561	      new_temp = make_ssa_name (vec_dest, new_stmt);
6562	      gimple_assign_set_lhs (new_stmt, new_temp);
6563	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
6564
6565	      /* 3. Handle explicit realignment if necessary/supported.
6566		 Create in loop:
6567		   vec_dest = realign_load (msq, lsq, realignment_token)  */
6568	      if (alignment_support_scheme == dr_explicit_realign_optimized
6569		  || alignment_support_scheme == dr_explicit_realign)
6570		{
6571		  lsq = gimple_assign_lhs (new_stmt);
6572		  if (!realignment_token)
6573		    realignment_token = dataref_ptr;
6574		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
6575		  new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6576						  msq, lsq, realignment_token);
6577		  new_temp = make_ssa_name (vec_dest, new_stmt);
6578		  gimple_assign_set_lhs (new_stmt, new_temp);
6579		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
6580
6581		  if (alignment_support_scheme == dr_explicit_realign_optimized)
6582		    {
6583		      gcc_assert (phi);
6584		      if (i == vec_num - 1 && j == ncopies - 1)
6585			add_phi_arg (phi, lsq,
6586				     loop_latch_edge (containing_loop),
6587				     UNKNOWN_LOCATION);
6588		      msq = lsq;
6589		    }
6590		}
6591
6592	      /* 4. Handle invariant-load.  */
6593	      if (inv_p && !bb_vinfo)
6594		{
6595		  gcc_assert (!grouped_load);
6596		  /* If we have versioned for aliasing or the loop doesn't
6597		     have any data dependencies that would preclude this,
6598		     then we are sure this is a loop invariant load and
6599		     thus we can insert it on the preheader edge.  */
6600		  if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6601		      && !nested_in_vect_loop
6602		      && hoist_defs_of_uses (stmt, loop))
6603		    {
6604		      if (dump_enabled_p ())
6605			{
6606			  dump_printf_loc (MSG_NOTE, vect_location,
6607					   "hoisting out of the vectorized "
6608					   "loop: ");
6609			  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6610			}
6611		      tree tem = copy_ssa_name (scalar_dest);
6612		      gsi_insert_on_edge_immediate
6613			(loop_preheader_edge (loop),
6614			 gimple_build_assign (tem,
6615					      unshare_expr
6616					        (gimple_assign_rhs1 (stmt))));
6617		      new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6618		    }
6619		  else
6620		    {
6621		      gimple_stmt_iterator gsi2 = *gsi;
6622		      gsi_next (&gsi2);
6623		      new_temp = vect_init_vector (stmt, scalar_dest,
6624						   vectype, &gsi2);
6625		    }
6626		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6627		  set_vinfo_for_stmt (new_stmt,
6628				      new_stmt_vec_info (new_stmt, loop_vinfo,
6629							 bb_vinfo));
6630		}
6631
6632	      if (negative)
6633		{
6634		  tree perm_mask = perm_mask_for_reverse (vectype);
6635		  new_temp = permute_vec_elements (new_temp, new_temp,
6636						   perm_mask, stmt, gsi);
6637		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
6638		}
6639
6640	      /* Collect vector loads and later create their permutation in
6641		 vect_transform_grouped_load ().  */
6642	      if (grouped_load || slp_perm)
6643		dr_chain.quick_push (new_temp);
6644
6645	      /* Store vector loads in the corresponding SLP_NODE.  */
6646	      if (slp && !slp_perm)
6647		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6648	    }
6649	  /* Bump the vector pointer to account for a gap.  */
6650	  if (slp && group_gap != 0)
6651	    {
6652	      tree bump = size_binop (MULT_EXPR,
6653				      TYPE_SIZE_UNIT (elem_type),
6654				      size_int (group_gap));
6655	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6656					     stmt, bump);
6657	    }
6658	}
6659
6660      if (slp && !slp_perm)
6661	continue;
6662
6663      if (slp_perm)
6664        {
6665          if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6666                                             slp_node_instance, false))
6667            {
6668              dr_chain.release ();
6669              return false;
6670            }
6671        }
6672      else
6673        {
6674          if (grouped_load)
6675  	    {
6676	      if (!load_lanes_p)
6677		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6678	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6679	    }
6680          else
6681	    {
6682	      if (j == 0)
6683	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6684	      else
6685	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6686	      prev_stmt_info = vinfo_for_stmt (new_stmt);
6687	    }
6688        }
6689      dr_chain.release ();
6690    }
6691
6692  return true;
6693}
6694
6695/* Function vect_is_simple_cond.
6696
6697   Input:
6698   LOOP - the loop that is being vectorized.
6699   COND - Condition that is checked for simple use.
6700
6701   Output:
6702   *COMP_VECTYPE - the vector type for the comparison.
6703
6704   Returns whether a COND can be vectorized.  Checks whether
6705   condition operands are supportable using vec_is_simple_use.  */
6706
6707static bool
6708vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6709		     bb_vec_info bb_vinfo, tree *comp_vectype)
6710{
6711  tree lhs, rhs;
6712  tree def;
6713  enum vect_def_type dt;
6714  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6715
6716  if (!COMPARISON_CLASS_P (cond))
6717    return false;
6718
6719  lhs = TREE_OPERAND (cond, 0);
6720  rhs = TREE_OPERAND (cond, 1);
6721
6722  if (TREE_CODE (lhs) == SSA_NAME)
6723    {
6724      gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6725      if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6726				 &lhs_def_stmt, &def, &dt, &vectype1))
6727	return false;
6728    }
6729  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6730	   && TREE_CODE (lhs) != FIXED_CST)
6731    return false;
6732
6733  if (TREE_CODE (rhs) == SSA_NAME)
6734    {
6735      gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6736      if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6737				 &rhs_def_stmt, &def, &dt, &vectype2))
6738	return false;
6739    }
6740  else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6741	   && TREE_CODE (rhs) != FIXED_CST)
6742    return false;
6743
6744  *comp_vectype = vectype1 ? vectype1 : vectype2;
6745  return true;
6746}
6747
6748/* vectorizable_condition.
6749
6750   Check if STMT is conditional modify expression that can be vectorized.
6751   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6752   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
6753   at GSI.
6754
6755   When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6756   to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6757   else caluse if it is 2).
6758
6759   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6760
6761bool
6762vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6763			gimple *vec_stmt, tree reduc_def, int reduc_index,
6764			slp_tree slp_node)
6765{
6766  tree scalar_dest = NULL_TREE;
6767  tree vec_dest = NULL_TREE;
6768  tree cond_expr, then_clause, else_clause;
6769  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6770  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6771  tree comp_vectype = NULL_TREE;
6772  tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6773  tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6774  tree vec_compare, vec_cond_expr;
6775  tree new_temp;
6776  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6777  tree def;
6778  enum vect_def_type dt, dts[4];
6779  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6780  int ncopies;
6781  enum tree_code code;
6782  stmt_vec_info prev_stmt_info = NULL;
6783  int i, j;
6784  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6785  vec<tree> vec_oprnds0 = vNULL;
6786  vec<tree> vec_oprnds1 = vNULL;
6787  vec<tree> vec_oprnds2 = vNULL;
6788  vec<tree> vec_oprnds3 = vNULL;
6789  tree vec_cmp_type;
6790
6791  if (slp_node || PURE_SLP_STMT (stmt_info))
6792    ncopies = 1;
6793  else
6794    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6795
6796  gcc_assert (ncopies >= 1);
6797  if (reduc_index && ncopies > 1)
6798    return false; /* FORNOW */
6799
6800  if (reduc_index && STMT_SLP_TYPE (stmt_info))
6801    return false;
6802
6803  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6804    return false;
6805
6806  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6807      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6808           && reduc_def))
6809    return false;
6810
6811  /* FORNOW: not yet supported.  */
6812  if (STMT_VINFO_LIVE_P (stmt_info))
6813    {
6814      if (dump_enabled_p ())
6815        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6816                         "value used after loop.\n");
6817      return false;
6818    }
6819
6820  /* Is vectorizable conditional operation?  */
6821  if (!is_gimple_assign (stmt))
6822    return false;
6823
6824  code = gimple_assign_rhs_code (stmt);
6825
6826  if (code != COND_EXPR)
6827    return false;
6828
6829  cond_expr = gimple_assign_rhs1 (stmt);
6830  then_clause = gimple_assign_rhs2 (stmt);
6831  else_clause = gimple_assign_rhs3 (stmt);
6832
6833  if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6834			    &comp_vectype)
6835      || !comp_vectype)
6836    return false;
6837
6838  if (TREE_CODE (then_clause) == SSA_NAME)
6839    {
6840      gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6841      if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6842			       &then_def_stmt, &def, &dt))
6843	return false;
6844    }
6845  else if (TREE_CODE (then_clause) != INTEGER_CST
6846	   && TREE_CODE (then_clause) != REAL_CST
6847	   && TREE_CODE (then_clause) != FIXED_CST)
6848    return false;
6849
6850  if (TREE_CODE (else_clause) == SSA_NAME)
6851    {
6852      gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6853      if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6854			       &else_def_stmt, &def, &dt))
6855	return false;
6856    }
6857  else if (TREE_CODE (else_clause) != INTEGER_CST
6858	   && TREE_CODE (else_clause) != REAL_CST
6859	   && TREE_CODE (else_clause) != FIXED_CST)
6860    return false;
6861
6862  unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6863  /* The result of a vector comparison should be signed type.  */
6864  tree cmp_type = build_nonstandard_integer_type (prec, 0);
6865  vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6866  if (vec_cmp_type == NULL_TREE)
6867    return false;
6868
6869  if (!vec_stmt)
6870    {
6871      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6872      return expand_vec_cond_expr_p (vectype, comp_vectype);
6873    }
6874
6875  /* Transform.  */
6876
6877  if (!slp_node)
6878    {
6879      vec_oprnds0.create (1);
6880      vec_oprnds1.create (1);
6881      vec_oprnds2.create (1);
6882      vec_oprnds3.create (1);
6883    }
6884
6885  /* Handle def.  */
6886  scalar_dest = gimple_assign_lhs (stmt);
6887  vec_dest = vect_create_destination_var (scalar_dest, vectype);
6888
6889  /* Handle cond expr.  */
6890  for (j = 0; j < ncopies; j++)
6891    {
6892      gassign *new_stmt = NULL;
6893      if (j == 0)
6894	{
6895          if (slp_node)
6896            {
6897              auto_vec<tree, 4> ops;
6898	      auto_vec<vec<tree>, 4> vec_defs;
6899
6900              ops.safe_push (TREE_OPERAND (cond_expr, 0));
6901              ops.safe_push (TREE_OPERAND (cond_expr, 1));
6902              ops.safe_push (then_clause);
6903              ops.safe_push (else_clause);
6904              vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6905	      vec_oprnds3 = vec_defs.pop ();
6906	      vec_oprnds2 = vec_defs.pop ();
6907	      vec_oprnds1 = vec_defs.pop ();
6908	      vec_oprnds0 = vec_defs.pop ();
6909
6910              ops.release ();
6911              vec_defs.release ();
6912            }
6913          else
6914            {
6915	      gimple gtemp;
6916	      vec_cond_lhs =
6917	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6918					    stmt, NULL);
6919	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6920				  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6921
6922	      vec_cond_rhs =
6923		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6924						stmt, NULL);
6925	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6926				  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6927	      if (reduc_index == 1)
6928		vec_then_clause = reduc_def;
6929	      else
6930		{
6931		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6932		 		  			      stmt, NULL);
6933	          vect_is_simple_use (then_clause, stmt, loop_vinfo,
6934					  NULL, &gtemp, &def, &dts[2]);
6935		}
6936	      if (reduc_index == 2)
6937		vec_else_clause = reduc_def;
6938	      else
6939		{
6940		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6941							      stmt, NULL);
6942		  vect_is_simple_use (else_clause, stmt, loop_vinfo,
6943				  NULL, &gtemp, &def, &dts[3]);
6944		}
6945	    }
6946	}
6947      else
6948	{
6949	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6950							 vec_oprnds0.pop ());
6951	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6952							 vec_oprnds1.pop ());
6953	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6954							    vec_oprnds2.pop ());
6955	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6956							    vec_oprnds3.pop ());
6957	}
6958
6959      if (!slp_node)
6960        {
6961	  vec_oprnds0.quick_push (vec_cond_lhs);
6962	  vec_oprnds1.quick_push (vec_cond_rhs);
6963	  vec_oprnds2.quick_push (vec_then_clause);
6964	  vec_oprnds3.quick_push (vec_else_clause);
6965	}
6966
6967      /* Arguments are ready.  Create the new vector stmt.  */
6968      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6969        {
6970          vec_cond_rhs = vec_oprnds1[i];
6971          vec_then_clause = vec_oprnds2[i];
6972          vec_else_clause = vec_oprnds3[i];
6973
6974	  vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6975				vec_cond_lhs, vec_cond_rhs);
6976          vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6977 		         vec_compare, vec_then_clause, vec_else_clause);
6978
6979          new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6980          new_temp = make_ssa_name (vec_dest, new_stmt);
6981          gimple_assign_set_lhs (new_stmt, new_temp);
6982          vect_finish_stmt_generation (stmt, new_stmt, gsi);
6983          if (slp_node)
6984            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6985        }
6986
6987        if (slp_node)
6988          continue;
6989
6990        if (j == 0)
6991          STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6992        else
6993          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6994
6995        prev_stmt_info = vinfo_for_stmt (new_stmt);
6996    }
6997
6998  vec_oprnds0.release ();
6999  vec_oprnds1.release ();
7000  vec_oprnds2.release ();
7001  vec_oprnds3.release ();
7002
7003  return true;
7004}
7005
7006
7007/* Make sure the statement is vectorizable.  */
7008
7009bool
7010vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
7011{
7012  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7013  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7014  enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7015  bool ok;
7016  tree scalar_type, vectype;
7017  gimple pattern_stmt;
7018  gimple_seq pattern_def_seq;
7019
7020  if (dump_enabled_p ())
7021    {
7022      dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7023      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7024    }
7025
7026  if (gimple_has_volatile_ops (stmt))
7027    {
7028      if (dump_enabled_p ())
7029        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7030                         "not vectorized: stmt has volatile operands\n");
7031
7032      return false;
7033    }
7034
7035  /* Skip stmts that do not need to be vectorized. In loops this is expected
7036     to include:
7037     - the COND_EXPR which is the loop exit condition
7038     - any LABEL_EXPRs in the loop
7039     - computations that are used only for array indexing or loop control.
7040     In basic blocks we only analyze statements that are a part of some SLP
7041     instance, therefore, all the statements are relevant.
7042
7043     Pattern statement needs to be analyzed instead of the original statement
7044     if the original statement is not relevant.  Otherwise, we analyze both
7045     statements.  In basic blocks we are called from some SLP instance
7046     traversal, don't analyze pattern stmts instead, the pattern stmts
7047     already will be part of SLP instance.  */
7048
7049  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7050  if (!STMT_VINFO_RELEVANT_P (stmt_info)
7051      && !STMT_VINFO_LIVE_P (stmt_info))
7052    {
7053      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7054          && pattern_stmt
7055          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7056              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7057        {
7058          /* Analyze PATTERN_STMT instead of the original stmt.  */
7059          stmt = pattern_stmt;
7060          stmt_info = vinfo_for_stmt (pattern_stmt);
7061          if (dump_enabled_p ())
7062            {
7063              dump_printf_loc (MSG_NOTE, vect_location,
7064                               "==> examining pattern statement: ");
7065              dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7066            }
7067        }
7068      else
7069        {
7070          if (dump_enabled_p ())
7071            dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7072
7073          return true;
7074        }
7075    }
7076  else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7077	   && node == NULL
7078           && pattern_stmt
7079           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7080               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7081    {
7082      /* Analyze PATTERN_STMT too.  */
7083      if (dump_enabled_p ())
7084        {
7085          dump_printf_loc (MSG_NOTE, vect_location,
7086                           "==> examining pattern statement: ");
7087          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7088        }
7089
7090      if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7091        return false;
7092   }
7093
7094  if (is_pattern_stmt_p (stmt_info)
7095      && node == NULL
7096      && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7097    {
7098      gimple_stmt_iterator si;
7099
7100      for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7101	{
7102	  gimple pattern_def_stmt = gsi_stmt (si);
7103	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7104	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7105	    {
7106	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
7107	      if (dump_enabled_p ())
7108		{
7109		  dump_printf_loc (MSG_NOTE, vect_location,
7110                                   "==> examining pattern def statement: ");
7111		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7112		}
7113
7114	      if (!vect_analyze_stmt (pattern_def_stmt,
7115				      need_to_vectorize, node))
7116		return false;
7117	    }
7118	}
7119    }
7120
7121  switch (STMT_VINFO_DEF_TYPE (stmt_info))
7122    {
7123      case vect_internal_def:
7124        break;
7125
7126      case vect_reduction_def:
7127      case vect_nested_cycle:
7128         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7129                     || relevance == vect_used_in_outer_by_reduction
7130                     || relevance == vect_unused_in_scope));
7131         break;
7132
7133      case vect_induction_def:
7134      case vect_constant_def:
7135      case vect_external_def:
7136      case vect_unknown_def_type:
7137      default:
7138        gcc_unreachable ();
7139    }
7140
7141  if (bb_vinfo)
7142    {
7143      gcc_assert (PURE_SLP_STMT (stmt_info));
7144
7145      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7146      if (dump_enabled_p ())
7147        {
7148          dump_printf_loc (MSG_NOTE, vect_location,
7149                           "get vectype for scalar type:  ");
7150          dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7151          dump_printf (MSG_NOTE, "\n");
7152        }
7153
7154      vectype = get_vectype_for_scalar_type (scalar_type);
7155      if (!vectype)
7156        {
7157          if (dump_enabled_p ())
7158            {
7159               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7160                                "not SLPed: unsupported data-type ");
7161               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7162                                  scalar_type);
7163              dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7164            }
7165          return false;
7166        }
7167
7168      if (dump_enabled_p ())
7169        {
7170          dump_printf_loc (MSG_NOTE, vect_location, "vectype:  ");
7171          dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7172          dump_printf (MSG_NOTE, "\n");
7173        }
7174
7175      STMT_VINFO_VECTYPE (stmt_info) = vectype;
7176   }
7177
7178  if (STMT_VINFO_RELEVANT_P (stmt_info))
7179    {
7180      gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7181      gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7182		  || (is_gimple_call (stmt)
7183		      && gimple_call_lhs (stmt) == NULL_TREE));
7184      *need_to_vectorize = true;
7185    }
7186
7187   ok = true;
7188   if (!bb_vinfo
7189       && (STMT_VINFO_RELEVANT_P (stmt_info)
7190           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7191      ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7192	    || vectorizable_conversion (stmt, NULL, NULL, NULL)
7193            || vectorizable_shift (stmt, NULL, NULL, NULL)
7194            || vectorizable_operation (stmt, NULL, NULL, NULL)
7195            || vectorizable_assignment (stmt, NULL, NULL, NULL)
7196            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7197	    || vectorizable_call (stmt, NULL, NULL, NULL)
7198            || vectorizable_store (stmt, NULL, NULL, NULL)
7199            || vectorizable_reduction (stmt, NULL, NULL, NULL)
7200            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7201    else
7202      {
7203        if (bb_vinfo)
7204	  ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7205		|| vectorizable_conversion (stmt, NULL, NULL, node)
7206		|| vectorizable_shift (stmt, NULL, NULL, node)
7207                || vectorizable_operation (stmt, NULL, NULL, node)
7208                || vectorizable_assignment (stmt, NULL, NULL, node)
7209                || vectorizable_load (stmt, NULL, NULL, node, NULL)
7210		|| vectorizable_call (stmt, NULL, NULL, node)
7211                || vectorizable_store (stmt, NULL, NULL, node)
7212                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7213      }
7214
7215  if (!ok)
7216    {
7217      if (dump_enabled_p ())
7218        {
7219          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7220                           "not vectorized: relevant stmt not ");
7221          dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7222          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7223        }
7224
7225      return false;
7226    }
7227
7228  if (bb_vinfo)
7229    return true;
7230
7231  /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7232      need extra handling, except for vectorizable reductions.  */
7233  if (STMT_VINFO_LIVE_P (stmt_info)
7234      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7235    ok = vectorizable_live_operation (stmt, NULL, NULL);
7236
7237  if (!ok)
7238    {
7239      if (dump_enabled_p ())
7240        {
7241          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7242                           "not vectorized: live stmt not ");
7243          dump_printf (MSG_MISSED_OPTIMIZATION,  "supported: ");
7244          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7245        }
7246
7247       return false;
7248    }
7249
7250  return true;
7251}
7252
7253
7254/* Function vect_transform_stmt.
7255
7256   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
7257
7258bool
7259vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7260		     bool *grouped_store, slp_tree slp_node,
7261                     slp_instance slp_node_instance)
7262{
7263  bool is_store = false;
7264  gimple vec_stmt = NULL;
7265  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7266  bool done;
7267
7268  switch (STMT_VINFO_TYPE (stmt_info))
7269    {
7270    case type_demotion_vec_info_type:
7271    case type_promotion_vec_info_type:
7272    case type_conversion_vec_info_type:
7273      done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7274      gcc_assert (done);
7275      break;
7276
7277    case induc_vec_info_type:
7278      gcc_assert (!slp_node);
7279      done = vectorizable_induction (stmt, gsi, &vec_stmt);
7280      gcc_assert (done);
7281      break;
7282
7283    case shift_vec_info_type:
7284      done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7285      gcc_assert (done);
7286      break;
7287
7288    case op_vec_info_type:
7289      done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7290      gcc_assert (done);
7291      break;
7292
7293    case assignment_vec_info_type:
7294      done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7295      gcc_assert (done);
7296      break;
7297
7298    case load_vec_info_type:
7299      done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7300                                slp_node_instance);
7301      gcc_assert (done);
7302      break;
7303
7304    case store_vec_info_type:
7305      done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7306      gcc_assert (done);
7307      if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7308	{
7309	  /* In case of interleaving, the whole chain is vectorized when the
7310	     last store in the chain is reached.  Store stmts before the last
7311	     one are skipped, and there vec_stmt_info shouldn't be freed
7312	     meanwhile.  */
7313	  *grouped_store = true;
7314	  if (STMT_VINFO_VEC_STMT (stmt_info))
7315	    is_store = true;
7316	  }
7317      else
7318	is_store = true;
7319      break;
7320
7321    case condition_vec_info_type:
7322      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7323      gcc_assert (done);
7324      break;
7325
7326    case call_vec_info_type:
7327      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7328      stmt = gsi_stmt (*gsi);
7329      if (is_gimple_call (stmt)
7330	  && gimple_call_internal_p (stmt)
7331	  && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7332	is_store = true;
7333      break;
7334
7335    case call_simd_clone_vec_info_type:
7336      done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7337      stmt = gsi_stmt (*gsi);
7338      break;
7339
7340    case reduc_vec_info_type:
7341      done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7342      gcc_assert (done);
7343      break;
7344
7345    default:
7346      if (!STMT_VINFO_LIVE_P (stmt_info))
7347	{
7348	  if (dump_enabled_p ())
7349	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7350                             "stmt not supported.\n");
7351	  gcc_unreachable ();
7352	}
7353    }
7354
7355  /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7356     is being vectorized, but outside the immediately enclosing loop.  */
7357  if (vec_stmt
7358      && STMT_VINFO_LOOP_VINFO (stmt_info)
7359      && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7360                                STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7361      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7362      && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7363          || STMT_VINFO_RELEVANT (stmt_info) ==
7364                                           vect_used_in_outer_by_reduction))
7365    {
7366      struct loop *innerloop = LOOP_VINFO_LOOP (
7367                                STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7368      imm_use_iterator imm_iter;
7369      use_operand_p use_p;
7370      tree scalar_dest;
7371      gimple exit_phi;
7372
7373      if (dump_enabled_p ())
7374        dump_printf_loc (MSG_NOTE, vect_location,
7375                         "Record the vdef for outer-loop vectorization.\n");
7376
7377      /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7378        (to be used when vectorizing outer-loop stmts that use the DEF of
7379        STMT).  */
7380      if (gimple_code (stmt) == GIMPLE_PHI)
7381        scalar_dest = PHI_RESULT (stmt);
7382      else
7383        scalar_dest = gimple_assign_lhs (stmt);
7384
7385      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7386       {
7387         if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7388           {
7389             exit_phi = USE_STMT (use_p);
7390             STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7391           }
7392       }
7393    }
7394
7395  /* Handle stmts whose DEF is used outside the loop-nest that is
7396     being vectorized.  */
7397  if (STMT_VINFO_LIVE_P (stmt_info)
7398      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7399    {
7400      done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7401      gcc_assert (done);
7402    }
7403
7404  if (vec_stmt)
7405    STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7406
7407  return is_store;
7408}
7409
7410
7411/* Remove a group of stores (for SLP or interleaving), free their
7412   stmt_vec_info.  */
7413
7414void
7415vect_remove_stores (gimple first_stmt)
7416{
7417  gimple next = first_stmt;
7418  gimple tmp;
7419  gimple_stmt_iterator next_si;
7420
7421  while (next)
7422    {
7423      stmt_vec_info stmt_info = vinfo_for_stmt (next);
7424
7425      tmp = GROUP_NEXT_ELEMENT (stmt_info);
7426      if (is_pattern_stmt_p (stmt_info))
7427	next = STMT_VINFO_RELATED_STMT (stmt_info);
7428      /* Free the attached stmt_vec_info and remove the stmt.  */
7429      next_si = gsi_for_stmt (next);
7430      unlink_stmt_vdef (next);
7431      gsi_remove (&next_si, true);
7432      release_defs (next);
7433      free_stmt_vec_info (next);
7434      next = tmp;
7435    }
7436}
7437
7438
7439/* Function new_stmt_vec_info.
7440
7441   Create and initialize a new stmt_vec_info struct for STMT.  */
7442
7443stmt_vec_info
7444new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7445                   bb_vec_info bb_vinfo)
7446{
7447  stmt_vec_info res;
7448  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7449
7450  STMT_VINFO_TYPE (res) = undef_vec_info_type;
7451  STMT_VINFO_STMT (res) = stmt;
7452  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7453  STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7454  STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7455  STMT_VINFO_LIVE_P (res) = false;
7456  STMT_VINFO_VECTYPE (res) = NULL;
7457  STMT_VINFO_VEC_STMT (res) = NULL;
7458  STMT_VINFO_VECTORIZABLE (res) = true;
7459  STMT_VINFO_IN_PATTERN_P (res) = false;
7460  STMT_VINFO_RELATED_STMT (res) = NULL;
7461  STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7462  STMT_VINFO_DATA_REF (res) = NULL;
7463
7464  STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7465  STMT_VINFO_DR_OFFSET (res) = NULL;
7466  STMT_VINFO_DR_INIT (res) = NULL;
7467  STMT_VINFO_DR_STEP (res) = NULL;
7468  STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7469
7470  if (gimple_code (stmt) == GIMPLE_PHI
7471      && is_loop_header_bb_p (gimple_bb (stmt)))
7472    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7473  else
7474    STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7475
7476  STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7477  STMT_SLP_TYPE (res) = loop_vect;
7478  GROUP_FIRST_ELEMENT (res) = NULL;
7479  GROUP_NEXT_ELEMENT (res) = NULL;
7480  GROUP_SIZE (res) = 0;
7481  GROUP_STORE_COUNT (res) = 0;
7482  GROUP_GAP (res) = 0;
7483  GROUP_SAME_DR_STMT (res) = NULL;
7484
7485  return res;
7486}
7487
7488
7489/* Create a hash table for stmt_vec_info. */
7490
7491void
7492init_stmt_vec_info_vec (void)
7493{
7494  gcc_assert (!stmt_vec_info_vec.exists ());
7495  stmt_vec_info_vec.create (50);
7496}
7497
7498
7499/* Free hash table for stmt_vec_info. */
7500
7501void
7502free_stmt_vec_info_vec (void)
7503{
7504  unsigned int i;
7505  vec_void_p info;
7506  FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7507    if (info != NULL)
7508      free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7509  gcc_assert (stmt_vec_info_vec.exists ());
7510  stmt_vec_info_vec.release ();
7511}
7512
7513
7514/* Free stmt vectorization related info.  */
7515
7516void
7517free_stmt_vec_info (gimple stmt)
7518{
7519  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7520
7521  if (!stmt_info)
7522    return;
7523
7524  /* Check if this statement has a related "pattern stmt"
7525     (introduced by the vectorizer during the pattern recognition
7526     pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7527     too.  */
7528  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7529    {
7530      stmt_vec_info patt_info
7531	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7532      if (patt_info)
7533	{
7534	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7535	  gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7536	  gimple_set_bb (patt_stmt, NULL);
7537	  tree lhs = gimple_get_lhs (patt_stmt);
7538	  if (TREE_CODE (lhs) == SSA_NAME)
7539	    release_ssa_name (lhs);
7540	  if (seq)
7541	    {
7542	      gimple_stmt_iterator si;
7543	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7544		{
7545		  gimple seq_stmt = gsi_stmt (si);
7546		  gimple_set_bb (seq_stmt, NULL);
7547		  lhs = gimple_get_lhs (patt_stmt);
7548		  if (TREE_CODE (lhs) == SSA_NAME)
7549		    release_ssa_name (lhs);
7550		  free_stmt_vec_info (seq_stmt);
7551		}
7552	    }
7553	  free_stmt_vec_info (patt_stmt);
7554	}
7555    }
7556
7557  STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7558  STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7559  set_vinfo_for_stmt (stmt, NULL);
7560  free (stmt_info);
7561}
7562
7563
7564/* Function get_vectype_for_scalar_type_and_size.
7565
7566   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
7567   by the target.  */
7568
7569static tree
7570get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7571{
7572  machine_mode inner_mode = TYPE_MODE (scalar_type);
7573  machine_mode simd_mode;
7574  unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7575  int nunits;
7576  tree vectype;
7577
7578  if (nbytes == 0)
7579    return NULL_TREE;
7580
7581  if (GET_MODE_CLASS (inner_mode) != MODE_INT
7582      && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7583    return NULL_TREE;
7584
7585  /* For vector types of elements whose mode precision doesn't
7586     match their types precision we use a element type of mode
7587     precision.  The vectorization routines will have to make sure
7588     they support the proper result truncation/extension.
7589     We also make sure to build vector types with INTEGER_TYPE
7590     component type only.  */
7591  if (INTEGRAL_TYPE_P (scalar_type)
7592      && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7593	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
7594    scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7595						  TYPE_UNSIGNED (scalar_type));
7596
7597  /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7598     When the component mode passes the above test simply use a type
7599     corresponding to that mode.  The theory is that any use that
7600     would cause problems with this will disable vectorization anyway.  */
7601  else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7602	   && !INTEGRAL_TYPE_P (scalar_type))
7603    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7604
7605  /* We can't build a vector type of elements with alignment bigger than
7606     their size.  */
7607  else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7608    scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7609						  TYPE_UNSIGNED (scalar_type));
7610
7611  /* If we felt back to using the mode fail if there was
7612     no scalar type for it.  */
7613  if (scalar_type == NULL_TREE)
7614    return NULL_TREE;
7615
7616  /* If no size was supplied use the mode the target prefers.   Otherwise
7617     lookup a vector mode of the specified size.  */
7618  if (size == 0)
7619    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7620  else
7621    simd_mode = mode_for_vector (inner_mode, size / nbytes);
7622  nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7623  if (nunits <= 1)
7624    return NULL_TREE;
7625
7626  vectype = build_vector_type (scalar_type, nunits);
7627
7628  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7629      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7630    return NULL_TREE;
7631
7632  return vectype;
7633}
7634
7635unsigned int current_vector_size;
7636
7637/* Function get_vectype_for_scalar_type.
7638
7639   Returns the vector type corresponding to SCALAR_TYPE as supported
7640   by the target.  */
7641
7642tree
7643get_vectype_for_scalar_type (tree scalar_type)
7644{
7645  tree vectype;
7646  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7647						  current_vector_size);
7648  if (vectype
7649      && current_vector_size == 0)
7650    current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7651  return vectype;
7652}
7653
7654/* Function get_same_sized_vectype
7655
7656   Returns a vector type corresponding to SCALAR_TYPE of size
7657   VECTOR_TYPE if supported by the target.  */
7658
7659tree
7660get_same_sized_vectype (tree scalar_type, tree vector_type)
7661{
7662  return get_vectype_for_scalar_type_and_size
7663	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7664}
7665
7666/* Function vect_is_simple_use.
7667
7668   Input:
7669   LOOP_VINFO - the vect info of the loop that is being vectorized.
7670   BB_VINFO - the vect info of the basic block that is being vectorized.
7671   OPERAND - operand of STMT in the loop or bb.
7672   DEF - the defining stmt in case OPERAND is an SSA_NAME.
7673
7674   Returns whether a stmt with OPERAND can be vectorized.
7675   For loops, supportable operands are constants, loop invariants, and operands
7676   that are defined by the current iteration of the loop.  Unsupportable
7677   operands are those that are defined by a previous iteration of the loop (as
7678   is the case in reduction/induction computations).
7679   For basic blocks, supportable operands are constants and bb invariants.
7680   For now, operands defined outside the basic block are not supported.  */
7681
7682bool
7683vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7684                    bb_vec_info bb_vinfo, gimple *def_stmt,
7685		    tree *def, enum vect_def_type *dt)
7686{
7687  basic_block bb;
7688  stmt_vec_info stmt_vinfo;
7689  struct loop *loop = NULL;
7690
7691  if (loop_vinfo)
7692    loop = LOOP_VINFO_LOOP (loop_vinfo);
7693
7694  *def_stmt = NULL;
7695  *def = NULL_TREE;
7696
7697  if (dump_enabled_p ())
7698    {
7699      dump_printf_loc (MSG_NOTE, vect_location,
7700                       "vect_is_simple_use: operand ");
7701      dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7702      dump_printf (MSG_NOTE, "\n");
7703    }
7704
7705  if (CONSTANT_CLASS_P (operand))
7706    {
7707      *dt = vect_constant_def;
7708      return true;
7709    }
7710
7711  if (is_gimple_min_invariant (operand))
7712    {
7713      *def = operand;
7714      *dt = vect_external_def;
7715      return true;
7716    }
7717
7718  if (TREE_CODE (operand) == PAREN_EXPR)
7719    {
7720      if (dump_enabled_p ())
7721        dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7722      operand = TREE_OPERAND (operand, 0);
7723    }
7724
7725  if (TREE_CODE (operand) != SSA_NAME)
7726    {
7727      if (dump_enabled_p ())
7728        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7729                         "not ssa-name.\n");
7730      return false;
7731    }
7732
7733  *def_stmt = SSA_NAME_DEF_STMT (operand);
7734  if (*def_stmt == NULL)
7735    {
7736      if (dump_enabled_p ())
7737        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7738                         "no def_stmt.\n");
7739      return false;
7740    }
7741
7742  if (dump_enabled_p ())
7743    {
7744      dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7745      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7746    }
7747
7748  /* Empty stmt is expected only in case of a function argument.
7749     (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
7750  if (gimple_nop_p (*def_stmt))
7751    {
7752      *def = operand;
7753      *dt = vect_external_def;
7754      return true;
7755    }
7756
7757  bb = gimple_bb (*def_stmt);
7758
7759  if ((loop && !flow_bb_inside_loop_p (loop, bb))
7760      || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7761      || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7762    *dt = vect_external_def;
7763  else
7764    {
7765      stmt_vinfo = vinfo_for_stmt (*def_stmt);
7766      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7767    }
7768
7769  if (*dt == vect_unknown_def_type
7770      || (stmt
7771	  && *dt == vect_double_reduction_def
7772	  && gimple_code (stmt) != GIMPLE_PHI))
7773    {
7774      if (dump_enabled_p ())
7775        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7776                         "Unsupported pattern.\n");
7777      return false;
7778    }
7779
7780  if (dump_enabled_p ())
7781    dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7782
7783  switch (gimple_code (*def_stmt))
7784    {
7785    case GIMPLE_PHI:
7786      *def = gimple_phi_result (*def_stmt);
7787      break;
7788
7789    case GIMPLE_ASSIGN:
7790      *def = gimple_assign_lhs (*def_stmt);
7791      break;
7792
7793    case GIMPLE_CALL:
7794      *def = gimple_call_lhs (*def_stmt);
7795      if (*def != NULL)
7796	break;
7797      /* FALLTHRU */
7798    default:
7799      if (dump_enabled_p ())
7800        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7801                         "unsupported defining stmt:\n");
7802      return false;
7803    }
7804
7805  return true;
7806}
7807
7808/* Function vect_is_simple_use_1.
7809
7810   Same as vect_is_simple_use_1 but also determines the vector operand
7811   type of OPERAND and stores it to *VECTYPE.  If the definition of
7812   OPERAND is vect_uninitialized_def, vect_constant_def or
7813   vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7814   is responsible to compute the best suited vector type for the
7815   scalar operand.  */
7816
7817bool
7818vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7819		      bb_vec_info bb_vinfo, gimple *def_stmt,
7820		      tree *def, enum vect_def_type *dt, tree *vectype)
7821{
7822  if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7823			   def, dt))
7824    return false;
7825
7826  /* Now get a vector type if the def is internal, otherwise supply
7827     NULL_TREE and leave it up to the caller to figure out a proper
7828     type for the use stmt.  */
7829  if (*dt == vect_internal_def
7830      || *dt == vect_induction_def
7831      || *dt == vect_reduction_def
7832      || *dt == vect_double_reduction_def
7833      || *dt == vect_nested_cycle)
7834    {
7835      stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7836
7837      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7838          && !STMT_VINFO_RELEVANT (stmt_info)
7839          && !STMT_VINFO_LIVE_P (stmt_info))
7840	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7841
7842      *vectype = STMT_VINFO_VECTYPE (stmt_info);
7843      gcc_assert (*vectype != NULL_TREE);
7844    }
7845  else if (*dt == vect_uninitialized_def
7846	   || *dt == vect_constant_def
7847	   || *dt == vect_external_def)
7848    *vectype = NULL_TREE;
7849  else
7850    gcc_unreachable ();
7851
7852  return true;
7853}
7854
7855
7856/* Function supportable_widening_operation
7857
7858   Check whether an operation represented by the code CODE is a
7859   widening operation that is supported by the target platform in
7860   vector form (i.e., when operating on arguments of type VECTYPE_IN
7861   producing a result of type VECTYPE_OUT).
7862
7863   Widening operations we currently support are NOP (CONVERT), FLOAT
7864   and WIDEN_MULT.  This function checks if these operations are supported
7865   by the target platform either directly (via vector tree-codes), or via
7866   target builtins.
7867
7868   Output:
7869   - CODE1 and CODE2 are codes of vector operations to be used when
7870   vectorizing the operation, if available.
7871   - MULTI_STEP_CVT determines the number of required intermediate steps in
7872   case of multi-step conversion (like char->short->int - in that case
7873   MULTI_STEP_CVT will be 1).
7874   - INTERM_TYPES contains the intermediate type required to perform the
7875   widening operation (short in the above example).  */
7876
7877bool
7878supportable_widening_operation (enum tree_code code, gimple stmt,
7879				tree vectype_out, tree vectype_in,
7880                                enum tree_code *code1, enum tree_code *code2,
7881                                int *multi_step_cvt,
7882                                vec<tree> *interm_types)
7883{
7884  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7885  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7886  struct loop *vect_loop = NULL;
7887  machine_mode vec_mode;
7888  enum insn_code icode1, icode2;
7889  optab optab1, optab2;
7890  tree vectype = vectype_in;
7891  tree wide_vectype = vectype_out;
7892  enum tree_code c1, c2;
7893  int i;
7894  tree prev_type, intermediate_type;
7895  machine_mode intermediate_mode, prev_mode;
7896  optab optab3, optab4;
7897
7898  *multi_step_cvt = 0;
7899  if (loop_info)
7900    vect_loop = LOOP_VINFO_LOOP (loop_info);
7901
7902  switch (code)
7903    {
7904    case WIDEN_MULT_EXPR:
7905      /* The result of a vectorized widening operation usually requires
7906	 two vectors (because the widened results do not fit into one vector).
7907	 The generated vector results would normally be expected to be
7908	 generated in the same order as in the original scalar computation,
7909	 i.e. if 8 results are generated in each vector iteration, they are
7910	 to be organized as follows:
7911		vect1: [res1,res2,res3,res4],
7912		vect2: [res5,res6,res7,res8].
7913
7914	 However, in the special case that the result of the widening
7915	 operation is used in a reduction computation only, the order doesn't
7916	 matter (because when vectorizing a reduction we change the order of
7917	 the computation).  Some targets can take advantage of this and
7918	 generate more efficient code.  For example, targets like Altivec,
7919	 that support widen_mult using a sequence of {mult_even,mult_odd}
7920	 generate the following vectors:
7921		vect1: [res1,res3,res5,res7],
7922		vect2: [res2,res4,res6,res8].
7923
7924	 When vectorizing outer-loops, we execute the inner-loop sequentially
7925	 (each vectorized inner-loop iteration contributes to VF outer-loop
7926	 iterations in parallel).  We therefore don't allow to change the
7927	 order of the computation in the inner-loop during outer-loop
7928	 vectorization.  */
7929      /* TODO: Another case in which order doesn't *really* matter is when we
7930	 widen and then contract again, e.g. (short)((int)x * y >> 8).
7931	 Normally, pack_trunc performs an even/odd permute, whereas the
7932	 repack from an even/odd expansion would be an interleave, which
7933	 would be significantly simpler for e.g. AVX2.  */
7934      /* In any case, in order to avoid duplicating the code below, recurse
7935	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
7936	 are properly set up for the caller.  If we fail, we'll continue with
7937	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
7938      if (vect_loop
7939	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7940	  && !nested_in_vect_loop_p (vect_loop, stmt)
7941	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7942					     stmt, vectype_out, vectype_in,
7943					     code1, code2, multi_step_cvt,
7944					     interm_types))
7945        {
7946          /* Elements in a vector with vect_used_by_reduction property cannot
7947             be reordered if the use chain with this property does not have the
7948             same operation.  One such an example is s += a * b, where elements
7949             in a and b cannot be reordered.  Here we check if the vector defined
7950             by STMT is only directly used in the reduction statement.  */
7951          tree lhs = gimple_assign_lhs (stmt);
7952          use_operand_p dummy;
7953          gimple use_stmt;
7954          stmt_vec_info use_stmt_info = NULL;
7955          if (single_imm_use (lhs, &dummy, &use_stmt)
7956              && (use_stmt_info = vinfo_for_stmt (use_stmt))
7957              && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7958            return true;
7959        }
7960      c1 = VEC_WIDEN_MULT_LO_EXPR;
7961      c2 = VEC_WIDEN_MULT_HI_EXPR;
7962      break;
7963
7964    case VEC_WIDEN_MULT_EVEN_EXPR:
7965      /* Support the recursion induced just above.  */
7966      c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7967      c2 = VEC_WIDEN_MULT_ODD_EXPR;
7968      break;
7969
7970    case WIDEN_LSHIFT_EXPR:
7971      c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7972      c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7973      break;
7974
7975    CASE_CONVERT:
7976      c1 = VEC_UNPACK_LO_EXPR;
7977      c2 = VEC_UNPACK_HI_EXPR;
7978      break;
7979
7980    case FLOAT_EXPR:
7981      c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7982      c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7983      break;
7984
7985    case FIX_TRUNC_EXPR:
7986      /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7987	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7988	 computing the operation.  */
7989      return false;
7990
7991    default:
7992      gcc_unreachable ();
7993    }
7994
7995  if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7996    {
7997      enum tree_code ctmp = c1;
7998      c1 = c2;
7999      c2 = ctmp;
8000    }
8001
8002  if (code == FIX_TRUNC_EXPR)
8003    {
8004      /* The signedness is determined from output operand.  */
8005      optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8006      optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8007    }
8008  else
8009    {
8010      optab1 = optab_for_tree_code (c1, vectype, optab_default);
8011      optab2 = optab_for_tree_code (c2, vectype, optab_default);
8012    }
8013
8014  if (!optab1 || !optab2)
8015    return false;
8016
8017  vec_mode = TYPE_MODE (vectype);
8018  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8019       || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8020    return false;
8021
8022  *code1 = c1;
8023  *code2 = c2;
8024
8025  if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8026      && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8027    return true;
8028
8029  /* Check if it's a multi-step conversion that can be done using intermediate
8030     types.  */
8031
8032  prev_type = vectype;
8033  prev_mode = vec_mode;
8034
8035  if (!CONVERT_EXPR_CODE_P (code))
8036    return false;
8037
8038  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8039     intermediate steps in promotion sequence.  We try
8040     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8041     not.  */
8042  interm_types->create (MAX_INTERM_CVT_STEPS);
8043  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8044    {
8045      intermediate_mode = insn_data[icode1].operand[0].mode;
8046      intermediate_type
8047	= lang_hooks.types.type_for_mode (intermediate_mode,
8048					  TYPE_UNSIGNED (prev_type));
8049      optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8050      optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8051
8052      if (!optab3 || !optab4
8053          || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8054	  || insn_data[icode1].operand[0].mode != intermediate_mode
8055	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8056	  || insn_data[icode2].operand[0].mode != intermediate_mode
8057	  || ((icode1 = optab_handler (optab3, intermediate_mode))
8058	      == CODE_FOR_nothing)
8059	  || ((icode2 = optab_handler (optab4, intermediate_mode))
8060	      == CODE_FOR_nothing))
8061	break;
8062
8063      interm_types->quick_push (intermediate_type);
8064      (*multi_step_cvt)++;
8065
8066      if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8067	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8068	return true;
8069
8070      prev_type = intermediate_type;
8071      prev_mode = intermediate_mode;
8072    }
8073
8074  interm_types->release ();
8075  return false;
8076}
8077
8078
8079/* Function supportable_narrowing_operation
8080
8081   Check whether an operation represented by the code CODE is a
8082   narrowing operation that is supported by the target platform in
8083   vector form (i.e., when operating on arguments of type VECTYPE_IN
8084   and producing a result of type VECTYPE_OUT).
8085
8086   Narrowing operations we currently support are NOP (CONVERT) and
8087   FIX_TRUNC.  This function checks if these operations are supported by
8088   the target platform directly via vector tree-codes.
8089
8090   Output:
8091   - CODE1 is the code of a vector operation to be used when
8092   vectorizing the operation, if available.
8093   - MULTI_STEP_CVT determines the number of required intermediate steps in
8094   case of multi-step conversion (like int->short->char - in that case
8095   MULTI_STEP_CVT will be 1).
8096   - INTERM_TYPES contains the intermediate type required to perform the
8097   narrowing operation (short in the above example).   */
8098
8099bool
8100supportable_narrowing_operation (enum tree_code code,
8101				 tree vectype_out, tree vectype_in,
8102				 enum tree_code *code1, int *multi_step_cvt,
8103                                 vec<tree> *interm_types)
8104{
8105  machine_mode vec_mode;
8106  enum insn_code icode1;
8107  optab optab1, interm_optab;
8108  tree vectype = vectype_in;
8109  tree narrow_vectype = vectype_out;
8110  enum tree_code c1;
8111  tree intermediate_type;
8112  machine_mode intermediate_mode, prev_mode;
8113  int i;
8114  bool uns;
8115
8116  *multi_step_cvt = 0;
8117  switch (code)
8118    {
8119    CASE_CONVERT:
8120      c1 = VEC_PACK_TRUNC_EXPR;
8121      break;
8122
8123    case FIX_TRUNC_EXPR:
8124      c1 = VEC_PACK_FIX_TRUNC_EXPR;
8125      break;
8126
8127    case FLOAT_EXPR:
8128      /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8129	 tree code and optabs used for computing the operation.  */
8130      return false;
8131
8132    default:
8133      gcc_unreachable ();
8134    }
8135
8136  if (code == FIX_TRUNC_EXPR)
8137    /* The signedness is determined from output operand.  */
8138    optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8139  else
8140    optab1 = optab_for_tree_code (c1, vectype, optab_default);
8141
8142  if (!optab1)
8143    return false;
8144
8145  vec_mode = TYPE_MODE (vectype);
8146  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8147    return false;
8148
8149  *code1 = c1;
8150
8151  if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8152    return true;
8153
8154  /* Check if it's a multi-step conversion that can be done using intermediate
8155     types.  */
8156  prev_mode = vec_mode;
8157  if (code == FIX_TRUNC_EXPR)
8158    uns = TYPE_UNSIGNED (vectype_out);
8159  else
8160    uns = TYPE_UNSIGNED (vectype);
8161
8162  /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8163     conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8164     costly than signed.  */
8165  if (code == FIX_TRUNC_EXPR && uns)
8166    {
8167      enum insn_code icode2;
8168
8169      intermediate_type
8170	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8171      interm_optab
8172	= optab_for_tree_code (c1, intermediate_type, optab_default);
8173      if (interm_optab != unknown_optab
8174	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8175	  && insn_data[icode1].operand[0].mode
8176	     == insn_data[icode2].operand[0].mode)
8177	{
8178	  uns = false;
8179	  optab1 = interm_optab;
8180	  icode1 = icode2;
8181	}
8182    }
8183
8184  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8185     intermediate steps in promotion sequence.  We try
8186     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
8187  interm_types->create (MAX_INTERM_CVT_STEPS);
8188  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8189    {
8190      intermediate_mode = insn_data[icode1].operand[0].mode;
8191      intermediate_type
8192	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
8193      interm_optab
8194	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8195			       optab_default);
8196      if (!interm_optab
8197	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8198	  || insn_data[icode1].operand[0].mode != intermediate_mode
8199	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8200	      == CODE_FOR_nothing))
8201	break;
8202
8203      interm_types->quick_push (intermediate_type);
8204      (*multi_step_cvt)++;
8205
8206      if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8207	return true;
8208
8209      prev_mode = intermediate_mode;
8210      optab1 = interm_optab;
8211    }
8212
8213  interm_types->release ();
8214  return false;
8215}
8216