tree-vect-stmts.c revision 1.3
1/* Statement Analysis and Transformation for Vectorization
2   Copyright (C) 2003-2013 Free Software Foundation, Inc.
3   Contributed by Dorit Naishlos <dorit@il.ibm.com>
4   and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "dumpfile.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "gimple-pretty-print.h"
32#include "tree-flow.h"
33#include "cfgloop.h"
34#include "expr.h"
35#include "recog.h"		/* FIXME: for insn_data */
36#include "optabs.h"
37#include "diagnostic-core.h"
38#include "tree-vectorizer.h"
39#include "dumpfile.h"
40
41/* For lang_hooks.types.type_for_mode.  */
42#include "langhooks.h"
43
44/* Return the vectorized type for the given statement.  */
45
46tree
47stmt_vectype (struct _stmt_vec_info *stmt_info)
48{
49  return STMT_VINFO_VECTYPE (stmt_info);
50}
51
52/* Return TRUE iff the given statement is in an inner loop relative to
53   the loop being vectorized.  */
54bool
55stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
56{
57  gimple stmt = STMT_VINFO_STMT (stmt_info);
58  basic_block bb = gimple_bb (stmt);
59  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
60  struct loop* loop;
61
62  if (!loop_vinfo)
63    return false;
64
65  loop = LOOP_VINFO_LOOP (loop_vinfo);
66
67  return (bb->loop_father == loop->inner);
68}
69
70/* Record the cost of a statement, either by directly informing the
71   target model or by saving it in a vector for later processing.
72   Return a preliminary estimate of the statement's cost.  */
73
74unsigned
75record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
76		  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
77		  int misalign, enum vect_cost_model_location where)
78{
79  if (body_cost_vec)
80    {
81      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
82      add_stmt_info_to_vec (body_cost_vec, count, kind,
83			    stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
84			    misalign);
85      return (unsigned)
86	(builtin_vectorization_cost (kind, vectype, misalign) * count);
87
88    }
89  else
90    {
91      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92      bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
93      void *target_cost_data;
94
95      if (loop_vinfo)
96	target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
97      else
98	target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
99
100      return add_stmt_cost (target_cost_data, count, kind, stmt_info,
101			    misalign, where);
102    }
103}
104
105/* Return a variable of type ELEM_TYPE[NELEMS].  */
106
107static tree
108create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
109{
110  return create_tmp_var (build_array_type_nelts (elem_type, nelems),
111			 "vect_array");
112}
113
114/* ARRAY is an array of vectors created by create_vector_array.
115   Return an SSA_NAME for the vector in index N.  The reference
116   is part of the vectorization of STMT and the vector is associated
117   with scalar destination SCALAR_DEST.  */
118
119static tree
120read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
121		   tree array, unsigned HOST_WIDE_INT n)
122{
123  tree vect_type, vect, vect_name, array_ref;
124  gimple new_stmt;
125
126  gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
127  vect_type = TREE_TYPE (TREE_TYPE (array));
128  vect = vect_create_destination_var (scalar_dest, vect_type);
129  array_ref = build4 (ARRAY_REF, vect_type, array,
130		      build_int_cst (size_type_node, n),
131		      NULL_TREE, NULL_TREE);
132
133  new_stmt = gimple_build_assign (vect, array_ref);
134  vect_name = make_ssa_name (vect, new_stmt);
135  gimple_assign_set_lhs (new_stmt, vect_name);
136  vect_finish_stmt_generation (stmt, new_stmt, gsi);
137
138  return vect_name;
139}
140
141/* ARRAY is an array of vectors created by create_vector_array.
142   Emit code to store SSA_NAME VECT in index N of the array.
143   The store is part of the vectorization of STMT.  */
144
145static void
146write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
147		    tree array, unsigned HOST_WIDE_INT n)
148{
149  tree array_ref;
150  gimple new_stmt;
151
152  array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
153		      build_int_cst (size_type_node, n),
154		      NULL_TREE, NULL_TREE);
155
156  new_stmt = gimple_build_assign (array_ref, vect);
157  vect_finish_stmt_generation (stmt, new_stmt, gsi);
158}
159
160/* PTR is a pointer to an array of type TYPE.  Return a representation
161   of *PTR.  The memory reference replaces those in FIRST_DR
162   (and its group).  */
163
164static tree
165create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
166{
167  tree mem_ref, alias_ptr_type;
168
169  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
170  mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
171  /* Arrays have the same alignment as their type.  */
172  set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
173  return mem_ref;
174}
175
176/* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
177
178/* Function vect_mark_relevant.
179
180   Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
181
182static void
183vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
184		    enum vect_relevant relevant, bool live_p,
185		    bool used_in_pattern)
186{
187  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188  enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
189  bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
190  gimple pattern_stmt;
191
192  if (dump_enabled_p ())
193    dump_printf_loc (MSG_NOTE, vect_location,
194                     "mark relevant %d, live %d.", relevant, live_p);
195
196  /* If this stmt is an original stmt in a pattern, we might need to mark its
197     related pattern stmt instead of the original stmt.  However, such stmts
198     may have their own uses that are not in any pattern, in such cases the
199     stmt itself should be marked.  */
200  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
201    {
202      bool found = false;
203      if (!used_in_pattern)
204        {
205          imm_use_iterator imm_iter;
206          use_operand_p use_p;
207          gimple use_stmt;
208          tree lhs;
209	  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
210	  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
211
212          if (is_gimple_assign (stmt))
213            lhs = gimple_assign_lhs (stmt);
214          else
215            lhs = gimple_call_lhs (stmt);
216
217          /* This use is out of pattern use, if LHS has other uses that are
218             pattern uses, we should mark the stmt itself, and not the pattern
219             stmt.  */
220	  if (TREE_CODE (lhs) == SSA_NAME)
221	    FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
222	      {
223		if (is_gimple_debug (USE_STMT (use_p)))
224		  continue;
225		use_stmt = USE_STMT (use_p);
226
227		if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
228		  continue;
229
230		if (vinfo_for_stmt (use_stmt)
231		    && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
232		  {
233		    found = true;
234		    break;
235		  }
236	      }
237        }
238
239      if (!found)
240        {
241          /* This is the last stmt in a sequence that was detected as a
242             pattern that can potentially be vectorized.  Don't mark the stmt
243             as relevant/live because it's not going to be vectorized.
244             Instead mark the pattern-stmt that replaces it.  */
245
246          pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
247
248          if (dump_enabled_p ())
249            dump_printf_loc (MSG_NOTE, vect_location,
250                             "last stmt in pattern. don't mark"
251                             " relevant/live.");
252          stmt_info = vinfo_for_stmt (pattern_stmt);
253          gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
254          save_relevant = STMT_VINFO_RELEVANT (stmt_info);
255          save_live_p = STMT_VINFO_LIVE_P (stmt_info);
256          stmt = pattern_stmt;
257        }
258    }
259
260  STMT_VINFO_LIVE_P (stmt_info) |= live_p;
261  if (relevant > STMT_VINFO_RELEVANT (stmt_info))
262    STMT_VINFO_RELEVANT (stmt_info) = relevant;
263
264  if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
265      && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
266    {
267      if (dump_enabled_p ())
268        dump_printf_loc (MSG_NOTE, vect_location,
269                         "already marked relevant/live.");
270      return;
271    }
272
273  worklist->safe_push (stmt);
274}
275
276
277/* Function vect_stmt_relevant_p.
278
279   Return true if STMT in loop that is represented by LOOP_VINFO is
280   "relevant for vectorization".
281
282   A stmt is considered "relevant for vectorization" if:
283   - it has uses outside the loop.
284   - it has vdefs (it alters memory).
285   - control stmts in the loop (except for the exit condition).
286
287   CHECKME: what other side effects would the vectorizer allow?  */
288
289static bool
290vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
291		      enum vect_relevant *relevant, bool *live_p)
292{
293  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294  ssa_op_iter op_iter;
295  imm_use_iterator imm_iter;
296  use_operand_p use_p;
297  def_operand_p def_p;
298
299  *relevant = vect_unused_in_scope;
300  *live_p = false;
301
302  /* cond stmt other than loop exit cond.  */
303  if (is_ctrl_stmt (stmt)
304      && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
305         != loop_exit_ctrl_vec_info_type)
306    *relevant = vect_used_in_scope;
307
308  /* changing memory.  */
309  if (gimple_code (stmt) != GIMPLE_PHI)
310    if (gimple_vdef (stmt))
311      {
312	if (dump_enabled_p ())
313	  dump_printf_loc (MSG_NOTE, vect_location,
314                           "vec_stmt_relevant_p: stmt has vdefs.");
315	*relevant = vect_used_in_scope;
316      }
317
318  /* uses outside the loop.  */
319  FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
320    {
321      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
322	{
323	  basic_block bb = gimple_bb (USE_STMT (use_p));
324	  if (!flow_bb_inside_loop_p (loop, bb))
325	    {
326	      if (dump_enabled_p ())
327		dump_printf_loc (MSG_NOTE, vect_location,
328                                 "vec_stmt_relevant_p: used out of loop.");
329
330	      if (is_gimple_debug (USE_STMT (use_p)))
331		continue;
332
333	      /* We expect all such uses to be in the loop exit phis
334		 (because of loop closed form)   */
335	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
336	      gcc_assert (bb == single_exit (loop)->dest);
337
338              *live_p = true;
339	    }
340	}
341    }
342
343  return (*live_p || *relevant);
344}
345
346
347/* Function exist_non_indexing_operands_for_use_p
348
349   USE is one of the uses attached to STMT.  Check if USE is
350   used in STMT for anything other than indexing an array.  */
351
352static bool
353exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
354{
355  tree operand;
356  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
357
358  /* USE corresponds to some operand in STMT.  If there is no data
359     reference in STMT, then any operand that corresponds to USE
360     is not indexing an array.  */
361  if (!STMT_VINFO_DATA_REF (stmt_info))
362    return true;
363
364  /* STMT has a data_ref. FORNOW this means that its of one of
365     the following forms:
366     -1- ARRAY_REF = var
367     -2- var = ARRAY_REF
368     (This should have been verified in analyze_data_refs).
369
370     'var' in the second case corresponds to a def, not a use,
371     so USE cannot correspond to any operands that are not used
372     for array indexing.
373
374     Therefore, all we need to check is if STMT falls into the
375     first case, and whether var corresponds to USE.  */
376
377  if (!gimple_assign_copy_p (stmt))
378    return false;
379  if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
380    return false;
381  operand = gimple_assign_rhs1 (stmt);
382  if (TREE_CODE (operand) != SSA_NAME)
383    return false;
384
385  if (operand == use)
386    return true;
387
388  return false;
389}
390
391
392/*
393   Function process_use.
394
395   Inputs:
396   - a USE in STMT in a loop represented by LOOP_VINFO
397   - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
398     that defined USE.  This is done by calling mark_relevant and passing it
399     the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
400   - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
401     be performed.
402
403   Outputs:
404   Generally, LIVE_P and RELEVANT are used to define the liveness and
405   relevance info of the DEF_STMT of this USE:
406       STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
407       STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
408   Exceptions:
409   - case 1: If USE is used only for address computations (e.g. array indexing),
410   which does not need to be directly vectorized, then the liveness/relevance
411   of the respective DEF_STMT is left unchanged.
412   - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
413   skip DEF_STMT cause it had already been processed.
414   - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
415   be modified accordingly.
416
417   Return true if everything is as expected. Return false otherwise.  */
418
419static bool
420process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
421	     enum vect_relevant relevant, vec<gimple> *worklist,
422	     bool force)
423{
424  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
425  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
426  stmt_vec_info dstmt_vinfo;
427  basic_block bb, def_bb;
428  tree def;
429  gimple def_stmt;
430  enum vect_def_type dt;
431
432  /* case 1: we are only interested in uses that need to be vectorized.  Uses
433     that are used for address computation are not considered relevant.  */
434  if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
435     return true;
436
437  if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
438    {
439      if (dump_enabled_p ())
440        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
441                         "not vectorized: unsupported use in stmt.");
442      return false;
443    }
444
445  if (!def_stmt || gimple_nop_p (def_stmt))
446    return true;
447
448  def_bb = gimple_bb (def_stmt);
449  if (!flow_bb_inside_loop_p (loop, def_bb))
450    {
451      if (dump_enabled_p ())
452	dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.");
453      return true;
454    }
455
456  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
457     DEF_STMT must have already been processed, because this should be the
458     only way that STMT, which is a reduction-phi, was put in the worklist,
459     as there should be no other uses for DEF_STMT in the loop.  So we just
460     check that everything is as expected, and we are done.  */
461  dstmt_vinfo = vinfo_for_stmt (def_stmt);
462  bb = gimple_bb (stmt);
463  if (gimple_code (stmt) == GIMPLE_PHI
464      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
465      && gimple_code (def_stmt) != GIMPLE_PHI
466      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
467      && bb->loop_father == def_bb->loop_father)
468    {
469      if (dump_enabled_p ())
470	dump_printf_loc (MSG_NOTE, vect_location,
471                         "reduc-stmt defining reduc-phi in the same nest.");
472      if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
473	dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
474      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
475      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
476		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
477      return true;
478    }
479
480  /* case 3a: outer-loop stmt defining an inner-loop stmt:
481	outer-loop-header-bb:
482		d = def_stmt
483	inner-loop:
484		stmt # use (d)
485	outer-loop-tail-bb:
486		...		  */
487  if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
488    {
489      if (dump_enabled_p ())
490	dump_printf_loc (MSG_NOTE, vect_location,
491                         "outer-loop def-stmt defining inner-loop stmt.");
492
493      switch (relevant)
494	{
495	case vect_unused_in_scope:
496	  relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
497		      vect_used_in_scope : vect_unused_in_scope;
498	  break;
499
500	case vect_used_in_outer_by_reduction:
501          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
502	  relevant = vect_used_by_reduction;
503	  break;
504
505	case vect_used_in_outer:
506          gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
507	  relevant = vect_used_in_scope;
508	  break;
509
510	case vect_used_in_scope:
511	  break;
512
513	default:
514	  gcc_unreachable ();
515	}
516    }
517
518  /* case 3b: inner-loop stmt defining an outer-loop stmt:
519	outer-loop-header-bb:
520		...
521	inner-loop:
522		d = def_stmt
523	outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
524		stmt # use (d)		*/
525  else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
526    {
527      if (dump_enabled_p ())
528	dump_printf_loc (MSG_NOTE, vect_location,
529                         "inner-loop def-stmt defining outer-loop stmt.");
530
531      switch (relevant)
532        {
533        case vect_unused_in_scope:
534          relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535            || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
536                      vect_used_in_outer_by_reduction : vect_unused_in_scope;
537          break;
538
539        case vect_used_by_reduction:
540          relevant = vect_used_in_outer_by_reduction;
541          break;
542
543        case vect_used_in_scope:
544          relevant = vect_used_in_outer;
545          break;
546
547        default:
548          gcc_unreachable ();
549        }
550    }
551
552  vect_mark_relevant (worklist, def_stmt, relevant, live_p,
553                      is_pattern_stmt_p (stmt_vinfo));
554  return true;
555}
556
557
558/* Function vect_mark_stmts_to_be_vectorized.
559
560   Not all stmts in the loop need to be vectorized. For example:
561
562     for i...
563       for j...
564   1.    T0 = i + j
565   2.	 T1 = a[T0]
566
567   3.    j = j + 1
568
569   Stmt 1 and 3 do not need to be vectorized, because loop control and
570   addressing of vectorized data-refs are handled differently.
571
572   This pass detects such stmts.  */
573
574bool
575vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
576{
577  vec<gimple> worklist;
578  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
579  basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
580  unsigned int nbbs = loop->num_nodes;
581  gimple_stmt_iterator si;
582  gimple stmt;
583  unsigned int i;
584  stmt_vec_info stmt_vinfo;
585  basic_block bb;
586  gimple phi;
587  bool live_p;
588  enum vect_relevant relevant, tmp_relevant;
589  enum vect_def_type def_type;
590
591  if (dump_enabled_p ())
592    dump_printf_loc (MSG_NOTE, vect_location,
593                     "=== vect_mark_stmts_to_be_vectorized ===");
594
595  worklist.create (64);
596
597  /* 1. Init worklist.  */
598  for (i = 0; i < nbbs; i++)
599    {
600      bb = bbs[i];
601      for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
602	{
603	  phi = gsi_stmt (si);
604	  if (dump_enabled_p ())
605	    {
606	      dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
607	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
608	    }
609
610	  if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
611	    vect_mark_relevant (&worklist, phi, relevant, live_p, false);
612	}
613      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
614	{
615	  stmt = gsi_stmt (si);
616	  if (dump_enabled_p ())
617	    {
618	      dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
619	      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
620	    }
621
622	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
623            vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
624	}
625    }
626
627  /* 2. Process_worklist */
628  while (worklist.length () > 0)
629    {
630      use_operand_p use_p;
631      ssa_op_iter iter;
632
633      stmt = worklist.pop ();
634      if (dump_enabled_p ())
635	{
636          dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
637          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
638	}
639
640      /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
641	 (DEF_STMT) as relevant/irrelevant and live/dead according to the
642	 liveness and relevance properties of STMT.  */
643      stmt_vinfo = vinfo_for_stmt (stmt);
644      relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
645      live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
646
647      /* Generally, the liveness and relevance properties of STMT are
648	 propagated as is to the DEF_STMTs of its USEs:
649	  live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
650	  relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
651
652	 One exception is when STMT has been identified as defining a reduction
653	 variable; in this case we set the liveness/relevance as follows:
654	   live_p = false
655	   relevant = vect_used_by_reduction
656	 This is because we distinguish between two kinds of relevant stmts -
657	 those that are used by a reduction computation, and those that are
658	 (also) used by a regular computation.  This allows us later on to
659	 identify stmts that are used solely by a reduction, and therefore the
660	 order of the results that they produce does not have to be kept.  */
661
662      def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
663      tmp_relevant = relevant;
664      switch (def_type)
665        {
666          case vect_reduction_def:
667	    switch (tmp_relevant)
668	      {
669	        case vect_unused_in_scope:
670	          relevant = vect_used_by_reduction;
671	          break;
672
673	        case vect_used_by_reduction:
674	          if (gimple_code (stmt) == GIMPLE_PHI)
675                    break;
676  	          /* fall through */
677
678	        default:
679	          if (dump_enabled_p ())
680	            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
681                                     "unsupported use of reduction.");
682  	          worklist.release ();
683	          return false;
684	      }
685
686	    live_p = false;
687	    break;
688
689          case vect_nested_cycle:
690            if (tmp_relevant != vect_unused_in_scope
691                && tmp_relevant != vect_used_in_outer_by_reduction
692                && tmp_relevant != vect_used_in_outer)
693              {
694                if (dump_enabled_p ())
695                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696                                   "unsupported use of nested cycle.");
697
698                worklist.release ();
699                return false;
700              }
701
702            live_p = false;
703            break;
704
705          case vect_double_reduction_def:
706            if (tmp_relevant != vect_unused_in_scope
707                && tmp_relevant != vect_used_by_reduction)
708              {
709                if (dump_enabled_p ())
710                  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
711                                   "unsupported use of double reduction.");
712
713                worklist.release ();
714                return false;
715              }
716
717            live_p = false;
718            break;
719
720          default:
721            break;
722        }
723
724      if (is_pattern_stmt_p (stmt_vinfo))
725        {
726          /* Pattern statements are not inserted into the code, so
727             FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
728             have to scan the RHS or function arguments instead.  */
729          if (is_gimple_assign (stmt))
730            {
731	      enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
732	      tree op = gimple_assign_rhs1 (stmt);
733
734	      i = 1;
735	      if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
736		{
737		  if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
738				    live_p, relevant, &worklist, false)
739		      || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
740				       live_p, relevant, &worklist, false))
741		    {
742		      worklist.release ();
743		      return false;
744		    }
745		  i = 2;
746		}
747	      for (; i < gimple_num_ops (stmt); i++)
748                {
749		  op = gimple_op (stmt, i);
750                  if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
751				    &worklist, false))
752                    {
753                      worklist.release ();
754                      return false;
755                    }
756                 }
757            }
758          else if (is_gimple_call (stmt))
759            {
760              for (i = 0; i < gimple_call_num_args (stmt); i++)
761                {
762                  tree arg = gimple_call_arg (stmt, i);
763                  if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
764				    &worklist, false))
765                    {
766                      worklist.release ();
767                      return false;
768                    }
769                }
770            }
771        }
772      else
773        FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774          {
775            tree op = USE_FROM_PTR (use_p);
776            if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
777			      &worklist, false))
778              {
779                worklist.release ();
780                return false;
781              }
782          }
783
784      if (STMT_VINFO_GATHER_P (stmt_vinfo))
785	{
786	  tree off;
787	  tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
788	  gcc_assert (decl);
789	  if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
790			    &worklist, true))
791	    {
792	      worklist.release ();
793	      return false;
794	    }
795	}
796    } /* while worklist */
797
798  worklist.release ();
799  return true;
800}
801
802
803/* Function vect_model_simple_cost.
804
805   Models cost for simple operations, i.e. those that only emit ncopies of a
806   single op.  Right now, this does not account for multiple insns that could
807   be generated for the single vector op.  We will handle that shortly.  */
808
809void
810vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
811			enum vect_def_type *dt,
812			stmt_vector_for_cost *prologue_cost_vec,
813			stmt_vector_for_cost *body_cost_vec)
814{
815  int i;
816  int inside_cost = 0, prologue_cost = 0;
817
818  /* The SLP costs were already calculated during SLP tree build.  */
819  if (PURE_SLP_STMT (stmt_info))
820    return;
821
822  /* FORNOW: Assuming maximum 2 args per stmts.  */
823  for (i = 0; i < 2; i++)
824    if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
825      prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
826					 stmt_info, 0, vect_prologue);
827
828  /* Pass the inside-of-loop statements to the target-specific cost model.  */
829  inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
830				  stmt_info, 0, vect_body);
831
832  if (dump_enabled_p ())
833    dump_printf_loc (MSG_NOTE, vect_location,
834                     "vect_model_simple_cost: inside_cost = %d, "
835                     "prologue_cost = %d .", inside_cost, prologue_cost);
836}
837
838
839/* Model cost for type demotion and promotion operations.  PWR is normally
840   zero for single-step promotions and demotions.  It will be one if
841   two-step promotion/demotion is required, and so on.  Each additional
842   step doubles the number of instructions required.  */
843
844static void
845vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
846				    enum vect_def_type *dt, int pwr)
847{
848  int i, tmp;
849  int inside_cost = 0, prologue_cost = 0;
850  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
851  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
852  void *target_cost_data;
853
854  /* The SLP costs were already calculated during SLP tree build.  */
855  if (PURE_SLP_STMT (stmt_info))
856    return;
857
858  if (loop_vinfo)
859    target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
860  else
861    target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
862
863  for (i = 0; i < pwr + 1; i++)
864    {
865      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
866	(i + 1) : i;
867      inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
868				    vec_promote_demote, stmt_info, 0,
869				    vect_body);
870    }
871
872  /* FORNOW: Assuming maximum 2 args per stmts.  */
873  for (i = 0; i < 2; i++)
874    if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
875      prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
876				      stmt_info, 0, vect_prologue);
877
878  if (dump_enabled_p ())
879    dump_printf_loc (MSG_NOTE, vect_location,
880                     "vect_model_promotion_demotion_cost: inside_cost = %d, "
881                     "prologue_cost = %d .", inside_cost, prologue_cost);
882}
883
884/* Function vect_cost_group_size
885
886   For grouped load or store, return the group_size only if it is the first
887   load or store of a group, else return 1.  This ensures that group size is
888   only returned once per group.  */
889
890static int
891vect_cost_group_size (stmt_vec_info stmt_info)
892{
893  gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
894
895  if (first_stmt == STMT_VINFO_STMT (stmt_info))
896    return GROUP_SIZE (stmt_info);
897
898  return 1;
899}
900
901
902/* Function vect_model_store_cost
903
904   Models cost for stores.  In the case of grouped accesses, one access
905   has the overhead of the grouped access attributed to it.  */
906
907void
908vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
909		       bool store_lanes_p, enum vect_def_type dt,
910		       slp_tree slp_node,
911		       stmt_vector_for_cost *prologue_cost_vec,
912		       stmt_vector_for_cost *body_cost_vec)
913{
914  int group_size;
915  unsigned int inside_cost = 0, prologue_cost = 0;
916  struct data_reference *first_dr;
917  gimple first_stmt;
918
919  /* The SLP costs were already calculated during SLP tree build.  */
920  if (PURE_SLP_STMT (stmt_info))
921    return;
922
923  if (dt == vect_constant_def || dt == vect_external_def)
924    prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
925				       stmt_info, 0, vect_prologue);
926
927  /* Grouped access?  */
928  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
929    {
930      if (slp_node)
931        {
932          first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
933          group_size = 1;
934        }
935      else
936        {
937          first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938          group_size = vect_cost_group_size (stmt_info);
939        }
940
941      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
942    }
943  /* Not a grouped access.  */
944  else
945    {
946      group_size = 1;
947      first_dr = STMT_VINFO_DATA_REF (stmt_info);
948    }
949
950  /* We assume that the cost of a single store-lanes instruction is
951     equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
952     access is instead being provided by a permute-and-store operation,
953     include the cost of the permutes.  */
954  if (!store_lanes_p && group_size > 1)
955    {
956      /* Uses a high and low interleave operation for each needed permute.  */
957
958      int nstmts = ncopies * exact_log2 (group_size) * group_size;
959      inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
960				      stmt_info, 0, vect_body);
961
962      if (dump_enabled_p ())
963        dump_printf_loc (MSG_NOTE, vect_location,
964                         "vect_model_store_cost: strided group_size = %d .",
965                         group_size);
966    }
967
968  /* Costs of the stores.  */
969  vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
970
971  if (dump_enabled_p ())
972    dump_printf_loc (MSG_NOTE, vect_location,
973                     "vect_model_store_cost: inside_cost = %d, "
974                     "prologue_cost = %d .", inside_cost, prologue_cost);
975}
976
977
978/* Calculate cost of DR's memory access.  */
979void
980vect_get_store_cost (struct data_reference *dr, int ncopies,
981		     unsigned int *inside_cost,
982		     stmt_vector_for_cost *body_cost_vec)
983{
984  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
985  gimple stmt = DR_STMT (dr);
986  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
987
988  switch (alignment_support_scheme)
989    {
990    case dr_aligned:
991      {
992	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
993					  vector_store, stmt_info, 0,
994					  vect_body);
995
996        if (dump_enabled_p ())
997          dump_printf_loc (MSG_NOTE, vect_location,
998                           "vect_model_store_cost: aligned.");
999        break;
1000      }
1001
1002    case dr_unaligned_supported:
1003      {
1004        /* Here, we assign an additional cost for the unaligned store.  */
1005	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1006					  unaligned_store, stmt_info,
1007					  DR_MISALIGNMENT (dr), vect_body);
1008        if (dump_enabled_p ())
1009          dump_printf_loc (MSG_NOTE, vect_location,
1010                           "vect_model_store_cost: unaligned supported by "
1011                           "hardware.");
1012        break;
1013      }
1014
1015    case dr_unaligned_unsupported:
1016      {
1017        *inside_cost = VECT_MAX_COST;
1018
1019        if (dump_enabled_p ())
1020          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1021                           "vect_model_store_cost: unsupported access.");
1022        break;
1023      }
1024
1025    default:
1026      gcc_unreachable ();
1027    }
1028}
1029
1030
1031/* Function vect_model_load_cost
1032
1033   Models cost for loads.  In the case of grouped accesses, the last access
1034   has the overhead of the grouped access attributed to it.  Since unaligned
1035   accesses are supported for loads, we also account for the costs of the
1036   access scheme chosen.  */
1037
1038void
1039vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1040		      bool load_lanes_p, slp_tree slp_node,
1041		      stmt_vector_for_cost *prologue_cost_vec,
1042		      stmt_vector_for_cost *body_cost_vec)
1043{
1044  int group_size;
1045  gimple first_stmt;
1046  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1047  unsigned int inside_cost = 0, prologue_cost = 0;
1048
1049  /* The SLP costs were already calculated during SLP tree build.  */
1050  if (PURE_SLP_STMT (stmt_info))
1051    return;
1052
1053  /* Grouped accesses?  */
1054  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1055  if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1056    {
1057      group_size = vect_cost_group_size (stmt_info);
1058      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1059    }
1060  /* Not a grouped access.  */
1061  else
1062    {
1063      group_size = 1;
1064      first_dr = dr;
1065    }
1066
1067  /* We assume that the cost of a single load-lanes instruction is
1068     equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1069     access is instead being provided by a load-and-permute operation,
1070     include the cost of the permutes.  */
1071  if (!load_lanes_p && group_size > 1)
1072    {
1073      /* Uses an even and odd extract operations for each needed permute.  */
1074      int nstmts = ncopies * exact_log2 (group_size) * group_size;
1075      inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076				       stmt_info, 0, vect_body);
1077
1078      if (dump_enabled_p ())
1079        dump_printf_loc (MSG_NOTE, vect_location,
1080                         "vect_model_load_cost: strided group_size = %d .",
1081                         group_size);
1082    }
1083
1084  /* The loads themselves.  */
1085  if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1086    {
1087      /* N scalar loads plus gathering them into a vector.  */
1088      tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1089      inside_cost += record_stmt_cost (body_cost_vec,
1090				       ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1091				       scalar_load, stmt_info, 0, vect_body);
1092      inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093				       stmt_info, 0, vect_body);
1094    }
1095  else
1096    vect_get_load_cost (first_dr, ncopies,
1097			((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1098			 || group_size > 1 || slp_node),
1099			&inside_cost, &prologue_cost,
1100			prologue_cost_vec, body_cost_vec, true);
1101
1102  if (dump_enabled_p ())
1103    dump_printf_loc (MSG_NOTE, vect_location,
1104                     "vect_model_load_cost: inside_cost = %d, "
1105                     "prologue_cost = %d .", inside_cost, prologue_cost);
1106}
1107
1108
1109/* Calculate cost of DR's memory access.  */
1110void
1111vect_get_load_cost (struct data_reference *dr, int ncopies,
1112		    bool add_realign_cost, unsigned int *inside_cost,
1113		    unsigned int *prologue_cost,
1114		    stmt_vector_for_cost *prologue_cost_vec,
1115		    stmt_vector_for_cost *body_cost_vec,
1116		    bool record_prologue_costs)
1117{
1118  int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1119  gimple stmt = DR_STMT (dr);
1120  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1121
1122  switch (alignment_support_scheme)
1123    {
1124    case dr_aligned:
1125      {
1126	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1127					  stmt_info, 0, vect_body);
1128
1129        if (dump_enabled_p ())
1130          dump_printf_loc (MSG_NOTE, vect_location,
1131                           "vect_model_load_cost: aligned.");
1132
1133        break;
1134      }
1135    case dr_unaligned_supported:
1136      {
1137        /* Here, we assign an additional cost for the unaligned load.  */
1138	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1139					  unaligned_load, stmt_info,
1140					  DR_MISALIGNMENT (dr), vect_body);
1141
1142        if (dump_enabled_p ())
1143          dump_printf_loc (MSG_NOTE, vect_location,
1144                           "vect_model_load_cost: unaligned supported by "
1145                           "hardware.");
1146
1147        break;
1148      }
1149    case dr_explicit_realign:
1150      {
1151	*inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1152					  vector_load, stmt_info, 0, vect_body);
1153	*inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1154					  vec_perm, stmt_info, 0, vect_body);
1155
1156        /* FIXME: If the misalignment remains fixed across the iterations of
1157           the containing loop, the following cost should be added to the
1158           prologue costs.  */
1159        if (targetm.vectorize.builtin_mask_for_load)
1160	  *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1161					    stmt_info, 0, vect_body);
1162
1163        if (dump_enabled_p ())
1164          dump_printf_loc (MSG_NOTE, vect_location,
1165                           "vect_model_load_cost: explicit realign");
1166
1167        break;
1168      }
1169    case dr_explicit_realign_optimized:
1170      {
1171        if (dump_enabled_p ())
1172          dump_printf_loc (MSG_NOTE, vect_location,
1173                           "vect_model_load_cost: unaligned software "
1174                           "pipelined.");
1175
1176        /* Unaligned software pipeline has a load of an address, an initial
1177           load, and possibly a mask operation to "prime" the loop.  However,
1178           if this is an access in a group of loads, which provide grouped
1179           access, then the above cost should only be considered for one
1180           access in the group.  Inside the loop, there is a load op
1181           and a realignment op.  */
1182
1183        if (add_realign_cost && record_prologue_costs)
1184          {
1185	    *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1186						vector_stmt, stmt_info,
1187						0, vect_prologue);
1188            if (targetm.vectorize.builtin_mask_for_load)
1189	      *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1190						  vector_stmt, stmt_info,
1191						  0, vect_prologue);
1192          }
1193
1194	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1195					  stmt_info, 0, vect_body);
1196	*inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1197					  stmt_info, 0, vect_body);
1198
1199        if (dump_enabled_p ())
1200          dump_printf_loc (MSG_NOTE, vect_location,
1201                           "vect_model_load_cost: explicit realign optimized");
1202
1203        break;
1204      }
1205
1206    case dr_unaligned_unsupported:
1207      {
1208        *inside_cost = VECT_MAX_COST;
1209
1210        if (dump_enabled_p ())
1211          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1212                           "vect_model_load_cost: unsupported access.");
1213        break;
1214      }
1215
1216    default:
1217      gcc_unreachable ();
1218    }
1219}
1220
1221/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1222   the loop preheader for the vectorized stmt STMT.  */
1223
1224static void
1225vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1226{
1227  if (gsi)
1228    vect_finish_stmt_generation (stmt, new_stmt, gsi);
1229  else
1230    {
1231      stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1232      loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233
1234      if (loop_vinfo)
1235        {
1236          struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1237	  basic_block new_bb;
1238	  edge pe;
1239
1240          if (nested_in_vect_loop_p (loop, stmt))
1241            loop = loop->inner;
1242
1243	  pe = loop_preheader_edge (loop);
1244          new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1245          gcc_assert (!new_bb);
1246	}
1247      else
1248       {
1249          bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1250          basic_block bb;
1251          gimple_stmt_iterator gsi_bb_start;
1252
1253          gcc_assert (bb_vinfo);
1254          bb = BB_VINFO_BB (bb_vinfo);
1255          gsi_bb_start = gsi_after_labels (bb);
1256          gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1257       }
1258    }
1259
1260  if (dump_enabled_p ())
1261    {
1262      dump_printf_loc (MSG_NOTE, vect_location,
1263                       "created new init_stmt: ");
1264      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1265    }
1266}
1267
1268/* Function vect_init_vector.
1269
1270   Insert a new stmt (INIT_STMT) that initializes a new variable of type
1271   TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1272   vector type a vector with all elements equal to VAL is created first.
1273   Place the initialization at BSI if it is not NULL.  Otherwise, place the
1274   initialization at the loop preheader.
1275   Return the DEF of INIT_STMT.
1276   It will be used in the vectorization of STMT.  */
1277
1278tree
1279vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1280{
1281  tree new_var;
1282  gimple init_stmt;
1283  tree vec_oprnd;
1284  tree new_temp;
1285
1286  if (TREE_CODE (type) == VECTOR_TYPE
1287      && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1288    {
1289      if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1290	{
1291	  if (CONSTANT_CLASS_P (val))
1292	    val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1293	  else
1294	    {
1295	      new_temp = make_ssa_name (TREE_TYPE (type), NULL);
1296	      init_stmt = gimple_build_assign_with_ops (NOP_EXPR,
1297							new_temp, val,
1298							NULL_TREE);
1299	      vect_init_vector_1 (stmt, init_stmt, gsi);
1300	      val = new_temp;
1301	    }
1302	}
1303      val = build_vector_from_val (type, val);
1304    }
1305
1306  new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1307  init_stmt = gimple_build_assign  (new_var, val);
1308  new_temp = make_ssa_name (new_var, init_stmt);
1309  gimple_assign_set_lhs (init_stmt, new_temp);
1310  vect_init_vector_1 (stmt, init_stmt, gsi);
1311  vec_oprnd = gimple_assign_lhs (init_stmt);
1312  return vec_oprnd;
1313}
1314
1315
1316/* Function vect_get_vec_def_for_operand.
1317
1318   OP is an operand in STMT.  This function returns a (vector) def that will be
1319   used in the vectorized stmt for STMT.
1320
1321   In the case that OP is an SSA_NAME which is defined in the loop, then
1322   STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1323
1324   In case OP is an invariant or constant, a new stmt that creates a vector def
1325   needs to be introduced.  */
1326
1327tree
1328vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1329{
1330  tree vec_oprnd;
1331  gimple vec_stmt;
1332  gimple def_stmt;
1333  stmt_vec_info def_stmt_info = NULL;
1334  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1335  unsigned int nunits;
1336  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1337  tree def;
1338  enum vect_def_type dt;
1339  bool is_simple_use;
1340  tree vector_type;
1341
1342  if (dump_enabled_p ())
1343    {
1344      dump_printf_loc (MSG_NOTE, vect_location,
1345                       "vect_get_vec_def_for_operand: ");
1346      dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1347    }
1348
1349  is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1350				      &def_stmt, &def, &dt);
1351  gcc_assert (is_simple_use);
1352  if (dump_enabled_p ())
1353    {
1354      int loc_printed = 0;
1355      if (def)
1356        {
1357          dump_printf_loc (MSG_NOTE, vect_location, "def =  ");
1358          loc_printed = 1;
1359          dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1360        }
1361      if (def_stmt)
1362        {
1363          if (loc_printed)
1364            dump_printf (MSG_NOTE, "  def_stmt =  ");
1365          else
1366            dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1367	  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1368        }
1369    }
1370
1371  switch (dt)
1372    {
1373    /* Case 1: operand is a constant.  */
1374    case vect_constant_def:
1375      {
1376	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1377	gcc_assert (vector_type);
1378	nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1379
1380	if (scalar_def)
1381	  *scalar_def = op;
1382
1383        /* Create 'vect_cst_ = {cst,cst,...,cst}'  */
1384        if (dump_enabled_p ())
1385          dump_printf_loc (MSG_NOTE, vect_location,
1386                           "Create vector_cst. nunits = %d", nunits);
1387
1388        return vect_init_vector (stmt, op, vector_type, NULL);
1389      }
1390
1391    /* Case 2: operand is defined outside the loop - loop invariant.  */
1392    case vect_external_def:
1393      {
1394	vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1395	gcc_assert (vector_type);
1396
1397	if (scalar_def)
1398	  *scalar_def = def;
1399
1400        /* Create 'vec_inv = {inv,inv,..,inv}'  */
1401        if (dump_enabled_p ())
1402          dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.");
1403
1404        return vect_init_vector (stmt, def, vector_type, NULL);
1405      }
1406
1407    /* Case 3: operand is defined inside the loop.  */
1408    case vect_internal_def:
1409      {
1410	if (scalar_def)
1411	  *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1412
1413        /* Get the def from the vectorized stmt.  */
1414        def_stmt_info = vinfo_for_stmt (def_stmt);
1415
1416        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1417        /* Get vectorized pattern statement.  */
1418        if (!vec_stmt
1419            && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1420            && !STMT_VINFO_RELEVANT (def_stmt_info))
1421          vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1422                       STMT_VINFO_RELATED_STMT (def_stmt_info)));
1423        gcc_assert (vec_stmt);
1424	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1425	  vec_oprnd = PHI_RESULT (vec_stmt);
1426	else if (is_gimple_call (vec_stmt))
1427	  vec_oprnd = gimple_call_lhs (vec_stmt);
1428	else
1429	  vec_oprnd = gimple_assign_lhs (vec_stmt);
1430        return vec_oprnd;
1431      }
1432
1433    /* Case 4: operand is defined by a loop header phi - reduction  */
1434    case vect_reduction_def:
1435    case vect_double_reduction_def:
1436    case vect_nested_cycle:
1437      {
1438	struct loop *loop;
1439
1440	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1441	loop = (gimple_bb (def_stmt))->loop_father;
1442
1443        /* Get the def before the loop  */
1444        op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1445        return get_initial_def_for_reduction (stmt, op, scalar_def);
1446     }
1447
1448    /* Case 5: operand is defined by loop-header phi - induction.  */
1449    case vect_induction_def:
1450      {
1451	gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1452
1453        /* Get the def from the vectorized stmt.  */
1454        def_stmt_info = vinfo_for_stmt (def_stmt);
1455        vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1456	if (gimple_code (vec_stmt) == GIMPLE_PHI)
1457	  vec_oprnd = PHI_RESULT (vec_stmt);
1458	else
1459	  vec_oprnd = gimple_get_lhs (vec_stmt);
1460        return vec_oprnd;
1461      }
1462
1463    default:
1464      gcc_unreachable ();
1465    }
1466}
1467
1468
1469/* Function vect_get_vec_def_for_stmt_copy
1470
1471   Return a vector-def for an operand.  This function is used when the
1472   vectorized stmt to be created (by the caller to this function) is a "copy"
1473   created in case the vectorized result cannot fit in one vector, and several
1474   copies of the vector-stmt are required.  In this case the vector-def is
1475   retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1476   of the stmt that defines VEC_OPRND.
1477   DT is the type of the vector def VEC_OPRND.
1478
1479   Context:
1480        In case the vectorization factor (VF) is bigger than the number
1481   of elements that can fit in a vectype (nunits), we have to generate
1482   more than one vector stmt to vectorize the scalar stmt.  This situation
1483   arises when there are multiple data-types operated upon in the loop; the
1484   smallest data-type determines the VF, and as a result, when vectorizing
1485   stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1486   vector stmt (each computing a vector of 'nunits' results, and together
1487   computing 'VF' results in each iteration).  This function is called when
1488   vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1489   which VF=16 and nunits=4, so the number of copies required is 4):
1490
1491   scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1492
1493   S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1494                        VS1.1:  vx.1 = memref1      VS1.2
1495                        VS1.2:  vx.2 = memref2      VS1.3
1496                        VS1.3:  vx.3 = memref3
1497
1498   S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1499                        VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1500                        VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1501                        VSnew.3:  vz3 = vx.3 + ...
1502
1503   The vectorization of S1 is explained in vectorizable_load.
1504   The vectorization of S2:
1505        To create the first vector-stmt out of the 4 copies - VSnew.0 -
1506   the function 'vect_get_vec_def_for_operand' is called to
1507   get the relevant vector-def for each operand of S2.  For operand x it
1508   returns  the vector-def 'vx.0'.
1509
1510        To create the remaining copies of the vector-stmt (VSnew.j), this
1511   function is called to get the relevant vector-def for each operand.  It is
1512   obtained from the respective VS1.j stmt, which is recorded in the
1513   STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514
1515        For example, to obtain the vector-def 'vx.1' in order to create the
1516   vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1517   Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1518   STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1519   and return its def ('vx.1').
1520   Overall, to create the above sequence this function will be called 3 times:
1521        vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1522        vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1523        vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1524
1525tree
1526vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1527{
1528  gimple vec_stmt_for_operand;
1529  stmt_vec_info def_stmt_info;
1530
1531  /* Do nothing; can reuse same def.  */
1532  if (dt == vect_external_def || dt == vect_constant_def )
1533    return vec_oprnd;
1534
1535  vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1536  def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1537  gcc_assert (def_stmt_info);
1538  vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1539  gcc_assert (vec_stmt_for_operand);
1540  vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1541  if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1542    vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1543  else
1544    vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1545  return vec_oprnd;
1546}
1547
1548
1549/* Get vectorized definitions for the operands to create a copy of an original
1550   stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1551
1552static void
1553vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1554				 vec<tree> *vec_oprnds0,
1555				 vec<tree> *vec_oprnds1)
1556{
1557  tree vec_oprnd = vec_oprnds0->pop ();
1558
1559  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1560  vec_oprnds0->quick_push (vec_oprnd);
1561
1562  if (vec_oprnds1 && vec_oprnds1->length ())
1563    {
1564      vec_oprnd = vec_oprnds1->pop ();
1565      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1566      vec_oprnds1->quick_push (vec_oprnd);
1567    }
1568}
1569
1570
1571/* Get vectorized definitions for OP0 and OP1.
1572   REDUC_INDEX is the index of reduction operand in case of reduction,
1573   and -1 otherwise.  */
1574
1575void
1576vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1577		   vec<tree> *vec_oprnds0,
1578		   vec<tree> *vec_oprnds1,
1579		   slp_tree slp_node, int reduc_index)
1580{
1581  if (slp_node)
1582    {
1583      int nops = (op1 == NULL_TREE) ? 1 : 2;
1584      vec<tree> ops;
1585      ops.create (nops);
1586      vec<vec<tree> > vec_defs;
1587      vec_defs.create (nops);
1588
1589      ops.quick_push (op0);
1590      if (op1)
1591        ops.quick_push (op1);
1592
1593      vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1594
1595      *vec_oprnds0 = vec_defs[0];
1596      if (op1)
1597	*vec_oprnds1 = vec_defs[1];
1598
1599      ops.release ();
1600      vec_defs.release ();
1601    }
1602  else
1603    {
1604      tree vec_oprnd;
1605
1606      vec_oprnds0->create (1);
1607      vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1608      vec_oprnds0->quick_push (vec_oprnd);
1609
1610      if (op1)
1611	{
1612	  vec_oprnds1->create (1);
1613	  vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1614	  vec_oprnds1->quick_push (vec_oprnd);
1615	}
1616    }
1617}
1618
1619
1620/* Function vect_finish_stmt_generation.
1621
1622   Insert a new stmt.  */
1623
1624void
1625vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1626			     gimple_stmt_iterator *gsi)
1627{
1628  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1630  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1631
1632  gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1633
1634  if (!gsi_end_p (*gsi)
1635      && gimple_has_mem_ops (vec_stmt))
1636    {
1637      gimple at_stmt = gsi_stmt (*gsi);
1638      tree vuse = gimple_vuse (at_stmt);
1639      if (vuse && TREE_CODE (vuse) == SSA_NAME)
1640	{
1641	  tree vdef = gimple_vdef (at_stmt);
1642	  gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1643	  /* If we have an SSA vuse and insert a store, update virtual
1644	     SSA form to avoid triggering the renamer.  Do so only
1645	     if we can easily see all uses - which is what almost always
1646	     happens with the way vectorized stmts are inserted.  */
1647	  if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1648	      && ((is_gimple_assign (vec_stmt)
1649		   && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1650		  || (is_gimple_call (vec_stmt)
1651		      && !(gimple_call_flags (vec_stmt)
1652			   & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1653	    {
1654	      tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1655	      gimple_set_vdef (vec_stmt, new_vdef);
1656	      SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1657	    }
1658	}
1659    }
1660  gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1661
1662  set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1663                                                   bb_vinfo));
1664
1665  if (dump_enabled_p ())
1666    {
1667      dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1668      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1669    }
1670
1671  gimple_set_location (vec_stmt, gimple_location (stmt));
1672}
1673
1674/* Checks if CALL can be vectorized in type VECTYPE.  Returns
1675   a function declaration if the target has a vectorized version
1676   of the function, or NULL_TREE if the function cannot be vectorized.  */
1677
1678tree
1679vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
1680{
1681  tree fndecl = gimple_call_fndecl (call);
1682
1683  /* We only handle functions that do not read or clobber memory -- i.e.
1684     const or novops ones.  */
1685  if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1686    return NULL_TREE;
1687
1688  if (!fndecl
1689      || TREE_CODE (fndecl) != FUNCTION_DECL
1690      || !DECL_BUILT_IN (fndecl))
1691    return NULL_TREE;
1692
1693  return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1694						        vectype_in);
1695}
1696
1697/* Function vectorizable_call.
1698
1699   Check if STMT performs a function call that can be vectorized.
1700   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1701   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1702   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
1703
1704static bool
1705vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
1706		   slp_tree slp_node)
1707{
1708  tree vec_dest;
1709  tree scalar_dest;
1710  tree op, type;
1711  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1712  stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
1713  tree vectype_out, vectype_in;
1714  int nunits_in;
1715  int nunits_out;
1716  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1717  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1718  tree fndecl, new_temp, def, rhs_type;
1719  gimple def_stmt;
1720  enum vect_def_type dt[3]
1721    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
1722  gimple new_stmt = NULL;
1723  int ncopies, j;
1724  vec<tree> vargs = vNULL;
1725  enum { NARROW, NONE, WIDEN } modifier;
1726  size_t i, nargs;
1727  tree lhs;
1728
1729  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
1730    return false;
1731
1732  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1733    return false;
1734
1735  /* Is STMT a vectorizable call?   */
1736  if (!is_gimple_call (stmt))
1737    return false;
1738
1739  if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
1740    return false;
1741
1742  if (stmt_can_throw_internal (stmt))
1743    return false;
1744
1745  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
1746
1747  /* Process function arguments.  */
1748  rhs_type = NULL_TREE;
1749  vectype_in = NULL_TREE;
1750  nargs = gimple_call_num_args (stmt);
1751
1752  /* Bail out if the function has more than three arguments, we do not have
1753     interesting builtin functions to vectorize with more than two arguments
1754     except for fma.  No arguments is also not good.  */
1755  if (nargs == 0 || nargs > 3)
1756    return false;
1757
1758  for (i = 0; i < nargs; i++)
1759    {
1760      tree opvectype;
1761
1762      op = gimple_call_arg (stmt, i);
1763
1764      /* We can only handle calls with arguments of the same type.  */
1765      if (rhs_type
1766	  && !types_compatible_p (rhs_type, TREE_TYPE (op)))
1767	{
1768	  if (dump_enabled_p ())
1769	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1770                             "argument types differ.");
1771	  return false;
1772	}
1773      if (!rhs_type)
1774	rhs_type = TREE_TYPE (op);
1775
1776      if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
1777				 &def_stmt, &def, &dt[i], &opvectype))
1778	{
1779	  if (dump_enabled_p ())
1780	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1781                             "use not simple.");
1782	  return false;
1783	}
1784
1785      if (!vectype_in)
1786	vectype_in = opvectype;
1787      else if (opvectype
1788	       && opvectype != vectype_in)
1789	{
1790	  if (dump_enabled_p ())
1791	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1792                             "argument vector types differ.");
1793	  return false;
1794	}
1795    }
1796  /* If all arguments are external or constant defs use a vector type with
1797     the same size as the output vector type.  */
1798  if (!vectype_in)
1799    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
1800  if (vec_stmt)
1801    gcc_assert (vectype_in);
1802  if (!vectype_in)
1803    {
1804      if (dump_enabled_p ())
1805        {
1806          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807                           "no vectype for scalar type ");
1808          dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
1809        }
1810
1811      return false;
1812    }
1813
1814  /* FORNOW */
1815  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1816  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1817  if (nunits_in == nunits_out / 2)
1818    modifier = NARROW;
1819  else if (nunits_out == nunits_in)
1820    modifier = NONE;
1821  else if (nunits_out == nunits_in / 2)
1822    modifier = WIDEN;
1823  else
1824    return false;
1825
1826  /* For now, we only vectorize functions if a target specific builtin
1827     is available.  TODO -- in some cases, it might be profitable to
1828     insert the calls for pieces of the vector, in order to be able
1829     to vectorize other operations in the loop.  */
1830  fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
1831  if (fndecl == NULL_TREE)
1832    {
1833      if (dump_enabled_p ())
1834	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835                         "function is not vectorizable.");
1836
1837      return false;
1838    }
1839
1840  gcc_assert (!gimple_vuse (stmt));
1841
1842  if (slp_node || PURE_SLP_STMT (stmt_info))
1843    ncopies = 1;
1844  else if (modifier == NARROW)
1845    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
1846  else
1847    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1848
1849  /* Sanity check: make sure that at least one copy of the vectorized stmt
1850     needs to be generated.  */
1851  gcc_assert (ncopies >= 1);
1852
1853  if (!vec_stmt) /* transformation not required.  */
1854    {
1855      STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1856      if (dump_enabled_p ())
1857        dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ===");
1858      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
1859      return true;
1860    }
1861
1862  /** Transform.  **/
1863
1864  if (dump_enabled_p ())
1865    dump_printf_loc (MSG_NOTE, vect_location, "transform call.");
1866
1867  /* Handle def.  */
1868  scalar_dest = gimple_call_lhs (stmt);
1869  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
1870
1871  prev_stmt_info = NULL;
1872  switch (modifier)
1873    {
1874    case NONE:
1875      for (j = 0; j < ncopies; ++j)
1876	{
1877	  /* Build argument list for the vectorized call.  */
1878	  if (j == 0)
1879	    vargs.create (nargs);
1880	  else
1881	    vargs.truncate (0);
1882
1883	  if (slp_node)
1884	    {
1885	      vec<vec<tree> > vec_defs;
1886	      vec_defs.create (nargs);
1887	      vec<tree> vec_oprnds0;
1888
1889	      for (i = 0; i < nargs; i++)
1890		vargs.quick_push (gimple_call_arg (stmt, i));
1891	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1892	      vec_oprnds0 = vec_defs[0];
1893
1894	      /* Arguments are ready.  Create the new vector stmt.  */
1895	      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
1896		{
1897		  size_t k;
1898		  for (k = 0; k < nargs; k++)
1899		    {
1900		      vec<tree> vec_oprndsk = vec_defs[k];
1901		      vargs[k] = vec_oprndsk[i];
1902		    }
1903		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1904		  new_temp = make_ssa_name (vec_dest, new_stmt);
1905		  gimple_call_set_lhs (new_stmt, new_temp);
1906		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1907		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1908		}
1909
1910	      for (i = 0; i < nargs; i++)
1911		{
1912		  vec<tree> vec_oprndsi = vec_defs[i];
1913		  vec_oprndsi.release ();
1914		}
1915	      vec_defs.release ();
1916	      continue;
1917	    }
1918
1919	  for (i = 0; i < nargs; i++)
1920	    {
1921	      op = gimple_call_arg (stmt, i);
1922	      if (j == 0)
1923		vec_oprnd0
1924		  = vect_get_vec_def_for_operand (op, stmt, NULL);
1925	      else
1926		{
1927		  vec_oprnd0 = gimple_call_arg (new_stmt, i);
1928		  vec_oprnd0
1929                    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
1930		}
1931
1932	      vargs.quick_push (vec_oprnd0);
1933	    }
1934
1935	  new_stmt = gimple_build_call_vec (fndecl, vargs);
1936	  new_temp = make_ssa_name (vec_dest, new_stmt);
1937	  gimple_call_set_lhs (new_stmt, new_temp);
1938	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1939
1940	  if (j == 0)
1941	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1942	  else
1943	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1944
1945	  prev_stmt_info = vinfo_for_stmt (new_stmt);
1946	}
1947
1948      break;
1949
1950    case NARROW:
1951      for (j = 0; j < ncopies; ++j)
1952	{
1953	  /* Build argument list for the vectorized call.  */
1954	  if (j == 0)
1955	    vargs.create (nargs * 2);
1956	  else
1957	    vargs.truncate (0);
1958
1959	  if (slp_node)
1960	    {
1961	      vec<vec<tree> > vec_defs;
1962	      vec_defs.create (nargs);
1963	      vec<tree> vec_oprnds0;
1964
1965	      for (i = 0; i < nargs; i++)
1966		vargs.quick_push (gimple_call_arg (stmt, i));
1967	      vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
1968	      vec_oprnds0 = vec_defs[0];
1969
1970	      /* Arguments are ready.  Create the new vector stmt.  */
1971	      for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
1972		{
1973		  size_t k;
1974		  vargs.truncate (0);
1975		  for (k = 0; k < nargs; k++)
1976		    {
1977		      vec<tree> vec_oprndsk = vec_defs[k];
1978		      vargs.quick_push (vec_oprndsk[i]);
1979		      vargs.quick_push (vec_oprndsk[i + 1]);
1980		    }
1981		  new_stmt = gimple_build_call_vec (fndecl, vargs);
1982		  new_temp = make_ssa_name (vec_dest, new_stmt);
1983		  gimple_call_set_lhs (new_stmt, new_temp);
1984		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
1985		  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
1986		}
1987
1988	      for (i = 0; i < nargs; i++)
1989		{
1990		  vec<tree> vec_oprndsi = vec_defs[i];
1991		  vec_oprndsi.release ();
1992		}
1993	      vec_defs.release ();
1994	      continue;
1995	    }
1996
1997	  for (i = 0; i < nargs; i++)
1998	    {
1999	      op = gimple_call_arg (stmt, i);
2000	      if (j == 0)
2001		{
2002		  vec_oprnd0
2003		    = vect_get_vec_def_for_operand (op, stmt, NULL);
2004		  vec_oprnd1
2005		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2006		}
2007	      else
2008		{
2009		  vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2010		  vec_oprnd0
2011		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2012		  vec_oprnd1
2013		    = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2014		}
2015
2016	      vargs.quick_push (vec_oprnd0);
2017	      vargs.quick_push (vec_oprnd1);
2018	    }
2019
2020	  new_stmt = gimple_build_call_vec (fndecl, vargs);
2021	  new_temp = make_ssa_name (vec_dest, new_stmt);
2022	  gimple_call_set_lhs (new_stmt, new_temp);
2023	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2024
2025	  if (j == 0)
2026	    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2027	  else
2028	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2029
2030	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2031	}
2032
2033      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2034
2035      break;
2036
2037    case WIDEN:
2038      /* No current target implements this case.  */
2039      return false;
2040    }
2041
2042  vargs.release ();
2043
2044  /* Update the exception handling table with the vector stmt if necessary.  */
2045  if (maybe_clean_or_replace_eh_stmt (stmt, *vec_stmt))
2046    gimple_purge_dead_eh_edges (gimple_bb (stmt));
2047
2048  /* The call in STMT might prevent it from being removed in dce.
2049     We however cannot remove it here, due to the way the ssa name
2050     it defines is mapped to the new definition.  So just replace
2051     rhs of the statement with something harmless.  */
2052
2053  if (slp_node)
2054    return true;
2055
2056  type = TREE_TYPE (scalar_dest);
2057  if (is_pattern_stmt_p (stmt_info))
2058    lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2059  else
2060    lhs = gimple_call_lhs (stmt);
2061  new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2062  set_vinfo_for_stmt (new_stmt, stmt_info);
2063  set_vinfo_for_stmt (stmt, NULL);
2064  STMT_VINFO_STMT (stmt_info) = new_stmt;
2065  gsi_replace (gsi, new_stmt, false);
2066  SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2067
2068  return true;
2069}
2070
2071
2072/* Function vect_gen_widened_results_half
2073
2074   Create a vector stmt whose code, type, number of arguments, and result
2075   variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2076   VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
2077   In the case that CODE is a CALL_EXPR, this means that a call to DECL
2078   needs to be created (DECL is a function-decl of a target-builtin).
2079   STMT is the original scalar stmt that we are vectorizing.  */
2080
2081static gimple
2082vect_gen_widened_results_half (enum tree_code code,
2083			       tree decl,
2084                               tree vec_oprnd0, tree vec_oprnd1, int op_type,
2085			       tree vec_dest, gimple_stmt_iterator *gsi,
2086			       gimple stmt)
2087{
2088  gimple new_stmt;
2089  tree new_temp;
2090
2091  /* Generate half of the widened result:  */
2092  if (code == CALL_EXPR)
2093    {
2094      /* Target specific support  */
2095      if (op_type == binary_op)
2096	new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
2097      else
2098	new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
2099      new_temp = make_ssa_name (vec_dest, new_stmt);
2100      gimple_call_set_lhs (new_stmt, new_temp);
2101    }
2102  else
2103    {
2104      /* Generic support */
2105      gcc_assert (op_type == TREE_CODE_LENGTH (code));
2106      if (op_type != binary_op)
2107	vec_oprnd1 = NULL;
2108      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
2109					       vec_oprnd1);
2110      new_temp = make_ssa_name (vec_dest, new_stmt);
2111      gimple_assign_set_lhs (new_stmt, new_temp);
2112    }
2113  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2114
2115  return new_stmt;
2116}
2117
2118
2119/* Get vectorized definitions for loop-based vectorization.  For the first
2120   operand we call vect_get_vec_def_for_operand() (with OPRND containing
2121   scalar operand), and for the rest we get a copy with
2122   vect_get_vec_def_for_stmt_copy() using the previous vector definition
2123   (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2124   The vectors are collected into VEC_OPRNDS.  */
2125
2126static void
2127vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
2128			  vec<tree> *vec_oprnds, int multi_step_cvt)
2129{
2130  tree vec_oprnd;
2131
2132  /* Get first vector operand.  */
2133  /* All the vector operands except the very first one (that is scalar oprnd)
2134     are stmt copies.  */
2135  if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
2136    vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
2137  else
2138    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
2139
2140  vec_oprnds->quick_push (vec_oprnd);
2141
2142  /* Get second vector operand.  */
2143  vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2144  vec_oprnds->quick_push (vec_oprnd);
2145
2146  *oprnd = vec_oprnd;
2147
2148  /* For conversion in multiple steps, continue to get operands
2149     recursively.  */
2150  if (multi_step_cvt)
2151    vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
2152}
2153
2154
2155/* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2156   For multi-step conversions store the resulting vectors and call the function
2157   recursively.  */
2158
2159static void
2160vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
2161				       int multi_step_cvt, gimple stmt,
2162				       vec<tree> vec_dsts,
2163				       gimple_stmt_iterator *gsi,
2164				       slp_tree slp_node, enum tree_code code,
2165				       stmt_vec_info *prev_stmt_info)
2166{
2167  unsigned int i;
2168  tree vop0, vop1, new_tmp, vec_dest;
2169  gimple new_stmt;
2170  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2171
2172  vec_dest = vec_dsts.pop ();
2173
2174  for (i = 0; i < vec_oprnds->length (); i += 2)
2175    {
2176      /* Create demotion operation.  */
2177      vop0 = (*vec_oprnds)[i];
2178      vop1 = (*vec_oprnds)[i + 1];
2179      new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
2180      new_tmp = make_ssa_name (vec_dest, new_stmt);
2181      gimple_assign_set_lhs (new_stmt, new_tmp);
2182      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2183
2184      if (multi_step_cvt)
2185	/* Store the resulting vector for next recursive call.  */
2186	(*vec_oprnds)[i/2] = new_tmp;
2187      else
2188	{
2189	  /* This is the last step of the conversion sequence. Store the
2190	     vectors in SLP_NODE or in vector info of the scalar statement
2191	     (or in STMT_VINFO_RELATED_STMT chain).  */
2192	  if (slp_node)
2193	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2194	  else
2195	    {
2196	      if (!*prev_stmt_info)
2197		STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2198	      else
2199		STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
2200
2201	      *prev_stmt_info = vinfo_for_stmt (new_stmt);
2202	    }
2203	}
2204    }
2205
2206  /* For multi-step demotion operations we first generate demotion operations
2207     from the source type to the intermediate types, and then combine the
2208     results (stored in VEC_OPRNDS) in demotion operation to the destination
2209     type.  */
2210  if (multi_step_cvt)
2211    {
2212      /* At each level of recursion we have half of the operands we had at the
2213	 previous level.  */
2214      vec_oprnds->truncate ((i+1)/2);
2215      vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
2216					     stmt, vec_dsts, gsi, slp_node,
2217					     VEC_PACK_TRUNC_EXPR,
2218					     prev_stmt_info);
2219    }
2220
2221  vec_dsts.quick_push (vec_dest);
2222}
2223
2224
2225/* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2226   and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
2227   the resulting vectors and call the function recursively.  */
2228
2229static void
2230vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
2231					vec<tree> *vec_oprnds1,
2232					gimple stmt, tree vec_dest,
2233					gimple_stmt_iterator *gsi,
2234					enum tree_code code1,
2235					enum tree_code code2, tree decl1,
2236					tree decl2, int op_type)
2237{
2238  int i;
2239  tree vop0, vop1, new_tmp1, new_tmp2;
2240  gimple new_stmt1, new_stmt2;
2241  vec<tree> vec_tmp = vNULL;
2242
2243  vec_tmp.create (vec_oprnds0->length () * 2);
2244  FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
2245    {
2246      if (op_type == binary_op)
2247	vop1 = (*vec_oprnds1)[i];
2248      else
2249	vop1 = NULL_TREE;
2250
2251      /* Generate the two halves of promotion operation.  */
2252      new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
2253						 op_type, vec_dest, gsi, stmt);
2254      new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
2255						 op_type, vec_dest, gsi, stmt);
2256      if (is_gimple_call (new_stmt1))
2257	{
2258	  new_tmp1 = gimple_call_lhs (new_stmt1);
2259	  new_tmp2 = gimple_call_lhs (new_stmt2);
2260	}
2261      else
2262	{
2263	  new_tmp1 = gimple_assign_lhs (new_stmt1);
2264	  new_tmp2 = gimple_assign_lhs (new_stmt2);
2265	}
2266
2267      /* Store the results for the next step.  */
2268      vec_tmp.quick_push (new_tmp1);
2269      vec_tmp.quick_push (new_tmp2);
2270    }
2271
2272  vec_oprnds0->release ();
2273  *vec_oprnds0 = vec_tmp;
2274}
2275
2276
2277/* Check if STMT performs a conversion operation, that can be vectorized.
2278   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279   stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2280   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2281
2282static bool
2283vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
2284			 gimple *vec_stmt, slp_tree slp_node)
2285{
2286  tree vec_dest;
2287  tree scalar_dest;
2288  tree op0, op1 = NULL_TREE;
2289  tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2290  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2292  enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2293  enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
2294  tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2295  tree new_temp;
2296  tree def;
2297  gimple def_stmt;
2298  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2299  gimple new_stmt = NULL;
2300  stmt_vec_info prev_stmt_info;
2301  int nunits_in;
2302  int nunits_out;
2303  tree vectype_out, vectype_in;
2304  int ncopies, i, j;
2305  tree lhs_type, rhs_type;
2306  enum { NARROW, NONE, WIDEN } modifier;
2307  vec<tree> vec_oprnds0 = vNULL;
2308  vec<tree> vec_oprnds1 = vNULL;
2309  tree vop0;
2310  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2311  int multi_step_cvt = 0;
2312  vec<tree> vec_dsts = vNULL;
2313  vec<tree> interm_types = vNULL;
2314  tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
2315  int op_type;
2316  enum machine_mode rhs_mode;
2317  unsigned short fltsz;
2318
2319  /* Is STMT a vectorizable conversion?   */
2320
2321  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2322    return false;
2323
2324  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2325    return false;
2326
2327  if (!is_gimple_assign (stmt))
2328    return false;
2329
2330  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
2331    return false;
2332
2333  code = gimple_assign_rhs_code (stmt);
2334  if (!CONVERT_EXPR_CODE_P (code)
2335      && code != FIX_TRUNC_EXPR
2336      && code != FLOAT_EXPR
2337      && code != WIDEN_MULT_EXPR
2338      && code != WIDEN_LSHIFT_EXPR)
2339    return false;
2340
2341  op_type = TREE_CODE_LENGTH (code);
2342
2343  /* Check types of lhs and rhs.  */
2344  scalar_dest = gimple_assign_lhs (stmt);
2345  lhs_type = TREE_TYPE (scalar_dest);
2346  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2347
2348  op0 = gimple_assign_rhs1 (stmt);
2349  rhs_type = TREE_TYPE (op0);
2350
2351  if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2352      && !((INTEGRAL_TYPE_P (lhs_type)
2353	    && INTEGRAL_TYPE_P (rhs_type))
2354	   || (SCALAR_FLOAT_TYPE_P (lhs_type)
2355	       && SCALAR_FLOAT_TYPE_P (rhs_type))))
2356    return false;
2357
2358  if ((INTEGRAL_TYPE_P (lhs_type)
2359       && (TYPE_PRECISION (lhs_type)
2360	   != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
2361      || (INTEGRAL_TYPE_P (rhs_type)
2362	  && (TYPE_PRECISION (rhs_type)
2363	      != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
2364    {
2365      if (dump_enabled_p ())
2366	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2367                         "type conversion to/from bit-precision unsupported.");
2368      return false;
2369    }
2370
2371  /* Check the operands of the operation.  */
2372  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
2373			     &def_stmt, &def, &dt[0], &vectype_in))
2374    {
2375      if (dump_enabled_p ())
2376	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2377                         "use not simple.");
2378      return false;
2379    }
2380  if (op_type == binary_op)
2381    {
2382      bool ok;
2383
2384      op1 = gimple_assign_rhs2 (stmt);
2385      gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
2386      /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2387	 OP1.  */
2388      if (CONSTANT_CLASS_P (op0))
2389	ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
2390				   &def_stmt, &def, &dt[1], &vectype_in);
2391      else
2392	ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
2393				 &def, &dt[1]);
2394
2395      if (!ok)
2396	{
2397          if (dump_enabled_p ())
2398            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2399                             "use not simple.");
2400	  return false;
2401	}
2402    }
2403
2404  /* If op0 is an external or constant defs use a vector type of
2405     the same size as the output vector type.  */
2406  if (!vectype_in)
2407    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2408  if (vec_stmt)
2409    gcc_assert (vectype_in);
2410  if (!vectype_in)
2411    {
2412      if (dump_enabled_p ())
2413	{
2414	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2415                           "no vectype for scalar type ");
2416	  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2417	}
2418
2419      return false;
2420    }
2421
2422  nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2423  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2424  if (nunits_in < nunits_out)
2425    modifier = NARROW;
2426  else if (nunits_out == nunits_in)
2427    modifier = NONE;
2428  else
2429    modifier = WIDEN;
2430
2431  /* Multiple types in SLP are handled by creating the appropriate number of
2432     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2433     case of SLP.  */
2434  if (slp_node || PURE_SLP_STMT (stmt_info))
2435    ncopies = 1;
2436  else if (modifier == NARROW)
2437    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2438  else
2439    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2440
2441  /* Sanity check: make sure that at least one copy of the vectorized stmt
2442     needs to be generated.  */
2443  gcc_assert (ncopies >= 1);
2444
2445  /* Supportable by target?  */
2446  switch (modifier)
2447    {
2448    case NONE:
2449      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2450	return false;
2451      if (supportable_convert_operation (code, vectype_out, vectype_in,
2452					 &decl1, &code1))
2453	break;
2454      /* FALLTHRU */
2455    unsupported:
2456      if (dump_enabled_p ())
2457	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2458                         "conversion not supported by target.");
2459      return false;
2460
2461    case WIDEN:
2462      if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
2463					  &code1, &code2, &multi_step_cvt,
2464					  &interm_types))
2465	{
2466	  /* Binary widening operation can only be supported directly by the
2467	     architecture.  */
2468	  gcc_assert (!(multi_step_cvt && op_type == binary_op));
2469	  break;
2470	}
2471
2472      if (code != FLOAT_EXPR
2473	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2474	      <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2475	goto unsupported;
2476
2477      rhs_mode = TYPE_MODE (rhs_type);
2478      fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
2479      for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
2480	   rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
2481	   rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
2482	{
2483	  cvt_type
2484	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2485	  cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2486	  if (cvt_type == NULL_TREE)
2487	    goto unsupported;
2488
2489	  if (GET_MODE_SIZE (rhs_mode) == fltsz)
2490	    {
2491	      if (!supportable_convert_operation (code, vectype_out,
2492						  cvt_type, &decl1, &codecvt1))
2493		goto unsupported;
2494	    }
2495	  else if (!supportable_widening_operation (code, stmt, vectype_out,
2496						    cvt_type, &codecvt1,
2497						    &codecvt2, &multi_step_cvt,
2498						    &interm_types))
2499	    continue;
2500	  else
2501	    gcc_assert (multi_step_cvt == 0);
2502
2503	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
2504					      vectype_in, &code1, &code2,
2505					      &multi_step_cvt, &interm_types))
2506	    break;
2507	}
2508
2509      if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
2510	goto unsupported;
2511
2512      if (GET_MODE_SIZE (rhs_mode) == fltsz)
2513	codecvt2 = ERROR_MARK;
2514      else
2515	{
2516	  multi_step_cvt++;
2517	  interm_types.safe_push (cvt_type);
2518	  cvt_type = NULL_TREE;
2519	}
2520      break;
2521
2522    case NARROW:
2523      gcc_assert (op_type == unary_op);
2524      if (supportable_narrowing_operation (code, vectype_out, vectype_in,
2525					   &code1, &multi_step_cvt,
2526					   &interm_types))
2527	break;
2528
2529      if (code != FIX_TRUNC_EXPR
2530	  || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
2531	      >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
2532	goto unsupported;
2533
2534      rhs_mode = TYPE_MODE (rhs_type);
2535      cvt_type
2536	= build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
2537      cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
2538      if (cvt_type == NULL_TREE)
2539	goto unsupported;
2540      if (!supportable_convert_operation (code, cvt_type, vectype_in,
2541					  &decl1, &codecvt1))
2542	goto unsupported;
2543      if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
2544					   &code1, &multi_step_cvt,
2545					   &interm_types))
2546	break;
2547      goto unsupported;
2548
2549    default:
2550      gcc_unreachable ();
2551    }
2552
2553  if (!vec_stmt)		/* transformation not required.  */
2554    {
2555      if (dump_enabled_p ())
2556	dump_printf_loc (MSG_NOTE, vect_location,
2557                         "=== vectorizable_conversion ===");
2558      if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
2559        {
2560	  STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2561	  vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2562	}
2563      else if (modifier == NARROW)
2564	{
2565	  STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2566	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2567	}
2568      else
2569	{
2570	  STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2571	  vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
2572	}
2573      interm_types.release ();
2574      return true;
2575    }
2576
2577  /** Transform.  **/
2578  if (dump_enabled_p ())
2579    dump_printf_loc (MSG_NOTE, vect_location,
2580                     "transform conversion. ncopies = %d.", ncopies);
2581
2582  if (op_type == binary_op)
2583    {
2584      if (CONSTANT_CLASS_P (op0))
2585	op0 = fold_convert (TREE_TYPE (op1), op0);
2586      else if (CONSTANT_CLASS_P (op1))
2587	op1 = fold_convert (TREE_TYPE (op0), op1);
2588    }
2589
2590  /* In case of multi-step conversion, we first generate conversion operations
2591     to the intermediate types, and then from that types to the final one.
2592     We create vector destinations for the intermediate type (TYPES) received
2593     from supportable_*_operation, and store them in the correct order
2594     for future use in vect_create_vectorized_*_stmts ().  */
2595  vec_dsts.create (multi_step_cvt + 1);
2596  vec_dest = vect_create_destination_var (scalar_dest,
2597					  (cvt_type && modifier == WIDEN)
2598					  ? cvt_type : vectype_out);
2599  vec_dsts.quick_push (vec_dest);
2600
2601  if (multi_step_cvt)
2602    {
2603      for (i = interm_types.length () - 1;
2604	   interm_types.iterate (i, &intermediate_type); i--)
2605	{
2606	  vec_dest = vect_create_destination_var (scalar_dest,
2607						  intermediate_type);
2608	  vec_dsts.quick_push (vec_dest);
2609	}
2610    }
2611
2612  if (cvt_type)
2613    vec_dest = vect_create_destination_var (scalar_dest,
2614					    modifier == WIDEN
2615					    ? vectype_out : cvt_type);
2616
2617  if (!slp_node)
2618    {
2619      if (modifier == WIDEN)
2620	{
2621	  vec_oprnds0.create (multi_step_cvt ? vect_pow2(multi_step_cvt) : 1);
2622	  if (op_type == binary_op)
2623	    vec_oprnds1.create (1);
2624	}
2625      else if (modifier == NARROW)
2626	vec_oprnds0.create (
2627		   2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
2628    }
2629  else if (code == WIDEN_LSHIFT_EXPR)
2630    vec_oprnds1.create (slp_node->vec_stmts_size);
2631
2632  last_oprnd = op0;
2633  prev_stmt_info = NULL;
2634  switch (modifier)
2635    {
2636    case NONE:
2637      for (j = 0; j < ncopies; j++)
2638	{
2639	  if (j == 0)
2640	    vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
2641			       -1);
2642	  else
2643	    vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
2644
2645	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2646	    {
2647	      /* Arguments are ready, create the new vector stmt.  */
2648	      if (code1 == CALL_EXPR)
2649		{
2650		  new_stmt = gimple_build_call (decl1, 1, vop0);
2651		  new_temp = make_ssa_name (vec_dest, new_stmt);
2652		  gimple_call_set_lhs (new_stmt, new_temp);
2653		}
2654	      else
2655		{
2656		  gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
2657		  new_stmt = gimple_build_assign_with_ops (code1, vec_dest,
2658							   vop0, NULL);
2659		  new_temp = make_ssa_name (vec_dest, new_stmt);
2660		  gimple_assign_set_lhs (new_stmt, new_temp);
2661		}
2662
2663	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
2664	      if (slp_node)
2665		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2666	    }
2667
2668	  if (j == 0)
2669	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2670	  else
2671	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2672	  prev_stmt_info = vinfo_for_stmt (new_stmt);
2673	}
2674      break;
2675
2676    case WIDEN:
2677      /* In case the vectorization factor (VF) is bigger than the number
2678	 of elements that we can fit in a vectype (nunits), we have to
2679	 generate more than one vector stmt - i.e - we need to "unroll"
2680	 the vector stmt by a factor VF/nunits.  */
2681      for (j = 0; j < ncopies; j++)
2682	{
2683	  /* Handle uses.  */
2684	  if (j == 0)
2685	    {
2686	      if (slp_node)
2687		{
2688		  if (code == WIDEN_LSHIFT_EXPR)
2689		    {
2690		      unsigned int k;
2691
2692		      vec_oprnd1 = op1;
2693		      /* Store vec_oprnd1 for every vector stmt to be created
2694			 for SLP_NODE.  We check during the analysis that all
2695			 the shift arguments are the same.  */
2696		      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
2697			vec_oprnds1.quick_push (vec_oprnd1);
2698
2699		      vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2700					 slp_node, -1);
2701		    }
2702		  else
2703		    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
2704				       &vec_oprnds1, slp_node, -1);
2705		}
2706	      else
2707		{
2708		  vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2709		  vec_oprnds0.quick_push (vec_oprnd0);
2710		  if (op_type == binary_op)
2711		    {
2712		      if (code == WIDEN_LSHIFT_EXPR)
2713			vec_oprnd1 = op1;
2714		      else
2715			vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
2716								   NULL);
2717		      vec_oprnds1.quick_push (vec_oprnd1);
2718		    }
2719		}
2720	    }
2721	  else
2722	    {
2723	      vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
2724	      vec_oprnds0.truncate (0);
2725	      vec_oprnds0.quick_push (vec_oprnd0);
2726	      if (op_type == binary_op)
2727		{
2728		  if (code == WIDEN_LSHIFT_EXPR)
2729		    vec_oprnd1 = op1;
2730		  else
2731		    vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
2732								 vec_oprnd1);
2733		  vec_oprnds1.truncate (0);
2734		  vec_oprnds1.quick_push (vec_oprnd1);
2735		}
2736	    }
2737
2738	  /* Arguments are ready.  Create the new vector stmts.  */
2739	  for (i = multi_step_cvt; i >= 0; i--)
2740	    {
2741	      tree this_dest = vec_dsts[i];
2742	      enum tree_code c1 = code1, c2 = code2;
2743	      if (i == 0 && codecvt2 != ERROR_MARK)
2744		{
2745		  c1 = codecvt1;
2746		  c2 = codecvt2;
2747		}
2748	      vect_create_vectorized_promotion_stmts (&vec_oprnds0,
2749						      &vec_oprnds1,
2750						      stmt, this_dest, gsi,
2751						      c1, c2, decl1, decl2,
2752						      op_type);
2753	    }
2754
2755	  FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2756	    {
2757	      if (cvt_type)
2758		{
2759		  if (codecvt1 == CALL_EXPR)
2760		    {
2761		      new_stmt = gimple_build_call (decl1, 1, vop0);
2762		      new_temp = make_ssa_name (vec_dest, new_stmt);
2763		      gimple_call_set_lhs (new_stmt, new_temp);
2764		    }
2765		  else
2766		    {
2767		      gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2768		      new_temp = make_ssa_name (vec_dest, NULL);
2769		      new_stmt = gimple_build_assign_with_ops (codecvt1,
2770							       new_temp,
2771							       vop0, NULL);
2772		    }
2773
2774		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
2775		}
2776	      else
2777		new_stmt = SSA_NAME_DEF_STMT (vop0);
2778
2779	      if (slp_node)
2780		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2781	      else
2782		{
2783		  if (!prev_stmt_info)
2784		    STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2785		  else
2786		    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2787		  prev_stmt_info = vinfo_for_stmt (new_stmt);
2788		}
2789	    }
2790	}
2791
2792      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2793      break;
2794
2795    case NARROW:
2796      /* In case the vectorization factor (VF) is bigger than the number
2797	 of elements that we can fit in a vectype (nunits), we have to
2798	 generate more than one vector stmt - i.e - we need to "unroll"
2799	 the vector stmt by a factor VF/nunits.  */
2800      for (j = 0; j < ncopies; j++)
2801	{
2802	  /* Handle uses.  */
2803	  if (slp_node)
2804	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
2805			       slp_node, -1);
2806	  else
2807	    {
2808	      vec_oprnds0.truncate (0);
2809	      vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
2810					vect_pow2 (multi_step_cvt) - 1);
2811	    }
2812
2813	  /* Arguments are ready.  Create the new vector stmts.  */
2814	  if (cvt_type)
2815	    FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
2816	      {
2817		if (codecvt1 == CALL_EXPR)
2818		  {
2819		    new_stmt = gimple_build_call (decl1, 1, vop0);
2820		    new_temp = make_ssa_name (vec_dest, new_stmt);
2821		    gimple_call_set_lhs (new_stmt, new_temp);
2822		  }
2823		else
2824		  {
2825		    gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
2826		    new_temp = make_ssa_name (vec_dest, NULL);
2827		    new_stmt = gimple_build_assign_with_ops (codecvt1, new_temp,
2828							     vop0, NULL);
2829		  }
2830
2831		vect_finish_stmt_generation (stmt, new_stmt, gsi);
2832		vec_oprnds0[i] = new_temp;
2833	      }
2834
2835	  vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
2836						 stmt, vec_dsts, gsi,
2837						 slp_node, code1,
2838						 &prev_stmt_info);
2839	}
2840
2841      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2842      break;
2843    }
2844
2845  vec_oprnds0.release ();
2846  vec_oprnds1.release ();
2847  vec_dsts.release ();
2848  interm_types.release ();
2849
2850  return true;
2851}
2852
2853
2854/* Function vectorizable_assignment.
2855
2856   Check if STMT performs an assignment (copy) that can be vectorized.
2857   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2858   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2859   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2860
2861static bool
2862vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
2863			 gimple *vec_stmt, slp_tree slp_node)
2864{
2865  tree vec_dest;
2866  tree scalar_dest;
2867  tree op;
2868  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2870  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2871  tree new_temp;
2872  tree def;
2873  gimple def_stmt;
2874  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2875  unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2876  int ncopies;
2877  int i, j;
2878  vec<tree> vec_oprnds = vNULL;
2879  tree vop;
2880  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2881  gimple new_stmt = NULL;
2882  stmt_vec_info prev_stmt_info = NULL;
2883  enum tree_code code;
2884  tree vectype_in;
2885
2886  /* Multiple types in SLP are handled by creating the appropriate number of
2887     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2888     case of SLP.  */
2889  if (slp_node || PURE_SLP_STMT (stmt_info))
2890    ncopies = 1;
2891  else
2892    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2893
2894  gcc_assert (ncopies >= 1);
2895
2896  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2897    return false;
2898
2899  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2900    return false;
2901
2902  /* Is vectorizable assignment?  */
2903  if (!is_gimple_assign (stmt))
2904    return false;
2905
2906  scalar_dest = gimple_assign_lhs (stmt);
2907  if (TREE_CODE (scalar_dest) != SSA_NAME)
2908    return false;
2909
2910  code = gimple_assign_rhs_code (stmt);
2911  if (gimple_assign_single_p (stmt)
2912      || code == PAREN_EXPR
2913      || CONVERT_EXPR_CODE_P (code))
2914    op = gimple_assign_rhs1 (stmt);
2915  else
2916    return false;
2917
2918  if (code == VIEW_CONVERT_EXPR)
2919    op = TREE_OPERAND (op, 0);
2920
2921  if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2922			     &def_stmt, &def, &dt[0], &vectype_in))
2923    {
2924      if (dump_enabled_p ())
2925        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926                         "use not simple.");
2927      return false;
2928    }
2929
2930  /* We can handle NOP_EXPR conversions that do not change the number
2931     of elements or the vector size.  */
2932  if ((CONVERT_EXPR_CODE_P (code)
2933       || code == VIEW_CONVERT_EXPR)
2934      && (!vectype_in
2935	  || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
2936	  || (GET_MODE_SIZE (TYPE_MODE (vectype))
2937	      != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
2938    return false;
2939
2940  /* We do not handle bit-precision changes.  */
2941  if ((CONVERT_EXPR_CODE_P (code)
2942       || code == VIEW_CONVERT_EXPR)
2943      && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
2944      && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2945	   != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
2946	  || ((TYPE_PRECISION (TREE_TYPE (op))
2947	       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
2948      /* But a conversion that does not change the bit-pattern is ok.  */
2949      && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
2950	    > TYPE_PRECISION (TREE_TYPE (op)))
2951	   && TYPE_UNSIGNED (TREE_TYPE (op))))
2952    {
2953      if (dump_enabled_p ())
2954        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2955                         "type conversion to/from bit-precision "
2956                         "unsupported.");
2957      return false;
2958    }
2959
2960  if (!vec_stmt) /* transformation not required.  */
2961    {
2962      STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2963      if (dump_enabled_p ())
2964        dump_printf_loc (MSG_NOTE, vect_location,
2965                         "=== vectorizable_assignment ===");
2966      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2967      return true;
2968    }
2969
2970  /** Transform.  **/
2971  if (dump_enabled_p ())
2972    dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.");
2973
2974  /* Handle def.  */
2975  vec_dest = vect_create_destination_var (scalar_dest, vectype);
2976
2977  /* Handle use.  */
2978  for (j = 0; j < ncopies; j++)
2979    {
2980      /* Handle uses.  */
2981      if (j == 0)
2982        vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
2983      else
2984        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2985
2986      /* Arguments are ready. create the new vector stmt.  */
2987      FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2988       {
2989	 if (CONVERT_EXPR_CODE_P (code)
2990	     || code == VIEW_CONVERT_EXPR)
2991	   vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
2992         new_stmt = gimple_build_assign (vec_dest, vop);
2993         new_temp = make_ssa_name (vec_dest, new_stmt);
2994         gimple_assign_set_lhs (new_stmt, new_temp);
2995         vect_finish_stmt_generation (stmt, new_stmt, gsi);
2996         if (slp_node)
2997           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2998       }
2999
3000      if (slp_node)
3001        continue;
3002
3003      if (j == 0)
3004        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3005      else
3006        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3007
3008      prev_stmt_info = vinfo_for_stmt (new_stmt);
3009    }
3010
3011  vec_oprnds.release ();
3012  return true;
3013}
3014
3015
3016/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3017   either as shift by a scalar or by a vector.  */
3018
3019bool
3020vect_supportable_shift (enum tree_code code, tree scalar_type)
3021{
3022
3023  enum machine_mode vec_mode;
3024  optab optab;
3025  int icode;
3026  tree vectype;
3027
3028  vectype = get_vectype_for_scalar_type (scalar_type);
3029  if (!vectype)
3030    return false;
3031
3032  optab = optab_for_tree_code (code, vectype, optab_scalar);
3033  if (!optab
3034      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
3035    {
3036      optab = optab_for_tree_code (code, vectype, optab_vector);
3037      if (!optab
3038          || (optab_handler (optab, TYPE_MODE (vectype))
3039                      == CODE_FOR_nothing))
3040        return false;
3041    }
3042
3043  vec_mode = TYPE_MODE (vectype);
3044  icode = (int) optab_handler (optab, vec_mode);
3045  if (icode == CODE_FOR_nothing)
3046    return false;
3047
3048  return true;
3049}
3050
3051
3052/* Function vectorizable_shift.
3053
3054   Check if STMT performs a shift operation that can be vectorized.
3055   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3056   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3057   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3058
3059static bool
3060vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
3061                    gimple *vec_stmt, slp_tree slp_node)
3062{
3063  tree vec_dest;
3064  tree scalar_dest;
3065  tree op0, op1 = NULL;
3066  tree vec_oprnd1 = NULL_TREE;
3067  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3068  tree vectype;
3069  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3070  enum tree_code code;
3071  enum machine_mode vec_mode;
3072  tree new_temp;
3073  optab optab;
3074  int icode;
3075  enum machine_mode optab_op2_mode;
3076  tree def;
3077  gimple def_stmt;
3078  enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3079  gimple new_stmt = NULL;
3080  stmt_vec_info prev_stmt_info;
3081  int nunits_in;
3082  int nunits_out;
3083  tree vectype_out;
3084  tree op1_vectype;
3085  int ncopies;
3086  int j, i;
3087  vec<tree> vec_oprnds0 = vNULL;
3088  vec<tree> vec_oprnds1 = vNULL;
3089  tree vop0, vop1;
3090  unsigned int k;
3091  bool scalar_shift_arg = true;
3092  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3093  int vf;
3094
3095  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3096    return false;
3097
3098  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3099    return false;
3100
3101  /* Is STMT a vectorizable binary/unary operation?   */
3102  if (!is_gimple_assign (stmt))
3103    return false;
3104
3105  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3106    return false;
3107
3108  code = gimple_assign_rhs_code (stmt);
3109
3110  if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3111      || code == RROTATE_EXPR))
3112    return false;
3113
3114  scalar_dest = gimple_assign_lhs (stmt);
3115  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3116  if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
3117      != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3118    {
3119      if (dump_enabled_p ())
3120        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3121                         "bit-precision shifts not supported.");
3122      return false;
3123    }
3124
3125  op0 = gimple_assign_rhs1 (stmt);
3126  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3127                             &def_stmt, &def, &dt[0], &vectype))
3128    {
3129      if (dump_enabled_p ())
3130        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131                         "use not simple.");
3132      return false;
3133    }
3134  /* If op0 is an external or constant def use a vector type with
3135     the same size as the output vector type.  */
3136  if (!vectype)
3137    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3138  if (vec_stmt)
3139    gcc_assert (vectype);
3140  if (!vectype)
3141    {
3142      if (dump_enabled_p ())
3143        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144                         "no vectype for scalar type ");
3145      return false;
3146    }
3147
3148  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3149  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3150  if (nunits_out != nunits_in)
3151    return false;
3152
3153  op1 = gimple_assign_rhs2 (stmt);
3154  if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3155			     &def, &dt[1], &op1_vectype))
3156    {
3157      if (dump_enabled_p ())
3158        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3159                         "use not simple.");
3160      return false;
3161    }
3162
3163  if (loop_vinfo)
3164    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3165  else
3166    vf = 1;
3167
3168  /* Multiple types in SLP are handled by creating the appropriate number of
3169     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3170     case of SLP.  */
3171  if (slp_node || PURE_SLP_STMT (stmt_info))
3172    ncopies = 1;
3173  else
3174    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3175
3176  gcc_assert (ncopies >= 1);
3177
3178  /* Determine whether the shift amount is a vector, or scalar.  If the
3179     shift/rotate amount is a vector, use the vector/vector shift optabs.  */
3180
3181  if (dt[1] == vect_internal_def && !slp_node)
3182    scalar_shift_arg = false;
3183  else if (dt[1] == vect_constant_def
3184	   || dt[1] == vect_external_def
3185	   || dt[1] == vect_internal_def)
3186    {
3187      /* In SLP, need to check whether the shift count is the same,
3188	 in loops if it is a constant or invariant, it is always
3189	 a scalar shift.  */
3190      if (slp_node)
3191	{
3192	  vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3193	  gimple slpstmt;
3194
3195	  FOR_EACH_VEC_ELT (stmts, k, slpstmt)
3196	    if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
3197	      scalar_shift_arg = false;
3198	}
3199    }
3200  else
3201    {
3202      if (dump_enabled_p ())
3203        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3204                         "operand mode requires invariant argument.");
3205      return false;
3206    }
3207
3208  /* Vector shifted by vector.  */
3209  if (!scalar_shift_arg)
3210    {
3211      optab = optab_for_tree_code (code, vectype, optab_vector);
3212      if (dump_enabled_p ())
3213        dump_printf_loc (MSG_NOTE, vect_location,
3214                         "vector/vector shift/rotate found.");
3215
3216      if (!op1_vectype)
3217	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
3218      if (op1_vectype == NULL_TREE
3219	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
3220	{
3221	  if (dump_enabled_p ())
3222	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223                             "unusable type for last operand in"
3224                             " vector/vector shift/rotate.");
3225	  return false;
3226	}
3227    }
3228  /* See if the machine has a vector shifted by scalar insn and if not
3229     then see if it has a vector shifted by vector insn.  */
3230  else
3231    {
3232      optab = optab_for_tree_code (code, vectype, optab_scalar);
3233      if (optab
3234          && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
3235        {
3236          if (dump_enabled_p ())
3237            dump_printf_loc (MSG_NOTE, vect_location,
3238                             "vector/scalar shift/rotate found.");
3239        }
3240      else
3241        {
3242          optab = optab_for_tree_code (code, vectype, optab_vector);
3243          if (optab
3244               && (optab_handler (optab, TYPE_MODE (vectype))
3245                      != CODE_FOR_nothing))
3246            {
3247	      scalar_shift_arg = false;
3248
3249              if (dump_enabled_p ())
3250                dump_printf_loc (MSG_NOTE, vect_location,
3251                                 "vector/vector shift/rotate found.");
3252
3253              /* Unlike the other binary operators, shifts/rotates have
3254                 the rhs being int, instead of the same type as the lhs,
3255                 so make sure the scalar is the right type if we are
3256		 dealing with vectors of long long/long/short/char.  */
3257              if (dt[1] == vect_constant_def)
3258                op1 = fold_convert (TREE_TYPE (vectype), op1);
3259	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
3260						   TREE_TYPE (op1)))
3261		{
3262		  if (slp_node
3263		      && TYPE_MODE (TREE_TYPE (vectype))
3264			 != TYPE_MODE (TREE_TYPE (op1)))
3265		    {
3266                      if (dump_enabled_p ())
3267                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268                                         "unusable type for last operand in"
3269                                         " vector/vector shift/rotate.");
3270			return false;
3271		    }
3272		  if (vec_stmt && !slp_node)
3273		    {
3274		      op1 = fold_convert (TREE_TYPE (vectype), op1);
3275		      op1 = vect_init_vector (stmt, op1,
3276					      TREE_TYPE (vectype), NULL);
3277		    }
3278		}
3279            }
3280        }
3281    }
3282
3283  /* Supportable by target?  */
3284  if (!optab)
3285    {
3286      if (dump_enabled_p ())
3287        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3288                         "no optab.");
3289      return false;
3290    }
3291  vec_mode = TYPE_MODE (vectype);
3292  icode = (int) optab_handler (optab, vec_mode);
3293  if (icode == CODE_FOR_nothing)
3294    {
3295      if (dump_enabled_p ())
3296        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297                         "op not supported by target.");
3298      /* Check only during analysis.  */
3299      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3300          || (vf < vect_min_worthwhile_factor (code)
3301              && !vec_stmt))
3302        return false;
3303      if (dump_enabled_p ())
3304        dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3305    }
3306
3307  /* Worthwhile without SIMD support?  Check only during analysis.  */
3308  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3309      && vf < vect_min_worthwhile_factor (code)
3310      && !vec_stmt)
3311    {
3312      if (dump_enabled_p ())
3313        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314                         "not worthwhile without SIMD support.");
3315      return false;
3316    }
3317
3318  if (!vec_stmt) /* transformation not required.  */
3319    {
3320      STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
3321      if (dump_enabled_p ())
3322        dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_shift ===");
3323      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3324      return true;
3325    }
3326
3327  /** Transform.  **/
3328
3329  if (dump_enabled_p ())
3330    dump_printf_loc (MSG_NOTE, vect_location,
3331                     "transform binary/unary operation.");
3332
3333  /* Handle def.  */
3334  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3335
3336  prev_stmt_info = NULL;
3337  for (j = 0; j < ncopies; j++)
3338    {
3339      /* Handle uses.  */
3340      if (j == 0)
3341        {
3342          if (scalar_shift_arg)
3343            {
3344              /* Vector shl and shr insn patterns can be defined with scalar
3345                 operand 2 (shift operand).  In this case, use constant or loop
3346                 invariant op1 directly, without extending it to vector mode
3347                 first.  */
3348              optab_op2_mode = insn_data[icode].operand[2].mode;
3349              if (!VECTOR_MODE_P (optab_op2_mode))
3350                {
3351                  if (dump_enabled_p ())
3352                    dump_printf_loc (MSG_NOTE, vect_location,
3353                                     "operand 1 using scalar mode.");
3354                  vec_oprnd1 = op1;
3355                  vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
3356                  vec_oprnds1.quick_push (vec_oprnd1);
3357                  if (slp_node)
3358                    {
3359                      /* Store vec_oprnd1 for every vector stmt to be created
3360                         for SLP_NODE.  We check during the analysis that all
3361                         the shift arguments are the same.
3362                         TODO: Allow different constants for different vector
3363                         stmts generated for an SLP instance.  */
3364                      for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3365                        vec_oprnds1.quick_push (vec_oprnd1);
3366                    }
3367                }
3368            }
3369
3370          /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3371             (a special case for certain kind of vector shifts); otherwise,
3372             operand 1 should be of a vector type (the usual case).  */
3373          if (vec_oprnd1)
3374            vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3375                               slp_node, -1);
3376          else
3377            vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3378                               slp_node, -1);
3379        }
3380      else
3381        vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3382
3383      /* Arguments are ready.  Create the new vector stmt.  */
3384      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3385        {
3386          vop1 = vec_oprnds1[i];
3387          new_stmt = gimple_build_assign_with_ops (code, vec_dest, vop0, vop1);
3388          new_temp = make_ssa_name (vec_dest, new_stmt);
3389          gimple_assign_set_lhs (new_stmt, new_temp);
3390          vect_finish_stmt_generation (stmt, new_stmt, gsi);
3391          if (slp_node)
3392            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3393        }
3394
3395      if (slp_node)
3396        continue;
3397
3398      if (j == 0)
3399        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3400      else
3401        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3402      prev_stmt_info = vinfo_for_stmt (new_stmt);
3403    }
3404
3405  vec_oprnds0.release ();
3406  vec_oprnds1.release ();
3407
3408  return true;
3409}
3410
3411
3412static tree permute_vec_elements (tree, tree, tree, gimple,
3413				  gimple_stmt_iterator *);
3414
3415
3416/* Function vectorizable_operation.
3417
3418   Check if STMT performs a binary, unary or ternary operation that can
3419   be vectorized.
3420   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3421   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3422   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3423
3424static bool
3425vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
3426			gimple *vec_stmt, slp_tree slp_node)
3427{
3428  tree vec_dest;
3429  tree scalar_dest;
3430  tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
3431  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3432  tree vectype;
3433  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3434  enum tree_code code;
3435  enum machine_mode vec_mode;
3436  tree new_temp;
3437  int op_type;
3438  optab optab;
3439  int icode;
3440  tree def;
3441  gimple def_stmt;
3442  enum vect_def_type dt[3]
3443    = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3444  gimple new_stmt = NULL;
3445  stmt_vec_info prev_stmt_info;
3446  int nunits_in;
3447  int nunits_out;
3448  tree vectype_out;
3449  int ncopies;
3450  int j, i;
3451  vec<tree> vec_oprnds0 = vNULL;
3452  vec<tree> vec_oprnds1 = vNULL;
3453  vec<tree> vec_oprnds2 = vNULL;
3454  tree vop0, vop1, vop2;
3455  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3456  int vf;
3457
3458  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3459    return false;
3460
3461  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3462    return false;
3463
3464  /* Is STMT a vectorizable binary/unary operation?   */
3465  if (!is_gimple_assign (stmt))
3466    return false;
3467
3468  if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3469    return false;
3470
3471  code = gimple_assign_rhs_code (stmt);
3472
3473  /* For pointer addition, we should use the normal plus for
3474     the vector addition.  */
3475  if (code == POINTER_PLUS_EXPR)
3476    code = PLUS_EXPR;
3477
3478  /* Support only unary or binary operations.  */
3479  op_type = TREE_CODE_LENGTH (code);
3480  if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
3481    {
3482      if (dump_enabled_p ())
3483        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3484                         "num. args = %d (not unary/binary/ternary op).",
3485                         op_type);
3486      return false;
3487    }
3488
3489  scalar_dest = gimple_assign_lhs (stmt);
3490  vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3491
3492  /* Most operations cannot handle bit-precision types without extra
3493     truncations.  */
3494  if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
3495       != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
3496      /* Exception are bitwise binary operations.  */
3497      && code != BIT_IOR_EXPR
3498      && code != BIT_XOR_EXPR
3499      && code != BIT_AND_EXPR)
3500    {
3501      if (dump_enabled_p ())
3502        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3503                         "bit-precision arithmetic not supported.");
3504      return false;
3505    }
3506
3507  op0 = gimple_assign_rhs1 (stmt);
3508  if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3509			     &def_stmt, &def, &dt[0], &vectype))
3510    {
3511      if (dump_enabled_p ())
3512        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3513                         "use not simple.");
3514      return false;
3515    }
3516  /* If op0 is an external or constant def use a vector type with
3517     the same size as the output vector type.  */
3518  if (!vectype)
3519    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
3520  if (vec_stmt)
3521    gcc_assert (vectype);
3522  if (!vectype)
3523    {
3524      if (dump_enabled_p ())
3525        {
3526          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527                           "no vectype for scalar type ");
3528          dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3529                             TREE_TYPE (op0));
3530        }
3531
3532      return false;
3533    }
3534
3535  nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3536  nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3537  if (nunits_out != nunits_in)
3538    return false;
3539
3540  if (op_type == binary_op || op_type == ternary_op)
3541    {
3542      op1 = gimple_assign_rhs2 (stmt);
3543      if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3544			       &def, &dt[1]))
3545	{
3546	  if (dump_enabled_p ())
3547	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3548                             "use not simple.");
3549	  return false;
3550	}
3551    }
3552  if (op_type == ternary_op)
3553    {
3554      op2 = gimple_assign_rhs3 (stmt);
3555      if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3556			       &def, &dt[2]))
3557	{
3558	  if (dump_enabled_p ())
3559	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560                             "use not simple.");
3561	  return false;
3562	}
3563    }
3564
3565  if (loop_vinfo)
3566    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3567  else
3568    vf = 1;
3569
3570  /* Multiple types in SLP are handled by creating the appropriate number of
3571     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3572     case of SLP.  */
3573  if (slp_node || PURE_SLP_STMT (stmt_info))
3574    ncopies = 1;
3575  else
3576    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3577
3578  gcc_assert (ncopies >= 1);
3579
3580  /* Shifts are handled in vectorizable_shift ().  */
3581  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
3582      || code == RROTATE_EXPR)
3583   return false;
3584
3585  /* Supportable by target?  */
3586
3587  vec_mode = TYPE_MODE (vectype);
3588  if (code == MULT_HIGHPART_EXPR)
3589    {
3590      if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
3591	icode = LAST_INSN_CODE;
3592      else
3593	icode = CODE_FOR_nothing;
3594    }
3595  else
3596    {
3597      optab = optab_for_tree_code (code, vectype, optab_default);
3598      if (!optab)
3599	{
3600          if (dump_enabled_p ())
3601            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3602                             "no optab.");
3603	  return false;
3604	}
3605      icode = (int) optab_handler (optab, vec_mode);
3606    }
3607
3608  if (icode == CODE_FOR_nothing)
3609    {
3610      if (dump_enabled_p ())
3611	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3612                         "op not supported by target.");
3613      /* Check only during analysis.  */
3614      if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3615	  || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
3616        return false;
3617      if (dump_enabled_p ())
3618	dump_printf_loc (MSG_NOTE, vect_location, "proceeding using word mode.");
3619    }
3620
3621  /* Worthwhile without SIMD support?  Check only during analysis.  */
3622  if (!VECTOR_MODE_P (vec_mode)
3623      && !vec_stmt
3624      && vf < vect_min_worthwhile_factor (code))
3625    {
3626      if (dump_enabled_p ())
3627        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3628                         "not worthwhile without SIMD support.");
3629      return false;
3630    }
3631
3632  if (!vec_stmt) /* transformation not required.  */
3633    {
3634      STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3635      if (dump_enabled_p ())
3636        dump_printf_loc (MSG_NOTE, vect_location,
3637                         "=== vectorizable_operation ===");
3638      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3639      return true;
3640    }
3641
3642  /** Transform.  **/
3643
3644  if (dump_enabled_p ())
3645    dump_printf_loc (MSG_NOTE, vect_location,
3646                     "transform binary/unary operation.");
3647
3648  /* Handle def.  */
3649  vec_dest = vect_create_destination_var (scalar_dest, vectype);
3650
3651  /* In case the vectorization factor (VF) is bigger than the number
3652     of elements that we can fit in a vectype (nunits), we have to generate
3653     more than one vector stmt - i.e - we need to "unroll" the
3654     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
3655     from one copy of the vector stmt to the next, in the field
3656     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
3657     stages to find the correct vector defs to be used when vectorizing
3658     stmts that use the defs of the current stmt.  The example below
3659     illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3660     we need to create 4 vectorized stmts):
3661
3662     before vectorization:
3663                                RELATED_STMT    VEC_STMT
3664        S1:     x = memref      -               -
3665        S2:     z = x + 1       -               -
3666
3667     step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3668             there):
3669                                RELATED_STMT    VEC_STMT
3670        VS1_0:  vx0 = memref0   VS1_1           -
3671        VS1_1:  vx1 = memref1   VS1_2           -
3672        VS1_2:  vx2 = memref2   VS1_3           -
3673        VS1_3:  vx3 = memref3   -               -
3674        S1:     x = load        -               VS1_0
3675        S2:     z = x + 1       -               -
3676
3677     step2: vectorize stmt S2 (done here):
3678        To vectorize stmt S2 we first need to find the relevant vector
3679        def for the first operand 'x'.  This is, as usual, obtained from
3680        the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3681        that defines 'x' (S1).  This way we find the stmt VS1_0, and the
3682        relevant vector def 'vx0'.  Having found 'vx0' we can generate
3683        the vector stmt VS2_0, and as usual, record it in the
3684        STMT_VINFO_VEC_STMT of stmt S2.
3685        When creating the second copy (VS2_1), we obtain the relevant vector
3686        def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3687        stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
3688        vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
3689        pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3690        Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
3691        chain of stmts and pointers:
3692                                RELATED_STMT    VEC_STMT
3693        VS1_0:  vx0 = memref0   VS1_1           -
3694        VS1_1:  vx1 = memref1   VS1_2           -
3695        VS1_2:  vx2 = memref2   VS1_3           -
3696        VS1_3:  vx3 = memref3   -               -
3697        S1:     x = load        -               VS1_0
3698        VS2_0:  vz0 = vx0 + v1  VS2_1           -
3699        VS2_1:  vz1 = vx1 + v1  VS2_2           -
3700        VS2_2:  vz2 = vx2 + v1  VS2_3           -
3701        VS2_3:  vz3 = vx3 + v1  -               -
3702        S2:     z = x + 1       -               VS2_0  */
3703
3704  prev_stmt_info = NULL;
3705  for (j = 0; j < ncopies; j++)
3706    {
3707      /* Handle uses.  */
3708      if (j == 0)
3709	{
3710	  if (op_type == binary_op || op_type == ternary_op)
3711	    vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3712			       slp_node, -1);
3713	  else
3714	    vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3715			       slp_node, -1);
3716	  if (op_type == ternary_op)
3717	    {
3718	      vec_oprnds2.create (1);
3719	      vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
3720		                                                    stmt,
3721								    NULL));
3722	    }
3723	}
3724      else
3725	{
3726	  vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3727	  if (op_type == ternary_op)
3728	    {
3729	      tree vec_oprnd = vec_oprnds2.pop ();
3730	      vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
3731							           vec_oprnd));
3732	    }
3733	}
3734
3735      /* Arguments are ready.  Create the new vector stmt.  */
3736      FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3737        {
3738	  vop1 = ((op_type == binary_op || op_type == ternary_op)
3739		  ? vec_oprnds1[i] : NULL_TREE);
3740	  vop2 = ((op_type == ternary_op)
3741		  ? vec_oprnds2[i] : NULL_TREE);
3742	  new_stmt = gimple_build_assign_with_ops (code, vec_dest,
3743						   vop0, vop1, vop2);
3744	  new_temp = make_ssa_name (vec_dest, new_stmt);
3745	  gimple_assign_set_lhs (new_stmt, new_temp);
3746	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
3747          if (slp_node)
3748	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3749        }
3750
3751      if (slp_node)
3752        continue;
3753
3754      if (j == 0)
3755	STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3756      else
3757	STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3758      prev_stmt_info = vinfo_for_stmt (new_stmt);
3759    }
3760
3761  vec_oprnds0.release ();
3762  vec_oprnds1.release ();
3763  vec_oprnds2.release ();
3764
3765  return true;
3766}
3767
3768
3769/* Function vectorizable_store.
3770
3771   Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3772   can be vectorized.
3773   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3774   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3775   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3776
3777static bool
3778vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
3779		    slp_tree slp_node)
3780{
3781  tree scalar_dest;
3782  tree data_ref;
3783  tree op;
3784  tree vec_oprnd = NULL_TREE;
3785  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3786  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3787  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3788  tree elem_type;
3789  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3790  struct loop *loop = NULL;
3791  enum machine_mode vec_mode;
3792  tree dummy;
3793  enum dr_alignment_support alignment_support_scheme;
3794  tree def;
3795  gimple def_stmt;
3796  enum vect_def_type dt;
3797  stmt_vec_info prev_stmt_info = NULL;
3798  tree dataref_ptr = NULL_TREE;
3799  gimple ptr_incr = NULL;
3800  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3801  int ncopies;
3802  int j;
3803  gimple next_stmt, first_stmt = NULL;
3804  bool grouped_store = false;
3805  bool store_lanes_p = false;
3806  unsigned int group_size, i;
3807  vec<tree> dr_chain = vNULL;
3808  vec<tree> oprnds = vNULL;
3809  vec<tree> result_chain = vNULL;
3810  bool inv_p;
3811  vec<tree> vec_oprnds = vNULL;
3812  bool slp = (slp_node != NULL);
3813  unsigned int vec_num;
3814  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3815  tree aggr_type;
3816
3817  if (loop_vinfo)
3818    loop = LOOP_VINFO_LOOP (loop_vinfo);
3819
3820  /* Multiple types in SLP are handled by creating the appropriate number of
3821     vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3822     case of SLP.  */
3823  if (slp || PURE_SLP_STMT (stmt_info))
3824    ncopies = 1;
3825  else
3826    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3827
3828  gcc_assert (ncopies >= 1);
3829
3830  /* FORNOW. This restriction should be relaxed.  */
3831  if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3832    {
3833      if (dump_enabled_p ())
3834        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3835                         "multiple types in nested loop.");
3836      return false;
3837    }
3838
3839  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3840    return false;
3841
3842  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3843    return false;
3844
3845  /* Is vectorizable store? */
3846
3847  if (!is_gimple_assign (stmt))
3848    return false;
3849
3850  scalar_dest = gimple_assign_lhs (stmt);
3851  if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
3852      && is_pattern_stmt_p (stmt_info))
3853    scalar_dest = TREE_OPERAND (scalar_dest, 0);
3854  if (TREE_CODE (scalar_dest) != ARRAY_REF
3855      && TREE_CODE (scalar_dest) != INDIRECT_REF
3856      && TREE_CODE (scalar_dest) != COMPONENT_REF
3857      && TREE_CODE (scalar_dest) != IMAGPART_EXPR
3858      && TREE_CODE (scalar_dest) != REALPART_EXPR
3859      && TREE_CODE (scalar_dest) != MEM_REF)
3860    return false;
3861
3862  gcc_assert (gimple_assign_single_p (stmt));
3863  op = gimple_assign_rhs1 (stmt);
3864  if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3865			   &def, &dt))
3866    {
3867      if (dump_enabled_p ())
3868        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3869                         "use not simple.");
3870      return false;
3871    }
3872
3873  elem_type = TREE_TYPE (vectype);
3874  vec_mode = TYPE_MODE (vectype);
3875
3876  /* FORNOW. In some cases can vectorize even if data-type not supported
3877     (e.g. - array initialization with 0).  */
3878  if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
3879    return false;
3880
3881  if (!STMT_VINFO_DATA_REF (stmt_info))
3882    return false;
3883
3884  if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
3885			    ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
3886			    size_zero_node) < 0)
3887    {
3888      if (dump_enabled_p ())
3889        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3890                         "negative step for store.");
3891      return false;
3892    }
3893
3894  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
3895    {
3896      grouped_store = true;
3897      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
3898      if (!slp && !PURE_SLP_STMT (stmt_info))
3899	{
3900	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3901	  if (vect_store_lanes_supported (vectype, group_size))
3902	    store_lanes_p = true;
3903	  else if (!vect_grouped_store_supported (vectype, group_size))
3904	    return false;
3905	}
3906
3907      if (first_stmt == stmt)
3908	{
3909          /* STMT is the leader of the group. Check the operands of all the
3910             stmts of the group.  */
3911          next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
3912          while (next_stmt)
3913            {
3914	      gcc_assert (gimple_assign_single_p (next_stmt));
3915	      op = gimple_assign_rhs1 (next_stmt);
3916              if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
3917				       &def_stmt, &def, &dt))
3918                {
3919                  if (dump_enabled_p ())
3920                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3921                                     "use not simple.");
3922                  return false;
3923                }
3924              next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
3925            }
3926        }
3927    }
3928
3929  if (!vec_stmt) /* transformation not required.  */
3930    {
3931      STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3932      vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
3933			     NULL, NULL, NULL);
3934      return true;
3935    }
3936
3937  /** Transform.  **/
3938
3939  if (grouped_store)
3940    {
3941      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3942      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
3943
3944      GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3945
3946      /* FORNOW */
3947      gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
3948
3949      /* We vectorize all the stmts of the interleaving group when we
3950	 reach the last stmt in the group.  */
3951      if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3952	  < GROUP_SIZE (vinfo_for_stmt (first_stmt))
3953	  && !slp)
3954	{
3955	  *vec_stmt = NULL;
3956	  return true;
3957	}
3958
3959      if (slp)
3960        {
3961          grouped_store = false;
3962          /* VEC_NUM is the number of vect stmts to be created for this
3963             group.  */
3964          vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3965          first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3966          first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3967	  op = gimple_assign_rhs1 (first_stmt);
3968        }
3969      else
3970        /* VEC_NUM is the number of vect stmts to be created for this
3971           group.  */
3972	vec_num = group_size;
3973    }
3974  else
3975    {
3976      first_stmt = stmt;
3977      first_dr = dr;
3978      group_size = vec_num = 1;
3979    }
3980
3981  if (dump_enabled_p ())
3982    dump_printf_loc (MSG_NOTE, vect_location,
3983                     "transform store. ncopies = %d", ncopies);
3984
3985  dr_chain.create (group_size);
3986  oprnds.create (group_size);
3987
3988  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
3989  gcc_assert (alignment_support_scheme);
3990  /* Targets with store-lane instructions must not require explicit
3991     realignment.  */
3992  gcc_assert (!store_lanes_p
3993	      || alignment_support_scheme == dr_aligned
3994	      || alignment_support_scheme == dr_unaligned_supported);
3995
3996  if (store_lanes_p)
3997    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
3998  else
3999    aggr_type = vectype;
4000
4001  /* In case the vectorization factor (VF) is bigger than the number
4002     of elements that we can fit in a vectype (nunits), we have to generate
4003     more than one vector stmt - i.e - we need to "unroll" the
4004     vector stmt by a factor VF/nunits.  For more details see documentation in
4005     vect_get_vec_def_for_copy_stmt.  */
4006
4007  /* In case of interleaving (non-unit grouped access):
4008
4009        S1:  &base + 2 = x2
4010        S2:  &base = x0
4011        S3:  &base + 1 = x1
4012        S4:  &base + 3 = x3
4013
4014     We create vectorized stores starting from base address (the access of the
4015     first stmt in the chain (S2 in the above example), when the last store stmt
4016     of the chain (S4) is reached:
4017
4018        VS1: &base = vx2
4019	VS2: &base + vec_size*1 = vx0
4020	VS3: &base + vec_size*2 = vx1
4021	VS4: &base + vec_size*3 = vx3
4022
4023     Then permutation statements are generated:
4024
4025	VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4026	VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4027	...
4028
4029     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4030     (the order of the data-refs in the output of vect_permute_store_chain
4031     corresponds to the order of scalar stmts in the interleaving chain - see
4032     the documentation of vect_permute_store_chain()).
4033
4034     In case of both multiple types and interleaving, above vector stores and
4035     permutation stmts are created for every copy.  The result vector stmts are
4036     put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4037     STMT_VINFO_RELATED_STMT for the next copies.
4038  */
4039
4040  prev_stmt_info = NULL;
4041  for (j = 0; j < ncopies; j++)
4042    {
4043      gimple new_stmt;
4044
4045      if (j == 0)
4046	{
4047          if (slp)
4048            {
4049	      /* Get vectorized arguments for SLP_NODE.  */
4050              vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
4051                                 NULL, slp_node, -1);
4052
4053              vec_oprnd = vec_oprnds[0];
4054            }
4055          else
4056            {
4057	      /* For interleaved stores we collect vectorized defs for all the
4058		 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4059		 used as an input to vect_permute_store_chain(), and OPRNDS as
4060		 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4061
4062		 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4063		 OPRNDS are of size 1.  */
4064	      next_stmt = first_stmt;
4065	      for (i = 0; i < group_size; i++)
4066		{
4067		  /* Since gaps are not supported for interleaved stores,
4068		     GROUP_SIZE is the exact number of stmts in the chain.
4069		     Therefore, NEXT_STMT can't be NULL_TREE.  In case that
4070		     there is no interleaving, GROUP_SIZE is 1, and only one
4071		     iteration of the loop will be executed.  */
4072		  gcc_assert (next_stmt
4073			      && gimple_assign_single_p (next_stmt));
4074		  op = gimple_assign_rhs1 (next_stmt);
4075
4076		  vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4077							    NULL);
4078		  dr_chain.quick_push (vec_oprnd);
4079		  oprnds.quick_push (vec_oprnd);
4080		  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4081		}
4082	    }
4083
4084	  /* We should have catched mismatched types earlier.  */
4085	  gcc_assert (useless_type_conversion_p (vectype,
4086						 TREE_TYPE (vec_oprnd)));
4087	  dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, NULL,
4088						  NULL_TREE, &dummy, gsi,
4089						  &ptr_incr, false, &inv_p);
4090	  gcc_assert (bb_vinfo || !inv_p);
4091	}
4092      else
4093	{
4094	  /* For interleaved stores we created vectorized defs for all the
4095	     defs stored in OPRNDS in the previous iteration (previous copy).
4096	     DR_CHAIN is then used as an input to vect_permute_store_chain(),
4097	     and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4098	     next copy.
4099	     If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4100	     OPRNDS are of size 1.  */
4101	  for (i = 0; i < group_size; i++)
4102	    {
4103	      op = oprnds[i];
4104	      vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
4105				  &def, &dt);
4106	      vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4107	      dr_chain[i] = vec_oprnd;
4108	      oprnds[i] = vec_oprnd;
4109	    }
4110	  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4111					 TYPE_SIZE_UNIT (aggr_type));
4112	}
4113
4114      if (store_lanes_p)
4115	{
4116	  tree vec_array;
4117
4118	  /* Combine all the vectors into an array.  */
4119	  vec_array = create_vector_array (vectype, vec_num);
4120	  for (i = 0; i < vec_num; i++)
4121	    {
4122	      vec_oprnd = dr_chain[i];
4123	      write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
4124	    }
4125
4126	  /* Emit:
4127	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
4128	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4129	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
4130	  gimple_call_set_lhs (new_stmt, data_ref);
4131	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4132	}
4133      else
4134	{
4135	  new_stmt = NULL;
4136	  if (grouped_store)
4137	    {
4138	      if (j == 0)
4139		result_chain.create (group_size);
4140	      /* Permute.  */
4141	      vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
4142					&result_chain);
4143	    }
4144
4145	  next_stmt = first_stmt;
4146	  for (i = 0; i < vec_num; i++)
4147	    {
4148	      unsigned align, misalign;
4149
4150	      if (i > 0)
4151		/* Bump the vector pointer.  */
4152		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
4153					       stmt, NULL_TREE);
4154
4155	      if (slp)
4156		vec_oprnd = vec_oprnds[i];
4157	      else if (grouped_store)
4158		/* For grouped stores vectorized defs are interleaved in
4159		   vect_permute_store_chain().  */
4160		vec_oprnd = result_chain[i];
4161
4162	      data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
4163				 build_int_cst (reference_alias_ptr_type
4164						(DR_REF (first_dr)), 0));
4165	      align = TYPE_ALIGN_UNIT (vectype);
4166	      if (aligned_access_p (first_dr))
4167		misalign = 0;
4168	      else if (DR_MISALIGNMENT (first_dr) == -1)
4169		{
4170		  TREE_TYPE (data_ref)
4171		    = build_aligned_type (TREE_TYPE (data_ref),
4172					  TYPE_ALIGN (elem_type));
4173		  align = TYPE_ALIGN_UNIT (elem_type);
4174		  misalign = 0;
4175		}
4176	      else
4177		{
4178		  TREE_TYPE (data_ref)
4179		    = build_aligned_type (TREE_TYPE (data_ref),
4180					  TYPE_ALIGN (elem_type));
4181		  misalign = DR_MISALIGNMENT (first_dr);
4182		}
4183	      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
4184				      misalign);
4185
4186	      /* Arguments are ready.  Create the new vector stmt.  */
4187	      new_stmt = gimple_build_assign (data_ref, vec_oprnd);
4188	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189
4190	      if (slp)
4191		continue;
4192
4193	      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4194	      if (!next_stmt)
4195		break;
4196	    }
4197	}
4198      if (!slp)
4199	{
4200	  if (j == 0)
4201	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4202	  else
4203	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4204	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4205	}
4206    }
4207
4208  dr_chain.release ();
4209  oprnds.release ();
4210  result_chain.release ();
4211  vec_oprnds.release ();
4212
4213  return true;
4214}
4215
4216/* Given a vector type VECTYPE and permutation SEL returns
4217   the VECTOR_CST mask that implements the permutation of the
4218   vector elements.  If that is impossible to do, returns NULL.  */
4219
4220tree
4221vect_gen_perm_mask (tree vectype, unsigned char *sel)
4222{
4223  tree mask_elt_type, mask_type, mask_vec, *mask_elts;
4224  int i, nunits;
4225
4226  nunits = TYPE_VECTOR_SUBPARTS (vectype);
4227
4228  if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4229    return NULL;
4230
4231  mask_elt_type = lang_hooks.types.type_for_mode
4232		    (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
4233  mask_type = get_vectype_for_scalar_type (mask_elt_type);
4234
4235  mask_elts = XALLOCAVEC (tree, nunits);
4236  for (i = nunits - 1; i >= 0; i--)
4237    mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
4238  mask_vec = build_vector (mask_type, mask_elts);
4239
4240  return mask_vec;
4241}
4242
4243/* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4244   reversal of the vector elements.  If that is impossible to do,
4245   returns NULL.  */
4246
4247static tree
4248perm_mask_for_reverse (tree vectype)
4249{
4250  int i, nunits;
4251  unsigned char *sel;
4252
4253  nunits = TYPE_VECTOR_SUBPARTS (vectype);
4254  sel = XALLOCAVEC (unsigned char, nunits);
4255
4256  for (i = 0; i < nunits; ++i)
4257    sel[i] = nunits - 1 - i;
4258
4259  return vect_gen_perm_mask (vectype, sel);
4260}
4261
4262/* Given a vector variable X and Y, that was generated for the scalar
4263   STMT, generate instructions to permute the vector elements of X and Y
4264   using permutation mask MASK_VEC, insert them at *GSI and return the
4265   permuted vector variable.  */
4266
4267static tree
4268permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
4269		      gimple_stmt_iterator *gsi)
4270{
4271  tree vectype = TREE_TYPE (x);
4272  tree perm_dest, data_ref;
4273  gimple perm_stmt;
4274
4275  perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
4276  data_ref = make_ssa_name (perm_dest, NULL);
4277
4278  /* Generate the permute statement.  */
4279  perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4280					    x, y, mask_vec);
4281  vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4282
4283  return data_ref;
4284}
4285
4286/* vectorizable_load.
4287
4288   Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4289   can be vectorized.
4290   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4291   stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4292   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4293
4294static bool
4295vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4296		   slp_tree slp_node, slp_instance slp_node_instance)
4297{
4298  tree scalar_dest;
4299  tree vec_dest = NULL;
4300  tree data_ref = NULL;
4301  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4302  stmt_vec_info prev_stmt_info;
4303  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4304  struct loop *loop = NULL;
4305  struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
4306  bool nested_in_vect_loop = false;
4307  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4308  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4309  tree elem_type;
4310  tree new_temp;
4311  enum machine_mode mode;
4312  gimple new_stmt = NULL;
4313  tree dummy;
4314  enum dr_alignment_support alignment_support_scheme;
4315  tree dataref_ptr = NULL_TREE;
4316  gimple ptr_incr = NULL;
4317  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4318  int ncopies;
4319  int i, j, group_size;
4320  tree msq = NULL_TREE, lsq;
4321  tree offset = NULL_TREE;
4322  tree byte_offset = NULL_TREE;
4323  tree realignment_token = NULL_TREE;
4324  gimple phi = NULL;
4325  vec<tree> dr_chain = vNULL;
4326  bool grouped_load = false;
4327  bool load_lanes_p = false;
4328  gimple first_stmt;
4329  bool inv_p;
4330  bool negative = false;
4331  bool compute_in_loop = false;
4332  struct loop *at_loop;
4333  int vec_num;
4334  bool slp = (slp_node != NULL);
4335  bool slp_perm = false;
4336  enum tree_code code;
4337  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4338  int vf;
4339  tree aggr_type;
4340  tree gather_base = NULL_TREE, gather_off = NULL_TREE;
4341  tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
4342  int gather_scale = 1;
4343  enum vect_def_type gather_dt = vect_unknown_def_type;
4344
4345  if (loop_vinfo)
4346    {
4347      loop = LOOP_VINFO_LOOP (loop_vinfo);
4348      nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4349      vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4350    }
4351  else
4352    vf = 1;
4353
4354  /* Multiple types in SLP are handled by creating the appropriate number of
4355     vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4356     case of SLP.  */
4357  if (slp || PURE_SLP_STMT (stmt_info))
4358    ncopies = 1;
4359  else
4360    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4361
4362  gcc_assert (ncopies >= 1);
4363
4364  /* FORNOW. This restriction should be relaxed.  */
4365  if (nested_in_vect_loop && ncopies > 1)
4366    {
4367      if (dump_enabled_p ())
4368        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4369                         "multiple types in nested loop.");
4370      return false;
4371    }
4372
4373  /* Invalidate assumptions made by dependence analysis when vectorization
4374     on the unrolled body effectively re-orders stmts.  */
4375  if (ncopies > 1
4376      && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
4377      && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4378	  > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
4379    {
4380      if (dump_enabled_p ())
4381	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4382			 "cannot perform implicit CSE when unrolling "
4383			 "with negative dependence distance\n");
4384      return false;
4385    }
4386
4387  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4388    return false;
4389
4390  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4391    return false;
4392
4393  /* Is vectorizable load? */
4394  if (!is_gimple_assign (stmt))
4395    return false;
4396
4397  scalar_dest = gimple_assign_lhs (stmt);
4398  if (TREE_CODE (scalar_dest) != SSA_NAME)
4399    return false;
4400
4401  code = gimple_assign_rhs_code (stmt);
4402  if (code != ARRAY_REF
4403      && code != INDIRECT_REF
4404      && code != COMPONENT_REF
4405      && code != IMAGPART_EXPR
4406      && code != REALPART_EXPR
4407      && code != MEM_REF
4408      && TREE_CODE_CLASS (code) != tcc_declaration)
4409    return false;
4410
4411  if (!STMT_VINFO_DATA_REF (stmt_info))
4412    return false;
4413
4414  elem_type = TREE_TYPE (vectype);
4415  mode = TYPE_MODE (vectype);
4416
4417  /* FORNOW. In some cases can vectorize even if data-type not supported
4418    (e.g. - data copies).  */
4419  if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
4420    {
4421      if (dump_enabled_p ())
4422        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4423                         "Aligned load, but unsupported type.");
4424      return false;
4425    }
4426
4427  /* Check if the load is a part of an interleaving chain.  */
4428  if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
4429    {
4430      grouped_load = true;
4431      /* FORNOW */
4432      gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
4433
4434      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4435
4436      /* If this is single-element interleaving with an element distance
4437         that leaves unused vector loads around punt - we at least create
4438	 very sub-optimal code in that case (and blow up memory,
4439	 see PR65518).  */
4440      if (first_stmt == stmt
4441	  && !GROUP_NEXT_ELEMENT (stmt_info)
4442	  && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
4443	{
4444	  if (dump_enabled_p ())
4445	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4446			     "single-element interleaving not supported "
4447			     "for not adjacent vector loads\n");
4448	  return false;
4449	}
4450
4451      if (!slp && !PURE_SLP_STMT (stmt_info))
4452	{
4453	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4454	  if (vect_load_lanes_supported (vectype, group_size))
4455	    load_lanes_p = true;
4456	  else if (!vect_grouped_load_supported (vectype, group_size))
4457	    return false;
4458	}
4459
4460      /* Invalidate assumptions made by dependence analysis when vectorization
4461	 on the unrolled body effectively re-orders stmts.  */
4462      if (!PURE_SLP_STMT (stmt_info)
4463	  && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
4464	  && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4465	      > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
4466	{
4467	  if (dump_enabled_p ())
4468	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4469			     "cannot perform implicit CSE when performing "
4470			     "group loads with negative dependence distance\n");
4471	  return false;
4472	}
4473    }
4474
4475
4476  if (STMT_VINFO_GATHER_P (stmt_info))
4477    {
4478      gimple def_stmt;
4479      tree def;
4480      gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
4481				       &gather_off, &gather_scale);
4482      gcc_assert (gather_decl);
4483      if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
4484				 &def_stmt, &def, &gather_dt,
4485				 &gather_off_vectype))
4486	{
4487	  if (dump_enabled_p ())
4488	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4489                             "gather index use not simple.");
4490	  return false;
4491	}
4492    }
4493  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4494    ;
4495  else
4496    {
4497      negative = tree_int_cst_compare (nested_in_vect_loop
4498				       ? STMT_VINFO_DR_STEP (stmt_info)
4499				       : DR_STEP (dr),
4500				       size_zero_node) < 0;
4501      if (negative && ncopies > 1)
4502	{
4503	  if (dump_enabled_p ())
4504	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4505                             "multiple types with negative step.");
4506	  return false;
4507	}
4508
4509      if (negative)
4510	{
4511	  gcc_assert (!grouped_load);
4512	  alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
4513	  if (alignment_support_scheme != dr_aligned
4514	      && alignment_support_scheme != dr_unaligned_supported)
4515	    {
4516              if (dump_enabled_p ())
4517                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4518                                 "negative step but alignment required.");
4519	      return false;
4520	    }
4521	  if (!perm_mask_for_reverse (vectype))
4522	    {
4523              if (dump_enabled_p ())
4524                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4525                                 "negative step and reversing not supported.");
4526	      return false;
4527	    }
4528	}
4529    }
4530
4531  if (!vec_stmt) /* transformation not required.  */
4532    {
4533      STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4534      vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
4535      return true;
4536    }
4537
4538  if (dump_enabled_p ())
4539    dump_printf_loc (MSG_NOTE, vect_location,
4540                     "transform load. ncopies = %d", ncopies);
4541
4542  /** Transform.  **/
4543
4544  if (STMT_VINFO_GATHER_P (stmt_info))
4545    {
4546      tree vec_oprnd0 = NULL_TREE, op;
4547      tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
4548      tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
4549      tree ptr, mask, var, scale, perm_mask = NULL_TREE, prev_res = NULL_TREE;
4550      edge pe = loop_preheader_edge (loop);
4551      gimple_seq seq;
4552      basic_block new_bb;
4553      enum { NARROW, NONE, WIDEN } modifier;
4554      int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
4555
4556      if (nunits == gather_off_nunits)
4557	modifier = NONE;
4558      else if (nunits == gather_off_nunits / 2)
4559	{
4560	  unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
4561	  modifier = WIDEN;
4562
4563	  for (i = 0; i < gather_off_nunits; ++i)
4564	    sel[i] = i | nunits;
4565
4566	  perm_mask = vect_gen_perm_mask (gather_off_vectype, sel);
4567	  gcc_assert (perm_mask != NULL_TREE);
4568	}
4569      else if (nunits == gather_off_nunits * 2)
4570	{
4571	  unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
4572	  modifier = NARROW;
4573
4574	  for (i = 0; i < nunits; ++i)
4575	    sel[i] = i < gather_off_nunits
4576		     ? i : i + nunits - gather_off_nunits;
4577
4578	  perm_mask = vect_gen_perm_mask (vectype, sel);
4579	  gcc_assert (perm_mask != NULL_TREE);
4580	  ncopies *= 2;
4581	}
4582      else
4583	gcc_unreachable ();
4584
4585      rettype = TREE_TYPE (TREE_TYPE (gather_decl));
4586      srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4587      ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4588      idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4589      masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
4590      scaletype = TREE_VALUE (arglist);
4591      gcc_checking_assert (types_compatible_p (srctype, rettype)
4592			   && types_compatible_p (srctype, masktype));
4593
4594      vec_dest = vect_create_destination_var (scalar_dest, vectype);
4595
4596      ptr = fold_convert (ptrtype, gather_base);
4597      if (!is_gimple_min_invariant (ptr))
4598	{
4599	  ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
4600	  new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
4601	  gcc_assert (!new_bb);
4602	}
4603
4604      /* Currently we support only unconditional gather loads,
4605	 so mask should be all ones.  */
4606      if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
4607	mask = build_int_cst (TREE_TYPE (masktype), -1);
4608      else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
4609	{
4610	  REAL_VALUE_TYPE r;
4611	  long tmp[6];
4612	  for (j = 0; j < 6; ++j)
4613	    tmp[j] = -1;
4614	  real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
4615	  mask = build_real (TREE_TYPE (masktype), r);
4616	}
4617      else
4618	gcc_unreachable ();
4619      mask = build_vector_from_val (masktype, mask);
4620      mask = vect_init_vector (stmt, mask, masktype, NULL);
4621
4622      scale = build_int_cst (scaletype, gather_scale);
4623
4624      prev_stmt_info = NULL;
4625      for (j = 0; j < ncopies; ++j)
4626	{
4627	  if (modifier == WIDEN && (j & 1))
4628	    op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
4629				       perm_mask, stmt, gsi);
4630	  else if (j == 0)
4631	    op = vec_oprnd0
4632	      = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
4633	  else
4634	    op = vec_oprnd0
4635	      = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
4636
4637	  if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
4638	    {
4639	      gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
4640			  == TYPE_VECTOR_SUBPARTS (idxtype));
4641	      var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
4642	      var = make_ssa_name (var, NULL);
4643	      op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
4644	      new_stmt
4645		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var,
4646						op, NULL_TREE);
4647	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4648	      op = var;
4649	    }
4650
4651	  new_stmt
4652	    = gimple_build_call (gather_decl, 5, mask, ptr, op, mask, scale);
4653
4654	  if (!useless_type_conversion_p (vectype, rettype))
4655	    {
4656	      gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
4657			  == TYPE_VECTOR_SUBPARTS (rettype));
4658	      var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
4659	      op = make_ssa_name (var, new_stmt);
4660	      gimple_call_set_lhs (new_stmt, op);
4661	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
4662	      var = make_ssa_name (vec_dest, NULL);
4663	      op = build1 (VIEW_CONVERT_EXPR, vectype, op);
4664	      new_stmt
4665		= gimple_build_assign_with_ops (VIEW_CONVERT_EXPR, var, op,
4666						NULL_TREE);
4667	    }
4668	  else
4669	    {
4670	      var = make_ssa_name (vec_dest, new_stmt);
4671	      gimple_call_set_lhs (new_stmt, var);
4672	    }
4673
4674	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4675
4676	  if (modifier == NARROW)
4677	    {
4678	      if ((j & 1) == 0)
4679		{
4680		  prev_res = var;
4681		  continue;
4682		}
4683	      var = permute_vec_elements (prev_res, var,
4684					  perm_mask, stmt, gsi);
4685	      new_stmt = SSA_NAME_DEF_STMT (var);
4686	    }
4687
4688	  if (prev_stmt_info == NULL)
4689	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4690	  else
4691	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4692	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4693	}
4694      return true;
4695    }
4696  else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
4697    {
4698      gimple_stmt_iterator incr_gsi;
4699      bool insert_after;
4700      gimple incr;
4701      tree offvar;
4702      tree ivstep;
4703      tree running_off;
4704      vec<constructor_elt, va_gc> *v = NULL;
4705      gimple_seq stmts = NULL;
4706      tree stride_base, stride_step, alias_off;
4707
4708      gcc_assert (!nested_in_vect_loop);
4709
4710      stride_base
4711	= fold_build_pointer_plus
4712	    (unshare_expr (DR_BASE_ADDRESS (dr)),
4713	     size_binop (PLUS_EXPR,
4714			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
4715			 convert_to_ptrofftype (DR_INIT(dr))));
4716      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
4717
4718      /* For a load with loop-invariant (but other than power-of-2)
4719         stride (i.e. not a grouped access) like so:
4720
4721	   for (i = 0; i < n; i += stride)
4722	     ... = array[i];
4723
4724	 we generate a new induction variable and new accesses to
4725	 form a new vector (or vectors, depending on ncopies):
4726
4727	   for (j = 0; ; j += VF*stride)
4728	     tmp1 = array[j];
4729	     tmp2 = array[j + stride];
4730	     ...
4731	     vectemp = {tmp1, tmp2, ...}
4732         */
4733
4734      ivstep = stride_step;
4735      ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
4736			    build_int_cst (TREE_TYPE (ivstep), vf));
4737
4738      standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4739
4740      create_iv (stride_base, ivstep, NULL,
4741		 loop, &incr_gsi, insert_after,
4742		 &offvar, NULL);
4743      incr = gsi_stmt (incr_gsi);
4744      set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
4745
4746      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
4747      if (stmts)
4748	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4749
4750      prev_stmt_info = NULL;
4751      running_off = offvar;
4752      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
4753      for (j = 0; j < ncopies; j++)
4754	{
4755	  tree vec_inv;
4756
4757	  vec_alloc (v, nunits);
4758	  for (i = 0; i < nunits; i++)
4759	    {
4760	      tree newref, newoff;
4761	      gimple incr;
4762	      newref = build2 (MEM_REF, TREE_TYPE (vectype),
4763			       running_off, alias_off);
4764
4765	      newref = force_gimple_operand_gsi (gsi, newref, true,
4766						 NULL_TREE, true,
4767						 GSI_SAME_STMT);
4768	      CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
4769	      newoff = copy_ssa_name (running_off, NULL);
4770	      incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
4771						   running_off, stride_step);
4772	      vect_finish_stmt_generation (stmt, incr, gsi);
4773
4774	      running_off = newoff;
4775	    }
4776
4777	  vec_inv = build_constructor (vectype, v);
4778	  new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
4779	  new_stmt = SSA_NAME_DEF_STMT (new_temp);
4780
4781	  if (j == 0)
4782	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4783	  else
4784	    STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4785	  prev_stmt_info = vinfo_for_stmt (new_stmt);
4786	}
4787      return true;
4788    }
4789
4790  if (grouped_load)
4791    {
4792      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
4793      if (slp
4794          && !SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ()
4795	  && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
4796        first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
4797
4798      /* Check if the chain of loads is already vectorized.  */
4799      if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4800	{
4801	  *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4802	  return true;
4803	}
4804      first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4805      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
4806
4807      /* VEC_NUM is the number of vect stmts to be created for this group.  */
4808      if (slp)
4809	{
4810	  grouped_load = false;
4811	  vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4812          if (SLP_INSTANCE_LOAD_PERMUTATION (slp_node_instance).exists ())
4813            slp_perm = true;
4814    	}
4815      else
4816	vec_num = group_size;
4817    }
4818  else
4819    {
4820      first_stmt = stmt;
4821      first_dr = dr;
4822      group_size = vec_num = 1;
4823    }
4824
4825  alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
4826  gcc_assert (alignment_support_scheme);
4827  /* Targets with load-lane instructions must not require explicit
4828     realignment.  */
4829  gcc_assert (!load_lanes_p
4830	      || alignment_support_scheme == dr_aligned
4831	      || alignment_support_scheme == dr_unaligned_supported);
4832
4833  /* In case the vectorization factor (VF) is bigger than the number
4834     of elements that we can fit in a vectype (nunits), we have to generate
4835     more than one vector stmt - i.e - we need to "unroll" the
4836     vector stmt by a factor VF/nunits.  In doing so, we record a pointer
4837     from one copy of the vector stmt to the next, in the field
4838     STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
4839     stages to find the correct vector defs to be used when vectorizing
4840     stmts that use the defs of the current stmt.  The example below
4841     illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4842     need to create 4 vectorized stmts):
4843
4844     before vectorization:
4845                                RELATED_STMT    VEC_STMT
4846        S1:     x = memref      -               -
4847        S2:     z = x + 1       -               -
4848
4849     step 1: vectorize stmt S1:
4850        We first create the vector stmt VS1_0, and, as usual, record a
4851        pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4852        Next, we create the vector stmt VS1_1, and record a pointer to
4853        it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4854        Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
4855        stmts and pointers:
4856                                RELATED_STMT    VEC_STMT
4857        VS1_0:  vx0 = memref0   VS1_1           -
4858        VS1_1:  vx1 = memref1   VS1_2           -
4859        VS1_2:  vx2 = memref2   VS1_3           -
4860        VS1_3:  vx3 = memref3   -               -
4861        S1:     x = load        -               VS1_0
4862        S2:     z = x + 1       -               -
4863
4864     See in documentation in vect_get_vec_def_for_stmt_copy for how the
4865     information we recorded in RELATED_STMT field is used to vectorize
4866     stmt S2.  */
4867
4868  /* In case of interleaving (non-unit grouped access):
4869
4870     S1:  x2 = &base + 2
4871     S2:  x0 = &base
4872     S3:  x1 = &base + 1
4873     S4:  x3 = &base + 3
4874
4875     Vectorized loads are created in the order of memory accesses
4876     starting from the access of the first stmt of the chain:
4877
4878     VS1: vx0 = &base
4879     VS2: vx1 = &base + vec_size*1
4880     VS3: vx3 = &base + vec_size*2
4881     VS4: vx4 = &base + vec_size*3
4882
4883     Then permutation statements are generated:
4884
4885     VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4886     VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4887       ...
4888
4889     And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4890     (the order of the data-refs in the output of vect_permute_load_chain
4891     corresponds to the order of scalar stmts in the interleaving chain - see
4892     the documentation of vect_permute_load_chain()).
4893     The generation of permutation stmts and recording them in
4894     STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4895
4896     In case of both multiple types and interleaving, the vector loads and
4897     permutation stmts above are created for every copy.  The result vector
4898     stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4899     corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
4900
4901  /* If the data reference is aligned (dr_aligned) or potentially unaligned
4902     on a target that supports unaligned accesses (dr_unaligned_supported)
4903     we generate the following code:
4904         p = initial_addr;
4905         indx = 0;
4906         loop {
4907	   p = p + indx * vectype_size;
4908           vec_dest = *(p);
4909           indx = indx + 1;
4910         }
4911
4912     Otherwise, the data reference is potentially unaligned on a target that
4913     does not support unaligned accesses (dr_explicit_realign_optimized) -
4914     then generate the following code, in which the data in each iteration is
4915     obtained by two vector loads, one from the previous iteration, and one
4916     from the current iteration:
4917         p1 = initial_addr;
4918         msq_init = *(floor(p1))
4919         p2 = initial_addr + VS - 1;
4920         realignment_token = call target_builtin;
4921         indx = 0;
4922         loop {
4923           p2 = p2 + indx * vectype_size
4924           lsq = *(floor(p2))
4925           vec_dest = realign_load (msq, lsq, realignment_token)
4926           indx = indx + 1;
4927           msq = lsq;
4928         }   */
4929
4930  /* If the misalignment remains the same throughout the execution of the
4931     loop, we can create the init_addr and permutation mask at the loop
4932     preheader.  Otherwise, it needs to be created inside the loop.
4933     This can only occur when vectorizing memory accesses in the inner-loop
4934     nested within an outer-loop that is being vectorized.  */
4935
4936  if (nested_in_vect_loop
4937      && (TREE_INT_CST_LOW (DR_STEP (dr))
4938	  % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
4939    {
4940      gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
4941      compute_in_loop = true;
4942    }
4943
4944  if ((alignment_support_scheme == dr_explicit_realign_optimized
4945       || alignment_support_scheme == dr_explicit_realign)
4946      && !compute_in_loop)
4947    {
4948      msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
4949				    alignment_support_scheme, NULL_TREE,
4950				    &at_loop);
4951      if (alignment_support_scheme == dr_explicit_realign_optimized)
4952	{
4953	  phi = SSA_NAME_DEF_STMT (msq);
4954	  byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
4955				    size_one_node);
4956	}
4957    }
4958  else
4959    at_loop = loop;
4960
4961  if (negative)
4962    offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
4963
4964  if (load_lanes_p)
4965    aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
4966  else
4967    aggr_type = vectype;
4968
4969  prev_stmt_info = NULL;
4970  for (j = 0; j < ncopies; j++)
4971    {
4972      /* 1. Create the vector or array pointer update chain.  */
4973      if (j == 0)
4974        dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
4975						offset, &dummy, gsi,
4976						&ptr_incr, false, &inv_p,
4977						byte_offset);
4978      else
4979        dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
4980				       TYPE_SIZE_UNIT (aggr_type));
4981
4982      if (grouped_load || slp_perm)
4983	dr_chain.create (vec_num);
4984
4985      if (load_lanes_p)
4986	{
4987	  tree vec_array;
4988
4989	  vec_array = create_vector_array (vectype, vec_num);
4990
4991	  /* Emit:
4992	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
4993	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
4994	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
4995	  gimple_call_set_lhs (new_stmt, vec_array);
4996	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
4997
4998	  /* Extract each vector into an SSA_NAME.  */
4999	  for (i = 0; i < vec_num; i++)
5000	    {
5001	      new_temp = read_vector_array (stmt, gsi, scalar_dest,
5002					    vec_array, i);
5003	      dr_chain.quick_push (new_temp);
5004	    }
5005
5006	  /* Record the mapping between SSA_NAMEs and statements.  */
5007	  vect_record_grouped_load_vectors (stmt, dr_chain);
5008	}
5009      else
5010	{
5011	  for (i = 0; i < vec_num; i++)
5012	    {
5013	      if (i > 0)
5014		dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5015					       stmt, NULL_TREE);
5016
5017	      /* 2. Create the vector-load in the loop.  */
5018	      switch (alignment_support_scheme)
5019		{
5020		case dr_aligned:
5021		case dr_unaligned_supported:
5022		  {
5023		    unsigned int align, misalign;
5024
5025		    data_ref
5026		      = build2 (MEM_REF, vectype, dataref_ptr,
5027				build_int_cst (reference_alias_ptr_type
5028					       (DR_REF (first_dr)), 0));
5029		    align = TYPE_ALIGN_UNIT (vectype);
5030		    if (alignment_support_scheme == dr_aligned)
5031		      {
5032			gcc_assert (aligned_access_p (first_dr));
5033			misalign = 0;
5034		      }
5035		    else if (DR_MISALIGNMENT (first_dr) == -1)
5036		      {
5037			TREE_TYPE (data_ref)
5038			  = build_aligned_type (TREE_TYPE (data_ref),
5039						TYPE_ALIGN (elem_type));
5040			align = TYPE_ALIGN_UNIT (elem_type);
5041			misalign = 0;
5042		      }
5043		    else
5044		      {
5045			TREE_TYPE (data_ref)
5046			  = build_aligned_type (TREE_TYPE (data_ref),
5047						TYPE_ALIGN (elem_type));
5048			misalign = DR_MISALIGNMENT (first_dr);
5049		      }
5050		    set_ptr_info_alignment (get_ptr_info (dataref_ptr),
5051					    align, misalign);
5052		    break;
5053		  }
5054		case dr_explicit_realign:
5055		  {
5056		    tree ptr, bump;
5057		    tree vs_minus_1;
5058
5059		    vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5060
5061		    if (compute_in_loop)
5062		      msq = vect_setup_realignment (first_stmt, gsi,
5063						    &realignment_token,
5064						    dr_explicit_realign,
5065						    dataref_ptr, NULL);
5066
5067		    ptr = copy_ssa_name (dataref_ptr, NULL);
5068		    new_stmt = gimple_build_assign_with_ops
5069				 (BIT_AND_EXPR, ptr, dataref_ptr,
5070				  build_int_cst
5071				  (TREE_TYPE (dataref_ptr),
5072				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5073		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5074		    data_ref
5075		      = build2 (MEM_REF, vectype, ptr,
5076				build_int_cst (reference_alias_ptr_type
5077						 (DR_REF (first_dr)), 0));
5078		    vec_dest = vect_create_destination_var (scalar_dest,
5079							    vectype);
5080		    new_stmt = gimple_build_assign (vec_dest, data_ref);
5081		    new_temp = make_ssa_name (vec_dest, new_stmt);
5082		    gimple_assign_set_lhs (new_stmt, new_temp);
5083		    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
5084		    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
5085		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5086		    msq = new_temp;
5087
5088		    bump = size_binop (MULT_EXPR, vs_minus_1,
5089				       TYPE_SIZE_UNIT (elem_type));
5090		    ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
5091		    new_stmt = gimple_build_assign_with_ops
5092				 (BIT_AND_EXPR, NULL_TREE, ptr,
5093				  build_int_cst
5094				  (TREE_TYPE (ptr),
5095				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5096		    ptr = copy_ssa_name (dataref_ptr, new_stmt);
5097		    gimple_assign_set_lhs (new_stmt, ptr);
5098		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
5099		    data_ref
5100		      = build2 (MEM_REF, vectype, ptr,
5101				build_int_cst (reference_alias_ptr_type
5102						 (DR_REF (first_dr)), 0));
5103		    break;
5104		  }
5105		case dr_explicit_realign_optimized:
5106		  new_temp = copy_ssa_name (dataref_ptr, NULL);
5107		  new_stmt = gimple_build_assign_with_ops
5108			       (BIT_AND_EXPR, new_temp, dataref_ptr,
5109				build_int_cst
5110				  (TREE_TYPE (dataref_ptr),
5111				   -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
5112		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5113		  data_ref
5114		    = build2 (MEM_REF, vectype, new_temp,
5115			      build_int_cst (reference_alias_ptr_type
5116					       (DR_REF (first_dr)), 0));
5117		  break;
5118		default:
5119		  gcc_unreachable ();
5120		}
5121	      vec_dest = vect_create_destination_var (scalar_dest, vectype);
5122	      new_stmt = gimple_build_assign (vec_dest, data_ref);
5123	      new_temp = make_ssa_name (vec_dest, new_stmt);
5124	      gimple_assign_set_lhs (new_stmt, new_temp);
5125	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
5126
5127	      /* 3. Handle explicit realignment if necessary/supported.
5128		 Create in loop:
5129		   vec_dest = realign_load (msq, lsq, realignment_token)  */
5130	      if (alignment_support_scheme == dr_explicit_realign_optimized
5131		  || alignment_support_scheme == dr_explicit_realign)
5132		{
5133		  lsq = gimple_assign_lhs (new_stmt);
5134		  if (!realignment_token)
5135		    realignment_token = dataref_ptr;
5136		  vec_dest = vect_create_destination_var (scalar_dest, vectype);
5137		  new_stmt
5138		    = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR,
5139						    vec_dest, msq, lsq,
5140						    realignment_token);
5141		  new_temp = make_ssa_name (vec_dest, new_stmt);
5142		  gimple_assign_set_lhs (new_stmt, new_temp);
5143		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
5144
5145		  if (alignment_support_scheme == dr_explicit_realign_optimized)
5146		    {
5147		      gcc_assert (phi);
5148		      if (i == vec_num - 1 && j == ncopies - 1)
5149			add_phi_arg (phi, lsq,
5150				     loop_latch_edge (containing_loop),
5151				     UNKNOWN_LOCATION);
5152		      msq = lsq;
5153		    }
5154		}
5155
5156	      /* 4. Handle invariant-load.  */
5157	      if (inv_p && !bb_vinfo)
5158		{
5159		  gimple_stmt_iterator gsi2 = *gsi;
5160		  gcc_assert (!grouped_load);
5161		  gsi_next (&gsi2);
5162		  new_temp = vect_init_vector (stmt, scalar_dest,
5163					       vectype, &gsi2);
5164		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
5165		}
5166
5167	      if (negative)
5168		{
5169		  tree perm_mask = perm_mask_for_reverse (vectype);
5170		  new_temp = permute_vec_elements (new_temp, new_temp,
5171						   perm_mask, stmt, gsi);
5172		  new_stmt = SSA_NAME_DEF_STMT (new_temp);
5173		}
5174
5175	      /* Collect vector loads and later create their permutation in
5176		 vect_transform_grouped_load ().  */
5177	      if (grouped_load || slp_perm)
5178		dr_chain.quick_push (new_temp);
5179
5180	      /* Store vector loads in the corresponding SLP_NODE.  */
5181	      if (slp && !slp_perm)
5182		SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5183	    }
5184	}
5185
5186      if (slp && !slp_perm)
5187	continue;
5188
5189      if (slp_perm)
5190        {
5191          if (!vect_transform_slp_perm_load (stmt, dr_chain, gsi, vf,
5192                                             slp_node_instance, false))
5193            {
5194              dr_chain.release ();
5195              return false;
5196            }
5197        }
5198      else
5199        {
5200          if (grouped_load)
5201  	    {
5202	      if (!load_lanes_p)
5203		vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
5204	      *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5205	    }
5206          else
5207	    {
5208	      if (j == 0)
5209	        STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5210	      else
5211	        STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5212	      prev_stmt_info = vinfo_for_stmt (new_stmt);
5213	    }
5214        }
5215      dr_chain.release ();
5216    }
5217
5218  return true;
5219}
5220
5221/* Function vect_is_simple_cond.
5222
5223   Input:
5224   LOOP - the loop that is being vectorized.
5225   COND - Condition that is checked for simple use.
5226
5227   Output:
5228   *COMP_VECTYPE - the vector type for the comparison.
5229
5230   Returns whether a COND can be vectorized.  Checks whether
5231   condition operands are supportable using vec_is_simple_use.  */
5232
5233static bool
5234vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
5235		     bb_vec_info bb_vinfo, tree *comp_vectype)
5236{
5237  tree lhs, rhs;
5238  tree def;
5239  enum vect_def_type dt;
5240  tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
5241
5242  if (!COMPARISON_CLASS_P (cond))
5243    return false;
5244
5245  lhs = TREE_OPERAND (cond, 0);
5246  rhs = TREE_OPERAND (cond, 1);
5247
5248  if (TREE_CODE (lhs) == SSA_NAME)
5249    {
5250      gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5251      if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
5252				 &lhs_def_stmt, &def, &dt, &vectype1))
5253	return false;
5254    }
5255  else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5256	   && TREE_CODE (lhs) != FIXED_CST)
5257    return false;
5258
5259  if (TREE_CODE (rhs) == SSA_NAME)
5260    {
5261      gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5262      if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
5263				 &rhs_def_stmt, &def, &dt, &vectype2))
5264	return false;
5265    }
5266  else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5267	   && TREE_CODE (rhs) != FIXED_CST)
5268    return false;
5269
5270  *comp_vectype = vectype1 ? vectype1 : vectype2;
5271  return true;
5272}
5273
5274/* vectorizable_condition.
5275
5276   Check if STMT is conditional modify expression that can be vectorized.
5277   If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5278   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
5279   at GSI.
5280
5281   When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5282   to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5283   else caluse if it is 2).
5284
5285   Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5286
5287bool
5288vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
5289			gimple *vec_stmt, tree reduc_def, int reduc_index,
5290			slp_tree slp_node)
5291{
5292  tree scalar_dest = NULL_TREE;
5293  tree vec_dest = NULL_TREE;
5294  tree cond_expr, then_clause, else_clause;
5295  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5296  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5297  tree comp_vectype = NULL_TREE;
5298  tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
5299  tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
5300  tree vec_compare, vec_cond_expr;
5301  tree new_temp;
5302  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5303  tree def;
5304  enum vect_def_type dt, dts[4];
5305  int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5306  int ncopies;
5307  enum tree_code code;
5308  stmt_vec_info prev_stmt_info = NULL;
5309  int i, j;
5310  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5311  vec<tree> vec_oprnds0 = vNULL;
5312  vec<tree> vec_oprnds1 = vNULL;
5313  vec<tree> vec_oprnds2 = vNULL;
5314  vec<tree> vec_oprnds3 = vNULL;
5315  tree vec_cmp_type = vectype;
5316
5317  if (slp_node || PURE_SLP_STMT (stmt_info))
5318    ncopies = 1;
5319  else
5320    ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5321
5322  gcc_assert (ncopies >= 1);
5323  if (reduc_index && ncopies > 1)
5324    return false; /* FORNOW */
5325
5326  if (reduc_index && STMT_SLP_TYPE (stmt_info))
5327    return false;
5328
5329  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5330    return false;
5331
5332  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5333      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
5334           && reduc_def))
5335    return false;
5336
5337  /* FORNOW: not yet supported.  */
5338  if (STMT_VINFO_LIVE_P (stmt_info))
5339    {
5340      if (dump_enabled_p ())
5341        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5342                         "value used after loop.");
5343      return false;
5344    }
5345
5346  /* Is vectorizable conditional operation?  */
5347  if (!is_gimple_assign (stmt))
5348    return false;
5349
5350  code = gimple_assign_rhs_code (stmt);
5351
5352  if (code != COND_EXPR)
5353    return false;
5354
5355  cond_expr = gimple_assign_rhs1 (stmt);
5356  then_clause = gimple_assign_rhs2 (stmt);
5357  else_clause = gimple_assign_rhs3 (stmt);
5358
5359  if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
5360			    &comp_vectype)
5361      || !comp_vectype)
5362    return false;
5363
5364  if (TREE_CODE (then_clause) == SSA_NAME)
5365    {
5366      gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5367      if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
5368			       &then_def_stmt, &def, &dt))
5369	return false;
5370    }
5371  else if (TREE_CODE (then_clause) != INTEGER_CST
5372	   && TREE_CODE (then_clause) != REAL_CST
5373	   && TREE_CODE (then_clause) != FIXED_CST)
5374    return false;
5375
5376  if (TREE_CODE (else_clause) == SSA_NAME)
5377    {
5378      gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
5379      if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
5380			       &else_def_stmt, &def, &dt))
5381	return false;
5382    }
5383  else if (TREE_CODE (else_clause) != INTEGER_CST
5384	   && TREE_CODE (else_clause) != REAL_CST
5385	   && TREE_CODE (else_clause) != FIXED_CST)
5386    return false;
5387
5388  if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
5389    {
5390      unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
5391      tree cmp_type = build_nonstandard_integer_type (prec, 1);
5392      vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
5393      if (vec_cmp_type == NULL_TREE)
5394	return false;
5395    }
5396
5397  if (!vec_stmt)
5398    {
5399      STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
5400      return expand_vec_cond_expr_p (vectype, comp_vectype);
5401    }
5402
5403  /* Transform.  */
5404
5405  if (!slp_node)
5406    {
5407      vec_oprnds0.create (1);
5408      vec_oprnds1.create (1);
5409      vec_oprnds2.create (1);
5410      vec_oprnds3.create (1);
5411    }
5412
5413  /* Handle def.  */
5414  scalar_dest = gimple_assign_lhs (stmt);
5415  vec_dest = vect_create_destination_var (scalar_dest, vectype);
5416
5417  /* Handle cond expr.  */
5418  for (j = 0; j < ncopies; j++)
5419    {
5420      gimple new_stmt = NULL;
5421      if (j == 0)
5422	{
5423          if (slp_node)
5424            {
5425              vec<tree> ops;
5426	      ops.create (4);
5427	      vec<vec<tree> > vec_defs;
5428
5429	      vec_defs.create (4);
5430              ops.safe_push (TREE_OPERAND (cond_expr, 0));
5431              ops.safe_push (TREE_OPERAND (cond_expr, 1));
5432              ops.safe_push (then_clause);
5433              ops.safe_push (else_clause);
5434              vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
5435	      vec_oprnds3 = vec_defs.pop ();
5436	      vec_oprnds2 = vec_defs.pop ();
5437	      vec_oprnds1 = vec_defs.pop ();
5438	      vec_oprnds0 = vec_defs.pop ();
5439
5440              ops.release ();
5441              vec_defs.release ();
5442            }
5443          else
5444            {
5445	      gimple gtemp;
5446	      vec_cond_lhs =
5447	      vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
5448					    stmt, NULL);
5449	      vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
5450				  loop_vinfo, NULL, &gtemp, &def, &dts[0]);
5451
5452	      vec_cond_rhs =
5453		vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
5454						stmt, NULL);
5455	      vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
5456				  loop_vinfo, NULL, &gtemp, &def, &dts[1]);
5457	      if (reduc_index == 1)
5458		vec_then_clause = reduc_def;
5459	      else
5460		{
5461		  vec_then_clause = vect_get_vec_def_for_operand (then_clause,
5462		 		  			      stmt, NULL);
5463	          vect_is_simple_use (then_clause, stmt, loop_vinfo,
5464					  NULL, &gtemp, &def, &dts[2]);
5465		}
5466	      if (reduc_index == 2)
5467		vec_else_clause = reduc_def;
5468	      else
5469		{
5470		  vec_else_clause = vect_get_vec_def_for_operand (else_clause,
5471							      stmt, NULL);
5472		  vect_is_simple_use (else_clause, stmt, loop_vinfo,
5473				  NULL, &gtemp, &def, &dts[3]);
5474		}
5475	    }
5476	}
5477      else
5478	{
5479	  vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
5480							 vec_oprnds0.pop ());
5481	  vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
5482							 vec_oprnds1.pop ());
5483	  vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
5484							    vec_oprnds2.pop ());
5485	  vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
5486							    vec_oprnds3.pop ());
5487	}
5488
5489      if (!slp_node)
5490        {
5491	  vec_oprnds0.quick_push (vec_cond_lhs);
5492	  vec_oprnds1.quick_push (vec_cond_rhs);
5493	  vec_oprnds2.quick_push (vec_then_clause);
5494	  vec_oprnds3.quick_push (vec_else_clause);
5495	}
5496
5497      /* Arguments are ready.  Create the new vector stmt.  */
5498      FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
5499        {
5500          vec_cond_rhs = vec_oprnds1[i];
5501          vec_then_clause = vec_oprnds2[i];
5502          vec_else_clause = vec_oprnds3[i];
5503
5504	  vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
5505				vec_cond_lhs, vec_cond_rhs);
5506          vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
5507 		         vec_compare, vec_then_clause, vec_else_clause);
5508
5509          new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
5510          new_temp = make_ssa_name (vec_dest, new_stmt);
5511          gimple_assign_set_lhs (new_stmt, new_temp);
5512          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5513          if (slp_node)
5514            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5515        }
5516
5517        if (slp_node)
5518          continue;
5519
5520        if (j == 0)
5521          STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5522        else
5523          STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5524
5525        prev_stmt_info = vinfo_for_stmt (new_stmt);
5526    }
5527
5528  vec_oprnds0.release ();
5529  vec_oprnds1.release ();
5530  vec_oprnds2.release ();
5531  vec_oprnds3.release ();
5532
5533  return true;
5534}
5535
5536
5537/* Make sure the statement is vectorizable.  */
5538
5539bool
5540vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
5541{
5542  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5543  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5544  enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
5545  bool ok;
5546  tree scalar_type, vectype;
5547  gimple pattern_stmt;
5548  gimple_seq pattern_def_seq;
5549
5550  if (dump_enabled_p ())
5551    {
5552      dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
5553      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5554    }
5555
5556  if (gimple_has_volatile_ops (stmt))
5557    {
5558      if (dump_enabled_p ())
5559        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5560                         "not vectorized: stmt has volatile operands");
5561
5562      return false;
5563    }
5564
5565  /* Skip stmts that do not need to be vectorized. In loops this is expected
5566     to include:
5567     - the COND_EXPR which is the loop exit condition
5568     - any LABEL_EXPRs in the loop
5569     - computations that are used only for array indexing or loop control.
5570     In basic blocks we only analyze statements that are a part of some SLP
5571     instance, therefore, all the statements are relevant.
5572
5573     Pattern statement needs to be analyzed instead of the original statement
5574     if the original statement is not relevant.  Otherwise, we analyze both
5575     statements.  In basic blocks we are called from some SLP instance
5576     traversal, don't analyze pattern stmts instead, the pattern stmts
5577     already will be part of SLP instance.  */
5578
5579  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
5580  if (!STMT_VINFO_RELEVANT_P (stmt_info)
5581      && !STMT_VINFO_LIVE_P (stmt_info))
5582    {
5583      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5584          && pattern_stmt
5585          && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5586              || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5587        {
5588          /* Analyze PATTERN_STMT instead of the original stmt.  */
5589          stmt = pattern_stmt;
5590          stmt_info = vinfo_for_stmt (pattern_stmt);
5591          if (dump_enabled_p ())
5592            {
5593              dump_printf_loc (MSG_NOTE, vect_location,
5594                               "==> examining pattern statement: ");
5595              dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5596            }
5597        }
5598      else
5599        {
5600          if (dump_enabled_p ())
5601            dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.");
5602
5603          return true;
5604        }
5605    }
5606  else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
5607	   && node == NULL
5608           && pattern_stmt
5609           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
5610               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
5611    {
5612      /* Analyze PATTERN_STMT too.  */
5613      if (dump_enabled_p ())
5614        {
5615          dump_printf_loc (MSG_NOTE, vect_location,
5616                           "==> examining pattern statement: ");
5617          dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
5618        }
5619
5620      if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
5621        return false;
5622   }
5623
5624  if (is_pattern_stmt_p (stmt_info)
5625      && node == NULL
5626      && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
5627    {
5628      gimple_stmt_iterator si;
5629
5630      for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
5631	{
5632	  gimple pattern_def_stmt = gsi_stmt (si);
5633	  if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
5634	      || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
5635	    {
5636	      /* Analyze def stmt of STMT if it's a pattern stmt.  */
5637	      if (dump_enabled_p ())
5638		{
5639		  dump_printf_loc (MSG_NOTE, vect_location,
5640                                   "==> examining pattern def statement: ");
5641		  dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
5642		}
5643
5644	      if (!vect_analyze_stmt (pattern_def_stmt,
5645				      need_to_vectorize, node))
5646		return false;
5647	    }
5648	}
5649    }
5650
5651  switch (STMT_VINFO_DEF_TYPE (stmt_info))
5652    {
5653      case vect_internal_def:
5654        break;
5655
5656      case vect_reduction_def:
5657      case vect_nested_cycle:
5658         gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
5659                     || relevance == vect_used_in_outer_by_reduction
5660                     || relevance == vect_unused_in_scope));
5661         break;
5662
5663      case vect_induction_def:
5664      case vect_constant_def:
5665      case vect_external_def:
5666      case vect_unknown_def_type:
5667      default:
5668        gcc_unreachable ();
5669    }
5670
5671  if (bb_vinfo)
5672    {
5673      gcc_assert (PURE_SLP_STMT (stmt_info));
5674
5675      scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
5676      if (dump_enabled_p ())
5677        {
5678          dump_printf_loc (MSG_NOTE, vect_location,
5679                           "get vectype for scalar type:  ");
5680          dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
5681        }
5682
5683      vectype = get_vectype_for_scalar_type (scalar_type);
5684      if (!vectype)
5685        {
5686          if (dump_enabled_p ())
5687            {
5688               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5689                                "not SLPed: unsupported data-type ");
5690               dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5691                                  scalar_type);
5692            }
5693          return false;
5694        }
5695
5696      if (dump_enabled_p ())
5697        {
5698          dump_printf_loc (MSG_NOTE, vect_location, "vectype:  ");
5699          dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
5700        }
5701
5702      STMT_VINFO_VECTYPE (stmt_info) = vectype;
5703   }
5704
5705  if (STMT_VINFO_RELEVANT_P (stmt_info))
5706    {
5707      gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
5708      gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
5709      *need_to_vectorize = true;
5710    }
5711
5712   ok = true;
5713   if (!bb_vinfo
5714       && (STMT_VINFO_RELEVANT_P (stmt_info)
5715           || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
5716      ok = (vectorizable_conversion (stmt, NULL, NULL, NULL)
5717            || vectorizable_shift (stmt, NULL, NULL, NULL)
5718            || vectorizable_operation (stmt, NULL, NULL, NULL)
5719            || vectorizable_assignment (stmt, NULL, NULL, NULL)
5720            || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
5721	    || vectorizable_call (stmt, NULL, NULL, NULL)
5722            || vectorizable_store (stmt, NULL, NULL, NULL)
5723            || vectorizable_reduction (stmt, NULL, NULL, NULL)
5724            || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
5725    else
5726      {
5727        if (bb_vinfo)
5728	  ok = (vectorizable_conversion (stmt, NULL, NULL, node)
5729		|| vectorizable_shift (stmt, NULL, NULL, node)
5730                || vectorizable_operation (stmt, NULL, NULL, node)
5731                || vectorizable_assignment (stmt, NULL, NULL, node)
5732                || vectorizable_load (stmt, NULL, NULL, node, NULL)
5733		|| vectorizable_call (stmt, NULL, NULL, node)
5734                || vectorizable_store (stmt, NULL, NULL, node)
5735                || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
5736      }
5737
5738  if (!ok)
5739    {
5740      if (dump_enabled_p ())
5741        {
5742          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5743                           "not vectorized: relevant stmt not ");
5744          dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
5745          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5746        }
5747
5748      return false;
5749    }
5750
5751  if (bb_vinfo)
5752    return true;
5753
5754  /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5755      need extra handling, except for vectorizable reductions.  */
5756  if (STMT_VINFO_LIVE_P (stmt_info)
5757      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5758    ok = vectorizable_live_operation (stmt, NULL, NULL);
5759
5760  if (!ok)
5761    {
5762      if (dump_enabled_p ())
5763        {
5764          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5765                           "not vectorized: live stmt not ");
5766          dump_printf (MSG_MISSED_OPTIMIZATION,  "supported: ");
5767          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
5768        }
5769
5770       return false;
5771    }
5772
5773  return true;
5774}
5775
5776
5777/* Function vect_transform_stmt.
5778
5779   Create a vectorized stmt to replace STMT, and insert it at BSI.  */
5780
5781bool
5782vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
5783		     bool *grouped_store, slp_tree slp_node,
5784                     slp_instance slp_node_instance)
5785{
5786  bool is_store = false;
5787  gimple vec_stmt = NULL;
5788  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5789  bool done;
5790
5791  switch (STMT_VINFO_TYPE (stmt_info))
5792    {
5793    case type_demotion_vec_info_type:
5794    case type_promotion_vec_info_type:
5795    case type_conversion_vec_info_type:
5796      done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
5797      gcc_assert (done);
5798      break;
5799
5800    case induc_vec_info_type:
5801      gcc_assert (!slp_node);
5802      done = vectorizable_induction (stmt, gsi, &vec_stmt);
5803      gcc_assert (done);
5804      break;
5805
5806    case shift_vec_info_type:
5807      done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
5808      gcc_assert (done);
5809      break;
5810
5811    case op_vec_info_type:
5812      done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
5813      gcc_assert (done);
5814      break;
5815
5816    case assignment_vec_info_type:
5817      done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
5818      gcc_assert (done);
5819      break;
5820
5821    case load_vec_info_type:
5822      done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
5823                                slp_node_instance);
5824      gcc_assert (done);
5825      break;
5826
5827    case store_vec_info_type:
5828      done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
5829      gcc_assert (done);
5830      if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
5831	{
5832	  /* In case of interleaving, the whole chain is vectorized when the
5833	     last store in the chain is reached.  Store stmts before the last
5834	     one are skipped, and there vec_stmt_info shouldn't be freed
5835	     meanwhile.  */
5836	  *grouped_store = true;
5837	  if (STMT_VINFO_VEC_STMT (stmt_info))
5838	    is_store = true;
5839	  }
5840      else
5841	is_store = true;
5842      break;
5843
5844    case condition_vec_info_type:
5845      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
5846      gcc_assert (done);
5847      break;
5848
5849    case call_vec_info_type:
5850      done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
5851      stmt = gsi_stmt (*gsi);
5852      break;
5853
5854    case reduc_vec_info_type:
5855      done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
5856      gcc_assert (done);
5857      break;
5858
5859    default:
5860      if (!STMT_VINFO_LIVE_P (stmt_info))
5861	{
5862	  if (dump_enabled_p ())
5863	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5864                             "stmt not supported.");
5865	  gcc_unreachable ();
5866	}
5867    }
5868
5869  /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5870     is being vectorized, but outside the immediately enclosing loop.  */
5871  if (vec_stmt
5872      && STMT_VINFO_LOOP_VINFO (stmt_info)
5873      && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5874                                STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
5875      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
5876      && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
5877          || STMT_VINFO_RELEVANT (stmt_info) ==
5878                                           vect_used_in_outer_by_reduction))
5879    {
5880      struct loop *innerloop = LOOP_VINFO_LOOP (
5881                                STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
5882      imm_use_iterator imm_iter;
5883      use_operand_p use_p;
5884      tree scalar_dest;
5885      gimple exit_phi;
5886
5887      if (dump_enabled_p ())
5888        dump_printf_loc (MSG_NOTE, vect_location,
5889                         "Record the vdef for outer-loop vectorization.");
5890
5891      /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
5892        (to be used when vectorizing outer-loop stmts that use the DEF of
5893        STMT).  */
5894      if (gimple_code (stmt) == GIMPLE_PHI)
5895        scalar_dest = PHI_RESULT (stmt);
5896      else
5897        scalar_dest = gimple_assign_lhs (stmt);
5898
5899      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
5900       {
5901         if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
5902           {
5903             exit_phi = USE_STMT (use_p);
5904             STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
5905           }
5906       }
5907    }
5908
5909  /* Handle stmts whose DEF is used outside the loop-nest that is
5910     being vectorized.  */
5911  if (STMT_VINFO_LIVE_P (stmt_info)
5912      && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5913    {
5914      done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
5915      gcc_assert (done);
5916    }
5917
5918  if (vec_stmt)
5919    STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5920
5921  return is_store;
5922}
5923
5924
5925/* Remove a group of stores (for SLP or interleaving), free their
5926   stmt_vec_info.  */
5927
5928void
5929vect_remove_stores (gimple first_stmt)
5930{
5931  gimple next = first_stmt;
5932  gimple tmp;
5933  gimple_stmt_iterator next_si;
5934
5935  while (next)
5936    {
5937      stmt_vec_info stmt_info = vinfo_for_stmt (next);
5938
5939      tmp = GROUP_NEXT_ELEMENT (stmt_info);
5940      if (is_pattern_stmt_p (stmt_info))
5941	next = STMT_VINFO_RELATED_STMT (stmt_info);
5942      /* Free the attached stmt_vec_info and remove the stmt.  */
5943      next_si = gsi_for_stmt (next);
5944      unlink_stmt_vdef (next);
5945      gsi_remove (&next_si, true);
5946      release_defs (next);
5947      free_stmt_vec_info (next);
5948      next = tmp;
5949    }
5950}
5951
5952
5953/* Function new_stmt_vec_info.
5954
5955   Create and initialize a new stmt_vec_info struct for STMT.  */
5956
5957stmt_vec_info
5958new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
5959                   bb_vec_info bb_vinfo)
5960{
5961  stmt_vec_info res;
5962  res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
5963
5964  STMT_VINFO_TYPE (res) = undef_vec_info_type;
5965  STMT_VINFO_STMT (res) = stmt;
5966  STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
5967  STMT_VINFO_BB_VINFO (res) = bb_vinfo;
5968  STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
5969  STMT_VINFO_LIVE_P (res) = false;
5970  STMT_VINFO_VECTYPE (res) = NULL;
5971  STMT_VINFO_VEC_STMT (res) = NULL;
5972  STMT_VINFO_VECTORIZABLE (res) = true;
5973  STMT_VINFO_IN_PATTERN_P (res) = false;
5974  STMT_VINFO_RELATED_STMT (res) = NULL;
5975  STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
5976  STMT_VINFO_DATA_REF (res) = NULL;
5977
5978  STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
5979  STMT_VINFO_DR_OFFSET (res) = NULL;
5980  STMT_VINFO_DR_INIT (res) = NULL;
5981  STMT_VINFO_DR_STEP (res) = NULL;
5982  STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
5983
5984  if (gimple_code (stmt) == GIMPLE_PHI
5985      && is_loop_header_bb_p (gimple_bb (stmt)))
5986    STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
5987  else
5988    STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
5989
5990  STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
5991  STMT_SLP_TYPE (res) = loop_vect;
5992  GROUP_FIRST_ELEMENT (res) = NULL;
5993  GROUP_NEXT_ELEMENT (res) = NULL;
5994  GROUP_SIZE (res) = 0;
5995  GROUP_STORE_COUNT (res) = 0;
5996  GROUP_GAP (res) = 0;
5997  GROUP_SAME_DR_STMT (res) = NULL;
5998  GROUP_READ_WRITE_DEPENDENCE (res) = false;
5999
6000  return res;
6001}
6002
6003
6004/* Create a hash table for stmt_vec_info. */
6005
6006void
6007init_stmt_vec_info_vec (void)
6008{
6009  gcc_assert (!stmt_vec_info_vec.exists ());
6010  stmt_vec_info_vec.create (50);
6011}
6012
6013
6014/* Free hash table for stmt_vec_info. */
6015
6016void
6017free_stmt_vec_info_vec (void)
6018{
6019  unsigned int i;
6020  vec_void_p info;
6021  FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
6022    if (info != NULL)
6023      free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
6024  gcc_assert (stmt_vec_info_vec.exists ());
6025  stmt_vec_info_vec.release ();
6026}
6027
6028
6029/* Free stmt vectorization related info.  */
6030
6031void
6032free_stmt_vec_info (gimple stmt)
6033{
6034  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6035
6036  if (!stmt_info)
6037    return;
6038
6039  /* Check if this statement has a related "pattern stmt"
6040     (introduced by the vectorizer during the pattern recognition
6041     pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6042     too.  */
6043  if (STMT_VINFO_IN_PATTERN_P (stmt_info))
6044    {
6045      stmt_vec_info patt_info
6046	= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6047      if (patt_info)
6048	{
6049	  gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
6050	  if (seq)
6051	    {
6052	      gimple_stmt_iterator si;
6053	      for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
6054		free_stmt_vec_info (gsi_stmt (si));
6055	    }
6056	  free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info));
6057	}
6058    }
6059
6060  STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
6061  set_vinfo_for_stmt (stmt, NULL);
6062  free (stmt_info);
6063}
6064
6065
6066/* Function get_vectype_for_scalar_type_and_size.
6067
6068   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
6069   by the target.  */
6070
6071static tree
6072get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
6073{
6074  enum machine_mode inner_mode = TYPE_MODE (scalar_type);
6075  enum machine_mode simd_mode;
6076  unsigned int nbytes = GET_MODE_SIZE (inner_mode);
6077  int nunits;
6078  tree vectype;
6079
6080  if (nbytes == 0)
6081    return NULL_TREE;
6082
6083  if (GET_MODE_CLASS (inner_mode) != MODE_INT
6084      && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
6085    return NULL_TREE;
6086
6087  /* For vector types of elements whose mode precision doesn't
6088     match their types precision we use a element type of mode
6089     precision.  The vectorization routines will have to make sure
6090     they support the proper result truncation/extension.
6091     We also make sure to build vector types with INTEGER_TYPE
6092     component type only.  */
6093  if (INTEGRAL_TYPE_P (scalar_type)
6094      && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
6095	  || TREE_CODE (scalar_type) != INTEGER_TYPE))
6096    scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
6097						  TYPE_UNSIGNED (scalar_type));
6098
6099  /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6100     When the component mode passes the above test simply use a type
6101     corresponding to that mode.  The theory is that any use that
6102     would cause problems with this will disable vectorization anyway.  */
6103  else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
6104	   && !INTEGRAL_TYPE_P (scalar_type))
6105    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
6106
6107  /* We can't build a vector type of elements with alignment bigger than
6108     their size.  */
6109  else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
6110    scalar_type = lang_hooks.types.type_for_mode (inner_mode,
6111						  TYPE_UNSIGNED (scalar_type));
6112
6113  /* If we felt back to using the mode fail if there was
6114     no scalar type for it.  */
6115  if (scalar_type == NULL_TREE)
6116    return NULL_TREE;
6117
6118  /* If no size was supplied use the mode the target prefers.   Otherwise
6119     lookup a vector mode of the specified size.  */
6120  if (size == 0)
6121    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
6122  else
6123    simd_mode = mode_for_vector (inner_mode, size / nbytes);
6124  nunits = GET_MODE_SIZE (simd_mode) / nbytes;
6125  if (nunits <= 1)
6126    return NULL_TREE;
6127
6128  vectype = build_vector_type (scalar_type, nunits);
6129  if (dump_enabled_p ())
6130    {
6131      dump_printf_loc (MSG_NOTE, vect_location,
6132                       "get vectype with %d units of type ", nunits);
6133      dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
6134    }
6135
6136  if (!vectype)
6137    return NULL_TREE;
6138
6139  if (dump_enabled_p ())
6140    {
6141      dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
6142      dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
6143    }
6144
6145  if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6146      && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
6147    {
6148      if (dump_enabled_p ())
6149        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6150                         "mode not supported by target.");
6151      return NULL_TREE;
6152    }
6153
6154  return vectype;
6155}
6156
6157unsigned int current_vector_size;
6158
6159/* Function get_vectype_for_scalar_type.
6160
6161   Returns the vector type corresponding to SCALAR_TYPE as supported
6162   by the target.  */
6163
6164tree
6165get_vectype_for_scalar_type (tree scalar_type)
6166{
6167  tree vectype;
6168  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
6169						  current_vector_size);
6170  if (vectype
6171      && current_vector_size == 0)
6172    current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
6173  return vectype;
6174}
6175
6176/* Function get_same_sized_vectype
6177
6178   Returns a vector type corresponding to SCALAR_TYPE of size
6179   VECTOR_TYPE if supported by the target.  */
6180
6181tree
6182get_same_sized_vectype (tree scalar_type, tree vector_type)
6183{
6184  return get_vectype_for_scalar_type_and_size
6185	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
6186}
6187
6188/* Function vect_is_simple_use.
6189
6190   Input:
6191   LOOP_VINFO - the vect info of the loop that is being vectorized.
6192   BB_VINFO - the vect info of the basic block that is being vectorized.
6193   OPERAND - operand of STMT in the loop or bb.
6194   DEF - the defining stmt in case OPERAND is an SSA_NAME.
6195
6196   Returns whether a stmt with OPERAND can be vectorized.
6197   For loops, supportable operands are constants, loop invariants, and operands
6198   that are defined by the current iteration of the loop.  Unsupportable
6199   operands are those that are defined by a previous iteration of the loop (as
6200   is the case in reduction/induction computations).
6201   For basic blocks, supportable operands are constants and bb invariants.
6202   For now, operands defined outside the basic block are not supported.  */
6203
6204bool
6205vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6206                    bb_vec_info bb_vinfo, gimple *def_stmt,
6207		    tree *def, enum vect_def_type *dt)
6208{
6209  basic_block bb;
6210  stmt_vec_info stmt_vinfo;
6211  struct loop *loop = NULL;
6212
6213  if (loop_vinfo)
6214    loop = LOOP_VINFO_LOOP (loop_vinfo);
6215
6216  *def_stmt = NULL;
6217  *def = NULL_TREE;
6218
6219  if (dump_enabled_p ())
6220    {
6221      dump_printf_loc (MSG_NOTE, vect_location,
6222                       "vect_is_simple_use: operand ");
6223      dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
6224    }
6225
6226  if (CONSTANT_CLASS_P (operand))
6227    {
6228      *dt = vect_constant_def;
6229      return true;
6230    }
6231
6232  if (is_gimple_min_invariant (operand))
6233    {
6234      *def = operand;
6235      *dt = vect_external_def;
6236      return true;
6237    }
6238
6239  if (TREE_CODE (operand) == PAREN_EXPR)
6240    {
6241      if (dump_enabled_p ())
6242        dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.");
6243      operand = TREE_OPERAND (operand, 0);
6244    }
6245
6246  if (TREE_CODE (operand) != SSA_NAME)
6247    {
6248      if (dump_enabled_p ())
6249        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6250                         "not ssa-name.");
6251      return false;
6252    }
6253
6254  *def_stmt = SSA_NAME_DEF_STMT (operand);
6255  if (*def_stmt == NULL)
6256    {
6257      if (dump_enabled_p ())
6258        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6259                         "no def_stmt.");
6260      return false;
6261    }
6262
6263  if (dump_enabled_p ())
6264    {
6265      dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
6266      dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
6267    }
6268
6269  /* Empty stmt is expected only in case of a function argument.
6270     (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN).  */
6271  if (gimple_nop_p (*def_stmt))
6272    {
6273      *def = operand;
6274      *dt = vect_external_def;
6275      return true;
6276    }
6277
6278  bb = gimple_bb (*def_stmt);
6279
6280  if ((loop && !flow_bb_inside_loop_p (loop, bb))
6281      || (!loop && bb != BB_VINFO_BB (bb_vinfo))
6282      || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
6283    *dt = vect_external_def;
6284  else
6285    {
6286      stmt_vinfo = vinfo_for_stmt (*def_stmt);
6287      *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
6288    }
6289
6290  if (*dt == vect_unknown_def_type
6291      || (stmt
6292	  && *dt == vect_double_reduction_def
6293	  && gimple_code (stmt) != GIMPLE_PHI))
6294    {
6295      if (dump_enabled_p ())
6296        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6297                         "Unsupported pattern.");
6298      return false;
6299    }
6300
6301  if (dump_enabled_p ())
6302    dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.", *dt);
6303
6304  switch (gimple_code (*def_stmt))
6305    {
6306    case GIMPLE_PHI:
6307      *def = gimple_phi_result (*def_stmt);
6308      break;
6309
6310    case GIMPLE_ASSIGN:
6311      *def = gimple_assign_lhs (*def_stmt);
6312      break;
6313
6314    case GIMPLE_CALL:
6315      *def = gimple_call_lhs (*def_stmt);
6316      if (*def != NULL)
6317	break;
6318      /* FALLTHRU */
6319    default:
6320      if (dump_enabled_p ())
6321        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6322                         "unsupported defining stmt: ");
6323      return false;
6324    }
6325
6326  return true;
6327}
6328
6329/* Function vect_is_simple_use_1.
6330
6331   Same as vect_is_simple_use_1 but also determines the vector operand
6332   type of OPERAND and stores it to *VECTYPE.  If the definition of
6333   OPERAND is vect_uninitialized_def, vect_constant_def or
6334   vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6335   is responsible to compute the best suited vector type for the
6336   scalar operand.  */
6337
6338bool
6339vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
6340		      bb_vec_info bb_vinfo, gimple *def_stmt,
6341		      tree *def, enum vect_def_type *dt, tree *vectype)
6342{
6343  if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
6344			   def, dt))
6345    return false;
6346
6347  /* Now get a vector type if the def is internal, otherwise supply
6348     NULL_TREE and leave it up to the caller to figure out a proper
6349     type for the use stmt.  */
6350  if (*dt == vect_internal_def
6351      || *dt == vect_induction_def
6352      || *dt == vect_reduction_def
6353      || *dt == vect_double_reduction_def
6354      || *dt == vect_nested_cycle)
6355    {
6356      stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
6357
6358      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6359          && !STMT_VINFO_RELEVANT (stmt_info)
6360          && !STMT_VINFO_LIVE_P (stmt_info))
6361	stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
6362
6363      *vectype = STMT_VINFO_VECTYPE (stmt_info);
6364      gcc_assert (*vectype != NULL_TREE);
6365    }
6366  else if (*dt == vect_uninitialized_def
6367	   || *dt == vect_constant_def
6368	   || *dt == vect_external_def)
6369    *vectype = NULL_TREE;
6370  else
6371    gcc_unreachable ();
6372
6373  return true;
6374}
6375
6376
6377/* Function supportable_widening_operation
6378
6379   Check whether an operation represented by the code CODE is a
6380   widening operation that is supported by the target platform in
6381   vector form (i.e., when operating on arguments of type VECTYPE_IN
6382   producing a result of type VECTYPE_OUT).
6383
6384   Widening operations we currently support are NOP (CONVERT), FLOAT
6385   and WIDEN_MULT.  This function checks if these operations are supported
6386   by the target platform either directly (via vector tree-codes), or via
6387   target builtins.
6388
6389   Output:
6390   - CODE1 and CODE2 are codes of vector operations to be used when
6391   vectorizing the operation, if available.
6392   - MULTI_STEP_CVT determines the number of required intermediate steps in
6393   case of multi-step conversion (like char->short->int - in that case
6394   MULTI_STEP_CVT will be 1).
6395   - INTERM_TYPES contains the intermediate type required to perform the
6396   widening operation (short in the above example).  */
6397
6398bool
6399supportable_widening_operation (enum tree_code code, gimple stmt,
6400				tree vectype_out, tree vectype_in,
6401                                enum tree_code *code1, enum tree_code *code2,
6402                                int *multi_step_cvt,
6403                                vec<tree> *interm_types)
6404{
6405  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6406  loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6407  struct loop *vect_loop = NULL;
6408  enum machine_mode vec_mode;
6409  enum insn_code icode1, icode2;
6410  optab optab1, optab2;
6411  tree vectype = vectype_in;
6412  tree wide_vectype = vectype_out;
6413  enum tree_code c1, c2;
6414  int i;
6415  tree prev_type, intermediate_type;
6416  enum machine_mode intermediate_mode, prev_mode;
6417  optab optab3, optab4;
6418
6419  *multi_step_cvt = 0;
6420  if (loop_info)
6421    vect_loop = LOOP_VINFO_LOOP (loop_info);
6422
6423  switch (code)
6424    {
6425    case WIDEN_MULT_EXPR:
6426      /* The result of a vectorized widening operation usually requires
6427	 two vectors (because the widened results do not fit into one vector).
6428	 The generated vector results would normally be expected to be
6429	 generated in the same order as in the original scalar computation,
6430	 i.e. if 8 results are generated in each vector iteration, they are
6431	 to be organized as follows:
6432		vect1: [res1,res2,res3,res4],
6433		vect2: [res5,res6,res7,res8].
6434
6435	 However, in the special case that the result of the widening
6436	 operation is used in a reduction computation only, the order doesn't
6437	 matter (because when vectorizing a reduction we change the order of
6438	 the computation).  Some targets can take advantage of this and
6439	 generate more efficient code.  For example, targets like Altivec,
6440	 that support widen_mult using a sequence of {mult_even,mult_odd}
6441	 generate the following vectors:
6442		vect1: [res1,res3,res5,res7],
6443		vect2: [res2,res4,res6,res8].
6444
6445	 When vectorizing outer-loops, we execute the inner-loop sequentially
6446	 (each vectorized inner-loop iteration contributes to VF outer-loop
6447	 iterations in parallel).  We therefore don't allow to change the
6448	 order of the computation in the inner-loop during outer-loop
6449	 vectorization.  */
6450      /* TODO: Another case in which order doesn't *really* matter is when we
6451	 widen and then contract again, e.g. (short)((int)x * y >> 8).
6452	 Normally, pack_trunc performs an even/odd permute, whereas the
6453	 repack from an even/odd expansion would be an interleave, which
6454	 would be significantly simpler for e.g. AVX2.  */
6455      /* In any case, in order to avoid duplicating the code below, recurse
6456	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
6457	 are properly set up for the caller.  If we fail, we'll continue with
6458	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
6459      if (vect_loop
6460	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
6461	  && !nested_in_vect_loop_p (vect_loop, stmt)
6462	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
6463					     stmt, vectype_out, vectype_in,
6464					     code1, code2, multi_step_cvt,
6465					     interm_types))
6466        {
6467          /* Elements in a vector with vect_used_by_reduction property cannot
6468             be reordered if the use chain with this property does not have the
6469             same operation.  One such an example is s += a * b, where elements
6470             in a and b cannot be reordered.  Here we check if the vector defined
6471             by STMT is only directly used in the reduction statement.  */
6472          tree lhs = gimple_assign_lhs (stmt);
6473          use_operand_p dummy;
6474          gimple use_stmt;
6475          stmt_vec_info use_stmt_info = NULL;
6476          if (single_imm_use (lhs, &dummy, &use_stmt)
6477              && (use_stmt_info = vinfo_for_stmt (use_stmt))
6478              && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
6479            return true;
6480        }
6481      c1 = VEC_WIDEN_MULT_LO_EXPR;
6482      c2 = VEC_WIDEN_MULT_HI_EXPR;
6483      break;
6484
6485    case VEC_WIDEN_MULT_EVEN_EXPR:
6486      /* Support the recursion induced just above.  */
6487      c1 = VEC_WIDEN_MULT_EVEN_EXPR;
6488      c2 = VEC_WIDEN_MULT_ODD_EXPR;
6489      break;
6490
6491    case WIDEN_LSHIFT_EXPR:
6492      c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
6493      c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
6494      break;
6495
6496    CASE_CONVERT:
6497      c1 = VEC_UNPACK_LO_EXPR;
6498      c2 = VEC_UNPACK_HI_EXPR;
6499      break;
6500
6501    case FLOAT_EXPR:
6502      c1 = VEC_UNPACK_FLOAT_LO_EXPR;
6503      c2 = VEC_UNPACK_FLOAT_HI_EXPR;
6504      break;
6505
6506    case FIX_TRUNC_EXPR:
6507      /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6508	 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6509	 computing the operation.  */
6510      return false;
6511
6512    default:
6513      gcc_unreachable ();
6514    }
6515
6516  if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
6517    {
6518      enum tree_code ctmp = c1;
6519      c1 = c2;
6520      c2 = ctmp;
6521    }
6522
6523  if (code == FIX_TRUNC_EXPR)
6524    {
6525      /* The signedness is determined from output operand.  */
6526      optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6527      optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
6528    }
6529  else
6530    {
6531      optab1 = optab_for_tree_code (c1, vectype, optab_default);
6532      optab2 = optab_for_tree_code (c2, vectype, optab_default);
6533    }
6534
6535  if (!optab1 || !optab2)
6536    return false;
6537
6538  vec_mode = TYPE_MODE (vectype);
6539  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
6540       || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
6541    return false;
6542
6543  *code1 = c1;
6544  *code2 = c2;
6545
6546  if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6547      && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6548    return true;
6549
6550  /* Check if it's a multi-step conversion that can be done using intermediate
6551     types.  */
6552
6553  prev_type = vectype;
6554  prev_mode = vec_mode;
6555
6556  if (!CONVERT_EXPR_CODE_P (code))
6557    return false;
6558
6559  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6560     intermediate steps in promotion sequence.  We try
6561     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6562     not.  */
6563  interm_types->create (MAX_INTERM_CVT_STEPS);
6564  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6565    {
6566      intermediate_mode = insn_data[icode1].operand[0].mode;
6567      intermediate_type
6568	= lang_hooks.types.type_for_mode (intermediate_mode,
6569					  TYPE_UNSIGNED (prev_type));
6570      optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
6571      optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
6572
6573      if (!optab3 || !optab4
6574          || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
6575	  || insn_data[icode1].operand[0].mode != intermediate_mode
6576	  || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
6577	  || insn_data[icode2].operand[0].mode != intermediate_mode
6578	  || ((icode1 = optab_handler (optab3, intermediate_mode))
6579	      == CODE_FOR_nothing)
6580	  || ((icode2 = optab_handler (optab4, intermediate_mode))
6581	      == CODE_FOR_nothing))
6582	break;
6583
6584      interm_types->quick_push (intermediate_type);
6585      (*multi_step_cvt)++;
6586
6587      if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
6588	  && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
6589	return true;
6590
6591      prev_type = intermediate_type;
6592      prev_mode = intermediate_mode;
6593    }
6594
6595  interm_types->release ();
6596  return false;
6597}
6598
6599
6600/* Function supportable_narrowing_operation
6601
6602   Check whether an operation represented by the code CODE is a
6603   narrowing operation that is supported by the target platform in
6604   vector form (i.e., when operating on arguments of type VECTYPE_IN
6605   and producing a result of type VECTYPE_OUT).
6606
6607   Narrowing operations we currently support are NOP (CONVERT) and
6608   FIX_TRUNC.  This function checks if these operations are supported by
6609   the target platform directly via vector tree-codes.
6610
6611   Output:
6612   - CODE1 is the code of a vector operation to be used when
6613   vectorizing the operation, if available.
6614   - MULTI_STEP_CVT determines the number of required intermediate steps in
6615   case of multi-step conversion (like int->short->char - in that case
6616   MULTI_STEP_CVT will be 1).
6617   - INTERM_TYPES contains the intermediate type required to perform the
6618   narrowing operation (short in the above example).   */
6619
6620bool
6621supportable_narrowing_operation (enum tree_code code,
6622				 tree vectype_out, tree vectype_in,
6623				 enum tree_code *code1, int *multi_step_cvt,
6624                                 vec<tree> *interm_types)
6625{
6626  enum machine_mode vec_mode;
6627  enum insn_code icode1;
6628  optab optab1, interm_optab;
6629  tree vectype = vectype_in;
6630  tree narrow_vectype = vectype_out;
6631  enum tree_code c1;
6632  tree intermediate_type;
6633  enum machine_mode intermediate_mode, prev_mode;
6634  int i;
6635  bool uns;
6636
6637  *multi_step_cvt = 0;
6638  switch (code)
6639    {
6640    CASE_CONVERT:
6641      c1 = VEC_PACK_TRUNC_EXPR;
6642      break;
6643
6644    case FIX_TRUNC_EXPR:
6645      c1 = VEC_PACK_FIX_TRUNC_EXPR;
6646      break;
6647
6648    case FLOAT_EXPR:
6649      /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6650	 tree code and optabs used for computing the operation.  */
6651      return false;
6652
6653    default:
6654      gcc_unreachable ();
6655    }
6656
6657  if (code == FIX_TRUNC_EXPR)
6658    /* The signedness is determined from output operand.  */
6659    optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
6660  else
6661    optab1 = optab_for_tree_code (c1, vectype, optab_default);
6662
6663  if (!optab1)
6664    return false;
6665
6666  vec_mode = TYPE_MODE (vectype);
6667  if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
6668    return false;
6669
6670  *code1 = c1;
6671
6672  if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6673    return true;
6674
6675  /* Check if it's a multi-step conversion that can be done using intermediate
6676     types.  */
6677  prev_mode = vec_mode;
6678  if (code == FIX_TRUNC_EXPR)
6679    uns = TYPE_UNSIGNED (vectype_out);
6680  else
6681    uns = TYPE_UNSIGNED (vectype);
6682
6683  /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6684     conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6685     costly than signed.  */
6686  if (code == FIX_TRUNC_EXPR && uns)
6687    {
6688      enum insn_code icode2;
6689
6690      intermediate_type
6691	= lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
6692      interm_optab
6693	= optab_for_tree_code (c1, intermediate_type, optab_default);
6694      if (interm_optab != unknown_optab
6695	  && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
6696	  && insn_data[icode1].operand[0].mode
6697	     == insn_data[icode2].operand[0].mode)
6698	{
6699	  uns = false;
6700	  optab1 = interm_optab;
6701	  icode1 = icode2;
6702	}
6703    }
6704
6705  /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6706     intermediate steps in promotion sequence.  We try
6707     MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
6708  interm_types->create (MAX_INTERM_CVT_STEPS);
6709  for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
6710    {
6711      intermediate_mode = insn_data[icode1].operand[0].mode;
6712      intermediate_type
6713	= lang_hooks.types.type_for_mode (intermediate_mode, uns);
6714      interm_optab
6715	= optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
6716			       optab_default);
6717      if (!interm_optab
6718	  || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
6719	  || insn_data[icode1].operand[0].mode != intermediate_mode
6720	  || ((icode1 = optab_handler (interm_optab, intermediate_mode))
6721	      == CODE_FOR_nothing))
6722	break;
6723
6724      interm_types->quick_push (intermediate_type);
6725      (*multi_step_cvt)++;
6726
6727      if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
6728	return true;
6729
6730      prev_mode = intermediate_mode;
6731      optab1 = interm_optab;
6732    }
6733
6734  interm_types->release ();
6735  return false;
6736}
6737