1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006, 2007
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 2, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING.  If not, write to
19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20;; Boston, MA 02110-1301, USA.
21
22
23;; 16 byte integral modes handled by SSE, minus TImode, which gets
24;; special-cased for TARGET_64BIT.
25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27;; All 16-byte vector modes handled by SSE
28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30;; Mix-n-match
31(define_mode_macro SSEMODE12 [V16QI V8HI])
32(define_mode_macro SSEMODE24 [V8HI V4SI])
33(define_mode_macro SSEMODE14 [V16QI V4SI])
34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37;; Mapping from integer vector mode to mnemonic suffix
38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43;;
44;; Move patterns
45;;
46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48;; All of these patterns are enabled for SSE1 as well as SSE2.
49;; This is essential for maintaining stable calling conventions.
50
51(define_expand "mov<mode>"
52  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53	(match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54  "TARGET_SSE"
55{
56  ix86_expand_vector_move (<MODE>mode, operands);
57  DONE;
58})
59
60(define_insn "*mov<mode>_internal"
61  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62	(match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand"  "C ,xm,x"))]
63  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64{
65  switch (which_alternative)
66    {
67    case 0:
68      return standard_sse_constant_opcode (insn, operands[1]);
69    case 1:
70    case 2:
71      if (get_attr_mode (insn) == MODE_V4SF)
72	return "movaps\t{%1, %0|%0, %1}";
73      else
74	return "movdqa\t{%1, %0|%0, %1}";
75    default:
76      gcc_unreachable ();
77    }
78}
79  [(set_attr "type" "sselog1,ssemov,ssemov")
80   (set (attr "mode")
81	(if_then_else
82	  (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83		    (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84	       (and (eq_attr "alternative" "2")
85	  	    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
86		        (const_int 0))))
87	  (const_string "V4SF")
88	  (const_string "TI")))])
89
90(define_expand "movv4sf"
91  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92	(match_operand:V4SF 1 "nonimmediate_operand" ""))]
93  "TARGET_SSE"
94{
95  ix86_expand_vector_move (V4SFmode, operands);
96  DONE;
97})
98
99(define_insn "*movv4sf_internal"
100  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101	(match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
102  "TARGET_SSE"
103{
104  switch (which_alternative)
105    {
106    case 0:
107      return standard_sse_constant_opcode (insn, operands[1]);
108    case 1:
109    case 2:
110      return "movaps\t{%1, %0|%0, %1}";
111    default:
112      abort();
113    }
114}
115  [(set_attr "type" "sselog1,ssemov,ssemov")
116   (set_attr "mode" "V4SF")])
117
118(define_split
119  [(set (match_operand:V4SF 0 "register_operand" "")
120	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121  "TARGET_SSE && reload_completed"
122  [(set (match_dup 0)
123	(vec_merge:V4SF
124	  (vec_duplicate:V4SF (match_dup 1))
125	  (match_dup 2)
126	  (const_int 1)))]
127{
128  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129  operands[2] = CONST0_RTX (V4SFmode);
130})
131
132(define_expand "movv2df"
133  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
135  "TARGET_SSE"
136{
137  ix86_expand_vector_move (V2DFmode, operands);
138  DONE;
139})
140
141(define_insn "*movv2df_internal"
142  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143	(match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
145{
146  switch (which_alternative)
147    {
148    case 0:
149      return standard_sse_constant_opcode (insn, operands[1]);
150    case 1:
151    case 2:
152      if (get_attr_mode (insn) == MODE_V4SF)
153	return "movaps\t{%1, %0|%0, %1}";
154      else
155	return "movapd\t{%1, %0|%0, %1}";
156    default:
157      gcc_unreachable ();
158    }
159}
160  [(set_attr "type" "sselog1,ssemov,ssemov")
161   (set (attr "mode")
162	(if_then_else
163	  (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164		    (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165	       (and (eq_attr "alternative" "2")
166	  	    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
167		        (const_int 0))))
168	  (const_string "V4SF")
169	  (const_string "V2DF")))])
170
171(define_split
172  [(set (match_operand:V2DF 0 "register_operand" "")
173	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174  "TARGET_SSE2 && reload_completed"
175  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
176{
177  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178  operands[2] = CONST0_RTX (DFmode);
179})
180
181(define_expand "push<mode>1"
182  [(match_operand:SSEMODE 0 "register_operand" "")]
183  "TARGET_SSE"
184{
185  ix86_expand_push (<MODE>mode, operands[0]);
186  DONE;
187})
188
189(define_expand "movmisalign<mode>"
190  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191	(match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192  "TARGET_SSE"
193{
194  ix86_expand_vector_move_misalign (<MODE>mode, operands);
195  DONE;
196})
197
198(define_insn "sse_movups"
199  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
201		     UNSPEC_MOVU))]
202  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203  "movups\t{%1, %0|%0, %1}"
204  [(set_attr "type" "ssemov")
205   (set_attr "mode" "V2DF")])
206
207(define_insn "sse2_movupd"
208  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
210		     UNSPEC_MOVU))]
211  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212  "movupd\t{%1, %0|%0, %1}"
213  [(set_attr "type" "ssemov")
214   (set_attr "mode" "V2DF")])
215
216(define_insn "sse2_movdqu"
217  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
219		      UNSPEC_MOVU))]
220  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221  "movdqu\t{%1, %0|%0, %1}"
222  [(set_attr "type" "ssemov")
223   (set_attr "mode" "TI")])
224
225(define_insn "sse_movntv4sf"
226  [(set (match_operand:V4SF 0 "memory_operand" "=m")
227	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228		     UNSPEC_MOVNT))]
229  "TARGET_SSE"
230  "movntps\t{%1, %0|%0, %1}"
231  [(set_attr "type" "ssemov")
232   (set_attr "mode" "V4SF")])
233
234(define_insn "sse2_movntv2df"
235  [(set (match_operand:V2DF 0 "memory_operand" "=m")
236	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237		     UNSPEC_MOVNT))]
238  "TARGET_SSE2"
239  "movntpd\t{%1, %0|%0, %1}"
240  [(set_attr "type" "ssecvt")
241   (set_attr "mode" "V2DF")])
242
243(define_insn "sse2_movntv2di"
244  [(set (match_operand:V2DI 0 "memory_operand" "=m")
245	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246		     UNSPEC_MOVNT))]
247  "TARGET_SSE2"
248  "movntdq\t{%1, %0|%0, %1}"
249  [(set_attr "type" "ssecvt")
250   (set_attr "mode" "TI")])
251
252(define_insn "sse2_movntsi"
253  [(set (match_operand:SI 0 "memory_operand" "=m")
254	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255		   UNSPEC_MOVNT))]
256  "TARGET_SSE2"
257  "movnti\t{%1, %0|%0, %1}"
258  [(set_attr "type" "ssecvt")
259   (set_attr "mode" "V2DF")])
260
261(define_insn "sse3_lddqu"
262  [(set (match_operand:V16QI 0 "register_operand" "=x")
263	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264		      UNSPEC_LDQQU))]
265  "TARGET_SSE3"
266  "lddqu\t{%1, %0|%0, %1}"
267  [(set_attr "type" "ssecvt")
268   (set_attr "mode" "TI")])
269
270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
271;;
272;; Parallel single-precision floating point arithmetic
273;;
274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
275
276(define_expand "negv4sf2"
277  [(set (match_operand:V4SF 0 "register_operand" "")
278	(neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
279  "TARGET_SSE"
280  "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
281
282(define_expand "absv4sf2"
283  [(set (match_operand:V4SF 0 "register_operand" "")
284	(abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
285  "TARGET_SSE"
286  "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
287
288(define_expand "addv4sf3"
289  [(set (match_operand:V4SF 0 "register_operand" "")
290	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
292  "TARGET_SSE"
293  "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
294
295(define_insn "*addv4sf3"
296  [(set (match_operand:V4SF 0 "register_operand" "=x")
297	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300  "addps\t{%2, %0|%0, %2}"
301  [(set_attr "type" "sseadd")
302   (set_attr "mode" "V4SF")])
303
304(define_insn "sse_vmaddv4sf3"
305  [(set (match_operand:V4SF 0 "register_operand" "=x")
306	(vec_merge:V4SF
307	  (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
308		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309	  (match_dup 1)
310	  (const_int 1)))]
311  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312  "addss\t{%2, %0|%0, %2}"
313  [(set_attr "type" "sseadd")
314   (set_attr "mode" "SF")])
315
316(define_expand "subv4sf3"
317  [(set (match_operand:V4SF 0 "register_operand" "")
318	(minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319		    (match_operand:V4SF 2 "nonimmediate_operand" "")))]
320  "TARGET_SSE"
321  "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
322
323(define_insn "*subv4sf3"
324  [(set (match_operand:V4SF 0 "register_operand" "=x")
325	(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326		    (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
327  "TARGET_SSE"
328  "subps\t{%2, %0|%0, %2}"
329  [(set_attr "type" "sseadd")
330   (set_attr "mode" "V4SF")])
331
332(define_insn "sse_vmsubv4sf3"
333  [(set (match_operand:V4SF 0 "register_operand" "=x")
334	(vec_merge:V4SF
335	  (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336		      (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
337	  (match_dup 1)
338	  (const_int 1)))]
339  "TARGET_SSE"
340  "subss\t{%2, %0|%0, %2}"
341  [(set_attr "type" "sseadd")
342   (set_attr "mode" "SF")])
343
344(define_expand "mulv4sf3"
345  [(set (match_operand:V4SF 0 "register_operand" "")
346	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
348  "TARGET_SSE"
349  "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
350
351(define_insn "*mulv4sf3"
352  [(set (match_operand:V4SF 0 "register_operand" "=x")
353	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356  "mulps\t{%2, %0|%0, %2}"
357  [(set_attr "type" "ssemul")
358   (set_attr "mode" "V4SF")])
359
360(define_insn "sse_vmmulv4sf3"
361  [(set (match_operand:V4SF 0 "register_operand" "=x")
362	(vec_merge:V4SF
363	  (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
364		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365	  (match_dup 1)
366	  (const_int 1)))]
367  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368  "mulss\t{%2, %0|%0, %2}"
369  [(set_attr "type" "ssemul")
370   (set_attr "mode" "SF")])
371
372(define_expand "divv4sf3"
373  [(set (match_operand:V4SF 0 "register_operand" "")
374	(div:V4SF (match_operand:V4SF 1 "register_operand" "")
375		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
376  "TARGET_SSE"
377  "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
378
379(define_insn "*divv4sf3"
380  [(set (match_operand:V4SF 0 "register_operand" "=x")
381	(div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
383  "TARGET_SSE"
384  "divps\t{%2, %0|%0, %2}"
385  [(set_attr "type" "ssediv")
386   (set_attr "mode" "V4SF")])
387
388(define_insn "sse_vmdivv4sf3"
389  [(set (match_operand:V4SF 0 "register_operand" "=x")
390	(vec_merge:V4SF
391	  (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
393	  (match_dup 1)
394	  (const_int 1)))]
395  "TARGET_SSE"
396  "divss\t{%2, %0|%0, %2}"
397  [(set_attr "type" "ssediv")
398   (set_attr "mode" "SF")])
399
400(define_insn "sse_rcpv4sf2"
401  [(set (match_operand:V4SF 0 "register_operand" "=x")
402	(unspec:V4SF
403	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
404  "TARGET_SSE"
405  "rcpps\t{%1, %0|%0, %1}"
406  [(set_attr "type" "sse")
407   (set_attr "mode" "V4SF")])
408
409(define_insn "sse_vmrcpv4sf2"
410  [(set (match_operand:V4SF 0 "register_operand" "=x")
411	(vec_merge:V4SF
412	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
413		       UNSPEC_RCP)
414	  (match_operand:V4SF 2 "register_operand" "0")
415	  (const_int 1)))]
416  "TARGET_SSE"
417  "rcpss\t{%1, %0|%0, %1}"
418  [(set_attr "type" "sse")
419   (set_attr "mode" "SF")])
420
421(define_insn "sse_rsqrtv4sf2"
422  [(set (match_operand:V4SF 0 "register_operand" "=x")
423	(unspec:V4SF
424	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
425  "TARGET_SSE"
426  "rsqrtps\t{%1, %0|%0, %1}"
427  [(set_attr "type" "sse")
428   (set_attr "mode" "V4SF")])
429
430(define_insn "sse_vmrsqrtv4sf2"
431  [(set (match_operand:V4SF 0 "register_operand" "=x")
432	(vec_merge:V4SF
433	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
434		       UNSPEC_RSQRT)
435	  (match_operand:V4SF 2 "register_operand" "0")
436	  (const_int 1)))]
437  "TARGET_SSE"
438  "rsqrtss\t{%1, %0|%0, %1}"
439  [(set_attr "type" "sse")
440   (set_attr "mode" "SF")])
441
442(define_insn "sqrtv4sf2"
443  [(set (match_operand:V4SF 0 "register_operand" "=x")
444	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
445  "TARGET_SSE"
446  "sqrtps\t{%1, %0|%0, %1}"
447  [(set_attr "type" "sse")
448   (set_attr "mode" "V4SF")])
449
450(define_insn "sse_vmsqrtv4sf2"
451  [(set (match_operand:V4SF 0 "register_operand" "=x")
452	(vec_merge:V4SF
453	  (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454	  (match_operand:V4SF 2 "register_operand" "0")
455	  (const_int 1)))]
456  "TARGET_SSE"
457  "sqrtss\t{%1, %0|%0, %1}"
458  [(set_attr "type" "sse")
459   (set_attr "mode" "SF")])
460
461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462;; isn't really correct, as those rtl operators aren't defined when 
463;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
464
465(define_expand "smaxv4sf3"
466  [(set (match_operand:V4SF 0 "register_operand" "")
467	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469  "TARGET_SSE"
470{
471  if (!flag_finite_math_only)
472    operands[1] = force_reg (V4SFmode, operands[1]);
473  ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474})
475
476(define_insn "*smaxv4sf3_finite"
477  [(set (match_operand:V4SF 0 "register_operand" "=x")
478	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480  "TARGET_SSE && flag_finite_math_only
481   && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482  "maxps\t{%2, %0|%0, %2}"
483  [(set_attr "type" "sse")
484   (set_attr "mode" "V4SF")])
485
486(define_insn "*smaxv4sf3"
487  [(set (match_operand:V4SF 0 "register_operand" "=x")
488	(smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
490  "TARGET_SSE"
491  "maxps\t{%2, %0|%0, %2}"
492  [(set_attr "type" "sse")
493   (set_attr "mode" "V4SF")])
494
495(define_insn "sse_vmsmaxv4sf3"
496  [(set (match_operand:V4SF 0 "register_operand" "=x")
497	(vec_merge:V4SF
498	 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
499		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500	 (match_dup 1)
501	 (const_int 1)))]
502  "TARGET_SSE"
503  "maxss\t{%2, %0|%0, %2}"
504  [(set_attr "type" "sse")
505   (set_attr "mode" "SF")])
506
507(define_expand "sminv4sf3"
508  [(set (match_operand:V4SF 0 "register_operand" "")
509	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
510		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
511  "TARGET_SSE"
512{
513  if (!flag_finite_math_only)
514    operands[1] = force_reg (V4SFmode, operands[1]);
515  ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
516})
517
518(define_insn "*sminv4sf3_finite"
519  [(set (match_operand:V4SF 0 "register_operand" "=x")
520	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
521		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
522  "TARGET_SSE && flag_finite_math_only
523   && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
524  "minps\t{%2, %0|%0, %2}"
525  [(set_attr "type" "sse")
526   (set_attr "mode" "V4SF")])
527
528(define_insn "*sminv4sf3"
529  [(set (match_operand:V4SF 0 "register_operand" "=x")
530	(smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
531		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
532  "TARGET_SSE"
533  "minps\t{%2, %0|%0, %2}"
534  [(set_attr "type" "sse")
535   (set_attr "mode" "V4SF")])
536
537(define_insn "sse_vmsminv4sf3"
538  [(set (match_operand:V4SF 0 "register_operand" "=x")
539	(vec_merge:V4SF
540	 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
541		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
542	 (match_dup 1)
543	 (const_int 1)))]
544  "TARGET_SSE"
545  "minss\t{%2, %0|%0, %2}"
546  [(set_attr "type" "sse")
547   (set_attr "mode" "SF")])
548
549;; These versions of the min/max patterns implement exactly the operations
550;;   min = (op1 < op2 ? op1 : op2)
551;;   max = (!(op1 < op2) ? op1 : op2)
552;; Their operands are not commutative, and thus they may be used in the
553;; presence of -0.0 and NaN.
554
555(define_insn "*ieee_sminv4sf3"
556  [(set (match_operand:V4SF 0 "register_operand" "=x")
557	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
558		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
559		     UNSPEC_IEEE_MIN))]
560  "TARGET_SSE"
561  "minps\t{%2, %0|%0, %2}"
562  [(set_attr "type" "sseadd")
563   (set_attr "mode" "V4SF")])
564
565(define_insn "*ieee_smaxv4sf3"
566  [(set (match_operand:V4SF 0 "register_operand" "=x")
567	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
568		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
569		     UNSPEC_IEEE_MAX))]
570  "TARGET_SSE"
571  "maxps\t{%2, %0|%0, %2}"
572  [(set_attr "type" "sseadd")
573   (set_attr "mode" "V4SF")])
574
575(define_insn "*ieee_sminv2df3"
576  [(set (match_operand:V2DF 0 "register_operand" "=x")
577	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
578		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
579		     UNSPEC_IEEE_MIN))]
580  "TARGET_SSE2"
581  "minpd\t{%2, %0|%0, %2}"
582  [(set_attr "type" "sseadd")
583   (set_attr "mode" "V2DF")])
584
585(define_insn "*ieee_smaxv2df3"
586  [(set (match_operand:V2DF 0 "register_operand" "=x")
587	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
588		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
589		     UNSPEC_IEEE_MAX))]
590  "TARGET_SSE2"
591  "maxpd\t{%2, %0|%0, %2}"
592  [(set_attr "type" "sseadd")
593   (set_attr "mode" "V2DF")])
594
595(define_insn "sse3_addsubv4sf3"
596  [(set (match_operand:V4SF 0 "register_operand" "=x")
597	(vec_merge:V4SF
598	  (plus:V4SF
599	    (match_operand:V4SF 1 "register_operand" "0")
600	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
601	  (minus:V4SF (match_dup 1) (match_dup 2))
602	  (const_int 5)))]
603  "TARGET_SSE3"
604  "addsubps\t{%2, %0|%0, %2}"
605  [(set_attr "type" "sseadd")
606   (set_attr "mode" "V4SF")])
607
608(define_insn "sse3_haddv4sf3"
609  [(set (match_operand:V4SF 0 "register_operand" "=x")
610	(vec_concat:V4SF
611	  (vec_concat:V2SF
612	    (plus:SF
613	      (vec_select:SF 
614		(match_operand:V4SF 1 "register_operand" "0")
615		(parallel [(const_int 0)]))
616	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
617	    (plus:SF
618	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
619	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
620	  (vec_concat:V2SF
621	    (plus:SF
622	      (vec_select:SF
623		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
624		(parallel [(const_int 0)]))
625	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
626	    (plus:SF
627	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
628	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
629  "TARGET_SSE3"
630  "haddps\t{%2, %0|%0, %2}"
631  [(set_attr "type" "sseadd")
632   (set_attr "mode" "V4SF")])
633
634(define_insn "sse3_hsubv4sf3"
635  [(set (match_operand:V4SF 0 "register_operand" "=x")
636	(vec_concat:V4SF
637	  (vec_concat:V2SF
638	    (minus:SF
639	      (vec_select:SF 
640		(match_operand:V4SF 1 "register_operand" "0")
641		(parallel [(const_int 0)]))
642	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
643	    (minus:SF
644	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
646	  (vec_concat:V2SF
647	    (minus:SF
648	      (vec_select:SF
649		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
650		(parallel [(const_int 0)]))
651	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
652	    (minus:SF
653	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
655  "TARGET_SSE3"
656  "hsubps\t{%2, %0|%0, %2}"
657  [(set_attr "type" "sseadd")
658   (set_attr "mode" "V4SF")])
659
660(define_expand "reduc_splus_v4sf"
661  [(match_operand:V4SF 0 "register_operand" "")
662   (match_operand:V4SF 1 "register_operand" "")]
663  "TARGET_SSE"
664{
665  if (TARGET_SSE3)
666    {
667      rtx tmp = gen_reg_rtx (V4SFmode);
668      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
669      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
670    }
671  else
672    ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
673  DONE;
674})
675
676(define_expand "reduc_smax_v4sf"
677  [(match_operand:V4SF 0 "register_operand" "")
678   (match_operand:V4SF 1 "register_operand" "")]
679  "TARGET_SSE"
680{
681  ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
682  DONE;
683})
684
685(define_expand "reduc_smin_v4sf"
686  [(match_operand:V4SF 0 "register_operand" "")
687   (match_operand:V4SF 1 "register_operand" "")]
688  "TARGET_SSE"
689{
690  ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
691  DONE;
692})
693
694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
695;;
696;; Parallel single-precision floating point comparisons
697;;
698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
699
700(define_insn "sse_maskcmpv4sf3"
701  [(set (match_operand:V4SF 0 "register_operand" "=x")
702	(match_operator:V4SF 3 "sse_comparison_operator"
703		[(match_operand:V4SF 1 "register_operand" "0")
704		 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
705  "TARGET_SSE"
706  "cmp%D3ps\t{%2, %0|%0, %2}"
707  [(set_attr "type" "ssecmp")
708   (set_attr "mode" "V4SF")])
709
710(define_insn "sse_vmmaskcmpv4sf3"
711  [(set (match_operand:V4SF 0 "register_operand" "=x")
712	(vec_merge:V4SF
713	 (match_operator:V4SF 3 "sse_comparison_operator"
714		[(match_operand:V4SF 1 "register_operand" "0")
715		 (match_operand:V4SF 2 "register_operand" "x")])
716	 (match_dup 1)
717	 (const_int 1)))]
718  "TARGET_SSE"
719  "cmp%D3ss\t{%2, %0|%0, %2}"
720  [(set_attr "type" "ssecmp")
721   (set_attr "mode" "SF")])
722
723(define_insn "sse_comi"
724  [(set (reg:CCFP FLAGS_REG)
725	(compare:CCFP
726	  (vec_select:SF
727	    (match_operand:V4SF 0 "register_operand" "x")
728	    (parallel [(const_int 0)]))
729	  (vec_select:SF
730	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
731	    (parallel [(const_int 0)]))))]
732  "TARGET_SSE"
733  "comiss\t{%1, %0|%0, %1}"
734  [(set_attr "type" "ssecomi")
735   (set_attr "mode" "SF")])
736
737(define_insn "sse_ucomi"
738  [(set (reg:CCFPU FLAGS_REG)
739	(compare:CCFPU
740	  (vec_select:SF
741	    (match_operand:V4SF 0 "register_operand" "x")
742	    (parallel [(const_int 0)]))
743	  (vec_select:SF
744	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
745	    (parallel [(const_int 0)]))))]
746  "TARGET_SSE"
747  "ucomiss\t{%1, %0|%0, %1}"
748  [(set_attr "type" "ssecomi")
749   (set_attr "mode" "SF")])
750
751(define_expand "vcondv4sf"
752  [(set (match_operand:V4SF 0 "register_operand" "")
753        (if_then_else:V4SF
754          (match_operator 3 ""
755            [(match_operand:V4SF 4 "nonimmediate_operand" "")
756             (match_operand:V4SF 5 "nonimmediate_operand" "")])
757          (match_operand:V4SF 1 "general_operand" "")
758          (match_operand:V4SF 2 "general_operand" "")))]
759  "TARGET_SSE"
760{
761  if (ix86_expand_fp_vcond (operands))
762    DONE;
763  else
764    FAIL;
765})
766
767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
768;;
769;; Parallel single-precision floating point logical operations
770;;
771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
772
773(define_expand "andv4sf3"
774  [(set (match_operand:V4SF 0 "register_operand" "")
775	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
776		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
777  "TARGET_SSE"
778  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
779
780(define_insn "*andv4sf3"
781  [(set (match_operand:V4SF 0 "register_operand" "=x")
782	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
783		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
784  "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
785  "andps\t{%2, %0|%0, %2}"
786  [(set_attr "type" "sselog")
787   (set_attr "mode" "V4SF")])
788
789(define_insn "sse_nandv4sf3"
790  [(set (match_operand:V4SF 0 "register_operand" "=x")
791	(and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
792		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
793  "TARGET_SSE"
794  "andnps\t{%2, %0|%0, %2}"
795  [(set_attr "type" "sselog")
796   (set_attr "mode" "V4SF")])
797
798(define_expand "iorv4sf3"
799  [(set (match_operand:V4SF 0 "register_operand" "")
800	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
801		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802  "TARGET_SSE"
803  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
804
805(define_insn "*iorv4sf3"
806  [(set (match_operand:V4SF 0 "register_operand" "=x")
807	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
808		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
809  "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
810  "orps\t{%2, %0|%0, %2}"
811  [(set_attr "type" "sselog")
812   (set_attr "mode" "V4SF")])
813
814(define_expand "xorv4sf3"
815  [(set (match_operand:V4SF 0 "register_operand" "")
816	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
817		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
818  "TARGET_SSE"
819  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
820
821(define_insn "*xorv4sf3"
822  [(set (match_operand:V4SF 0 "register_operand" "=x")
823	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
824		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
825  "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
826  "xorps\t{%2, %0|%0, %2}"
827  [(set_attr "type" "sselog")
828   (set_attr "mode" "V4SF")])
829
830;; Also define scalar versions.  These are used for abs, neg, and
831;; conditional move.  Using subregs into vector modes causes register
832;; allocation lossage.  These patterns do not allow memory operands
833;; because the native instructions read the full 128-bits.
834
835(define_insn "*andsf3"
836  [(set (match_operand:SF 0 "register_operand" "=x")
837	(and:SF (match_operand:SF 1 "register_operand" "0")
838		(match_operand:SF 2 "register_operand" "x")))]
839  "TARGET_SSE"
840  "andps\t{%2, %0|%0, %2}"
841  [(set_attr "type" "sselog")
842   (set_attr "mode" "V4SF")])
843
844(define_insn "*nandsf3"
845  [(set (match_operand:SF 0 "register_operand" "=x")
846	(and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
847		(match_operand:SF 2 "register_operand" "x")))]
848  "TARGET_SSE"
849  "andnps\t{%2, %0|%0, %2}"
850  [(set_attr "type" "sselog")
851   (set_attr "mode" "V4SF")])
852
853(define_insn "*iorsf3"
854  [(set (match_operand:SF 0 "register_operand" "=x")
855	(ior:SF (match_operand:SF 1 "register_operand" "0")
856		(match_operand:SF 2 "register_operand" "x")))]
857  "TARGET_SSE"
858  "orps\t{%2, %0|%0, %2}"
859  [(set_attr "type" "sselog")
860   (set_attr "mode" "V4SF")])
861
862(define_insn "*xorsf3"
863  [(set (match_operand:SF 0 "register_operand" "=x")
864	(xor:SF (match_operand:SF 1 "register_operand" "0")
865		(match_operand:SF 2 "register_operand" "x")))]
866  "TARGET_SSE"
867  "xorps\t{%2, %0|%0, %2}"
868  [(set_attr "type" "sselog")
869   (set_attr "mode" "V4SF")])
870
871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
872;;
873;; Parallel single-precision floating point conversion operations
874;;
875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
876
877(define_insn "sse_cvtpi2ps"
878  [(set (match_operand:V4SF 0 "register_operand" "=x")
879	(vec_merge:V4SF
880	  (vec_duplicate:V4SF
881	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
882	  (match_operand:V4SF 1 "register_operand" "0")
883	  (const_int 3)))]
884  "TARGET_SSE"
885  "cvtpi2ps\t{%2, %0|%0, %2}"
886  [(set_attr "type" "ssecvt")
887   (set_attr "mode" "V4SF")])
888
889(define_insn "sse_cvtps2pi"
890  [(set (match_operand:V2SI 0 "register_operand" "=y")
891	(vec_select:V2SI
892	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
893		       UNSPEC_FIX_NOTRUNC)
894	  (parallel [(const_int 0) (const_int 1)])))]
895  "TARGET_SSE"
896  "cvtps2pi\t{%1, %0|%0, %1}"
897  [(set_attr "type" "ssecvt")
898   (set_attr "unit" "mmx")
899   (set_attr "mode" "DI")])
900
901(define_insn "sse_cvttps2pi"
902  [(set (match_operand:V2SI 0 "register_operand" "=y")
903	(vec_select:V2SI
904	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
905	  (parallel [(const_int 0) (const_int 1)])))]
906  "TARGET_SSE"
907  "cvttps2pi\t{%1, %0|%0, %1}"
908  [(set_attr "type" "ssecvt")
909   (set_attr "unit" "mmx")
910   (set_attr "mode" "SF")])
911
912(define_insn "sse_cvtsi2ss"
913  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914	(vec_merge:V4SF
915	  (vec_duplicate:V4SF
916	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
917	  (match_operand:V4SF 1 "register_operand" "0,0")
918	  (const_int 1)))]
919  "TARGET_SSE"
920  "cvtsi2ss\t{%2, %0|%0, %2}"
921  [(set_attr "type" "sseicvt")
922   (set_attr "athlon_decode" "vector,double")
923   (set_attr "amdfam10_decode" "vector,double")
924   (set_attr "mode" "SF")])
925
926(define_insn "sse_cvtsi2ssq"
927  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
928	(vec_merge:V4SF
929	  (vec_duplicate:V4SF
930	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
931	  (match_operand:V4SF 1 "register_operand" "0,0")
932	  (const_int 1)))]
933  "TARGET_SSE && TARGET_64BIT"
934  "cvtsi2ssq\t{%2, %0|%0, %2}"
935  [(set_attr "type" "sseicvt")
936   (set_attr "athlon_decode" "vector,double")
937   (set_attr "amdfam10_decode" "vector,double")
938   (set_attr "mode" "SF")])
939
940(define_insn "sse_cvtss2si"
941  [(set (match_operand:SI 0 "register_operand" "=r,r")
942	(unspec:SI
943	  [(vec_select:SF
944	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
945	     (parallel [(const_int 0)]))]
946	  UNSPEC_FIX_NOTRUNC))]
947  "TARGET_SSE"
948  "cvtss2si\t{%1, %0|%0, %1}"
949  [(set_attr "type" "sseicvt")
950   (set_attr "athlon_decode" "double,vector")
951   (set_attr "amdfam10_decode" "double,double")
952   (set_attr "mode" "SI")])
953
954(define_insn "sse_cvtss2siq"
955  [(set (match_operand:DI 0 "register_operand" "=r,r")
956	(unspec:DI
957	  [(vec_select:SF
958	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
959	     (parallel [(const_int 0)]))]
960	  UNSPEC_FIX_NOTRUNC))]
961  "TARGET_SSE && TARGET_64BIT"
962  "cvtss2siq\t{%1, %0|%0, %1}"
963  [(set_attr "type" "sseicvt")
964   (set_attr "athlon_decode" "double,vector")
965   (set_attr "amdfam10_decode" "double,double")
966   (set_attr "mode" "DI")])
967
968(define_insn "sse_cvttss2si"
969  [(set (match_operand:SI 0 "register_operand" "=r,r")
970	(fix:SI
971	  (vec_select:SF
972	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
973	    (parallel [(const_int 0)]))))]
974  "TARGET_SSE"
975  "cvttss2si\t{%1, %0|%0, %1}"
976  [(set_attr "type" "sseicvt")
977   (set_attr "athlon_decode" "double,vector")
978   (set_attr "amdfam10_decode" "double,double")
979   (set_attr "mode" "SI")])
980
981(define_insn "sse_cvttss2siq"
982  [(set (match_operand:DI 0 "register_operand" "=r,r")
983	(fix:DI
984	  (vec_select:SF
985	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
986	    (parallel [(const_int 0)]))))]
987  "TARGET_SSE && TARGET_64BIT"
988  "cvttss2siq\t{%1, %0|%0, %1}"
989  [(set_attr "type" "sseicvt")
990   (set_attr "athlon_decode" "double,vector")
991   (set_attr "amdfam10_decode" "double,double")
992   (set_attr "mode" "DI")])
993
994(define_insn "sse2_cvtdq2ps"
995  [(set (match_operand:V4SF 0 "register_operand" "=x")
996	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
997  "TARGET_SSE2"
998  "cvtdq2ps\t{%1, %0|%0, %1}"
999  [(set_attr "type" "ssecvt")
1000   (set_attr "mode" "V2DF")])
1001
1002(define_insn "sse2_cvtps2dq"
1003  [(set (match_operand:V4SI 0 "register_operand" "=x")
1004	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1005		     UNSPEC_FIX_NOTRUNC))]
1006  "TARGET_SSE2"
1007  "cvtps2dq\t{%1, %0|%0, %1}"
1008  [(set_attr "type" "ssecvt")
1009   (set_attr "mode" "TI")])
1010
1011(define_insn "sse2_cvttps2dq"
1012  [(set (match_operand:V4SI 0 "register_operand" "=x")
1013	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1014  "TARGET_SSE2"
1015  "cvttps2dq\t{%1, %0|%0, %1}"
1016  [(set_attr "type" "ssecvt")
1017   (set_attr "mode" "TI")])
1018
1019;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1020;;
1021;; Parallel single-precision floating point element swizzling
1022;;
1023;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1024
1025(define_insn "sse_movhlps"
1026  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
1027	(vec_select:V4SF
1028	  (vec_concat:V8SF
1029	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1030	    (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1031	  (parallel [(const_int 6)
1032		     (const_int 7)
1033		     (const_int 2)
1034		     (const_int 3)])))]
1035  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1036  "@
1037   movhlps\t{%2, %0|%0, %2}
1038   movlps\t{%H2, %0|%0, %H2}
1039   movhps\t{%2, %0|%0, %2}"
1040  [(set_attr "type" "ssemov")
1041   (set_attr "mode" "V4SF,V2SF,V2SF")])
1042
1043(define_insn "sse_movlhps"
1044  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
1045	(vec_select:V4SF
1046	  (vec_concat:V8SF
1047	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1048	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1049	  (parallel [(const_int 0)
1050		     (const_int 1)
1051		     (const_int 4)
1052		     (const_int 5)])))]
1053  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1054  "@
1055   movlhps\t{%2, %0|%0, %2}
1056   movhps\t{%2, %0|%0, %2}
1057   movlps\t{%2, %H0|%H0, %2}"
1058  [(set_attr "type" "ssemov")
1059   (set_attr "mode" "V4SF,V2SF,V2SF")])
1060
1061(define_insn "sse_unpckhps"
1062  [(set (match_operand:V4SF 0 "register_operand" "=x")
1063	(vec_select:V4SF
1064	  (vec_concat:V8SF
1065	    (match_operand:V4SF 1 "register_operand" "0")
1066	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1067	  (parallel [(const_int 2) (const_int 6)
1068		     (const_int 3) (const_int 7)])))]
1069  "TARGET_SSE"
1070  "unpckhps\t{%2, %0|%0, %2}"
1071  [(set_attr "type" "sselog")
1072   (set_attr "mode" "V4SF")])
1073
1074(define_insn "sse_unpcklps"
1075  [(set (match_operand:V4SF 0 "register_operand" "=x")
1076	(vec_select:V4SF
1077	  (vec_concat:V8SF
1078	    (match_operand:V4SF 1 "register_operand" "0")
1079	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1080	  (parallel [(const_int 0) (const_int 4)
1081		     (const_int 1) (const_int 5)])))]
1082  "TARGET_SSE"
1083  "unpcklps\t{%2, %0|%0, %2}"
1084  [(set_attr "type" "sselog")
1085   (set_attr "mode" "V4SF")])
1086
1087;; These are modeled with the same vec_concat as the others so that we
1088;; capture users of shufps that can use the new instructions
1089(define_insn "sse3_movshdup"
1090  [(set (match_operand:V4SF 0 "register_operand" "=x")
1091	(vec_select:V4SF
1092	  (vec_concat:V8SF
1093	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1094	    (match_dup 1))
1095	  (parallel [(const_int 1)
1096		     (const_int 1)
1097		     (const_int 7)
1098		     (const_int 7)])))]
1099  "TARGET_SSE3"
1100  "movshdup\t{%1, %0|%0, %1}"
1101  [(set_attr "type" "sse")
1102   (set_attr "mode" "V4SF")])
1103
1104(define_insn "sse3_movsldup"
1105  [(set (match_operand:V4SF 0 "register_operand" "=x")
1106	(vec_select:V4SF
1107	  (vec_concat:V8SF
1108	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1109	    (match_dup 1))
1110	  (parallel [(const_int 0)
1111		     (const_int 0)
1112		     (const_int 6)
1113		     (const_int 6)])))]
1114  "TARGET_SSE3"
1115  "movsldup\t{%1, %0|%0, %1}"
1116  [(set_attr "type" "sse")
1117   (set_attr "mode" "V4SF")])
1118
1119(define_expand "sse_shufps"
1120  [(match_operand:V4SF 0 "register_operand" "")
1121   (match_operand:V4SF 1 "register_operand" "")
1122   (match_operand:V4SF 2 "nonimmediate_operand" "")
1123   (match_operand:SI 3 "const_int_operand" "")]
1124  "TARGET_SSE"
1125{
1126  int mask = INTVAL (operands[3]);
1127  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1128			       GEN_INT ((mask >> 0) & 3),
1129			       GEN_INT ((mask >> 2) & 3),
1130			       GEN_INT (((mask >> 4) & 3) + 4),
1131			       GEN_INT (((mask >> 6) & 3) + 4)));
1132  DONE;
1133})
1134
1135(define_insn "sse_shufps_1"
1136  [(set (match_operand:V4SF 0 "register_operand" "=x")
1137	(vec_select:V4SF
1138	  (vec_concat:V8SF
1139	    (match_operand:V4SF 1 "register_operand" "0")
1140	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1141	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
1142		     (match_operand 4 "const_0_to_3_operand" "")
1143		     (match_operand 5 "const_4_to_7_operand" "")
1144		     (match_operand 6 "const_4_to_7_operand" "")])))]
1145  "TARGET_SSE"
1146{
1147  int mask = 0;
1148  mask |= INTVAL (operands[3]) << 0;
1149  mask |= INTVAL (operands[4]) << 2;
1150  mask |= (INTVAL (operands[5]) - 4) << 4;
1151  mask |= (INTVAL (operands[6]) - 4) << 6;
1152  operands[3] = GEN_INT (mask);
1153
1154  return "shufps\t{%3, %2, %0|%0, %2, %3}";
1155}
1156  [(set_attr "type" "sselog")
1157   (set_attr "mode" "V4SF")])
1158
1159(define_insn "sse_storehps"
1160  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1161	(vec_select:V2SF
1162	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1163	  (parallel [(const_int 2) (const_int 3)])))]
1164  "TARGET_SSE"
1165  "@
1166   movhps\t{%1, %0|%0, %1}
1167   movhlps\t{%1, %0|%0, %1}
1168   movlps\t{%H1, %0|%0, %H1}"
1169  [(set_attr "type" "ssemov")
1170   (set_attr "mode" "V2SF,V4SF,V2SF")])
1171
1172(define_insn "sse_loadhps"
1173  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1174	(vec_concat:V4SF
1175	  (vec_select:V2SF
1176	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1177	    (parallel [(const_int 0) (const_int 1)]))
1178	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1179  "TARGET_SSE"
1180  "@
1181   movhps\t{%2, %0|%0, %2}
1182   movlhps\t{%2, %0|%0, %2}
1183   movlps\t{%2, %H0|%H0, %2}"
1184  [(set_attr "type" "ssemov")
1185   (set_attr "mode" "V2SF,V4SF,V2SF")])
1186
1187(define_insn "sse_storelps"
1188  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1189	(vec_select:V2SF
1190	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1191	  (parallel [(const_int 0) (const_int 1)])))]
1192  "TARGET_SSE"
1193  "@
1194   movlps\t{%1, %0|%0, %1}
1195   movaps\t{%1, %0|%0, %1}
1196   movlps\t{%1, %0|%0, %1}"
1197  [(set_attr "type" "ssemov")
1198   (set_attr "mode" "V2SF,V4SF,V2SF")])
1199
1200(define_insn "sse_loadlps"
1201  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1202	(vec_concat:V4SF
1203	  (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1204	  (vec_select:V2SF
1205	    (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1206	    (parallel [(const_int 2) (const_int 3)]))))]
1207  "TARGET_SSE"
1208  "@
1209   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1210   movlps\t{%2, %0|%0, %2}
1211   movlps\t{%2, %0|%0, %2}"
1212  [(set_attr "type" "sselog,ssemov,ssemov")
1213   (set_attr "mode" "V4SF,V2SF,V2SF")])
1214
1215(define_insn "sse_movss"
1216  [(set (match_operand:V4SF 0 "register_operand" "=x")
1217	(vec_merge:V4SF
1218	  (match_operand:V4SF 2 "register_operand" "x")
1219	  (match_operand:V4SF 1 "register_operand" "0")
1220	  (const_int 1)))]
1221  "TARGET_SSE"
1222  "movss\t{%2, %0|%0, %2}"
1223  [(set_attr "type" "ssemov")
1224   (set_attr "mode" "SF")])
1225
1226(define_insn "*vec_dupv4sf"
1227  [(set (match_operand:V4SF 0 "register_operand" "=x")
1228	(vec_duplicate:V4SF
1229	  (match_operand:SF 1 "register_operand" "0")))]
1230  "TARGET_SSE"
1231  "shufps\t{$0, %0, %0|%0, %0, 0}"
1232  [(set_attr "type" "sselog1")
1233   (set_attr "mode" "V4SF")])
1234
1235;; ??? In theory we can match memory for the MMX alternative, but allowing
1236;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1237;; alternatives pretty much forces the MMX alternative to be chosen.
1238(define_insn "*sse_concatv2sf"
1239  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
1240	(vec_concat:V2SF
1241	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1242	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
1243  "TARGET_SSE"
1244  "@
1245   unpcklps\t{%2, %0|%0, %2}
1246   movss\t{%1, %0|%0, %1}
1247   punpckldq\t{%2, %0|%0, %2}
1248   movd\t{%1, %0|%0, %1}"
1249  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1250   (set_attr "mode" "V4SF,SF,DI,DI")])
1251
1252(define_insn "*sse_concatv4sf"
1253  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
1254	(vec_concat:V4SF
1255	  (match_operand:V2SF 1 "register_operand" " 0,0")
1256	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1257  "TARGET_SSE"
1258  "@
1259   movlhps\t{%2, %0|%0, %2}
1260   movhps\t{%2, %0|%0, %2}"
1261  [(set_attr "type" "ssemov")
1262   (set_attr "mode" "V4SF,V2SF")])
1263
1264(define_expand "vec_initv4sf"
1265  [(match_operand:V4SF 0 "register_operand" "")
1266   (match_operand 1 "" "")]
1267  "TARGET_SSE"
1268{
1269  ix86_expand_vector_init (false, operands[0], operands[1]);
1270  DONE;
1271})
1272
1273(define_insn "*vec_setv4sf_0"
1274  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y ,m")
1275	(vec_merge:V4SF
1276	  (vec_duplicate:V4SF
1277	    (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
1278	  (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1279	  (const_int 1)))]
1280  "TARGET_SSE"
1281  "@
1282   movss\t{%2, %0|%0, %2}
1283   movss\t{%2, %0|%0, %2}
1284   movd\t{%2, %0|%0, %2}
1285   #"
1286  [(set_attr "type" "ssemov")
1287   (set_attr "mode" "SF")])
1288
1289(define_split
1290  [(set (match_operand:V4SF 0 "memory_operand" "")
1291	(vec_merge:V4SF
1292	  (vec_duplicate:V4SF
1293	    (match_operand:SF 1 "nonmemory_operand" ""))
1294	  (match_dup 0)
1295	  (const_int 1)))]
1296  "TARGET_SSE && reload_completed"
1297  [(const_int 0)]
1298{
1299  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1300  DONE;
1301})
1302
1303(define_expand "vec_setv4sf"
1304  [(match_operand:V4SF 0 "register_operand" "")
1305   (match_operand:SF 1 "register_operand" "")
1306   (match_operand 2 "const_int_operand" "")]
1307  "TARGET_SSE"
1308{
1309  ix86_expand_vector_set (false, operands[0], operands[1],
1310			  INTVAL (operands[2]));
1311  DONE;
1312})
1313
1314(define_insn_and_split "*vec_extractv4sf_0"
1315  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1316	(vec_select:SF
1317	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1318	  (parallel [(const_int 0)])))]
1319  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1320  "#"
1321  "&& reload_completed"
1322  [(const_int 0)]
1323{
1324  rtx op1 = operands[1];
1325  if (REG_P (op1))
1326    op1 = gen_rtx_REG (SFmode, REGNO (op1));
1327  else
1328    op1 = gen_lowpart (SFmode, op1);
1329  emit_move_insn (operands[0], op1);
1330  DONE;
1331})
1332
1333(define_expand "vec_extractv4sf"
1334  [(match_operand:SF 0 "register_operand" "")
1335   (match_operand:V4SF 1 "register_operand" "")
1336   (match_operand 2 "const_int_operand" "")]
1337  "TARGET_SSE"
1338{
1339  ix86_expand_vector_extract (false, operands[0], operands[1],
1340			      INTVAL (operands[2]));
1341  DONE;
1342})
1343
1344;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1345;;
1346;; Parallel double-precision floating point arithmetic
1347;;
1348;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1349
1350(define_expand "negv2df2"
1351  [(set (match_operand:V2DF 0 "register_operand" "")
1352	(neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1353  "TARGET_SSE2"
1354  "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1355
1356(define_expand "absv2df2"
1357  [(set (match_operand:V2DF 0 "register_operand" "")
1358	(abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1359  "TARGET_SSE2"
1360  "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1361
1362(define_expand "addv2df3"
1363  [(set (match_operand:V2DF 0 "register_operand" "")
1364	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1365		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1366  "TARGET_SSE2"
1367  "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1368
1369(define_insn "*addv2df3"
1370  [(set (match_operand:V2DF 0 "register_operand" "=x")
1371	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1372		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1373  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1374  "addpd\t{%2, %0|%0, %2}"
1375  [(set_attr "type" "sseadd")
1376   (set_attr "mode" "V2DF")])
1377
1378(define_insn "sse2_vmaddv2df3"
1379  [(set (match_operand:V2DF 0 "register_operand" "=x")
1380	(vec_merge:V2DF
1381	  (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1382		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1383	  (match_dup 1)
1384	  (const_int 1)))]
1385  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1386  "addsd\t{%2, %0|%0, %2}"
1387  [(set_attr "type" "sseadd")
1388   (set_attr "mode" "DF")])
1389
1390(define_expand "subv2df3"
1391  [(set (match_operand:V2DF 0 "register_operand" "")
1392	(minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1393		    (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1394  "TARGET_SSE2"
1395  "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1396
1397(define_insn "*subv2df3"
1398  [(set (match_operand:V2DF 0 "register_operand" "=x")
1399	(minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1400		    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1401  "TARGET_SSE2"
1402  "subpd\t{%2, %0|%0, %2}"
1403  [(set_attr "type" "sseadd")
1404   (set_attr "mode" "V2DF")])
1405
1406(define_insn "sse2_vmsubv2df3"
1407  [(set (match_operand:V2DF 0 "register_operand" "=x")
1408	(vec_merge:V2DF
1409	  (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1410		      (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1411	  (match_dup 1)
1412	  (const_int 1)))]
1413  "TARGET_SSE2"
1414  "subsd\t{%2, %0|%0, %2}"
1415  [(set_attr "type" "sseadd")
1416   (set_attr "mode" "DF")])
1417
1418(define_expand "mulv2df3"
1419  [(set (match_operand:V2DF 0 "register_operand" "")
1420	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1421		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1422  "TARGET_SSE2"
1423  "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1424
1425(define_insn "*mulv2df3"
1426  [(set (match_operand:V2DF 0 "register_operand" "=x")
1427	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1428		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1429  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1430  "mulpd\t{%2, %0|%0, %2}"
1431  [(set_attr "type" "ssemul")
1432   (set_attr "mode" "V2DF")])
1433
1434(define_insn "sse2_vmmulv2df3"
1435  [(set (match_operand:V2DF 0 "register_operand" "=x")
1436	(vec_merge:V2DF
1437	  (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1438		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1439	  (match_dup 1)
1440	  (const_int 1)))]
1441  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1442  "mulsd\t{%2, %0|%0, %2}"
1443  [(set_attr "type" "ssemul")
1444   (set_attr "mode" "DF")])
1445
1446(define_expand "divv2df3"
1447  [(set (match_operand:V2DF 0 "register_operand" "")
1448	(div:V2DF (match_operand:V2DF 1 "register_operand" "")
1449		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1450  "TARGET_SSE2"
1451  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1452
1453(define_insn "*divv2df3"
1454  [(set (match_operand:V2DF 0 "register_operand" "=x")
1455	(div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1456		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1457  "TARGET_SSE2"
1458  "divpd\t{%2, %0|%0, %2}"
1459  [(set_attr "type" "ssediv")
1460   (set_attr "mode" "V2DF")])
1461
1462(define_insn "sse2_vmdivv2df3"
1463  [(set (match_operand:V2DF 0 "register_operand" "=x")
1464	(vec_merge:V2DF
1465	  (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1466		    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1467	  (match_dup 1)
1468	  (const_int 1)))]
1469  "TARGET_SSE2"
1470  "divsd\t{%2, %0|%0, %2}"
1471  [(set_attr "type" "ssediv")
1472   (set_attr "mode" "DF")])
1473
1474(define_insn "sqrtv2df2"
1475  [(set (match_operand:V2DF 0 "register_operand" "=x")
1476	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1477  "TARGET_SSE2"
1478  "sqrtpd\t{%1, %0|%0, %1}"
1479  [(set_attr "type" "sse")
1480   (set_attr "mode" "V2DF")])
1481
1482(define_insn "sse2_vmsqrtv2df2"
1483  [(set (match_operand:V2DF 0 "register_operand" "=x")
1484	(vec_merge:V2DF
1485	  (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1486	  (match_operand:V2DF 2 "register_operand" "0")
1487	  (const_int 1)))]
1488  "TARGET_SSE2"
1489  "sqrtsd\t{%1, %0|%0, %1}"
1490  [(set_attr "type" "sse")
1491   (set_attr "mode" "DF")])
1492
1493;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1494;; isn't really correct, as those rtl operators aren't defined when 
1495;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1496
1497(define_expand "smaxv2df3"
1498  [(set (match_operand:V2DF 0 "register_operand" "")
1499	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1500		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1501  "TARGET_SSE2"
1502{
1503  if (!flag_finite_math_only)
1504    operands[1] = force_reg (V2DFmode, operands[1]);
1505  ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1506})
1507
1508(define_insn "*smaxv2df3_finite"
1509  [(set (match_operand:V2DF 0 "register_operand" "=x")
1510	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1511		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1512  "TARGET_SSE2 && flag_finite_math_only
1513   && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1514  "maxpd\t{%2, %0|%0, %2}"
1515  [(set_attr "type" "sseadd")
1516   (set_attr "mode" "V2DF")])
1517
1518(define_insn "*smaxv2df3"
1519  [(set (match_operand:V2DF 0 "register_operand" "=x")
1520	(smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1521		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1522  "TARGET_SSE2"
1523  "maxpd\t{%2, %0|%0, %2}"
1524  [(set_attr "type" "sseadd")
1525   (set_attr "mode" "V2DF")])
1526
1527(define_insn "sse2_vmsmaxv2df3"
1528  [(set (match_operand:V2DF 0 "register_operand" "=x")
1529	(vec_merge:V2DF
1530	  (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1531		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1532	  (match_dup 1)
1533	  (const_int 1)))]
1534  "TARGET_SSE2"
1535  "maxsd\t{%2, %0|%0, %2}"
1536  [(set_attr "type" "sseadd")
1537   (set_attr "mode" "DF")])
1538
1539(define_expand "sminv2df3"
1540  [(set (match_operand:V2DF 0 "register_operand" "")
1541	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1542		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1543  "TARGET_SSE2"
1544{
1545  if (!flag_finite_math_only)
1546    operands[1] = force_reg (V2DFmode, operands[1]);
1547  ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1548})
1549
1550(define_insn "*sminv2df3_finite"
1551  [(set (match_operand:V2DF 0 "register_operand" "=x")
1552	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1553		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1554  "TARGET_SSE2 && flag_finite_math_only
1555   && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1556  "minpd\t{%2, %0|%0, %2}"
1557  [(set_attr "type" "sseadd")
1558   (set_attr "mode" "V2DF")])
1559
1560(define_insn "*sminv2df3"
1561  [(set (match_operand:V2DF 0 "register_operand" "=x")
1562	(smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1563		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1564  "TARGET_SSE2"
1565  "minpd\t{%2, %0|%0, %2}"
1566  [(set_attr "type" "sseadd")
1567   (set_attr "mode" "V2DF")])
1568
1569(define_insn "sse2_vmsminv2df3"
1570  [(set (match_operand:V2DF 0 "register_operand" "=x")
1571	(vec_merge:V2DF
1572	  (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1573		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1574	  (match_dup 1)
1575	  (const_int 1)))]
1576  "TARGET_SSE2"
1577  "minsd\t{%2, %0|%0, %2}"
1578  [(set_attr "type" "sseadd")
1579   (set_attr "mode" "DF")])
1580
1581(define_insn "sse3_addsubv2df3"
1582  [(set (match_operand:V2DF 0 "register_operand" "=x")
1583	(vec_merge:V2DF
1584	  (plus:V2DF
1585	    (match_operand:V2DF 1 "register_operand" "0")
1586	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1587	  (minus:V2DF (match_dup 1) (match_dup 2))
1588	  (const_int 1)))]
1589  "TARGET_SSE3"
1590  "addsubpd\t{%2, %0|%0, %2}"
1591  [(set_attr "type" "sseadd")
1592   (set_attr "mode" "V2DF")])
1593
1594(define_insn "sse3_haddv2df3"
1595  [(set (match_operand:V2DF 0 "register_operand" "=x")
1596	(vec_concat:V2DF
1597	  (plus:DF
1598	    (vec_select:DF
1599	      (match_operand:V2DF 1 "register_operand" "0")
1600	      (parallel [(const_int 0)]))
1601	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1602	  (plus:DF
1603	    (vec_select:DF
1604	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1605	      (parallel [(const_int 0)]))
1606	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1607  "TARGET_SSE3"
1608  "haddpd\t{%2, %0|%0, %2}"
1609  [(set_attr "type" "sseadd")
1610   (set_attr "mode" "V2DF")])
1611
1612(define_insn "sse3_hsubv2df3"
1613  [(set (match_operand:V2DF 0 "register_operand" "=x")
1614	(vec_concat:V2DF
1615	  (minus:DF
1616	    (vec_select:DF
1617	      (match_operand:V2DF 1 "register_operand" "0")
1618	      (parallel [(const_int 0)]))
1619	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1620	  (minus:DF
1621	    (vec_select:DF
1622	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1623	      (parallel [(const_int 0)]))
1624	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1625  "TARGET_SSE3"
1626  "hsubpd\t{%2, %0|%0, %2}"
1627  [(set_attr "type" "sseadd")
1628   (set_attr "mode" "V2DF")])
1629
1630(define_expand "reduc_splus_v2df"
1631  [(match_operand:V2DF 0 "register_operand" "")
1632   (match_operand:V2DF 1 "register_operand" "")]
1633  "TARGET_SSE3"
1634{
1635  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1636  DONE;
1637})
1638
1639;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1640;;
1641;; Parallel double-precision floating point comparisons
1642;;
1643;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1644
1645(define_insn "sse2_maskcmpv2df3"
1646  [(set (match_operand:V2DF 0 "register_operand" "=x")
1647	(match_operator:V2DF 3 "sse_comparison_operator"
1648		[(match_operand:V2DF 1 "register_operand" "0")
1649		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1650  "TARGET_SSE2"
1651  "cmp%D3pd\t{%2, %0|%0, %2}"
1652  [(set_attr "type" "ssecmp")
1653   (set_attr "mode" "V2DF")])
1654
1655(define_insn "sse2_vmmaskcmpv2df3"
1656  [(set (match_operand:V2DF 0 "register_operand" "=x")
1657	(vec_merge:V2DF
1658	  (match_operator:V2DF 3 "sse_comparison_operator"
1659		[(match_operand:V2DF 1 "register_operand" "0")
1660		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1661	  (match_dup 1)
1662	  (const_int 1)))]
1663  "TARGET_SSE2"
1664  "cmp%D3sd\t{%2, %0|%0, %2}"
1665  [(set_attr "type" "ssecmp")
1666   (set_attr "mode" "DF")])
1667
1668(define_insn "sse2_comi"
1669  [(set (reg:CCFP FLAGS_REG)
1670	(compare:CCFP
1671	  (vec_select:DF
1672	    (match_operand:V2DF 0 "register_operand" "x")
1673	    (parallel [(const_int 0)]))
1674	  (vec_select:DF
1675	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1676	    (parallel [(const_int 0)]))))]
1677  "TARGET_SSE2"
1678  "comisd\t{%1, %0|%0, %1}"
1679  [(set_attr "type" "ssecomi")
1680   (set_attr "mode" "DF")])
1681
1682(define_insn "sse2_ucomi"
1683  [(set (reg:CCFPU FLAGS_REG)
1684	(compare:CCFPU
1685	  (vec_select:DF
1686	    (match_operand:V2DF 0 "register_operand" "x")
1687	    (parallel [(const_int 0)]))
1688	  (vec_select:DF
1689	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1690	    (parallel [(const_int 0)]))))]
1691  "TARGET_SSE2"
1692  "ucomisd\t{%1, %0|%0, %1}"
1693  [(set_attr "type" "ssecomi")
1694   (set_attr "mode" "DF")])
1695
1696(define_expand "vcondv2df"
1697  [(set (match_operand:V2DF 0 "register_operand" "")
1698        (if_then_else:V2DF
1699          (match_operator 3 ""
1700            [(match_operand:V2DF 4 "nonimmediate_operand" "")
1701             (match_operand:V2DF 5 "nonimmediate_operand" "")])
1702          (match_operand:V2DF 1 "general_operand" "")
1703          (match_operand:V2DF 2 "general_operand" "")))]
1704  "TARGET_SSE2"
1705{
1706  if (ix86_expand_fp_vcond (operands))
1707    DONE;
1708  else
1709    FAIL;
1710})
1711
1712;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1713;;
1714;; Parallel double-precision floating point logical operations
1715;;
1716;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1717
1718(define_expand "andv2df3"
1719  [(set (match_operand:V2DF 0 "register_operand" "")
1720	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1721		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1722  "TARGET_SSE2"
1723  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1724
1725(define_insn "*andv2df3"
1726  [(set (match_operand:V2DF 0 "register_operand" "=x")
1727	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1728		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1729  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1730  "andpd\t{%2, %0|%0, %2}"
1731  [(set_attr "type" "sselog")
1732   (set_attr "mode" "V2DF")])
1733
1734(define_insn "sse2_nandv2df3"
1735  [(set (match_operand:V2DF 0 "register_operand" "=x")
1736	(and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1737		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1738  "TARGET_SSE2"
1739  "andnpd\t{%2, %0|%0, %2}"
1740  [(set_attr "type" "sselog")
1741   (set_attr "mode" "V2DF")])
1742
1743(define_expand "iorv2df3"
1744  [(set (match_operand:V2DF 0 "register_operand" "")
1745	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1746		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1747  "TARGET_SSE2"
1748  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1749
1750(define_insn "*iorv2df3"
1751  [(set (match_operand:V2DF 0 "register_operand" "=x")
1752	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1753		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1754  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1755  "orpd\t{%2, %0|%0, %2}"
1756  [(set_attr "type" "sselog")
1757   (set_attr "mode" "V2DF")])
1758
1759(define_expand "xorv2df3"
1760  [(set (match_operand:V2DF 0 "register_operand" "")
1761	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1762		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1763  "TARGET_SSE2"
1764  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1765
1766(define_insn "*xorv2df3"
1767  [(set (match_operand:V2DF 0 "register_operand" "=x")
1768	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1769		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1770  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1771  "xorpd\t{%2, %0|%0, %2}"
1772  [(set_attr "type" "sselog")
1773   (set_attr "mode" "V2DF")])
1774
1775;; Also define scalar versions.  These are used for abs, neg, and
1776;; conditional move.  Using subregs into vector modes causes register
1777;; allocation lossage.  These patterns do not allow memory operands
1778;; because the native instructions read the full 128-bits.
1779
1780(define_insn "*anddf3"
1781  [(set (match_operand:DF 0 "register_operand" "=x")
1782	(and:DF (match_operand:DF 1 "register_operand" "0")
1783		(match_operand:DF 2 "register_operand" "x")))]
1784  "TARGET_SSE2"
1785  "andpd\t{%2, %0|%0, %2}"
1786  [(set_attr "type" "sselog")
1787   (set_attr "mode" "V2DF")])
1788
1789(define_insn "*nanddf3"
1790  [(set (match_operand:DF 0 "register_operand" "=x")
1791	(and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1792		(match_operand:DF 2 "register_operand" "x")))]
1793  "TARGET_SSE2"
1794  "andnpd\t{%2, %0|%0, %2}"
1795  [(set_attr "type" "sselog")
1796   (set_attr "mode" "V2DF")])
1797
1798(define_insn "*iordf3"
1799  [(set (match_operand:DF 0 "register_operand" "=x")
1800	(ior:DF (match_operand:DF 1 "register_operand" "0")
1801		(match_operand:DF 2 "register_operand" "x")))]
1802  "TARGET_SSE2"
1803  "orpd\t{%2, %0|%0, %2}"
1804  [(set_attr "type" "sselog")
1805   (set_attr "mode" "V2DF")])
1806
1807(define_insn "*xordf3"
1808  [(set (match_operand:DF 0 "register_operand" "=x")
1809	(xor:DF (match_operand:DF 1 "register_operand" "0")
1810		(match_operand:DF 2 "register_operand" "x")))]
1811  "TARGET_SSE2"
1812  "xorpd\t{%2, %0|%0, %2}"
1813  [(set_attr "type" "sselog")
1814   (set_attr "mode" "V2DF")])
1815
1816;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1817;;
1818;; Parallel double-precision floating point conversion operations
1819;;
1820;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1821
1822(define_insn "sse2_cvtpi2pd"
1823  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1824	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1825  "TARGET_SSE2"
1826  "cvtpi2pd\t{%1, %0|%0, %1}"
1827  [(set_attr "type" "ssecvt")
1828   (set_attr "unit" "mmx,*")
1829   (set_attr "mode" "V2DF")])
1830
1831(define_insn "sse2_cvtpd2pi"
1832  [(set (match_operand:V2SI 0 "register_operand" "=y")
1833	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1834		     UNSPEC_FIX_NOTRUNC))]
1835  "TARGET_SSE2"
1836  "cvtpd2pi\t{%1, %0|%0, %1}"
1837  [(set_attr "type" "ssecvt")
1838   (set_attr "unit" "mmx")
1839   (set_attr "mode" "DI")])
1840
1841(define_insn "sse2_cvttpd2pi"
1842  [(set (match_operand:V2SI 0 "register_operand" "=y")
1843	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1844  "TARGET_SSE2"
1845  "cvttpd2pi\t{%1, %0|%0, %1}"
1846  [(set_attr "type" "ssecvt")
1847   (set_attr "unit" "mmx")
1848   (set_attr "mode" "TI")])
1849
1850(define_insn "sse2_cvtsi2sd"
1851  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1852	(vec_merge:V2DF
1853	  (vec_duplicate:V2DF
1854	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1855	  (match_operand:V2DF 1 "register_operand" "0,0")
1856	  (const_int 1)))]
1857  "TARGET_SSE2"
1858  "cvtsi2sd\t{%2, %0|%0, %2}"
1859  [(set_attr "type" "sseicvt")
1860   (set_attr "mode" "DF")
1861   (set_attr "athlon_decode" "double,direct")
1862   (set_attr "amdfam10_decode" "vector,double")])
1863
1864(define_insn "sse2_cvtsi2sdq"
1865  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1866	(vec_merge:V2DF
1867	  (vec_duplicate:V2DF
1868	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1869	  (match_operand:V2DF 1 "register_operand" "0,0")
1870	  (const_int 1)))]
1871  "TARGET_SSE2 && TARGET_64BIT"
1872  "cvtsi2sdq\t{%2, %0|%0, %2}"
1873  [(set_attr "type" "sseicvt")
1874   (set_attr "mode" "DF")
1875   (set_attr "athlon_decode" "double,direct")
1876   (set_attr "amdfam10_decode" "vector,double")])
1877
1878(define_insn "sse2_cvtsd2si"
1879  [(set (match_operand:SI 0 "register_operand" "=r,r")
1880	(unspec:SI
1881	  [(vec_select:DF
1882	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1883	     (parallel [(const_int 0)]))]
1884	  UNSPEC_FIX_NOTRUNC))]
1885  "TARGET_SSE2"
1886  "cvtsd2si\t{%1, %0|%0, %1}"
1887  [(set_attr "type" "sseicvt")
1888   (set_attr "athlon_decode" "double,vector")
1889   (set_attr "amdfam10_decode" "double,double")
1890   (set_attr "mode" "SI")])
1891
1892(define_insn "sse2_cvtsd2siq"
1893  [(set (match_operand:DI 0 "register_operand" "=r,r")
1894	(unspec:DI
1895	  [(vec_select:DF
1896	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1897	     (parallel [(const_int 0)]))]
1898	  UNSPEC_FIX_NOTRUNC))]
1899  "TARGET_SSE2 && TARGET_64BIT"
1900  "cvtsd2siq\t{%1, %0|%0, %1}"
1901  [(set_attr "type" "sseicvt")
1902   (set_attr "athlon_decode" "double,vector")
1903   (set_attr "amdfam10_decode" "double,double")
1904   (set_attr "mode" "DI")])
1905
1906(define_insn "sse2_cvttsd2si"
1907  [(set (match_operand:SI 0 "register_operand" "=r,r")
1908	(fix:SI
1909	  (vec_select:DF
1910	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1911	    (parallel [(const_int 0)]))))]
1912  "TARGET_SSE2"
1913  "cvttsd2si\t{%1, %0|%0, %1}"
1914  [(set_attr "type" "sseicvt")
1915   (set_attr "mode" "SI")
1916   (set_attr "athlon_decode" "double,vector")
1917   (set_attr "amdfam10_decode" "double,double")])
1918
1919(define_insn "sse2_cvttsd2siq"
1920  [(set (match_operand:DI 0 "register_operand" "=r,r")
1921	(fix:DI
1922	  (vec_select:DF
1923	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1924	    (parallel [(const_int 0)]))))]
1925  "TARGET_SSE2 && TARGET_64BIT"
1926  "cvttsd2siq\t{%1, %0|%0, %1}"
1927  [(set_attr "type" "sseicvt")
1928   (set_attr "mode" "DI")
1929   (set_attr "athlon_decode" "double,vector")
1930   (set_attr "amdfam10_decode" "double,double")])
1931
1932(define_insn "sse2_cvtdq2pd"
1933  [(set (match_operand:V2DF 0 "register_operand" "=x")
1934	(float:V2DF
1935	  (vec_select:V2SI
1936	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1937	    (parallel [(const_int 0) (const_int 1)]))))]
1938  "TARGET_SSE2"
1939  "cvtdq2pd\t{%1, %0|%0, %1}"
1940  [(set_attr "type" "ssecvt")
1941   (set_attr "mode" "V2DF")])
1942
1943(define_expand "sse2_cvtpd2dq"
1944  [(set (match_operand:V4SI 0 "register_operand" "")
1945	(vec_concat:V4SI
1946	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1947		       UNSPEC_FIX_NOTRUNC)
1948	  (match_dup 2)))]
1949  "TARGET_SSE2"
1950  "operands[2] = CONST0_RTX (V2SImode);")
1951
1952(define_insn "*sse2_cvtpd2dq"
1953  [(set (match_operand:V4SI 0 "register_operand" "=x")
1954	(vec_concat:V4SI
1955	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1956		       UNSPEC_FIX_NOTRUNC)
1957	  (match_operand:V2SI 2 "const0_operand" "")))]
1958  "TARGET_SSE2"
1959  "cvtpd2dq\t{%1, %0|%0, %1}"
1960  [(set_attr "type" "ssecvt")
1961   (set_attr "mode" "TI")
1962   (set_attr "amdfam10_decode" "double")])
1963
1964(define_expand "sse2_cvttpd2dq"
1965  [(set (match_operand:V4SI 0 "register_operand" "")
1966	(vec_concat:V4SI
1967	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1968	  (match_dup 2)))]
1969  "TARGET_SSE2"
1970  "operands[2] = CONST0_RTX (V2SImode);")
1971
1972(define_insn "*sse2_cvttpd2dq"
1973  [(set (match_operand:V4SI 0 "register_operand" "=x")
1974	(vec_concat:V4SI
1975	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1976	  (match_operand:V2SI 2 "const0_operand" "")))]
1977  "TARGET_SSE2"
1978  "cvttpd2dq\t{%1, %0|%0, %1}"
1979  [(set_attr "type" "ssecvt")
1980   (set_attr "mode" "TI")
1981   (set_attr "amdfam10_decode" "double")])
1982
1983(define_insn "sse2_cvtsd2ss"
1984  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1985	(vec_merge:V4SF
1986	  (vec_duplicate:V4SF
1987	    (float_truncate:V2SF
1988	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1989	  (match_operand:V4SF 1 "register_operand" "0,0")
1990	  (const_int 1)))]
1991  "TARGET_SSE2"
1992  "cvtsd2ss\t{%2, %0|%0, %2}"
1993  [(set_attr "type" "ssecvt")
1994   (set_attr "athlon_decode" "vector,double")
1995   (set_attr "amdfam10_decode" "vector,double")
1996   (set_attr "mode" "SF")])
1997
1998(define_insn "sse2_cvtss2sd"
1999  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2000	(vec_merge:V2DF
2001	  (float_extend:V2DF
2002	    (vec_select:V2SF
2003	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
2004	      (parallel [(const_int 0) (const_int 1)])))
2005	  (match_operand:V2DF 1 "register_operand" "0,0")
2006	  (const_int 1)))]
2007  "TARGET_SSE2"
2008  "cvtss2sd\t{%2, %0|%0, %2}"
2009  [(set_attr "type" "ssecvt")
2010   (set_attr "amdfam10_decode" "vector,double")
2011   (set_attr "mode" "DF")])
2012
2013(define_expand "sse2_cvtpd2ps"
2014  [(set (match_operand:V4SF 0 "register_operand" "")
2015	(vec_concat:V4SF
2016	  (float_truncate:V2SF
2017	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2018	  (match_dup 2)))]
2019  "TARGET_SSE2"
2020  "operands[2] = CONST0_RTX (V2SFmode);")
2021
2022(define_insn "*sse2_cvtpd2ps"
2023  [(set (match_operand:V4SF 0 "register_operand" "=x")
2024	(vec_concat:V4SF
2025	  (float_truncate:V2SF
2026	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2027	  (match_operand:V2SF 2 "const0_operand" "")))]
2028  "TARGET_SSE2"
2029  "cvtpd2ps\t{%1, %0|%0, %1}"
2030  [(set_attr "type" "ssecvt")
2031   (set_attr "mode" "V4SF")
2032   (set_attr "amdfam10_decode" "double")])
2033
2034(define_insn "sse2_cvtps2pd"
2035  [(set (match_operand:V2DF 0 "register_operand" "=x")
2036	(float_extend:V2DF
2037	  (vec_select:V2SF
2038	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2039	    (parallel [(const_int 0) (const_int 1)]))))]
2040  "TARGET_SSE2"
2041  "cvtps2pd\t{%1, %0|%0, %1}"
2042  [(set_attr "type" "ssecvt")
2043   (set_attr "mode" "V2DF")
2044   (set_attr "amdfam10_decode" "direct")])
2045
2046;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2047;;
2048;; Parallel double-precision floating point element swizzling
2049;;
2050;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2051
2052(define_insn "sse2_unpckhpd"
2053  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
2054	(vec_select:V2DF
2055	  (vec_concat:V4DF
2056	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2057	    (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2058	  (parallel [(const_int 1)
2059		     (const_int 3)])))]
2060  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2061  "@
2062   unpckhpd\t{%2, %0|%0, %2}
2063   movlpd\t{%H1, %0|%0, %H1}
2064   movhpd\t{%1, %0|%0, %1}"
2065  [(set_attr "type" "sselog,ssemov,ssemov")
2066   (set_attr "mode" "V2DF,V1DF,V1DF")])
2067
2068(define_insn "*sse3_movddup"
2069  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,o")
2070	(vec_select:V2DF
2071	  (vec_concat:V4DF
2072	    (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2073	    (match_dup 1))
2074	  (parallel [(const_int 0)
2075		     (const_int 2)])))]
2076  "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2077  "@
2078   movddup\t{%1, %0|%0, %1}
2079   #"
2080  [(set_attr "type" "sselog1,ssemov")
2081   (set_attr "mode" "V2DF")])
2082
2083(define_split
2084  [(set (match_operand:V2DF 0 "memory_operand" "")
2085	(vec_select:V2DF
2086	  (vec_concat:V4DF
2087	    (match_operand:V2DF 1 "register_operand" "")
2088	    (match_dup 1))
2089	  (parallel [(const_int 0)
2090		     (const_int 2)])))]
2091  "TARGET_SSE3 && reload_completed"
2092  [(const_int 0)]
2093{
2094  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2095  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2096  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2097  DONE;
2098})
2099
2100(define_insn "sse2_unpcklpd"
2101  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
2102	(vec_select:V2DF
2103	  (vec_concat:V4DF
2104	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2105	    (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2106	  (parallel [(const_int 0)
2107		     (const_int 2)])))]
2108  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2109  "@
2110   unpcklpd\t{%2, %0|%0, %2}
2111   movhpd\t{%2, %0|%0, %2}
2112   movlpd\t{%2, %H0|%H0, %2}"
2113  [(set_attr "type" "sselog,ssemov,ssemov")
2114   (set_attr "mode" "V2DF,V1DF,V1DF")])
2115
2116(define_expand "sse2_shufpd"
2117  [(match_operand:V2DF 0 "register_operand" "")
2118   (match_operand:V2DF 1 "register_operand" "")
2119   (match_operand:V2DF 2 "nonimmediate_operand" "")
2120   (match_operand:SI 3 "const_int_operand" "")]
2121  "TARGET_SSE2"
2122{
2123  int mask = INTVAL (operands[3]);
2124  emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2125				GEN_INT (mask & 1),
2126				GEN_INT (mask & 2 ? 3 : 2)));
2127  DONE;
2128})
2129
2130(define_insn "sse2_shufpd_1"
2131  [(set (match_operand:V2DF 0 "register_operand" "=x")
2132	(vec_select:V2DF
2133	  (vec_concat:V4DF
2134	    (match_operand:V2DF 1 "register_operand" "0")
2135	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2136	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
2137		     (match_operand 4 "const_2_to_3_operand" "")])))]
2138  "TARGET_SSE2"
2139{
2140  int mask;
2141  mask = INTVAL (operands[3]);
2142  mask |= (INTVAL (operands[4]) - 2) << 1;
2143  operands[3] = GEN_INT (mask);
2144
2145  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2146}
2147  [(set_attr "type" "sselog")
2148   (set_attr "mode" "V2DF")])
2149
2150(define_insn "sse2_storehpd"
2151  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2152	(vec_select:DF
2153	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2154	  (parallel [(const_int 1)])))]
2155  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2156  "@
2157   movhpd\t{%1, %0|%0, %1}
2158   unpckhpd\t%0, %0
2159   #"
2160  [(set_attr "type" "ssemov,sselog1,ssemov")
2161   (set_attr "mode" "V1DF,V2DF,DF")])
2162
2163(define_split
2164  [(set (match_operand:DF 0 "register_operand" "")
2165	(vec_select:DF
2166	  (match_operand:V2DF 1 "memory_operand" "")
2167	  (parallel [(const_int 1)])))]
2168  "TARGET_SSE2 && reload_completed"
2169  [(set (match_dup 0) (match_dup 1))]
2170{
2171  operands[1] = adjust_address (operands[1], DFmode, 8);
2172})
2173
2174(define_insn "sse2_storelpd"
2175  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2176	(vec_select:DF
2177	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2178	  (parallel [(const_int 0)])))]
2179  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2180  "@
2181   movlpd\t{%1, %0|%0, %1}
2182   #
2183   #"
2184  [(set_attr "type" "ssemov")
2185   (set_attr "mode" "V1DF,DF,DF")])
2186
2187(define_split
2188  [(set (match_operand:DF 0 "register_operand" "")
2189	(vec_select:DF
2190	  (match_operand:V2DF 1 "nonimmediate_operand" "")
2191	  (parallel [(const_int 0)])))]
2192  "TARGET_SSE2 && reload_completed"
2193  [(const_int 0)]
2194{
2195  rtx op1 = operands[1];
2196  if (REG_P (op1))
2197    op1 = gen_rtx_REG (DFmode, REGNO (op1));
2198  else
2199    op1 = gen_lowpart (DFmode, op1);
2200  emit_move_insn (operands[0], op1);
2201  DONE;
2202})
2203
2204(define_insn "sse2_loadhpd"
2205  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
2206	(vec_concat:V2DF
2207	  (vec_select:DF
2208	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2209	    (parallel [(const_int 0)]))
2210	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x*fr")))]
2211  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2212  "@
2213   movhpd\t{%2, %0|%0, %2}
2214   unpcklpd\t{%2, %0|%0, %2}
2215   shufpd\t{$1, %1, %0|%0, %1, 1}
2216   #"
2217  [(set_attr "type" "ssemov,sselog,sselog,other")
2218   (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2219
2220(define_split
2221  [(set (match_operand:V2DF 0 "memory_operand" "")
2222	(vec_concat:V2DF
2223	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2224	  (match_operand:DF 1 "register_operand" "")))]
2225  "TARGET_SSE2 && reload_completed"
2226  [(set (match_dup 0) (match_dup 1))]
2227{
2228  operands[0] = adjust_address (operands[0], DFmode, 8);
2229})
2230
2231(define_insn "sse2_loadlpd"
2232  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
2233	(vec_concat:V2DF
2234	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
2235	  (vec_select:DF
2236	    (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2237	    (parallel [(const_int 1)]))))]
2238  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2239  "@
2240   movsd\t{%2, %0|%0, %2}
2241   movlpd\t{%2, %0|%0, %2}
2242   movsd\t{%2, %0|%0, %2}
2243   shufpd\t{$2, %2, %0|%0, %2, 2}
2244   movhpd\t{%H1, %0|%0, %H1}
2245   #"
2246  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2247   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2248
2249(define_split
2250  [(set (match_operand:V2DF 0 "memory_operand" "")
2251	(vec_concat:V2DF
2252	  (match_operand:DF 1 "register_operand" "")
2253	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2254  "TARGET_SSE2 && reload_completed"
2255  [(set (match_dup 0) (match_dup 1))]
2256{
2257  operands[0] = adjust_address (operands[0], DFmode, 8);
2258})
2259
2260;; Not sure these two are ever used, but it doesn't hurt to have
2261;; them. -aoliva
2262(define_insn "*vec_extractv2df_1_sse"
2263  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2264	(vec_select:DF
2265	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2266	  (parallel [(const_int 1)])))]
2267  "!TARGET_SSE2 && TARGET_SSE
2268   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2269  "@
2270   movhps\t{%1, %0|%0, %1}
2271   movhlps\t{%1, %0|%0, %1}
2272   movlps\t{%H1, %0|%0, %H1}"
2273  [(set_attr "type" "ssemov")
2274   (set_attr "mode" "V2SF,V4SF,V2SF")])
2275
2276(define_insn "*vec_extractv2df_0_sse"
2277  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2278	(vec_select:DF
2279	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2280	  (parallel [(const_int 0)])))]
2281  "!TARGET_SSE2 && TARGET_SSE
2282   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2283  "@
2284   movlps\t{%1, %0|%0, %1}
2285   movaps\t{%1, %0|%0, %1}
2286   movlps\t{%1, %0|%0, %1}"
2287  [(set_attr "type" "ssemov")
2288   (set_attr "mode" "V2SF,V4SF,V2SF")])
2289
2290(define_insn "sse2_movsd"
2291  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
2292	(vec_merge:V2DF
2293	  (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2294	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2295	  (const_int 1)))]
2296  "TARGET_SSE2"
2297  "@
2298   movsd\t{%2, %0|%0, %2}
2299   movlpd\t{%2, %0|%0, %2}
2300   movlpd\t{%2, %0|%0, %2}
2301   shufpd\t{$2, %2, %0|%0, %2, 2}
2302   movhps\t{%H1, %0|%0, %H1}
2303   movhps\t{%1, %H0|%H0, %1}"
2304  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2305   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2306
2307(define_insn "*vec_dupv2df_sse3"
2308  [(set (match_operand:V2DF 0 "register_operand" "=x")
2309	(vec_duplicate:V2DF
2310	  (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2311  "TARGET_SSE3"
2312  "movddup\t{%1, %0|%0, %1}"
2313  [(set_attr "type" "sselog1")
2314   (set_attr "mode" "DF")])
2315
2316(define_insn "*vec_dupv2df"
2317  [(set (match_operand:V2DF 0 "register_operand" "=x")
2318	(vec_duplicate:V2DF
2319	  (match_operand:DF 1 "register_operand" "0")))]
2320  "TARGET_SSE2"
2321  "unpcklpd\t%0, %0"
2322  [(set_attr "type" "sselog1")
2323   (set_attr "mode" "V4SF")])
2324
2325(define_insn "*vec_concatv2df_sse3"
2326  [(set (match_operand:V2DF 0 "register_operand" "=x")
2327	(vec_concat:V2DF
2328	  (match_operand:DF 1 "nonimmediate_operand" "xm")
2329	  (match_dup 1)))]
2330  "TARGET_SSE3"
2331  "movddup\t{%1, %0|%0, %1}"
2332  [(set_attr "type" "sselog1")
2333   (set_attr "mode" "DF")])
2334
2335(define_insn "*vec_concatv2df"
2336  [(set (match_operand:V2DF 0 "register_operand"     "=Y,Y,Y,x,x")
2337	(vec_concat:V2DF
2338	  (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2339	  (match_operand:DF 2 "vector_move_operand"  " Y,m,C,x,m")))]
2340  "TARGET_SSE"
2341  "@
2342   unpcklpd\t{%2, %0|%0, %2}
2343   movhpd\t{%2, %0|%0, %2}
2344   movsd\t{%1, %0|%0, %1}
2345   movlhps\t{%2, %0|%0, %2}
2346   movhps\t{%2, %0|%0, %2}"
2347  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2348   (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2349
2350(define_expand "vec_setv2df"
2351  [(match_operand:V2DF 0 "register_operand" "")
2352   (match_operand:DF 1 "register_operand" "")
2353   (match_operand 2 "const_int_operand" "")]
2354  "TARGET_SSE"
2355{
2356  ix86_expand_vector_set (false, operands[0], operands[1],
2357			  INTVAL (operands[2]));
2358  DONE;
2359})
2360
2361(define_expand "vec_extractv2df"
2362  [(match_operand:DF 0 "register_operand" "")
2363   (match_operand:V2DF 1 "register_operand" "")
2364   (match_operand 2 "const_int_operand" "")]
2365  "TARGET_SSE"
2366{
2367  ix86_expand_vector_extract (false, operands[0], operands[1],
2368			      INTVAL (operands[2]));
2369  DONE;
2370})
2371
2372(define_expand "vec_initv2df"
2373  [(match_operand:V2DF 0 "register_operand" "")
2374   (match_operand 1 "" "")]
2375  "TARGET_SSE"
2376{
2377  ix86_expand_vector_init (false, operands[0], operands[1]);
2378  DONE;
2379})
2380
2381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2382;;
2383;; Parallel integral arithmetic
2384;;
2385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2386
2387(define_expand "neg<mode>2"
2388  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2389	(minus:SSEMODEI
2390	  (match_dup 2)
2391	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2392  "TARGET_SSE2"
2393  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2394
2395(define_expand "add<mode>3"
2396  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2397	(plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2398		       (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2399  "TARGET_SSE2"
2400  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2401
2402(define_insn "*add<mode>3"
2403  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2404	(plus:SSEMODEI
2405	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2406	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2407  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2408  "padd<ssevecsize>\t{%2, %0|%0, %2}"
2409  [(set_attr "type" "sseiadd")
2410   (set_attr "mode" "TI")])
2411
2412(define_insn "sse2_ssadd<mode>3"
2413  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2414	(ss_plus:SSEMODE12
2415	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2416	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2417  "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2418  "padds<ssevecsize>\t{%2, %0|%0, %2}"
2419  [(set_attr "type" "sseiadd")
2420   (set_attr "mode" "TI")])
2421
2422(define_insn "sse2_usadd<mode>3"
2423  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2424	(us_plus:SSEMODE12
2425	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2426	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2427  "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2428  "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2429  [(set_attr "type" "sseiadd")
2430   (set_attr "mode" "TI")])
2431
2432(define_expand "sub<mode>3"
2433  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2434	(minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2435			(match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2436  "TARGET_SSE2"
2437  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2438
2439(define_insn "*sub<mode>3"
2440  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2441	(minus:SSEMODEI
2442	  (match_operand:SSEMODEI 1 "register_operand" "0")
2443	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2444  "TARGET_SSE2"
2445  "psub<ssevecsize>\t{%2, %0|%0, %2}"
2446  [(set_attr "type" "sseiadd")
2447   (set_attr "mode" "TI")])
2448
2449(define_insn "sse2_sssub<mode>3"
2450  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2451	(ss_minus:SSEMODE12
2452	  (match_operand:SSEMODE12 1 "register_operand" "0")
2453	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2454  "TARGET_SSE2"
2455  "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2456  [(set_attr "type" "sseiadd")
2457   (set_attr "mode" "TI")])
2458
2459(define_insn "sse2_ussub<mode>3"
2460  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2461	(us_minus:SSEMODE12
2462	  (match_operand:SSEMODE12 1 "register_operand" "0")
2463	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2464  "TARGET_SSE2"
2465  "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2466  [(set_attr "type" "sseiadd")
2467   (set_attr "mode" "TI")])
2468
2469(define_expand "mulv16qi3"
2470  [(set (match_operand:V16QI 0 "register_operand" "")
2471	(mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2472		    (match_operand:V16QI 2 "register_operand" "")))]
2473  "TARGET_SSE2"
2474{
2475  rtx t[12], op0;
2476  int i;
2477
2478  for (i = 0; i < 12; ++i)
2479    t[i] = gen_reg_rtx (V16QImode);
2480
2481  /* Unpack data such that we've got a source byte in each low byte of
2482     each word.  We don't care what goes into the high byte of each word.
2483     Rather than trying to get zero in there, most convenient is to let
2484     it be a copy of the low byte.  */
2485  emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2486  emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2487  emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2488  emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2489
2490  /* Multiply words.  The end-of-line annotations here give a picture of what
2491     the output of that instruction looks like.  Dot means don't care; the 
2492     letters are the bytes of the result with A being the most significant.  */
2493  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2494			   gen_lowpart (V8HImode, t[0]),
2495			   gen_lowpart (V8HImode, t[1])));
2496  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2497			   gen_lowpart (V8HImode, t[2]),
2498			   gen_lowpart (V8HImode, t[3])));
2499
2500  /* Extract the relevant bytes and merge them back together.  */
2501  emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4]));	/* ..AI..BJ..CK..DL */
2502  emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4]));	/* ..EM..FN..GO..HP */
2503  emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6]));	/* ....AEIM....BFJN */
2504  emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6]));	/* ....CGKO....DHLP */
2505  emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8]));	/* ........ACEGIKMO */
2506  emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8]));	/* ........BDFHJLNP */
2507
2508  op0 = operands[0];
2509  emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10]));	/* ABCDEFGHIJKLMNOP */
2510  DONE;
2511})
2512
2513(define_expand "mulv8hi3"
2514  [(set (match_operand:V8HI 0 "register_operand" "")
2515	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2516		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2517  "TARGET_SSE2"
2518  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2519
2520(define_insn "*mulv8hi3"
2521  [(set (match_operand:V8HI 0 "register_operand" "=x")
2522	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2523		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2524  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2525  "pmullw\t{%2, %0|%0, %2}"
2526  [(set_attr "type" "sseimul")
2527   (set_attr "mode" "TI")])
2528
2529(define_insn "sse2_smulv8hi3_highpart"
2530  [(set (match_operand:V8HI 0 "register_operand" "=x")
2531	(truncate:V8HI
2532	  (lshiftrt:V8SI
2533	    (mult:V8SI
2534	      (sign_extend:V8SI
2535		(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2536	      (sign_extend:V8SI
2537		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2538	    (const_int 16))))]
2539  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2540  "pmulhw\t{%2, %0|%0, %2}"
2541  [(set_attr "type" "sseimul")
2542   (set_attr "mode" "TI")])
2543
2544(define_insn "sse2_umulv8hi3_highpart"
2545  [(set (match_operand:V8HI 0 "register_operand" "=x")
2546	(truncate:V8HI
2547	  (lshiftrt:V8SI
2548	    (mult:V8SI
2549	      (zero_extend:V8SI
2550		(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2551	      (zero_extend:V8SI
2552		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2553	    (const_int 16))))]
2554  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2555  "pmulhuw\t{%2, %0|%0, %2}"
2556  [(set_attr "type" "sseimul")
2557   (set_attr "mode" "TI")])
2558
2559(define_insn "sse2_umulv2siv2di3"
2560  [(set (match_operand:V2DI 0 "register_operand" "=x")
2561	(mult:V2DI
2562	  (zero_extend:V2DI
2563	    (vec_select:V2SI
2564	      (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2565	      (parallel [(const_int 0) (const_int 2)])))
2566	  (zero_extend:V2DI
2567	    (vec_select:V2SI
2568	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2569	      (parallel [(const_int 0) (const_int 2)])))))]
2570  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2571  "pmuludq\t{%2, %0|%0, %2}"
2572  [(set_attr "type" "sseimul")
2573   (set_attr "mode" "TI")])
2574
2575(define_insn "sse2_pmaddwd"
2576  [(set (match_operand:V4SI 0 "register_operand" "=x")
2577	(plus:V4SI
2578	  (mult:V4SI
2579	    (sign_extend:V4SI
2580	      (vec_select:V4HI
2581		(match_operand:V8HI 1 "nonimmediate_operand" "%0")
2582		(parallel [(const_int 0)
2583			   (const_int 2)
2584			   (const_int 4)
2585			   (const_int 6)])))
2586	    (sign_extend:V4SI
2587	      (vec_select:V4HI
2588		(match_operand:V8HI 2 "nonimmediate_operand" "xm")
2589		(parallel [(const_int 0)
2590			   (const_int 2)
2591			   (const_int 4)
2592			   (const_int 6)]))))
2593	  (mult:V4SI
2594	    (sign_extend:V4SI
2595	      (vec_select:V4HI (match_dup 1)
2596		(parallel [(const_int 1)
2597			   (const_int 3)
2598			   (const_int 5)
2599			   (const_int 7)])))
2600	    (sign_extend:V4SI
2601	      (vec_select:V4HI (match_dup 2)
2602		(parallel [(const_int 1)
2603			   (const_int 3)
2604			   (const_int 5)
2605			   (const_int 7)]))))))]
2606  "TARGET_SSE2"
2607  "pmaddwd\t{%2, %0|%0, %2}"
2608  [(set_attr "type" "sseiadd")
2609   (set_attr "mode" "TI")])
2610
2611(define_expand "mulv4si3"
2612  [(set (match_operand:V4SI 0 "register_operand" "")
2613	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2614		   (match_operand:V4SI 2 "register_operand" "")))]
2615  "TARGET_SSE2"
2616{
2617  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2618  rtx op0, op1, op2;
2619
2620  op0 = operands[0];
2621  op1 = operands[1];
2622  op2 = operands[2];
2623  t1 = gen_reg_rtx (V4SImode);
2624  t2 = gen_reg_rtx (V4SImode);
2625  t3 = gen_reg_rtx (V4SImode);
2626  t4 = gen_reg_rtx (V4SImode);
2627  t5 = gen_reg_rtx (V4SImode);
2628  t6 = gen_reg_rtx (V4SImode);
2629  thirtytwo = GEN_INT (32);
2630
2631  /* Multiply elements 2 and 0.  */
2632  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2633
2634  /* Shift both input vectors down one element, so that elements 3 and 1
2635     are now in the slots for elements 2 and 0.  For K8, at least, this is
2636     faster than using a shuffle.  */
2637  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2638			       gen_lowpart (TImode, op1), thirtytwo));
2639  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2640			       gen_lowpart (TImode, op2), thirtytwo));
2641
2642  /* Multiply elements 3 and 1.  */
2643  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2644
2645  /* Move the results in element 2 down to element 1; we don't care what
2646     goes in elements 2 and 3.  */
2647  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2648				const0_rtx, const0_rtx));
2649  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2650				const0_rtx, const0_rtx));
2651
2652  /* Merge the parts back together.  */
2653  emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2654  DONE;
2655})
2656
2657(define_expand "mulv2di3"
2658  [(set (match_operand:V2DI 0 "register_operand" "")
2659	(mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2660		   (match_operand:V2DI 2 "register_operand" "")))]
2661  "TARGET_SSE2"
2662{
2663  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2664  rtx op0, op1, op2;
2665
2666  op0 = operands[0];
2667  op1 = operands[1];
2668  op2 = operands[2];
2669  t1 = gen_reg_rtx (V2DImode);
2670  t2 = gen_reg_rtx (V2DImode);
2671  t3 = gen_reg_rtx (V2DImode);
2672  t4 = gen_reg_rtx (V2DImode);
2673  t5 = gen_reg_rtx (V2DImode);
2674  t6 = gen_reg_rtx (V2DImode);
2675  thirtytwo = GEN_INT (32);
2676
2677  /* Multiply low parts.  */
2678  emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2679				     gen_lowpart (V4SImode, op2)));
2680
2681  /* Shift input vectors left 32 bits so we can multiply high parts.  */
2682  emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2683  emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2684
2685  /* Multiply high parts by low parts.  */
2686  emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2687				     gen_lowpart (V4SImode, t3)));
2688  emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2689				     gen_lowpart (V4SImode, t2)));
2690
2691  /* Shift them back.  */
2692  emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2693  emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2694
2695  /* Add the three parts together.  */
2696  emit_insn (gen_addv2di3 (t6, t1, t4));
2697  emit_insn (gen_addv2di3 (op0, t6, t5));
2698  DONE;
2699})
2700
2701(define_expand "sdot_prodv8hi"
2702  [(match_operand:V4SI 0 "register_operand" "")
2703   (match_operand:V8HI 1 "nonimmediate_operand" "")
2704   (match_operand:V8HI 2 "nonimmediate_operand" "")
2705   (match_operand:V4SI 3 "register_operand" "")]
2706  "TARGET_SSE2"
2707{
2708  rtx t = gen_reg_rtx (V4SImode);
2709  emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2710  emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2711  DONE;
2712})
2713
2714(define_expand "udot_prodv4si"
2715  [(match_operand:V2DI 0 "register_operand" "") 
2716   (match_operand:V4SI 1 "register_operand" "") 
2717   (match_operand:V4SI 2 "register_operand" "")
2718   (match_operand:V2DI 3 "register_operand" "")]
2719  "TARGET_SSE2"
2720{
2721  rtx t1, t2, t3, t4;
2722
2723  t1 = gen_reg_rtx (V2DImode);
2724  emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2725  emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2726
2727  t2 = gen_reg_rtx (V4SImode);
2728  t3 = gen_reg_rtx (V4SImode);
2729  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2730                               gen_lowpart (TImode, operands[1]),
2731                               GEN_INT (32)));
2732  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2733                               gen_lowpart (TImode, operands[2]),
2734                               GEN_INT (32)));
2735
2736  t4 = gen_reg_rtx (V2DImode);
2737  emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2738
2739  emit_insn (gen_addv2di3 (operands[0], t1, t4));
2740  DONE;
2741})
2742
2743(define_insn "ashr<mode>3"
2744  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2745	(ashiftrt:SSEMODE24
2746	  (match_operand:SSEMODE24 1 "register_operand" "0")
2747	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2748  "TARGET_SSE2"
2749  "psra<ssevecsize>\t{%2, %0|%0, %2}"
2750  [(set_attr "type" "sseishft")
2751   (set_attr "mode" "TI")])
2752
2753(define_insn "lshr<mode>3"
2754  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2755	(lshiftrt:SSEMODE248
2756	  (match_operand:SSEMODE248 1 "register_operand" "0")
2757	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2758  "TARGET_SSE2"
2759  "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2760  [(set_attr "type" "sseishft")
2761   (set_attr "mode" "TI")])
2762
2763(define_insn "ashl<mode>3"
2764  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2765	(ashift:SSEMODE248
2766	  (match_operand:SSEMODE248 1 "register_operand" "0")
2767	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2768  "TARGET_SSE2"
2769  "psll<ssevecsize>\t{%2, %0|%0, %2}"
2770  [(set_attr "type" "sseishft")
2771   (set_attr "mode" "TI")])
2772
2773(define_insn "sse2_ashlti3"
2774  [(set (match_operand:TI 0 "register_operand" "=x")
2775	(ashift:TI (match_operand:TI 1 "register_operand" "0")
2776		   (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2777  "TARGET_SSE2"
2778{
2779  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2780  return "pslldq\t{%2, %0|%0, %2}";
2781}
2782  [(set_attr "type" "sseishft")
2783   (set_attr "mode" "TI")])
2784
2785(define_expand "vec_shl_<mode>"
2786  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2787        (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2788		   (match_operand:SI 2 "general_operand" "")))]
2789  "TARGET_SSE2"
2790{
2791  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2792    FAIL;
2793  operands[0] = gen_lowpart (TImode, operands[0]);
2794  operands[1] = gen_lowpart (TImode, operands[1]);
2795})
2796
2797(define_insn "sse2_lshrti3"
2798  [(set (match_operand:TI 0 "register_operand" "=x")
2799 	(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2800		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2801  "TARGET_SSE2"
2802{
2803  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2804  return "psrldq\t{%2, %0|%0, %2}";
2805}
2806  [(set_attr "type" "sseishft")
2807   (set_attr "mode" "TI")])
2808
2809(define_expand "vec_shr_<mode>"
2810  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2811        (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2812		     (match_operand:SI 2 "general_operand" "")))]
2813  "TARGET_SSE2"
2814{
2815  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2816    FAIL;
2817  operands[0] = gen_lowpart (TImode, operands[0]);
2818  operands[1] = gen_lowpart (TImode, operands[1]);
2819})
2820
2821(define_expand "umaxv16qi3"
2822  [(set (match_operand:V16QI 0 "register_operand" "")
2823	(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2824		    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2825  "TARGET_SSE2"
2826  "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2827
2828(define_insn "*umaxv16qi3"
2829  [(set (match_operand:V16QI 0 "register_operand" "=x")
2830	(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2831		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2832  "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2833  "pmaxub\t{%2, %0|%0, %2}"
2834  [(set_attr "type" "sseiadd")
2835   (set_attr "mode" "TI")])
2836
2837(define_expand "smaxv8hi3"
2838  [(set (match_operand:V8HI 0 "register_operand" "")
2839	(smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2840		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2841  "TARGET_SSE2"
2842  "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2843
2844(define_insn "*smaxv8hi3"
2845  [(set (match_operand:V8HI 0 "register_operand" "=x")
2846	(smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2847		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2848  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2849  "pmaxsw\t{%2, %0|%0, %2}"
2850  [(set_attr "type" "sseiadd")
2851   (set_attr "mode" "TI")])
2852
2853(define_expand "umaxv8hi3"
2854  [(set (match_operand:V8HI 0 "register_operand" "=x")
2855	(us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2856		       (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2857   (set (match_dup 3)
2858	(plus:V8HI (match_dup 0) (match_dup 2)))]
2859  "TARGET_SSE2"
2860{
2861  operands[3] = operands[0];
2862  if (rtx_equal_p (operands[0], operands[2]))
2863    operands[0] = gen_reg_rtx (V8HImode);
2864})
2865
2866(define_expand "smax<mode>3"
2867  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2868	(smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2869			(match_operand:SSEMODE14 2 "register_operand" "")))]
2870  "TARGET_SSE2"
2871{
2872  rtx xops[6];
2873  bool ok;
2874
2875  xops[0] = operands[0];
2876  xops[1] = operands[1];
2877  xops[2] = operands[2];
2878  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2879  xops[4] = operands[1];
2880  xops[5] = operands[2];
2881  ok = ix86_expand_int_vcond (xops);
2882  gcc_assert (ok);
2883  DONE;
2884})
2885
2886(define_expand "umaxv4si3"
2887  [(set (match_operand:V4SI 0 "register_operand" "")
2888	(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2889		   (match_operand:V4SI 2 "register_operand" "")))]
2890  "TARGET_SSE2"
2891{
2892  rtx xops[6];
2893  bool ok;
2894
2895  xops[0] = operands[0];
2896  xops[1] = operands[1];
2897  xops[2] = operands[2];
2898  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2899  xops[4] = operands[1];
2900  xops[5] = operands[2];
2901  ok = ix86_expand_int_vcond (xops);
2902  gcc_assert (ok);
2903  DONE;
2904})
2905
2906(define_expand "uminv16qi3"
2907  [(set (match_operand:V16QI 0 "register_operand" "")
2908	(umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2909		    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2910  "TARGET_SSE2"
2911  "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2912
2913(define_insn "*uminv16qi3"
2914  [(set (match_operand:V16QI 0 "register_operand" "=x")
2915	(umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2916		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2917  "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2918  "pminub\t{%2, %0|%0, %2}"
2919  [(set_attr "type" "sseiadd")
2920   (set_attr "mode" "TI")])
2921
2922(define_expand "sminv8hi3"
2923  [(set (match_operand:V8HI 0 "register_operand" "")
2924	(smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2925		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2926  "TARGET_SSE2"
2927  "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2928
2929(define_insn "*sminv8hi3"
2930  [(set (match_operand:V8HI 0 "register_operand" "=x")
2931	(smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2932		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2933  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2934  "pminsw\t{%2, %0|%0, %2}"
2935  [(set_attr "type" "sseiadd")
2936   (set_attr "mode" "TI")])
2937
2938(define_expand "smin<mode>3"
2939  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2940	(smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2941			(match_operand:SSEMODE14 2 "register_operand" "")))]
2942  "TARGET_SSE2"
2943{
2944  rtx xops[6];
2945  bool ok;
2946
2947  xops[0] = operands[0];
2948  xops[1] = operands[2];
2949  xops[2] = operands[1];
2950  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2951  xops[4] = operands[1];
2952  xops[5] = operands[2];
2953  ok = ix86_expand_int_vcond (xops);
2954  gcc_assert (ok);
2955  DONE;
2956})
2957
2958(define_expand "umin<mode>3"
2959  [(set (match_operand:SSEMODE24 0 "register_operand" "")
2960	(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2961			(match_operand:SSEMODE24 2 "register_operand" "")))]
2962  "TARGET_SSE2"
2963{
2964  rtx xops[6];
2965  bool ok;
2966
2967  xops[0] = operands[0];
2968  xops[1] = operands[2];
2969  xops[2] = operands[1];
2970  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2971  xops[4] = operands[1];
2972  xops[5] = operands[2];
2973  ok = ix86_expand_int_vcond (xops);
2974  gcc_assert (ok);
2975  DONE;
2976})
2977
2978;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2979;;
2980;; Parallel integral comparisons
2981;;
2982;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2983
2984(define_insn "sse2_eq<mode>3"
2985  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2986	(eq:SSEMODE124
2987	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2988	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2989  "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2990  "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2991  [(set_attr "type" "ssecmp")
2992   (set_attr "mode" "TI")])
2993
2994(define_insn "sse2_gt<mode>3"
2995  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2996	(gt:SSEMODE124
2997	  (match_operand:SSEMODE124 1 "register_operand" "0")
2998	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2999  "TARGET_SSE2"
3000  "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
3001  [(set_attr "type" "ssecmp")
3002   (set_attr "mode" "TI")])
3003
3004(define_expand "vcond<mode>"
3005  [(set (match_operand:SSEMODE124 0 "register_operand" "")
3006        (if_then_else:SSEMODE124
3007          (match_operator 3 ""
3008            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3009             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3010          (match_operand:SSEMODE124 1 "general_operand" "")
3011          (match_operand:SSEMODE124 2 "general_operand" "")))]
3012  "TARGET_SSE2"
3013{
3014  if (ix86_expand_int_vcond (operands))
3015    DONE;
3016  else
3017    FAIL;
3018})
3019
3020(define_expand "vcondu<mode>"
3021  [(set (match_operand:SSEMODE124 0 "register_operand" "")
3022        (if_then_else:SSEMODE124
3023          (match_operator 3 ""
3024            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3025             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3026          (match_operand:SSEMODE124 1 "general_operand" "")
3027          (match_operand:SSEMODE124 2 "general_operand" "")))]
3028  "TARGET_SSE2"
3029{
3030  if (ix86_expand_int_vcond (operands))
3031    DONE;
3032  else
3033    FAIL;
3034})
3035
3036;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3037;;
3038;; Parallel integral logical operations
3039;;
3040;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3041
3042(define_expand "one_cmpl<mode>2"
3043  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3044	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3045		      (match_dup 2)))]
3046  "TARGET_SSE2"
3047{
3048  int i, n = GET_MODE_NUNITS (<MODE>mode);
3049  rtvec v = rtvec_alloc (n);
3050
3051  for (i = 0; i < n; ++i)
3052    RTVEC_ELT (v, i) = constm1_rtx;
3053
3054  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3055})
3056
3057(define_expand "and<mode>3"
3058  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3059	(and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3060		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3061  "TARGET_SSE2"
3062  "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3063
3064(define_insn "*and<mode>3"
3065  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3066	(and:SSEMODEI
3067	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3068	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3069  "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3070  "pand\t{%2, %0|%0, %2}"
3071  [(set_attr "type" "sselog")
3072   (set_attr "mode" "TI")])
3073
3074(define_insn "sse2_nand<mode>3"
3075  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3076	(and:SSEMODEI
3077	  (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3078	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3079  "TARGET_SSE2"
3080  "pandn\t{%2, %0|%0, %2}"
3081  [(set_attr "type" "sselog")
3082   (set_attr "mode" "TI")])
3083
3084(define_expand "ior<mode>3"
3085  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3086	(ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3087		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3088  "TARGET_SSE2"
3089  "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3090
3091(define_insn "*ior<mode>3"
3092  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3093	(ior:SSEMODEI
3094	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3095	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3096  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3097  "por\t{%2, %0|%0, %2}"
3098  [(set_attr "type" "sselog")
3099   (set_attr "mode" "TI")])
3100
3101(define_expand "xor<mode>3"
3102  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3103	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3104		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3105  "TARGET_SSE2"
3106  "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3107
3108(define_insn "*xor<mode>3"
3109  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3110	(xor:SSEMODEI
3111	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3112	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3113  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3114  "pxor\t{%2, %0|%0, %2}"
3115  [(set_attr "type" "sselog")
3116   (set_attr "mode" "TI")])
3117
3118;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3119;;
3120;; Parallel integral element swizzling
3121;;
3122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3123
3124(define_insn "sse2_packsswb"
3125  [(set (match_operand:V16QI 0 "register_operand" "=x")
3126	(vec_concat:V16QI
3127	  (ss_truncate:V8QI
3128	    (match_operand:V8HI 1 "register_operand" "0"))
3129	  (ss_truncate:V8QI
3130	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3131  "TARGET_SSE2"
3132  "packsswb\t{%2, %0|%0, %2}"
3133  [(set_attr "type" "sselog")
3134   (set_attr "mode" "TI")])
3135
3136(define_insn "sse2_packssdw"
3137  [(set (match_operand:V8HI 0 "register_operand" "=x")
3138	(vec_concat:V8HI
3139	  (ss_truncate:V4HI
3140	    (match_operand:V4SI 1 "register_operand" "0"))
3141	  (ss_truncate:V4HI
3142	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3143  "TARGET_SSE2"
3144  "packssdw\t{%2, %0|%0, %2}"
3145  [(set_attr "type" "sselog")
3146   (set_attr "mode" "TI")])
3147
3148(define_insn "sse2_packuswb"
3149  [(set (match_operand:V16QI 0 "register_operand" "=x")
3150	(vec_concat:V16QI
3151	  (us_truncate:V8QI
3152	    (match_operand:V8HI 1 "register_operand" "0"))
3153	  (us_truncate:V8QI
3154	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3155  "TARGET_SSE2"
3156  "packuswb\t{%2, %0|%0, %2}"
3157  [(set_attr "type" "sselog")
3158   (set_attr "mode" "TI")])
3159
3160(define_insn "sse2_punpckhbw"
3161  [(set (match_operand:V16QI 0 "register_operand" "=x")
3162	(vec_select:V16QI
3163	  (vec_concat:V32QI
3164	    (match_operand:V16QI 1 "register_operand" "0")
3165	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3166	  (parallel [(const_int 8)  (const_int 24)
3167		     (const_int 9)  (const_int 25)
3168		     (const_int 10) (const_int 26)
3169		     (const_int 11) (const_int 27)
3170		     (const_int 12) (const_int 28) 
3171		     (const_int 13) (const_int 29)
3172		     (const_int 14) (const_int 30)
3173		     (const_int 15) (const_int 31)])))]
3174  "TARGET_SSE2"
3175  "punpckhbw\t{%2, %0|%0, %2}"
3176  [(set_attr "type" "sselog")
3177   (set_attr "mode" "TI")])
3178
3179(define_insn "sse2_punpcklbw"
3180  [(set (match_operand:V16QI 0 "register_operand" "=x")
3181	(vec_select:V16QI
3182	  (vec_concat:V32QI
3183	    (match_operand:V16QI 1 "register_operand" "0")
3184	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3185	  (parallel [(const_int 0) (const_int 16)
3186		     (const_int 1) (const_int 17)
3187		     (const_int 2) (const_int 18)
3188		     (const_int 3) (const_int 19)
3189		     (const_int 4) (const_int 20)
3190		     (const_int 5) (const_int 21)
3191		     (const_int 6) (const_int 22)
3192		     (const_int 7) (const_int 23)])))]
3193  "TARGET_SSE2"
3194  "punpcklbw\t{%2, %0|%0, %2}"
3195  [(set_attr "type" "sselog")
3196   (set_attr "mode" "TI")])
3197
3198(define_insn "sse2_punpckhwd"
3199  [(set (match_operand:V8HI 0 "register_operand" "=x")
3200	(vec_select:V8HI
3201	  (vec_concat:V16HI
3202	    (match_operand:V8HI 1 "register_operand" "0")
3203	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3204	  (parallel [(const_int 4) (const_int 12)
3205		     (const_int 5) (const_int 13)
3206		     (const_int 6) (const_int 14)
3207		     (const_int 7) (const_int 15)])))]
3208  "TARGET_SSE2"
3209  "punpckhwd\t{%2, %0|%0, %2}"
3210  [(set_attr "type" "sselog")
3211   (set_attr "mode" "TI")])
3212
3213(define_insn "sse2_punpcklwd"
3214  [(set (match_operand:V8HI 0 "register_operand" "=x")
3215	(vec_select:V8HI
3216	  (vec_concat:V16HI
3217	    (match_operand:V8HI 1 "register_operand" "0")
3218	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3219	  (parallel [(const_int 0) (const_int 8)
3220		     (const_int 1) (const_int 9)
3221		     (const_int 2) (const_int 10)
3222		     (const_int 3) (const_int 11)])))]
3223  "TARGET_SSE2"
3224  "punpcklwd\t{%2, %0|%0, %2}"
3225  [(set_attr "type" "sselog")
3226   (set_attr "mode" "TI")])
3227
3228(define_insn "sse2_punpckhdq"
3229  [(set (match_operand:V4SI 0 "register_operand" "=x")
3230	(vec_select:V4SI
3231	  (vec_concat:V8SI
3232	    (match_operand:V4SI 1 "register_operand" "0")
3233	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3234	  (parallel [(const_int 2) (const_int 6)
3235		     (const_int 3) (const_int 7)])))]
3236  "TARGET_SSE2"
3237  "punpckhdq\t{%2, %0|%0, %2}"
3238  [(set_attr "type" "sselog")
3239   (set_attr "mode" "TI")])
3240
3241(define_insn "sse2_punpckldq"
3242  [(set (match_operand:V4SI 0 "register_operand" "=x")
3243	(vec_select:V4SI
3244	  (vec_concat:V8SI
3245	    (match_operand:V4SI 1 "register_operand" "0")
3246	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3247	  (parallel [(const_int 0) (const_int 4)
3248		     (const_int 1) (const_int 5)])))]
3249  "TARGET_SSE2"
3250  "punpckldq\t{%2, %0|%0, %2}"
3251  [(set_attr "type" "sselog")
3252   (set_attr "mode" "TI")])
3253
3254(define_insn "sse2_punpckhqdq"
3255  [(set (match_operand:V2DI 0 "register_operand" "=x")
3256	(vec_select:V2DI
3257	  (vec_concat:V4DI
3258	    (match_operand:V2DI 1 "register_operand" "0")
3259	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3260	  (parallel [(const_int 1)
3261		     (const_int 3)])))]
3262  "TARGET_SSE2"
3263  "punpckhqdq\t{%2, %0|%0, %2}"
3264  [(set_attr "type" "sselog")
3265   (set_attr "mode" "TI")])
3266
3267(define_insn "sse2_punpcklqdq"
3268  [(set (match_operand:V2DI 0 "register_operand" "=x")
3269	(vec_select:V2DI
3270	  (vec_concat:V4DI
3271	    (match_operand:V2DI 1 "register_operand" "0")
3272	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3273	  (parallel [(const_int 0)
3274		     (const_int 2)])))]
3275  "TARGET_SSE2"
3276  "punpcklqdq\t{%2, %0|%0, %2}"
3277  [(set_attr "type" "sselog")
3278   (set_attr "mode" "TI")])
3279
3280(define_expand "sse2_pinsrw"
3281  [(set (match_operand:V8HI 0 "register_operand" "")
3282	(vec_merge:V8HI
3283	  (vec_duplicate:V8HI
3284	    (match_operand:SI 2 "nonimmediate_operand" ""))
3285	  (match_operand:V8HI 1 "register_operand" "")
3286	  (match_operand:SI 3 "const_0_to_7_operand" "")))]
3287  "TARGET_SSE2"
3288{
3289  operands[2] = gen_lowpart (HImode, operands[2]);
3290  operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3291})
3292
3293(define_insn "*sse2_pinsrw"
3294  [(set (match_operand:V8HI 0 "register_operand" "=x")
3295	(vec_merge:V8HI
3296	  (vec_duplicate:V8HI
3297	    (match_operand:HI 2 "nonimmediate_operand" "rm"))
3298	  (match_operand:V8HI 1 "register_operand" "0")
3299	  (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3300  "TARGET_SSE2"
3301{
3302  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3303  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3304}
3305  [(set_attr "type" "sselog")
3306   (set_attr "mode" "TI")])
3307
3308(define_insn "sse2_pextrw"
3309  [(set (match_operand:SI 0 "register_operand" "=r")
3310	(zero_extend:SI
3311	  (vec_select:HI
3312	    (match_operand:V8HI 1 "register_operand" "x")
3313	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3314  "TARGET_SSE2"
3315  "pextrw\t{%2, %1, %0|%0, %1, %2}"
3316  [(set_attr "type" "sselog")
3317   (set_attr "mode" "TI")])
3318
3319(define_expand "sse2_pshufd"
3320  [(match_operand:V4SI 0 "register_operand" "")
3321   (match_operand:V4SI 1 "nonimmediate_operand" "")
3322   (match_operand:SI 2 "const_int_operand" "")]
3323  "TARGET_SSE2"
3324{
3325  int mask = INTVAL (operands[2]);
3326  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3327				GEN_INT ((mask >> 0) & 3),
3328				GEN_INT ((mask >> 2) & 3),
3329				GEN_INT ((mask >> 4) & 3),
3330				GEN_INT ((mask >> 6) & 3)));
3331  DONE;
3332})
3333
3334(define_insn "sse2_pshufd_1"
3335  [(set (match_operand:V4SI 0 "register_operand" "=x")
3336	(vec_select:V4SI
3337	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3338	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
3339		     (match_operand 3 "const_0_to_3_operand" "")
3340		     (match_operand 4 "const_0_to_3_operand" "")
3341		     (match_operand 5 "const_0_to_3_operand" "")])))]
3342  "TARGET_SSE2"
3343{
3344  int mask = 0;
3345  mask |= INTVAL (operands[2]) << 0;
3346  mask |= INTVAL (operands[3]) << 2;
3347  mask |= INTVAL (operands[4]) << 4;
3348  mask |= INTVAL (operands[5]) << 6;
3349  operands[2] = GEN_INT (mask);
3350
3351  return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3352}
3353  [(set_attr "type" "sselog1")
3354   (set_attr "mode" "TI")])
3355
3356(define_expand "sse2_pshuflw"
3357  [(match_operand:V8HI 0 "register_operand" "")
3358   (match_operand:V8HI 1 "nonimmediate_operand" "")
3359   (match_operand:SI 2 "const_int_operand" "")]
3360  "TARGET_SSE2"
3361{
3362  int mask = INTVAL (operands[2]);
3363  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3364				 GEN_INT ((mask >> 0) & 3),
3365				 GEN_INT ((mask >> 2) & 3),
3366				 GEN_INT ((mask >> 4) & 3),
3367				 GEN_INT ((mask >> 6) & 3)));
3368  DONE;
3369})
3370
3371(define_insn "sse2_pshuflw_1"
3372  [(set (match_operand:V8HI 0 "register_operand" "=x")
3373	(vec_select:V8HI
3374	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3375	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
3376		     (match_operand 3 "const_0_to_3_operand" "")
3377		     (match_operand 4 "const_0_to_3_operand" "")
3378		     (match_operand 5 "const_0_to_3_operand" "")
3379		     (const_int 4)
3380		     (const_int 5)
3381		     (const_int 6)
3382		     (const_int 7)])))]
3383  "TARGET_SSE2"
3384{
3385  int mask = 0;
3386  mask |= INTVAL (operands[2]) << 0;
3387  mask |= INTVAL (operands[3]) << 2;
3388  mask |= INTVAL (operands[4]) << 4;
3389  mask |= INTVAL (operands[5]) << 6;
3390  operands[2] = GEN_INT (mask);
3391
3392  return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3393}
3394  [(set_attr "type" "sselog")
3395   (set_attr "mode" "TI")])
3396
3397(define_expand "sse2_pshufhw"
3398  [(match_operand:V8HI 0 "register_operand" "")
3399   (match_operand:V8HI 1 "nonimmediate_operand" "")
3400   (match_operand:SI 2 "const_int_operand" "")]
3401  "TARGET_SSE2"
3402{
3403  int mask = INTVAL (operands[2]);
3404  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3405				 GEN_INT (((mask >> 0) & 3) + 4),
3406				 GEN_INT (((mask >> 2) & 3) + 4),
3407				 GEN_INT (((mask >> 4) & 3) + 4),
3408				 GEN_INT (((mask >> 6) & 3) + 4)));
3409  DONE;
3410})
3411
3412(define_insn "sse2_pshufhw_1"
3413  [(set (match_operand:V8HI 0 "register_operand" "=x")
3414	(vec_select:V8HI
3415	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3416	  (parallel [(const_int 0)
3417		     (const_int 1)
3418		     (const_int 2)
3419		     (const_int 3)
3420		     (match_operand 2 "const_4_to_7_operand" "")
3421		     (match_operand 3 "const_4_to_7_operand" "")
3422		     (match_operand 4 "const_4_to_7_operand" "")
3423		     (match_operand 5 "const_4_to_7_operand" "")])))]
3424  "TARGET_SSE2"
3425{
3426  int mask = 0;
3427  mask |= (INTVAL (operands[2]) - 4) << 0;
3428  mask |= (INTVAL (operands[3]) - 4) << 2;
3429  mask |= (INTVAL (operands[4]) - 4) << 4;
3430  mask |= (INTVAL (operands[5]) - 4) << 6;
3431  operands[2] = GEN_INT (mask);
3432
3433  return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3434}
3435  [(set_attr "type" "sselog")
3436   (set_attr "mode" "TI")])
3437
3438(define_expand "sse2_loadd"
3439  [(set (match_operand:V4SI 0 "register_operand" "")
3440	(vec_merge:V4SI
3441	  (vec_duplicate:V4SI
3442	    (match_operand:SI 1 "nonimmediate_operand" ""))
3443	  (match_dup 2)
3444	  (const_int 1)))]
3445  "TARGET_SSE"
3446  "operands[2] = CONST0_RTX (V4SImode);")
3447
3448(define_insn "sse2_loadld"
3449  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3450	(vec_merge:V4SI
3451	  (vec_duplicate:V4SI
3452	    (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3453	  (match_operand:V4SI 1 "reg_or_0_operand"     " C,C,0")
3454	  (const_int 1)))]
3455  "TARGET_SSE"
3456  "@
3457   movd\t{%2, %0|%0, %2}
3458   movss\t{%2, %0|%0, %2}
3459   movss\t{%2, %0|%0, %2}"
3460  [(set_attr "type" "ssemov")
3461   (set_attr "mode" "TI,V4SF,SF")])
3462
3463;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3464;; be taken into account, and movdi isn't fully populated even without.
3465(define_insn_and_split "sse2_stored"
3466  [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3467	(vec_select:SI
3468	  (match_operand:V4SI 1 "register_operand" "x")
3469	  (parallel [(const_int 0)])))]
3470  "TARGET_SSE"
3471  "#"
3472  "&& reload_completed"
3473  [(set (match_dup 0) (match_dup 1))]
3474{
3475  operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3476})
3477
3478(define_expand "sse_storeq"
3479  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3480	(vec_select:DI
3481	  (match_operand:V2DI 1 "register_operand" "")
3482	  (parallel [(const_int 0)])))]
3483  "TARGET_SSE"
3484  "")
3485
3486;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3487;; be taken into account, and movdi isn't fully populated even without.
3488(define_insn "*sse2_storeq"
3489  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3490	(vec_select:DI
3491	  (match_operand:V2DI 1 "register_operand" "x")
3492	  (parallel [(const_int 0)])))]
3493  "TARGET_SSE"
3494  "#")
3495
3496(define_split
3497  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3498	(vec_select:DI
3499	  (match_operand:V2DI 1 "register_operand" "")
3500	  (parallel [(const_int 0)])))]
3501  "TARGET_SSE && reload_completed"
3502  [(set (match_dup 0) (match_dup 1))]
3503{
3504  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3505})
3506
3507(define_insn "*vec_extractv2di_1_sse2"
3508  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3509	(vec_select:DI
3510	  (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3511	  (parallel [(const_int 1)])))]
3512  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3513  "@
3514   movhps\t{%1, %0|%0, %1}
3515   psrldq\t{$8, %0|%0, 8}
3516   movq\t{%H1, %0|%0, %H1}"
3517  [(set_attr "type" "ssemov,sseishft,ssemov")
3518   (set_attr "memory" "*,none,*")
3519   (set_attr "mode" "V2SF,TI,TI")])
3520
3521;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3522(define_insn "*vec_extractv2di_1_sse"
3523  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3524	(vec_select:DI
3525	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3526	  (parallel [(const_int 1)])))]
3527  "!TARGET_SSE2 && TARGET_SSE
3528   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3529  "@
3530   movhps\t{%1, %0|%0, %1}
3531   movhlps\t{%1, %0|%0, %1}
3532   movlps\t{%H1, %0|%0, %H1}"
3533  [(set_attr "type" "ssemov")
3534   (set_attr "mode" "V2SF,V4SF,V2SF")])
3535
3536(define_insn "*vec_dupv4si"
3537  [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3538	(vec_duplicate:V4SI
3539	  (match_operand:SI 1 "register_operand" " Y,0")))]
3540  "TARGET_SSE"
3541  "@
3542   pshufd\t{$0, %1, %0|%0, %1, 0}
3543   shufps\t{$0, %0, %0|%0, %0, 0}"
3544  [(set_attr "type" "sselog1")
3545   (set_attr "mode" "TI,V4SF")])
3546
3547(define_insn "*vec_dupv2di"
3548  [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3549	(vec_duplicate:V2DI
3550	  (match_operand:DI 1 "register_operand" " 0,0")))]
3551  "TARGET_SSE"
3552  "@
3553   punpcklqdq\t%0, %0
3554   movlhps\t%0, %0"
3555  [(set_attr "type" "sselog1,ssemov")
3556   (set_attr "mode" "TI,V4SF")])
3557
3558;; ??? In theory we can match memory for the MMX alternative, but allowing
3559;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3560;; alternatives pretty much forces the MMX alternative to be chosen.
3561(define_insn "*sse2_concatv2si"
3562  [(set (match_operand:V2SI 0 "register_operand"     "=Y, Y,*y,*y")
3563	(vec_concat:V2SI
3564	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3565	  (match_operand:SI 2 "reg_or_0_operand"     " Y, C,*y, C")))]
3566  "TARGET_SSE2"
3567  "@
3568   punpckldq\t{%2, %0|%0, %2}
3569   movd\t{%1, %0|%0, %1}
3570   punpckldq\t{%2, %0|%0, %2}
3571   movd\t{%1, %0|%0, %1}"
3572  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3573   (set_attr "mode" "TI,TI,DI,DI")])
3574
3575(define_insn "*sse1_concatv2si"
3576  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
3577	(vec_concat:V2SI
3578	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3579	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
3580  "TARGET_SSE"
3581  "@
3582   unpcklps\t{%2, %0|%0, %2}
3583   movss\t{%1, %0|%0, %1}
3584   punpckldq\t{%2, %0|%0, %2}
3585   movd\t{%1, %0|%0, %1}"
3586  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3587   (set_attr "mode" "V4SF,V4SF,DI,DI")])
3588
3589(define_insn "*vec_concatv4si_1"
3590  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3591	(vec_concat:V4SI
3592	  (match_operand:V2SI 1 "register_operand"     " 0,0,0")
3593	  (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3594  "TARGET_SSE"
3595  "@
3596   punpcklqdq\t{%2, %0|%0, %2}
3597   movlhps\t{%2, %0|%0, %2}
3598   movhps\t{%2, %0|%0, %2}"
3599  [(set_attr "type" "sselog,ssemov,ssemov")
3600   (set_attr "mode" "TI,V4SF,V2SF")])
3601
3602(define_insn "*vec_concatv2di"
3603  [(set (match_operand:V2DI 0 "register_operand"     "=Y,?Y,Y,x,x,x")
3604	(vec_concat:V2DI
3605	  (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3606	  (match_operand:DI 2 "vector_move_operand"  " C, C,Y,x,m,0")))]
3607  "TARGET_SSE"
3608  "@
3609   movq\t{%1, %0|%0, %1}
3610   movq2dq\t{%1, %0|%0, %1}
3611   punpcklqdq\t{%2, %0|%0, %2}
3612   movlhps\t{%2, %0|%0, %2}
3613   movhps\t{%2, %0|%0, %2}
3614   movlps\t{%1, %0|%0, %1}"
3615  [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3616   (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3617
3618(define_expand "vec_setv2di"
3619  [(match_operand:V2DI 0 "register_operand" "")
3620   (match_operand:DI 1 "register_operand" "")
3621   (match_operand 2 "const_int_operand" "")]
3622  "TARGET_SSE"
3623{
3624  ix86_expand_vector_set (false, operands[0], operands[1],
3625			  INTVAL (operands[2]));
3626  DONE;
3627})
3628
3629(define_expand "vec_extractv2di"
3630  [(match_operand:DI 0 "register_operand" "")
3631   (match_operand:V2DI 1 "register_operand" "")
3632   (match_operand 2 "const_int_operand" "")]
3633  "TARGET_SSE"
3634{
3635  ix86_expand_vector_extract (false, operands[0], operands[1],
3636			      INTVAL (operands[2]));
3637  DONE;
3638})
3639
3640(define_expand "vec_initv2di"
3641  [(match_operand:V2DI 0 "register_operand" "")
3642   (match_operand 1 "" "")]
3643  "TARGET_SSE"
3644{
3645  ix86_expand_vector_init (false, operands[0], operands[1]);
3646  DONE;
3647})
3648
3649(define_expand "vec_setv4si"
3650  [(match_operand:V4SI 0 "register_operand" "")
3651   (match_operand:SI 1 "register_operand" "")
3652   (match_operand 2 "const_int_operand" "")]
3653  "TARGET_SSE"
3654{
3655  ix86_expand_vector_set (false, operands[0], operands[1],
3656			  INTVAL (operands[2]));
3657  DONE;
3658})
3659
3660(define_expand "vec_extractv4si"
3661  [(match_operand:SI 0 "register_operand" "")
3662   (match_operand:V4SI 1 "register_operand" "")
3663   (match_operand 2 "const_int_operand" "")]
3664  "TARGET_SSE"
3665{
3666  ix86_expand_vector_extract (false, operands[0], operands[1],
3667			      INTVAL (operands[2]));
3668  DONE;
3669})
3670
3671(define_expand "vec_initv4si"
3672  [(match_operand:V4SI 0 "register_operand" "")
3673   (match_operand 1 "" "")]
3674  "TARGET_SSE"
3675{
3676  ix86_expand_vector_init (false, operands[0], operands[1]);
3677  DONE;
3678})
3679
3680(define_expand "vec_setv8hi"
3681  [(match_operand:V8HI 0 "register_operand" "")
3682   (match_operand:HI 1 "register_operand" "")
3683   (match_operand 2 "const_int_operand" "")]
3684  "TARGET_SSE"
3685{
3686  ix86_expand_vector_set (false, operands[0], operands[1],
3687			  INTVAL (operands[2]));
3688  DONE;
3689})
3690
3691(define_expand "vec_extractv8hi"
3692  [(match_operand:HI 0 "register_operand" "")
3693   (match_operand:V8HI 1 "register_operand" "")
3694   (match_operand 2 "const_int_operand" "")]
3695  "TARGET_SSE"
3696{
3697  ix86_expand_vector_extract (false, operands[0], operands[1],
3698			      INTVAL (operands[2]));
3699  DONE;
3700})
3701
3702(define_expand "vec_initv8hi"
3703  [(match_operand:V8HI 0 "register_operand" "")
3704   (match_operand 1 "" "")]
3705  "TARGET_SSE"
3706{
3707  ix86_expand_vector_init (false, operands[0], operands[1]);
3708  DONE;
3709})
3710
3711(define_expand "vec_setv16qi"
3712  [(match_operand:V16QI 0 "register_operand" "")
3713   (match_operand:QI 1 "register_operand" "")
3714   (match_operand 2 "const_int_operand" "")]
3715  "TARGET_SSE"
3716{
3717  ix86_expand_vector_set (false, operands[0], operands[1],
3718			  INTVAL (operands[2]));
3719  DONE;
3720})
3721
3722(define_expand "vec_extractv16qi"
3723  [(match_operand:QI 0 "register_operand" "")
3724   (match_operand:V16QI 1 "register_operand" "")
3725   (match_operand 2 "const_int_operand" "")]
3726  "TARGET_SSE"
3727{
3728  ix86_expand_vector_extract (false, operands[0], operands[1],
3729			      INTVAL (operands[2]));
3730  DONE;
3731})
3732
3733(define_expand "vec_initv16qi"
3734  [(match_operand:V16QI 0 "register_operand" "")
3735   (match_operand 1 "" "")]
3736  "TARGET_SSE"
3737{
3738  ix86_expand_vector_init (false, operands[0], operands[1]);
3739  DONE;
3740})
3741
3742;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3743;;
3744;; Miscellaneous
3745;;
3746;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3747
3748(define_insn "sse2_uavgv16qi3"
3749  [(set (match_operand:V16QI 0 "register_operand" "=x")
3750	(truncate:V16QI
3751	  (lshiftrt:V16HI
3752	    (plus:V16HI
3753	      (plus:V16HI
3754		(zero_extend:V16HI
3755		  (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3756		(zero_extend:V16HI
3757		  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3758	      (const_vector:V16QI [(const_int 1) (const_int 1)
3759				   (const_int 1) (const_int 1)
3760				   (const_int 1) (const_int 1)
3761				   (const_int 1) (const_int 1)
3762				   (const_int 1) (const_int 1)
3763				   (const_int 1) (const_int 1)
3764				   (const_int 1) (const_int 1)
3765				   (const_int 1) (const_int 1)]))
3766	    (const_int 1))))]
3767  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3768  "pavgb\t{%2, %0|%0, %2}"
3769  [(set_attr "type" "sseiadd")
3770   (set_attr "mode" "TI")])
3771
3772(define_insn "sse2_uavgv8hi3"
3773  [(set (match_operand:V8HI 0 "register_operand" "=x")
3774	(truncate:V8HI
3775	  (lshiftrt:V8SI
3776	    (plus:V8SI
3777	      (plus:V8SI
3778		(zero_extend:V8SI
3779		  (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3780		(zero_extend:V8SI
3781		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3782	      (const_vector:V8HI [(const_int 1) (const_int 1)
3783				  (const_int 1) (const_int 1)
3784				  (const_int 1) (const_int 1)
3785				  (const_int 1) (const_int 1)]))
3786	    (const_int 1))))]
3787  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3788  "pavgw\t{%2, %0|%0, %2}"
3789  [(set_attr "type" "sseiadd")
3790   (set_attr "mode" "TI")])
3791
3792;; The correct representation for this is absolutely enormous, and 
3793;; surely not generally useful.
3794(define_insn "sse2_psadbw"
3795  [(set (match_operand:V2DI 0 "register_operand" "=x")
3796	(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3797		      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3798		     UNSPEC_PSADBW))]
3799  "TARGET_SSE2"
3800  "psadbw\t{%2, %0|%0, %2}"
3801  [(set_attr "type" "sseiadd")
3802   (set_attr "mode" "TI")])
3803
3804(define_insn "sse_movmskps"
3805  [(set (match_operand:SI 0 "register_operand" "=r")
3806	(unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3807		   UNSPEC_MOVMSK))]
3808  "TARGET_SSE"
3809  "movmskps\t{%1, %0|%0, %1}"
3810  [(set_attr "type" "ssecvt")
3811   (set_attr "mode" "V4SF")])
3812
3813(define_insn "sse2_movmskpd"
3814  [(set (match_operand:SI 0 "register_operand" "=r")
3815	(unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3816		   UNSPEC_MOVMSK))]
3817  "TARGET_SSE2"
3818  "movmskpd\t{%1, %0|%0, %1}"
3819  [(set_attr "type" "ssecvt")
3820   (set_attr "mode" "V2DF")])
3821
3822(define_insn "sse2_pmovmskb"
3823  [(set (match_operand:SI 0 "register_operand" "=r")
3824	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3825		   UNSPEC_MOVMSK))]
3826  "TARGET_SSE2"
3827  "pmovmskb\t{%1, %0|%0, %1}"
3828  [(set_attr "type" "ssecvt")
3829   (set_attr "mode" "V2DF")])
3830
3831(define_expand "sse2_maskmovdqu"
3832  [(set (match_operand:V16QI 0 "memory_operand" "")
3833	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3834		       (match_operand:V16QI 2 "register_operand" "x")
3835		       (match_dup 0)]
3836		      UNSPEC_MASKMOV))]
3837  "TARGET_SSE2"
3838  "")
3839
3840(define_insn "*sse2_maskmovdqu"
3841  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3842	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3843		       (match_operand:V16QI 2 "register_operand" "x")
3844		       (mem:V16QI (match_dup 0))]
3845		      UNSPEC_MASKMOV))]
3846  "TARGET_SSE2 && !TARGET_64BIT"
3847  ;; @@@ check ordering of operands in intel/nonintel syntax
3848  "maskmovdqu\t{%2, %1|%1, %2}"
3849  [(set_attr "type" "ssecvt")
3850   (set_attr "mode" "TI")])
3851
3852(define_insn "*sse2_maskmovdqu_rex64"
3853  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3854	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3855		       (match_operand:V16QI 2 "register_operand" "x")
3856		       (mem:V16QI (match_dup 0))]
3857		      UNSPEC_MASKMOV))]
3858  "TARGET_SSE2 && TARGET_64BIT"
3859  ;; @@@ check ordering of operands in intel/nonintel syntax
3860  "maskmovdqu\t{%2, %1|%1, %2}"
3861  [(set_attr "type" "ssecvt")
3862   (set_attr "mode" "TI")])
3863
3864(define_insn "sse_ldmxcsr"
3865  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3866		    UNSPECV_LDMXCSR)]
3867  "TARGET_SSE"
3868  "ldmxcsr\t%0"
3869  [(set_attr "type" "sse")
3870   (set_attr "memory" "load")])
3871
3872(define_insn "sse_stmxcsr"
3873  [(set (match_operand:SI 0 "memory_operand" "=m")
3874	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3875  "TARGET_SSE"
3876  "stmxcsr\t%0"
3877  [(set_attr "type" "sse")
3878   (set_attr "memory" "store")])
3879
3880(define_expand "sse_sfence"
3881  [(set (match_dup 0)
3882	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3883  "TARGET_SSE || TARGET_3DNOW_A"
3884{
3885  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3886  MEM_VOLATILE_P (operands[0]) = 1;
3887})
3888
3889(define_insn "*sse_sfence"
3890  [(set (match_operand:BLK 0 "" "")
3891	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3892  "TARGET_SSE || TARGET_3DNOW_A"
3893  "sfence"
3894  [(set_attr "type" "sse")
3895   (set_attr "memory" "unknown")])
3896
3897(define_insn "sse2_clflush"
3898  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3899		    UNSPECV_CLFLUSH)]
3900  "TARGET_SSE2"
3901  "clflush\t%a0"
3902  [(set_attr "type" "sse")
3903   (set_attr "memory" "unknown")])
3904
3905(define_expand "sse2_mfence"
3906  [(set (match_dup 0)
3907	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3908  "TARGET_SSE2"
3909{
3910  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3911  MEM_VOLATILE_P (operands[0]) = 1;
3912})
3913
3914(define_insn "*sse2_mfence"
3915  [(set (match_operand:BLK 0 "" "")
3916	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3917  "TARGET_SSE2"
3918  "mfence"
3919  [(set_attr "type" "sse")
3920   (set_attr "memory" "unknown")])
3921
3922(define_expand "sse2_lfence"
3923  [(set (match_dup 0)
3924	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3925  "TARGET_SSE2"
3926{
3927  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3928  MEM_VOLATILE_P (operands[0]) = 1;
3929})
3930
3931(define_insn "*sse2_lfence"
3932  [(set (match_operand:BLK 0 "" "")
3933	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3934  "TARGET_SSE2"
3935  "lfence"
3936  [(set_attr "type" "sse")
3937   (set_attr "memory" "unknown")])
3938
3939(define_insn "sse3_mwait"
3940  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3941		     (match_operand:SI 1 "register_operand" "c")]
3942		    UNSPECV_MWAIT)]
3943  "TARGET_SSE3"
3944;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3945;; Since 32bit register operands are implicitly zero extended to 64bit,
3946;; we only need to set up 32bit registers.
3947  "mwait"
3948  [(set_attr "length" "3")])
3949
3950(define_insn "sse3_monitor"
3951  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3952		     (match_operand:SI 1 "register_operand" "c")
3953		     (match_operand:SI 2 "register_operand" "d")]
3954		    UNSPECV_MONITOR)]
3955  "TARGET_SSE3 && !TARGET_64BIT"
3956  "monitor\t%0, %1, %2"
3957  [(set_attr "length" "3")])
3958
3959(define_insn "sse3_monitor64"
3960  [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3961		     (match_operand:SI 1 "register_operand" "c")
3962		     (match_operand:SI 2 "register_operand" "d")]
3963		    UNSPECV_MONITOR)]
3964  "TARGET_SSE3 && TARGET_64BIT"
3965;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3966;; RCX and RDX are used.  Since 32bit register operands are implicitly
3967;; zero extended to 64bit, we only need to set up 32bit registers.
3968  "monitor"
3969  [(set_attr "length" "3")])
3970
3971;; SSSE3
3972(define_insn "ssse3_phaddwv8hi3"
3973  [(set (match_operand:V8HI 0 "register_operand" "=x")
3974	(vec_concat:V8HI
3975	  (vec_concat:V4HI
3976	    (vec_concat:V2HI
3977	      (plus:HI
3978		(vec_select:HI
3979		  (match_operand:V8HI 1 "register_operand" "0")
3980		  (parallel [(const_int 0)]))
3981		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
3982	      (plus:HI
3983		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
3984		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
3985	    (vec_concat:V2HI
3986	      (plus:HI
3987		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
3988		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
3989	      (plus:HI
3990		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
3991		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
3992	  (vec_concat:V4HI
3993	    (vec_concat:V2HI
3994	      (plus:HI
3995		(vec_select:HI
3996		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3997		  (parallel [(const_int 0)]))
3998		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
3999	      (plus:HI
4000		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4001		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4002	    (vec_concat:V2HI
4003	      (plus:HI
4004		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4005		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4006	      (plus:HI
4007		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4008		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4009  "TARGET_SSSE3"
4010  "phaddw\t{%2, %0|%0, %2}"
4011  [(set_attr "type" "sseiadd")
4012   (set_attr "mode" "TI")])
4013
4014(define_insn "ssse3_phaddwv4hi3"
4015  [(set (match_operand:V4HI 0 "register_operand" "=y")
4016	(vec_concat:V4HI
4017	  (vec_concat:V2HI
4018	    (plus:HI
4019	      (vec_select:HI
4020		(match_operand:V4HI 1 "register_operand" "0")
4021		(parallel [(const_int 0)]))
4022	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4023	    (plus:HI
4024	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4025	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4026	  (vec_concat:V2HI
4027	    (plus:HI
4028	      (vec_select:HI
4029		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
4030		(parallel [(const_int 0)]))
4031	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4032	    (plus:HI
4033	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4034	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4035  "TARGET_SSSE3"
4036  "phaddw\t{%2, %0|%0, %2}"
4037  [(set_attr "type" "sseiadd")
4038   (set_attr "mode" "DI")])
4039
4040(define_insn "ssse3_phadddv4si3"
4041  [(set (match_operand:V4SI 0 "register_operand" "=x")
4042	(vec_concat:V4SI
4043	  (vec_concat:V2SI
4044	    (plus:SI
4045	      (vec_select:SI
4046		(match_operand:V4SI 1 "register_operand" "0")
4047		(parallel [(const_int 0)]))
4048	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4049	    (plus:SI
4050	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4051	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4052	  (vec_concat:V2SI
4053	    (plus:SI
4054	      (vec_select:SI
4055		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
4056		(parallel [(const_int 0)]))
4057	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4058	    (plus:SI
4059	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4060	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4061  "TARGET_SSSE3"
4062  "phaddd\t{%2, %0|%0, %2}"
4063  [(set_attr "type" "sseiadd")
4064   (set_attr "mode" "TI")])
4065
4066(define_insn "ssse3_phadddv2si3"
4067  [(set (match_operand:V2SI 0 "register_operand" "=y")
4068	(vec_concat:V2SI
4069	  (plus:SI
4070	    (vec_select:SI
4071	      (match_operand:V2SI 1 "register_operand" "0")
4072	      (parallel [(const_int 0)]))
4073	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4074	  (plus:SI
4075	    (vec_select:SI
4076	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4077	      (parallel [(const_int 0)]))
4078	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4079  "TARGET_SSSE3"
4080  "phaddd\t{%2, %0|%0, %2}"
4081  [(set_attr "type" "sseiadd")
4082   (set_attr "mode" "DI")])
4083
4084(define_insn "ssse3_phaddswv8hi3"
4085  [(set (match_operand:V8HI 0 "register_operand" "=x")
4086	(vec_concat:V8HI
4087	  (vec_concat:V4HI
4088	    (vec_concat:V2HI
4089	      (ss_plus:HI
4090		(vec_select:HI
4091		  (match_operand:V8HI 1 "register_operand" "0")
4092		  (parallel [(const_int 0)]))
4093		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4094	      (ss_plus:HI
4095		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4096		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4097	    (vec_concat:V2HI
4098	      (ss_plus:HI
4099		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4100		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4101	      (ss_plus:HI
4102		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4103		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4104	  (vec_concat:V4HI
4105	    (vec_concat:V2HI
4106	      (ss_plus:HI
4107		(vec_select:HI
4108		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4109		  (parallel [(const_int 0)]))
4110		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4111	      (ss_plus:HI
4112		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4113		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4114	    (vec_concat:V2HI
4115	      (ss_plus:HI
4116		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4117		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4118	      (ss_plus:HI
4119		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4120		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4121  "TARGET_SSSE3"
4122  "phaddsw\t{%2, %0|%0, %2}"
4123  [(set_attr "type" "sseiadd")
4124   (set_attr "mode" "TI")])
4125
4126(define_insn "ssse3_phaddswv4hi3"
4127  [(set (match_operand:V4HI 0 "register_operand" "=y")
4128	(vec_concat:V4HI
4129	  (vec_concat:V2HI
4130	    (ss_plus:HI
4131	      (vec_select:HI
4132		(match_operand:V4HI 1 "register_operand" "0")
4133		(parallel [(const_int 0)]))
4134	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4135	    (ss_plus:HI
4136	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4137	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4138	  (vec_concat:V2HI
4139	    (ss_plus:HI
4140	      (vec_select:HI
4141		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
4142		(parallel [(const_int 0)]))
4143	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4144	    (ss_plus:HI
4145	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4146	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4147  "TARGET_SSSE3"
4148  "phaddsw\t{%2, %0|%0, %2}"
4149  [(set_attr "type" "sseiadd")
4150   (set_attr "mode" "DI")])
4151
4152(define_insn "ssse3_phsubwv8hi3"
4153  [(set (match_operand:V8HI 0 "register_operand" "=x")
4154	(vec_concat:V8HI
4155	  (vec_concat:V4HI
4156	    (vec_concat:V2HI
4157	      (minus:HI
4158		(vec_select:HI
4159		  (match_operand:V8HI 1 "register_operand" "0")
4160		  (parallel [(const_int 0)]))
4161		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4162	      (minus:HI
4163		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4164		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4165	    (vec_concat:V2HI
4166	      (minus:HI
4167		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4168		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4169	      (minus:HI
4170		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4171		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4172	  (vec_concat:V4HI
4173	    (vec_concat:V2HI
4174	      (minus:HI
4175		(vec_select:HI
4176		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4177		  (parallel [(const_int 0)]))
4178		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4179	      (minus:HI
4180		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4181		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4182	    (vec_concat:V2HI
4183	      (minus:HI
4184		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4185		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4186	      (minus:HI
4187		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4188		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4189  "TARGET_SSSE3"
4190  "phsubw\t{%2, %0|%0, %2}"
4191  [(set_attr "type" "sseiadd")
4192   (set_attr "mode" "TI")])
4193
4194(define_insn "ssse3_phsubwv4hi3"
4195  [(set (match_operand:V4HI 0 "register_operand" "=y")
4196	(vec_concat:V4HI
4197	  (vec_concat:V2HI
4198	    (minus:HI
4199	      (vec_select:HI
4200		(match_operand:V4HI 1 "register_operand" "0")
4201		(parallel [(const_int 0)]))
4202	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4203	    (minus:HI
4204	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4205	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4206	  (vec_concat:V2HI
4207	    (minus:HI
4208	      (vec_select:HI
4209		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
4210		(parallel [(const_int 0)]))
4211	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4212	    (minus:HI
4213	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4214	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4215  "TARGET_SSSE3"
4216  "phsubw\t{%2, %0|%0, %2}"
4217  [(set_attr "type" "sseiadd")
4218   (set_attr "mode" "DI")])
4219
4220(define_insn "ssse3_phsubdv4si3"
4221  [(set (match_operand:V4SI 0 "register_operand" "=x")
4222	(vec_concat:V4SI
4223	  (vec_concat:V2SI
4224	    (minus:SI
4225	      (vec_select:SI
4226		(match_operand:V4SI 1 "register_operand" "0")
4227		(parallel [(const_int 0)]))
4228	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4229	    (minus:SI
4230	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4231	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4232	  (vec_concat:V2SI
4233	    (minus:SI
4234	      (vec_select:SI
4235		(match_operand:V4SI 2 "nonimmediate_operand" "xm")
4236		(parallel [(const_int 0)]))
4237	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4238	    (minus:SI
4239	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4240	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4241  "TARGET_SSSE3"
4242  "phsubd\t{%2, %0|%0, %2}"
4243  [(set_attr "type" "sseiadd")
4244   (set_attr "mode" "TI")])
4245
4246(define_insn "ssse3_phsubdv2si3"
4247  [(set (match_operand:V2SI 0 "register_operand" "=y")
4248	(vec_concat:V2SI
4249	  (minus:SI
4250	    (vec_select:SI
4251	      (match_operand:V2SI 1 "register_operand" "0")
4252	      (parallel [(const_int 0)]))
4253	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4254	  (minus:SI
4255	    (vec_select:SI
4256	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4257	      (parallel [(const_int 0)]))
4258	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4259  "TARGET_SSSE3"
4260  "phsubd\t{%2, %0|%0, %2}"
4261  [(set_attr "type" "sseiadd")
4262   (set_attr "mode" "DI")])
4263
4264(define_insn "ssse3_phsubswv8hi3"
4265  [(set (match_operand:V8HI 0 "register_operand" "=x")
4266	(vec_concat:V8HI
4267	  (vec_concat:V4HI
4268	    (vec_concat:V2HI
4269	      (ss_minus:HI
4270		(vec_select:HI
4271		  (match_operand:V8HI 1 "register_operand" "0")
4272		  (parallel [(const_int 0)]))
4273		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4274	      (ss_minus:HI
4275		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4276		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4277	    (vec_concat:V2HI
4278	      (ss_minus:HI
4279		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4280		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4281	      (ss_minus:HI
4282		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4283		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4284	  (vec_concat:V4HI
4285	    (vec_concat:V2HI
4286	      (ss_minus:HI
4287		(vec_select:HI
4288		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4289		  (parallel [(const_int 0)]))
4290		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4291	      (ss_minus:HI
4292		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4293		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4294	    (vec_concat:V2HI
4295	      (ss_minus:HI
4296		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4297		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4298	      (ss_minus:HI
4299		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4300		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4301  "TARGET_SSSE3"
4302  "phsubsw\t{%2, %0|%0, %2}"
4303  [(set_attr "type" "sseiadd")
4304   (set_attr "mode" "TI")])
4305
4306(define_insn "ssse3_phsubswv4hi3"
4307  [(set (match_operand:V4HI 0 "register_operand" "=y")
4308	(vec_concat:V4HI
4309	  (vec_concat:V2HI
4310	    (ss_minus:HI
4311	      (vec_select:HI
4312		(match_operand:V4HI 1 "register_operand" "0")
4313		(parallel [(const_int 0)]))
4314	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4315	    (ss_minus:HI
4316	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4317	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4318	  (vec_concat:V2HI
4319	    (ss_minus:HI
4320	      (vec_select:HI
4321		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
4322		(parallel [(const_int 0)]))
4323	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4324	    (ss_minus:HI
4325	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4326	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4327  "TARGET_SSSE3"
4328  "phsubsw\t{%2, %0|%0, %2}"
4329  [(set_attr "type" "sseiadd")
4330   (set_attr "mode" "DI")])
4331
4332(define_insn "ssse3_pmaddubswv8hi3"
4333  [(set (match_operand:V8HI 0 "register_operand" "=x")
4334	(ss_plus:V8HI
4335	  (mult:V8HI
4336	    (zero_extend:V8HI
4337	      (vec_select:V4QI
4338		(match_operand:V16QI 1 "nonimmediate_operand" "%0")
4339		(parallel [(const_int 0)
4340			   (const_int 2)
4341			   (const_int 4)
4342			   (const_int 6)
4343			   (const_int 8)
4344			   (const_int 10)
4345			   (const_int 12)
4346			   (const_int 14)])))
4347	    (sign_extend:V8HI
4348	      (vec_select:V8QI
4349		(match_operand:V16QI 2 "nonimmediate_operand" "xm")
4350		(parallel [(const_int 0)
4351			   (const_int 2)
4352			   (const_int 4)
4353			   (const_int 6)
4354			   (const_int 8)
4355			   (const_int 10)
4356			   (const_int 12)
4357			   (const_int 14)]))))
4358	  (mult:V8HI
4359	    (zero_extend:V8HI
4360	      (vec_select:V16QI (match_dup 1)
4361		(parallel [(const_int 1)
4362			   (const_int 3)
4363			   (const_int 5)
4364			   (const_int 7)
4365			   (const_int 9)
4366			   (const_int 11)
4367			   (const_int 13)
4368			   (const_int 15)])))
4369	    (sign_extend:V8HI
4370	      (vec_select:V16QI (match_dup 2)
4371		(parallel [(const_int 1)
4372			   (const_int 3)
4373			   (const_int 5)
4374			   (const_int 7)
4375			   (const_int 9)
4376			   (const_int 11)
4377			   (const_int 13)
4378			   (const_int 15)]))))))]
4379  "TARGET_SSSE3"
4380  "pmaddubsw\t{%2, %0|%0, %2}"
4381  [(set_attr "type" "sseiadd")
4382   (set_attr "mode" "TI")])
4383
4384(define_insn "ssse3_pmaddubswv4hi3"
4385  [(set (match_operand:V4HI 0 "register_operand" "=y")
4386	(ss_plus:V4HI
4387	  (mult:V4HI
4388	    (zero_extend:V4HI
4389	      (vec_select:V4QI
4390		(match_operand:V8QI 1 "nonimmediate_operand" "%0")
4391		(parallel [(const_int 0)
4392			   (const_int 2)
4393			   (const_int 4)
4394			   (const_int 6)])))
4395	    (sign_extend:V4HI
4396	      (vec_select:V4QI
4397		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
4398		(parallel [(const_int 0)
4399			   (const_int 2)
4400			   (const_int 4)
4401			   (const_int 6)]))))
4402	  (mult:V4HI
4403	    (zero_extend:V4HI
4404	      (vec_select:V8QI (match_dup 1)
4405		(parallel [(const_int 1)
4406			   (const_int 3)
4407			   (const_int 5)
4408			   (const_int 7)])))
4409	    (sign_extend:V4HI
4410	      (vec_select:V8QI (match_dup 2)
4411		(parallel [(const_int 1)
4412			   (const_int 3)
4413			   (const_int 5)
4414			   (const_int 7)]))))))]
4415  "TARGET_SSSE3"
4416  "pmaddubsw\t{%2, %0|%0, %2}"
4417  [(set_attr "type" "sseiadd")
4418   (set_attr "mode" "DI")])
4419
4420(define_insn "ssse3_pmulhrswv8hi3"
4421  [(set (match_operand:V8HI 0 "register_operand" "=x")
4422	(truncate:V8HI
4423	  (lshiftrt:V8SI
4424	    (plus:V8SI
4425	      (lshiftrt:V8SI
4426		(mult:V8SI
4427		  (sign_extend:V8SI
4428		    (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4429		  (sign_extend:V8SI
4430		    (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4431		(const_int 14))
4432	      (const_vector:V8HI [(const_int 1) (const_int 1)
4433				  (const_int 1) (const_int 1)
4434				  (const_int 1) (const_int 1)
4435				  (const_int 1) (const_int 1)]))
4436	    (const_int 1))))]
4437  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4438  "pmulhrsw\t{%2, %0|%0, %2}"
4439  [(set_attr "type" "sseimul")
4440   (set_attr "mode" "TI")])
4441
4442(define_insn "ssse3_pmulhrswv4hi3"
4443  [(set (match_operand:V4HI 0 "register_operand" "=y")
4444	(truncate:V4HI
4445	  (lshiftrt:V4SI
4446	    (plus:V4SI
4447	      (lshiftrt:V4SI
4448		(mult:V4SI
4449		  (sign_extend:V4SI
4450		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
4451		  (sign_extend:V4SI
4452		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
4453		(const_int 14))
4454	      (const_vector:V4HI [(const_int 1) (const_int 1)
4455				  (const_int 1) (const_int 1)]))
4456	    (const_int 1))))]
4457  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
4458  "pmulhrsw\t{%2, %0|%0, %2}"
4459  [(set_attr "type" "sseimul")
4460   (set_attr "mode" "DI")])
4461
4462(define_insn "ssse3_pshufbv16qi3"
4463  [(set (match_operand:V16QI 0 "register_operand" "=x")
4464	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4465		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4466		       UNSPEC_PSHUFB))]
4467  "TARGET_SSSE3"
4468  "pshufb\t{%2, %0|%0, %2}";
4469  [(set_attr "type" "sselog1")
4470   (set_attr "mode" "TI")])
4471
4472(define_insn "ssse3_pshufbv8qi3"
4473  [(set (match_operand:V8QI 0 "register_operand" "=y")
4474	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
4475		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
4476		      UNSPEC_PSHUFB))]
4477  "TARGET_SSSE3"
4478  "pshufb\t{%2, %0|%0, %2}";
4479  [(set_attr "type" "sselog1")
4480   (set_attr "mode" "DI")])
4481
4482(define_insn "ssse3_psign<mode>3"
4483  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4484	(unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
4485			    (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
4486			    UNSPEC_PSIGN))]
4487  "TARGET_SSSE3"
4488  "psign<ssevecsize>\t{%2, %0|%0, %2}";
4489  [(set_attr "type" "sselog1")
4490   (set_attr "mode" "TI")])
4491
4492(define_insn "ssse3_psign<mode>3"
4493  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4494	(unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
4495			  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
4496			  UNSPEC_PSIGN))]
4497  "TARGET_SSSE3"
4498  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
4499  [(set_attr "type" "sselog1")
4500   (set_attr "mode" "DI")])
4501
4502(define_insn "ssse3_palignrti"
4503  [(set (match_operand:TI 0 "register_operand" "=x")
4504	(unspec:TI [(match_operand:TI 1 "register_operand" "0")
4505		    (match_operand:TI 2 "nonimmediate_operand" "xm")
4506		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4507		    UNSPEC_PALIGNR))]
4508  "TARGET_SSSE3"
4509{
4510  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4511  return "palignr\t{%3, %2, %0|%0, %2, %3}";
4512}
4513  [(set_attr "type" "sseishft")
4514   (set_attr "mode" "TI")])
4515
4516(define_insn "ssse3_palignrdi"
4517  [(set (match_operand:DI 0 "register_operand" "=y")
4518	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
4519		    (match_operand:DI 2 "nonimmediate_operand" "ym")
4520		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4521		    UNSPEC_PALIGNR))]
4522  "TARGET_SSSE3"
4523{
4524  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4525  return "palignr\t{%3, %2, %0|%0, %2, %3}";
4526}
4527  [(set_attr "type" "sseishft")
4528   (set_attr "mode" "DI")])
4529
4530(define_insn "abs<mode>2"
4531  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4532	(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
4533  "TARGET_SSSE3"
4534  "pabs<ssevecsize>\t{%1, %0|%0, %1}";
4535  [(set_attr "type" "sselog1")
4536   (set_attr "mode" "TI")])
4537
4538(define_insn "abs<mode>2"
4539  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4540	(abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
4541  "TARGET_SSSE3"
4542  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
4543  [(set_attr "type" "sselog1")
4544   (set_attr "mode" "DI")])
4545
4546;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4547;;
4548;; AMD SSE4A instructions
4549;;
4550;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4551
4552(define_insn "sse4a_vmmovntv2df"
4553  [(set (match_operand:DF 0 "memory_operand" "=m")
4554        (unspec:DF [(vec_select:DF 
4555                      (match_operand:V2DF 1 "register_operand" "x")
4556                      (parallel [(const_int 0)]))]
4557                   UNSPEC_MOVNT))]
4558  "TARGET_SSE4A"
4559  "movntsd\t{%1, %0|%0, %1}"
4560  [(set_attr "type" "ssemov")
4561   (set_attr "mode" "DF")])
4562
4563(define_insn "sse4a_movntdf"
4564  [(set (match_operand:DF 0 "memory_operand" "=m")
4565        (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
4566                   UNSPEC_MOVNT))]
4567  "TARGET_SSE4A"
4568  "movntsd\t{%1, %0|%0, %1}"
4569  [(set_attr "type" "ssemov")
4570   (set_attr "mode" "DF")])
4571
4572(define_insn "sse4a_vmmovntv4sf"
4573  [(set (match_operand:SF 0 "memory_operand" "=m")
4574	(unspec:SF [(vec_select:SF 
4575	              (match_operand:V4SF 1 "register_operand" "x")
4576		      (parallel [(const_int 0)]))]
4577		   UNSPEC_MOVNT))]
4578  "TARGET_SSE4A"
4579  "movntss\t{%1, %0|%0, %1}"
4580  [(set_attr "type" "ssemov")
4581   (set_attr "mode" "SF")])
4582
4583(define_insn "sse4a_movntsf"
4584  [(set (match_operand:SF 0 "memory_operand" "=m")
4585	(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
4586		   UNSPEC_MOVNT))]
4587  "TARGET_SSE4A"
4588  "movntss\t{%1, %0|%0, %1}"
4589  [(set_attr "type" "ssemov")
4590   (set_attr "mode" "SF")])
4591
4592(define_insn "sse4a_extrqi"
4593  [(set (match_operand:V2DI 0 "register_operand" "=x")
4594        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4595                      (match_operand 2 "const_int_operand" "")
4596                      (match_operand 3 "const_int_operand" "")]
4597                     UNSPEC_EXTRQI))]
4598  "TARGET_SSE4A"
4599  "extrq\t{%3, %2, %0|%0, %2, %3}"
4600  [(set_attr "type" "sse")
4601   (set_attr "mode" "TI")])
4602
4603(define_insn "sse4a_extrq"
4604  [(set (match_operand:V2DI 0 "register_operand" "=x")
4605        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4606                      (match_operand:V16QI 2 "register_operand" "x")]
4607                     UNSPEC_EXTRQ))]
4608  "TARGET_SSE4A"
4609  "extrq\t{%2, %0|%0, %2}"
4610  [(set_attr "type" "sse")
4611   (set_attr "mode" "TI")])
4612
4613(define_insn "sse4a_insertqi"
4614  [(set (match_operand:V2DI 0 "register_operand" "=x")
4615        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4616        	      (match_operand:V2DI 2 "register_operand" "x")
4617                      (match_operand 3 "const_int_operand" "")
4618                      (match_operand 4 "const_int_operand" "")]
4619                     UNSPEC_INSERTQI))]
4620  "TARGET_SSE4A"
4621  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
4622  [(set_attr "type" "sseins")
4623   (set_attr "mode" "TI")])
4624
4625(define_insn "sse4a_insertq"
4626  [(set (match_operand:V2DI 0 "register_operand" "=x")
4627        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
4628        	      (match_operand:V2DI 2 "register_operand" "x")]
4629        	     UNSPEC_INSERTQ))]
4630  "TARGET_SSE4A"
4631  "insertq\t{%2, %0|%0, %2}"
4632  [(set_attr "type" "sseins")
4633   (set_attr "mode" "TI")])
4634