1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 2, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING.  If not, write to
19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20;; Boston, MA 02110-1301, USA.
21
22
23;; 16 byte integral modes handled by SSE, minus TImode, which gets
24;; special-cased for TARGET_64BIT.
25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27;; All 16-byte vector modes handled by SSE
28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30;; Mix-n-match
31(define_mode_macro SSEMODE12 [V16QI V8HI])
32(define_mode_macro SSEMODE24 [V8HI V4SI])
33(define_mode_macro SSEMODE14 [V16QI V4SI])
34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37;; Mapping from integer vector mode to mnemonic suffix
38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43;;
44;; Move patterns
45;;
46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48;; All of these patterns are enabled for SSE1 as well as SSE2.
49;; This is essential for maintaining stable calling conventions.
50
51(define_expand "mov<mode>"
52  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53	(match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54  "TARGET_SSE"
55{
56  ix86_expand_vector_move (<MODE>mode, operands);
57  DONE;
58})
59
60(define_insn "*mov<mode>_internal"
61  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62	(match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
63  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64{
65  switch (which_alternative)
66    {
67    case 0:
68      if (get_attr_mode (insn) == MODE_V4SF)
69	return "xorps\t%0, %0";
70      else
71	return "pxor\t%0, %0";
72    case 1:
73    case 2:
74      if (get_attr_mode (insn) == MODE_V4SF)
75	return "movaps\t{%1, %0|%0, %1}";
76      else
77	return "movdqa\t{%1, %0|%0, %1}";
78    default:
79      gcc_unreachable ();
80    }
81}
82  [(set_attr "type" "sselog1,ssemov,ssemov")
83   (set (attr "mode")
84	(cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
85		 (const_string "V4SF")
86
87	       (eq_attr "alternative" "0,1")
88		 (if_then_else
89		   (ne (symbol_ref "optimize_size")
90		       (const_int 0))
91		   (const_string "V4SF")
92		   (const_string "TI"))
93	       (eq_attr "alternative" "2")
94		 (if_then_else
95		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
96			    (const_int 0))
97			(ne (symbol_ref "optimize_size")
98			    (const_int 0)))
99		   (const_string "V4SF")
100		   (const_string "TI"))]
101	       (const_string "TI")))])
102
103(define_expand "movv4sf"
104  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
105	(match_operand:V4SF 1 "nonimmediate_operand" ""))]
106  "TARGET_SSE"
107{
108  ix86_expand_vector_move (V4SFmode, operands);
109  DONE;
110})
111
112(define_insn "*movv4sf_internal"
113  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
114	(match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
115  "TARGET_SSE"
116  "@
117   xorps\t%0, %0
118   movaps\t{%1, %0|%0, %1}
119   movaps\t{%1, %0|%0, %1}"
120  [(set_attr "type" "sselog1,ssemov,ssemov")
121   (set_attr "mode" "V4SF")])
122
123(define_split
124  [(set (match_operand:V4SF 0 "register_operand" "")
125	(match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
126  "TARGET_SSE && reload_completed"
127  [(set (match_dup 0)
128	(vec_merge:V4SF
129	  (vec_duplicate:V4SF (match_dup 1))
130	  (match_dup 2)
131	  (const_int 1)))]
132{
133  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
134  operands[2] = CONST0_RTX (V4SFmode);
135})
136
137(define_expand "movv2df"
138  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
139	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
140  "TARGET_SSE"
141{
142  ix86_expand_vector_move (V2DFmode, operands);
143  DONE;
144})
145
146(define_insn "*movv2df_internal"
147  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
148	(match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
149  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
150{
151  switch (which_alternative)
152    {
153    case 0:
154      if (get_attr_mode (insn) == MODE_V4SF)
155	return "xorps\t%0, %0";
156      else
157	return "xorpd\t%0, %0";
158    case 1:
159    case 2:
160      if (get_attr_mode (insn) == MODE_V4SF)
161	return "movaps\t{%1, %0|%0, %1}";
162      else
163	return "movapd\t{%1, %0|%0, %1}";
164    default:
165      gcc_unreachable ();
166    }
167}
168  [(set_attr "type" "sselog1,ssemov,ssemov")
169   (set (attr "mode")
170	(cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
171		 (const_string "V4SF")
172	       (eq_attr "alternative" "0,1")
173		 (if_then_else
174		   (ne (symbol_ref "optimize_size")
175		       (const_int 0))
176		   (const_string "V4SF")
177		   (const_string "V2DF"))
178	       (eq_attr "alternative" "2")
179		 (if_then_else
180		   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
181			    (const_int 0))
182			(ne (symbol_ref "optimize_size")
183			    (const_int 0)))
184		   (const_string "V4SF")
185		   (const_string "V2DF"))]
186	       (const_string "V2DF")))])
187
188(define_split
189  [(set (match_operand:V2DF 0 "register_operand" "")
190	(match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
191  "TARGET_SSE2 && reload_completed"
192  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
193{
194  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
195  operands[2] = CONST0_RTX (DFmode);
196})
197
198(define_expand "push<mode>1"
199  [(match_operand:SSEMODE 0 "register_operand" "")]
200  "TARGET_SSE"
201{
202  ix86_expand_push (<MODE>mode, operands[0]);
203  DONE;
204})
205
206(define_expand "movmisalign<mode>"
207  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
208	(match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
209  "TARGET_SSE"
210{
211  ix86_expand_vector_move_misalign (<MODE>mode, operands);
212  DONE;
213})
214
215(define_insn "sse_movups"
216  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
217	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
218		     UNSPEC_MOVU))]
219  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
220  "movups\t{%1, %0|%0, %1}"
221  [(set_attr "type" "ssemov")
222   (set_attr "mode" "V2DF")])
223
224(define_insn "sse2_movupd"
225  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
226	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
227		     UNSPEC_MOVU))]
228  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
229  "movupd\t{%1, %0|%0, %1}"
230  [(set_attr "type" "ssemov")
231   (set_attr "mode" "V2DF")])
232
233(define_insn "sse2_movdqu"
234  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
235	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
236		      UNSPEC_MOVU))]
237  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
238  "movdqu\t{%1, %0|%0, %1}"
239  [(set_attr "type" "ssemov")
240   (set_attr "mode" "TI")])
241
242(define_insn "sse_movntv4sf"
243  [(set (match_operand:V4SF 0 "memory_operand" "=m")
244	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
245		     UNSPEC_MOVNT))]
246  "TARGET_SSE"
247  "movntps\t{%1, %0|%0, %1}"
248  [(set_attr "type" "ssemov")
249   (set_attr "mode" "V4SF")])
250
251(define_insn "sse2_movntv2df"
252  [(set (match_operand:V2DF 0 "memory_operand" "=m")
253	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
254		     UNSPEC_MOVNT))]
255  "TARGET_SSE2"
256  "movntpd\t{%1, %0|%0, %1}"
257  [(set_attr "type" "ssecvt")
258   (set_attr "mode" "V2DF")])
259
260(define_insn "sse2_movntv2di"
261  [(set (match_operand:V2DI 0 "memory_operand" "=m")
262	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
263		     UNSPEC_MOVNT))]
264  "TARGET_SSE2"
265  "movntdq\t{%1, %0|%0, %1}"
266  [(set_attr "type" "ssecvt")
267   (set_attr "mode" "TI")])
268
269(define_insn "sse2_movntsi"
270  [(set (match_operand:SI 0 "memory_operand" "=m")
271	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
272		   UNSPEC_MOVNT))]
273  "TARGET_SSE2"
274  "movnti\t{%1, %0|%0, %1}"
275  [(set_attr "type" "ssecvt")
276   (set_attr "mode" "V2DF")])
277
278(define_insn "sse3_lddqu"
279  [(set (match_operand:V16QI 0 "register_operand" "=x")
280	(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
281		      UNSPEC_LDQQU))]
282  "TARGET_SSE3"
283  "lddqu\t{%1, %0|%0, %1}"
284  [(set_attr "type" "ssecvt")
285   (set_attr "mode" "TI")])
286
287;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
288;;
289;; Parallel single-precision floating point arithmetic
290;;
291;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
292
293(define_expand "negv4sf2"
294  [(set (match_operand:V4SF 0 "register_operand" "")
295	(neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
296  "TARGET_SSE"
297  "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
298
299(define_expand "absv4sf2"
300  [(set (match_operand:V4SF 0 "register_operand" "")
301	(abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
302  "TARGET_SSE"
303  "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
304
305(define_expand "addv4sf3"
306  [(set (match_operand:V4SF 0 "register_operand" "")
307	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
308		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
309  "TARGET_SSE"
310  "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
311
312(define_insn "*addv4sf3"
313  [(set (match_operand:V4SF 0 "register_operand" "=x")
314	(plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
315		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
316  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
317  "addps\t{%2, %0|%0, %2}"
318  [(set_attr "type" "sseadd")
319   (set_attr "mode" "V4SF")])
320
321(define_insn "sse_vmaddv4sf3"
322  [(set (match_operand:V4SF 0 "register_operand" "=x")
323	(vec_merge:V4SF
324	  (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
325		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
326	  (match_dup 1)
327	  (const_int 1)))]
328  "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
329  "addss\t{%2, %0|%0, %2}"
330  [(set_attr "type" "sseadd")
331   (set_attr "mode" "SF")])
332
333(define_expand "subv4sf3"
334  [(set (match_operand:V4SF 0 "register_operand" "")
335	(minus:V4SF (match_operand:V4SF 1 "register_operand" "")
336		    (match_operand:V4SF 2 "nonimmediate_operand" "")))]
337  "TARGET_SSE"
338  "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
339
340(define_insn "*subv4sf3"
341  [(set (match_operand:V4SF 0 "register_operand" "=x")
342	(minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
343		    (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
344  "TARGET_SSE"
345  "subps\t{%2, %0|%0, %2}"
346  [(set_attr "type" "sseadd")
347   (set_attr "mode" "V4SF")])
348
349(define_insn "sse_vmsubv4sf3"
350  [(set (match_operand:V4SF 0 "register_operand" "=x")
351	(vec_merge:V4SF
352	  (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
353		      (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
354	  (match_dup 1)
355	  (const_int 1)))]
356  "TARGET_SSE"
357  "subss\t{%2, %0|%0, %2}"
358  [(set_attr "type" "sseadd")
359   (set_attr "mode" "SF")])
360
361(define_expand "mulv4sf3"
362  [(set (match_operand:V4SF 0 "register_operand" "")
363	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
364		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
365  "TARGET_SSE"
366  "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
367
368(define_insn "*mulv4sf3"
369  [(set (match_operand:V4SF 0 "register_operand" "=x")
370	(mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
371		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
372  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
373  "mulps\t{%2, %0|%0, %2}"
374  [(set_attr "type" "ssemul")
375   (set_attr "mode" "V4SF")])
376
377(define_insn "sse_vmmulv4sf3"
378  [(set (match_operand:V4SF 0 "register_operand" "=x")
379	(vec_merge:V4SF
380	  (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
381		     (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
382	  (match_dup 1)
383	  (const_int 1)))]
384  "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
385  "mulss\t{%2, %0|%0, %2}"
386  [(set_attr "type" "ssemul")
387   (set_attr "mode" "SF")])
388
389(define_expand "divv4sf3"
390  [(set (match_operand:V4SF 0 "register_operand" "")
391	(div:V4SF (match_operand:V4SF 1 "register_operand" "")
392		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
393  "TARGET_SSE"
394  "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
395
396(define_insn "*divv4sf3"
397  [(set (match_operand:V4SF 0 "register_operand" "=x")
398	(div:V4SF (match_operand:V4SF 1 "register_operand" "0")
399		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
400  "TARGET_SSE"
401  "divps\t{%2, %0|%0, %2}"
402  [(set_attr "type" "ssediv")
403   (set_attr "mode" "V4SF")])
404
405(define_insn "sse_vmdivv4sf3"
406  [(set (match_operand:V4SF 0 "register_operand" "=x")
407	(vec_merge:V4SF
408	  (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
409		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
410	  (match_dup 1)
411	  (const_int 1)))]
412  "TARGET_SSE"
413  "divss\t{%2, %0|%0, %2}"
414  [(set_attr "type" "ssediv")
415   (set_attr "mode" "SF")])
416
417(define_insn "sse_rcpv4sf2"
418  [(set (match_operand:V4SF 0 "register_operand" "=x")
419	(unspec:V4SF
420	 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
421  "TARGET_SSE"
422  "rcpps\t{%1, %0|%0, %1}"
423  [(set_attr "type" "sse")
424   (set_attr "mode" "V4SF")])
425
426(define_insn "sse_vmrcpv4sf2"
427  [(set (match_operand:V4SF 0 "register_operand" "=x")
428	(vec_merge:V4SF
429	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
430		       UNSPEC_RCP)
431	  (match_operand:V4SF 2 "register_operand" "0")
432	  (const_int 1)))]
433  "TARGET_SSE"
434  "rcpss\t{%1, %0|%0, %1}"
435  [(set_attr "type" "sse")
436   (set_attr "mode" "SF")])
437
438(define_insn "sse_rsqrtv4sf2"
439  [(set (match_operand:V4SF 0 "register_operand" "=x")
440	(unspec:V4SF
441	  [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
442  "TARGET_SSE"
443  "rsqrtps\t{%1, %0|%0, %1}"
444  [(set_attr "type" "sse")
445   (set_attr "mode" "V4SF")])
446
447(define_insn "sse_vmrsqrtv4sf2"
448  [(set (match_operand:V4SF 0 "register_operand" "=x")
449	(vec_merge:V4SF
450	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
451		       UNSPEC_RSQRT)
452	  (match_operand:V4SF 2 "register_operand" "0")
453	  (const_int 1)))]
454  "TARGET_SSE"
455  "rsqrtss\t{%1, %0|%0, %1}"
456  [(set_attr "type" "sse")
457   (set_attr "mode" "SF")])
458
459(define_insn "sqrtv4sf2"
460  [(set (match_operand:V4SF 0 "register_operand" "=x")
461	(sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
462  "TARGET_SSE"
463  "sqrtps\t{%1, %0|%0, %1}"
464  [(set_attr "type" "sse")
465   (set_attr "mode" "V4SF")])
466
467(define_insn "sse_vmsqrtv4sf2"
468  [(set (match_operand:V4SF 0 "register_operand" "=x")
469	(vec_merge:V4SF
470	  (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
471	  (match_operand:V4SF 2 "register_operand" "0")
472	  (const_int 1)))]
473  "TARGET_SSE"
474  "sqrtss\t{%1, %0|%0, %1}"
475  [(set_attr "type" "sse")
476   (set_attr "mode" "SF")])
477
478;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
479;; isn't really correct, as those rtl operators aren't defined when 
480;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
481
482(define_expand "smaxv4sf3"
483  [(set (match_operand:V4SF 0 "register_operand" "")
484	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
485		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
486  "TARGET_SSE"
487{
488  if (!flag_finite_math_only)
489    operands[1] = force_reg (V4SFmode, operands[1]);
490  ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
491})
492
493(define_insn "*smaxv4sf3_finite"
494  [(set (match_operand:V4SF 0 "register_operand" "=x")
495	(smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
496		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
497  "TARGET_SSE && flag_finite_math_only
498   && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
499  "maxps\t{%2, %0|%0, %2}"
500  [(set_attr "type" "sse")
501   (set_attr "mode" "V4SF")])
502
503(define_insn "*smaxv4sf3"
504  [(set (match_operand:V4SF 0 "register_operand" "=x")
505	(smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
506		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
507  "TARGET_SSE"
508  "maxps\t{%2, %0|%0, %2}"
509  [(set_attr "type" "sse")
510   (set_attr "mode" "V4SF")])
511
512(define_insn "*sse_vmsmaxv4sf3_finite"
513  [(set (match_operand:V4SF 0 "register_operand" "=x")
514	(vec_merge:V4SF
515	 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
516		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
517	 (match_dup 1)
518	 (const_int 1)))]
519  "TARGET_SSE && flag_finite_math_only
520   && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
521  "maxss\t{%2, %0|%0, %2}"
522  [(set_attr "type" "sse")
523   (set_attr "mode" "SF")])
524
525(define_insn "sse_vmsmaxv4sf3"
526  [(set (match_operand:V4SF 0 "register_operand" "=x")
527	(vec_merge:V4SF
528	 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
529		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
530	 (match_dup 1)
531	 (const_int 1)))]
532  "TARGET_SSE"
533  "maxss\t{%2, %0|%0, %2}"
534  [(set_attr "type" "sse")
535   (set_attr "mode" "SF")])
536
537(define_expand "sminv4sf3"
538  [(set (match_operand:V4SF 0 "register_operand" "")
539	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
540		   (match_operand:V4SF 2 "nonimmediate_operand" "")))]
541  "TARGET_SSE"
542{
543  if (!flag_finite_math_only)
544    operands[1] = force_reg (V4SFmode, operands[1]);
545  ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
546})
547
548(define_insn "*sminv4sf3_finite"
549  [(set (match_operand:V4SF 0 "register_operand" "=x")
550	(smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
551		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
552  "TARGET_SSE && flag_finite_math_only
553   && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
554  "minps\t{%2, %0|%0, %2}"
555  [(set_attr "type" "sse")
556   (set_attr "mode" "V4SF")])
557
558(define_insn "*sminv4sf3"
559  [(set (match_operand:V4SF 0 "register_operand" "=x")
560	(smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
561		   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
562  "TARGET_SSE"
563  "minps\t{%2, %0|%0, %2}"
564  [(set_attr "type" "sse")
565   (set_attr "mode" "V4SF")])
566
567(define_insn "*sse_vmsminv4sf3_finite"
568  [(set (match_operand:V4SF 0 "register_operand" "=x")
569	(vec_merge:V4SF
570	 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
571		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
572	 (match_dup 1)
573	 (const_int 1)))]
574  "TARGET_SSE && flag_finite_math_only
575   && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
576  "minss\t{%2, %0|%0, %2}"
577  [(set_attr "type" "sse")
578   (set_attr "mode" "SF")])
579
580(define_insn "sse_vmsminv4sf3"
581  [(set (match_operand:V4SF 0 "register_operand" "=x")
582	(vec_merge:V4SF
583	 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
584		    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
585	 (match_dup 1)
586	 (const_int 1)))]
587  "TARGET_SSE"
588  "minss\t{%2, %0|%0, %2}"
589  [(set_attr "type" "sse")
590   (set_attr "mode" "SF")])
591
592;; These versions of the min/max patterns implement exactly the operations
593;;   min = (op1 < op2 ? op1 : op2)
594;;   max = (!(op1 < op2) ? op1 : op2)
595;; Their operands are not commutative, and thus they may be used in the
596;; presence of -0.0 and NaN.
597
598(define_insn "*ieee_sminv4sf3"
599  [(set (match_operand:V4SF 0 "register_operand" "=x")
600	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
601		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
602		     UNSPEC_IEEE_MIN))]
603  "TARGET_SSE"
604  "minps\t{%2, %0|%0, %2}"
605  [(set_attr "type" "sseadd")
606   (set_attr "mode" "V4SF")])
607
608(define_insn "*ieee_smaxv4sf3"
609  [(set (match_operand:V4SF 0 "register_operand" "=x")
610	(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
611		      (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
612		     UNSPEC_IEEE_MAX))]
613  "TARGET_SSE"
614  "maxps\t{%2, %0|%0, %2}"
615  [(set_attr "type" "sseadd")
616   (set_attr "mode" "V4SF")])
617
618(define_insn "*ieee_sminv2df3"
619  [(set (match_operand:V2DF 0 "register_operand" "=x")
620	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
621		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
622		     UNSPEC_IEEE_MIN))]
623  "TARGET_SSE2"
624  "minpd\t{%2, %0|%0, %2}"
625  [(set_attr "type" "sseadd")
626   (set_attr "mode" "V2DF")])
627
628(define_insn "*ieee_smaxv2df3"
629  [(set (match_operand:V2DF 0 "register_operand" "=x")
630	(unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
631		      (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
632		     UNSPEC_IEEE_MAX))]
633  "TARGET_SSE2"
634  "maxpd\t{%2, %0|%0, %2}"
635  [(set_attr "type" "sseadd")
636   (set_attr "mode" "V2DF")])
637
638(define_insn "sse3_addsubv4sf3"
639  [(set (match_operand:V4SF 0 "register_operand" "=x")
640	(vec_merge:V4SF
641	  (plus:V4SF
642	    (match_operand:V4SF 1 "register_operand" "0")
643	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
644	  (minus:V4SF (match_dup 1) (match_dup 2))
645	  (const_int 5)))]
646  "TARGET_SSE3"
647  "addsubps\t{%2, %0|%0, %2}"
648  [(set_attr "type" "sseadd")
649   (set_attr "mode" "V4SF")])
650
651(define_insn "sse3_haddv4sf3"
652  [(set (match_operand:V4SF 0 "register_operand" "=x")
653	(vec_concat:V4SF
654	  (vec_concat:V2SF
655	    (plus:SF
656	      (vec_select:SF 
657		(match_operand:V4SF 1 "register_operand" "0")
658		(parallel [(const_int 0)]))
659	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
660	    (plus:SF
661	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
662	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
663	  (vec_concat:V2SF
664	    (plus:SF
665	      (vec_select:SF
666		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
667		(parallel [(const_int 0)]))
668	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
669	    (plus:SF
670	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
671	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
672  "TARGET_SSE3"
673  "haddps\t{%2, %0|%0, %2}"
674  [(set_attr "type" "sseadd")
675   (set_attr "mode" "V4SF")])
676
677(define_insn "sse3_hsubv4sf3"
678  [(set (match_operand:V4SF 0 "register_operand" "=x")
679	(vec_concat:V4SF
680	  (vec_concat:V2SF
681	    (minus:SF
682	      (vec_select:SF 
683		(match_operand:V4SF 1 "register_operand" "0")
684		(parallel [(const_int 0)]))
685	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
686	    (minus:SF
687	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
688	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
689	  (vec_concat:V2SF
690	    (minus:SF
691	      (vec_select:SF
692		(match_operand:V4SF 2 "nonimmediate_operand" "xm")
693		(parallel [(const_int 0)]))
694	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
695	    (minus:SF
696	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
697	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
698  "TARGET_SSE3"
699  "hsubps\t{%2, %0|%0, %2}"
700  [(set_attr "type" "sseadd")
701   (set_attr "mode" "V4SF")])
702
703(define_expand "reduc_splus_v4sf"
704  [(match_operand:V4SF 0 "register_operand" "")
705   (match_operand:V4SF 1 "register_operand" "")]
706  "TARGET_SSE"
707{
708  if (TARGET_SSE3)
709    {
710      rtx tmp = gen_reg_rtx (V4SFmode);
711      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
712      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
713    }
714  else
715    ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
716  DONE;
717})
718
719(define_expand "reduc_smax_v4sf"
720  [(match_operand:V4SF 0 "register_operand" "")
721   (match_operand:V4SF 1 "register_operand" "")]
722  "TARGET_SSE"
723{
724  ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
725  DONE;
726})
727
728(define_expand "reduc_smin_v4sf"
729  [(match_operand:V4SF 0 "register_operand" "")
730   (match_operand:V4SF 1 "register_operand" "")]
731  "TARGET_SSE"
732{
733  ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
734  DONE;
735})
736
737;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
738;;
739;; Parallel single-precision floating point comparisons
740;;
741;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
742
743(define_insn "sse_maskcmpv4sf3"
744  [(set (match_operand:V4SF 0 "register_operand" "=x")
745	(match_operator:V4SF 3 "sse_comparison_operator"
746		[(match_operand:V4SF 1 "register_operand" "0")
747		 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
748  "TARGET_SSE"
749  "cmp%D3ps\t{%2, %0|%0, %2}"
750  [(set_attr "type" "ssecmp")
751   (set_attr "mode" "V4SF")])
752
753(define_insn "sse_vmmaskcmpv4sf3"
754  [(set (match_operand:V4SF 0 "register_operand" "=x")
755	(vec_merge:V4SF
756	 (match_operator:V4SF 3 "sse_comparison_operator"
757		[(match_operand:V4SF 1 "register_operand" "0")
758		 (match_operand:V4SF 2 "register_operand" "x")])
759	 (match_dup 1)
760	 (const_int 1)))]
761  "TARGET_SSE"
762  "cmp%D3ss\t{%2, %0|%0, %2}"
763  [(set_attr "type" "ssecmp")
764   (set_attr "mode" "SF")])
765
766(define_insn "sse_comi"
767  [(set (reg:CCFP FLAGS_REG)
768	(compare:CCFP
769	  (vec_select:SF
770	    (match_operand:V4SF 0 "register_operand" "x")
771	    (parallel [(const_int 0)]))
772	  (vec_select:SF
773	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
774	    (parallel [(const_int 0)]))))]
775  "TARGET_SSE"
776  "comiss\t{%1, %0|%0, %1}"
777  [(set_attr "type" "ssecomi")
778   (set_attr "mode" "SF")])
779
780(define_insn "sse_ucomi"
781  [(set (reg:CCFPU FLAGS_REG)
782	(compare:CCFPU
783	  (vec_select:SF
784	    (match_operand:V4SF 0 "register_operand" "x")
785	    (parallel [(const_int 0)]))
786	  (vec_select:SF
787	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
788	    (parallel [(const_int 0)]))))]
789  "TARGET_SSE"
790  "ucomiss\t{%1, %0|%0, %1}"
791  [(set_attr "type" "ssecomi")
792   (set_attr "mode" "SF")])
793
794(define_expand "vcondv4sf"
795  [(set (match_operand:V4SF 0 "register_operand" "")
796        (if_then_else:V4SF
797          (match_operator 3 ""
798            [(match_operand:V4SF 4 "nonimmediate_operand" "")
799             (match_operand:V4SF 5 "nonimmediate_operand" "")])
800          (match_operand:V4SF 1 "general_operand" "")
801          (match_operand:V4SF 2 "general_operand" "")))]
802  "TARGET_SSE"
803{
804  if (ix86_expand_fp_vcond (operands))
805    DONE;
806  else
807    FAIL;
808})
809
810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
811;;
812;; Parallel single-precision floating point logical operations
813;;
814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
815
816(define_expand "andv4sf3"
817  [(set (match_operand:V4SF 0 "register_operand" "")
818	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
819		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
820  "TARGET_SSE"
821  "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
822
823(define_insn "*andv4sf3"
824  [(set (match_operand:V4SF 0 "register_operand" "=x")
825	(and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
826		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
827  "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
828  "andps\t{%2, %0|%0, %2}"
829  [(set_attr "type" "sselog")
830   (set_attr "mode" "V4SF")])
831
832(define_insn "sse_nandv4sf3"
833  [(set (match_operand:V4SF 0 "register_operand" "=x")
834	(and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
835		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
836  "TARGET_SSE"
837  "andnps\t{%2, %0|%0, %2}"
838  [(set_attr "type" "sselog")
839   (set_attr "mode" "V4SF")])
840
841(define_expand "iorv4sf3"
842  [(set (match_operand:V4SF 0 "register_operand" "")
843	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
844		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
845  "TARGET_SSE"
846  "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
847
848(define_insn "*iorv4sf3"
849  [(set (match_operand:V4SF 0 "register_operand" "=x")
850	(ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
851		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
852  "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
853  "orps\t{%2, %0|%0, %2}"
854  [(set_attr "type" "sselog")
855   (set_attr "mode" "V4SF")])
856
857(define_expand "xorv4sf3"
858  [(set (match_operand:V4SF 0 "register_operand" "")
859	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
860		  (match_operand:V4SF 2 "nonimmediate_operand" "")))]
861  "TARGET_SSE"
862  "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
863
864(define_insn "*xorv4sf3"
865  [(set (match_operand:V4SF 0 "register_operand" "=x")
866	(xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
867		  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
868  "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
869  "xorps\t{%2, %0|%0, %2}"
870  [(set_attr "type" "sselog")
871   (set_attr "mode" "V4SF")])
872
873;; Also define scalar versions.  These are used for abs, neg, and
874;; conditional move.  Using subregs into vector modes causes register
875;; allocation lossage.  These patterns do not allow memory operands
876;; because the native instructions read the full 128-bits.
877
878(define_insn "*andsf3"
879  [(set (match_operand:SF 0 "register_operand" "=x")
880	(and:SF (match_operand:SF 1 "register_operand" "0")
881		(match_operand:SF 2 "register_operand" "x")))]
882  "TARGET_SSE"
883  "andps\t{%2, %0|%0, %2}"
884  [(set_attr "type" "sselog")
885   (set_attr "mode" "V4SF")])
886
887(define_insn "*nandsf3"
888  [(set (match_operand:SF 0 "register_operand" "=x")
889	(and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
890		(match_operand:SF 2 "register_operand" "x")))]
891  "TARGET_SSE"
892  "andnps\t{%2, %0|%0, %2}"
893  [(set_attr "type" "sselog")
894   (set_attr "mode" "V4SF")])
895
896(define_insn "*iorsf3"
897  [(set (match_operand:SF 0 "register_operand" "=x")
898	(ior:SF (match_operand:SF 1 "register_operand" "0")
899		(match_operand:SF 2 "register_operand" "x")))]
900  "TARGET_SSE"
901  "orps\t{%2, %0|%0, %2}"
902  [(set_attr "type" "sselog")
903   (set_attr "mode" "V4SF")])
904
905(define_insn "*xorsf3"
906  [(set (match_operand:SF 0 "register_operand" "=x")
907	(xor:SF (match_operand:SF 1 "register_operand" "0")
908		(match_operand:SF 2 "register_operand" "x")))]
909  "TARGET_SSE"
910  "xorps\t{%2, %0|%0, %2}"
911  [(set_attr "type" "sselog")
912   (set_attr "mode" "V4SF")])
913
914;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
915;;
916;; Parallel single-precision floating point conversion operations
917;;
918;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
919
920(define_insn "sse_cvtpi2ps"
921  [(set (match_operand:V4SF 0 "register_operand" "=x")
922	(vec_merge:V4SF
923	  (vec_duplicate:V4SF
924	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
925	  (match_operand:V4SF 1 "register_operand" "0")
926	  (const_int 3)))]
927  "TARGET_SSE"
928  "cvtpi2ps\t{%2, %0|%0, %2}"
929  [(set_attr "type" "ssecvt")
930   (set_attr "mode" "V4SF")])
931
932(define_insn "sse_cvtps2pi"
933  [(set (match_operand:V2SI 0 "register_operand" "=y")
934	(vec_select:V2SI
935	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
936		       UNSPEC_FIX_NOTRUNC)
937	  (parallel [(const_int 0) (const_int 1)])))]
938  "TARGET_SSE"
939  "cvtps2pi\t{%1, %0|%0, %1}"
940  [(set_attr "type" "ssecvt")
941   (set_attr "unit" "mmx")
942   (set_attr "mode" "DI")])
943
944(define_insn "sse_cvttps2pi"
945  [(set (match_operand:V2SI 0 "register_operand" "=y")
946	(vec_select:V2SI
947	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
948	  (parallel [(const_int 0) (const_int 1)])))]
949  "TARGET_SSE"
950  "cvttps2pi\t{%1, %0|%0, %1}"
951  [(set_attr "type" "ssecvt")
952   (set_attr "unit" "mmx")
953   (set_attr "mode" "SF")])
954
955(define_insn "sse_cvtsi2ss"
956  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
957	(vec_merge:V4SF
958	  (vec_duplicate:V4SF
959	    (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
960	  (match_operand:V4SF 1 "register_operand" "0,0")
961	  (const_int 1)))]
962  "TARGET_SSE"
963  "cvtsi2ss\t{%2, %0|%0, %2}"
964  [(set_attr "type" "sseicvt")
965   (set_attr "athlon_decode" "vector,double")
966   (set_attr "mode" "SF")])
967
968(define_insn "sse_cvtsi2ssq"
969  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
970	(vec_merge:V4SF
971	  (vec_duplicate:V4SF
972	    (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
973	  (match_operand:V4SF 1 "register_operand" "0,0")
974	  (const_int 1)))]
975  "TARGET_SSE && TARGET_64BIT"
976  "cvtsi2ssq\t{%2, %0|%0, %2}"
977  [(set_attr "type" "sseicvt")
978   (set_attr "athlon_decode" "vector,double")
979   (set_attr "mode" "SF")])
980
981(define_insn "sse_cvtss2si"
982  [(set (match_operand:SI 0 "register_operand" "=r,r")
983	(unspec:SI
984	  [(vec_select:SF
985	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
986	     (parallel [(const_int 0)]))]
987	  UNSPEC_FIX_NOTRUNC))]
988  "TARGET_SSE"
989  "cvtss2si\t{%1, %0|%0, %1}"
990  [(set_attr "type" "sseicvt")
991   (set_attr "athlon_decode" "double,vector")
992   (set_attr "mode" "SI")])
993
994(define_insn "sse_cvtss2siq"
995  [(set (match_operand:DI 0 "register_operand" "=r,r")
996	(unspec:DI
997	  [(vec_select:SF
998	     (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
999	     (parallel [(const_int 0)]))]
1000	  UNSPEC_FIX_NOTRUNC))]
1001  "TARGET_SSE && TARGET_64BIT"
1002  "cvtss2siq\t{%1, %0|%0, %1}"
1003  [(set_attr "type" "sseicvt")
1004   (set_attr "athlon_decode" "double,vector")
1005   (set_attr "mode" "DI")])
1006
1007(define_insn "sse_cvttss2si"
1008  [(set (match_operand:SI 0 "register_operand" "=r,r")
1009	(fix:SI
1010	  (vec_select:SF
1011	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1012	    (parallel [(const_int 0)]))))]
1013  "TARGET_SSE"
1014  "cvttss2si\t{%1, %0|%0, %1}"
1015  [(set_attr "type" "sseicvt")
1016   (set_attr "athlon_decode" "double,vector")
1017   (set_attr "mode" "SI")])
1018
1019(define_insn "sse_cvttss2siq"
1020  [(set (match_operand:DI 0 "register_operand" "=r,r")
1021	(fix:DI
1022	  (vec_select:SF
1023	    (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
1024	    (parallel [(const_int 0)]))))]
1025  "TARGET_SSE && TARGET_64BIT"
1026  "cvttss2siq\t{%1, %0|%0, %1}"
1027  [(set_attr "type" "sseicvt")
1028   (set_attr "athlon_decode" "double,vector")
1029   (set_attr "mode" "DI")])
1030
1031(define_insn "sse2_cvtdq2ps"
1032  [(set (match_operand:V4SF 0 "register_operand" "=x")
1033	(float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
1034  "TARGET_SSE2"
1035  "cvtdq2ps\t{%1, %0|%0, %1}"
1036  [(set_attr "type" "ssecvt")
1037   (set_attr "mode" "V2DF")])
1038
1039(define_insn "sse2_cvtps2dq"
1040  [(set (match_operand:V4SI 0 "register_operand" "=x")
1041	(unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
1042		     UNSPEC_FIX_NOTRUNC))]
1043  "TARGET_SSE2"
1044  "cvtps2dq\t{%1, %0|%0, %1}"
1045  [(set_attr "type" "ssecvt")
1046   (set_attr "mode" "TI")])
1047
1048(define_insn "sse2_cvttps2dq"
1049  [(set (match_operand:V4SI 0 "register_operand" "=x")
1050	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1051  "TARGET_SSE2"
1052  "cvttps2dq\t{%1, %0|%0, %1}"
1053  [(set_attr "type" "ssecvt")
1054   (set_attr "mode" "TI")])
1055
1056;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1057;;
1058;; Parallel single-precision floating point element swizzling
1059;;
1060;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1061
1062(define_insn "sse_movhlps"
1063  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,m")
1064	(vec_select:V4SF
1065	  (vec_concat:V8SF
1066	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
1067	    (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
1068	  (parallel [(const_int 6)
1069		     (const_int 7)
1070		     (const_int 2)
1071		     (const_int 3)])))]
1072  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1073  "@
1074   movhlps\t{%2, %0|%0, %2}
1075   movlps\t{%H1, %0|%0, %H1}
1076   movhps\t{%1, %0|%0, %1}"
1077  [(set_attr "type" "ssemov")
1078   (set_attr "mode" "V4SF,V2SF,V2SF")])
1079
1080(define_insn "sse_movlhps"
1081  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,o")
1082	(vec_select:V4SF
1083	  (vec_concat:V8SF
1084	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1085	    (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1086	  (parallel [(const_int 0)
1087		     (const_int 1)
1088		     (const_int 4)
1089		     (const_int 5)])))]
1090  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1091  "@
1092   movlhps\t{%2, %0|%0, %2}
1093   movhps\t{%2, %0|%0, %2}
1094   movlps\t{%2, %H0|%H0, %2}"
1095  [(set_attr "type" "ssemov")
1096   (set_attr "mode" "V4SF,V2SF,V2SF")])
1097
1098(define_insn "sse_unpckhps"
1099  [(set (match_operand:V4SF 0 "register_operand" "=x")
1100	(vec_select:V4SF
1101	  (vec_concat:V8SF
1102	    (match_operand:V4SF 1 "register_operand" "0")
1103	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1104	  (parallel [(const_int 2) (const_int 6)
1105		     (const_int 3) (const_int 7)])))]
1106  "TARGET_SSE"
1107  "unpckhps\t{%2, %0|%0, %2}"
1108  [(set_attr "type" "sselog")
1109   (set_attr "mode" "V4SF")])
1110
1111(define_insn "sse_unpcklps"
1112  [(set (match_operand:V4SF 0 "register_operand" "=x")
1113	(vec_select:V4SF
1114	  (vec_concat:V8SF
1115	    (match_operand:V4SF 1 "register_operand" "0")
1116	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1117	  (parallel [(const_int 0) (const_int 4)
1118		     (const_int 1) (const_int 5)])))]
1119  "TARGET_SSE"
1120  "unpcklps\t{%2, %0|%0, %2}"
1121  [(set_attr "type" "sselog")
1122   (set_attr "mode" "V4SF")])
1123
1124;; These are modeled with the same vec_concat as the others so that we
1125;; capture users of shufps that can use the new instructions
1126(define_insn "sse3_movshdup"
1127  [(set (match_operand:V4SF 0 "register_operand" "=x")
1128	(vec_select:V4SF
1129	  (vec_concat:V8SF
1130	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1131	    (match_dup 1))
1132	  (parallel [(const_int 1)
1133		     (const_int 1)
1134		     (const_int 7)
1135		     (const_int 7)])))]
1136  "TARGET_SSE3"
1137  "movshdup\t{%1, %0|%0, %1}"
1138  [(set_attr "type" "sse")
1139   (set_attr "mode" "V4SF")])
1140
1141(define_insn "sse3_movsldup"
1142  [(set (match_operand:V4SF 0 "register_operand" "=x")
1143	(vec_select:V4SF
1144	  (vec_concat:V8SF
1145	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1146	    (match_dup 1))
1147	  (parallel [(const_int 0)
1148		     (const_int 0)
1149		     (const_int 6)
1150		     (const_int 6)])))]
1151  "TARGET_SSE3"
1152  "movsldup\t{%1, %0|%0, %1}"
1153  [(set_attr "type" "sse")
1154   (set_attr "mode" "V4SF")])
1155
1156(define_expand "sse_shufps"
1157  [(match_operand:V4SF 0 "register_operand" "")
1158   (match_operand:V4SF 1 "register_operand" "")
1159   (match_operand:V4SF 2 "nonimmediate_operand" "")
1160   (match_operand:SI 3 "const_int_operand" "")]
1161  "TARGET_SSE"
1162{
1163  int mask = INTVAL (operands[3]);
1164  emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1165			       GEN_INT ((mask >> 0) & 3),
1166			       GEN_INT ((mask >> 2) & 3),
1167			       GEN_INT (((mask >> 4) & 3) + 4),
1168			       GEN_INT (((mask >> 6) & 3) + 4)));
1169  DONE;
1170})
1171
1172(define_insn "sse_shufps_1"
1173  [(set (match_operand:V4SF 0 "register_operand" "=x")
1174	(vec_select:V4SF
1175	  (vec_concat:V8SF
1176	    (match_operand:V4SF 1 "register_operand" "0")
1177	    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1178	  (parallel [(match_operand 3 "const_0_to_3_operand" "")
1179		     (match_operand 4 "const_0_to_3_operand" "")
1180		     (match_operand 5 "const_4_to_7_operand" "")
1181		     (match_operand 6 "const_4_to_7_operand" "")])))]
1182  "TARGET_SSE"
1183{
1184  int mask = 0;
1185  mask |= INTVAL (operands[3]) << 0;
1186  mask |= INTVAL (operands[4]) << 2;
1187  mask |= (INTVAL (operands[5]) - 4) << 4;
1188  mask |= (INTVAL (operands[6]) - 4) << 6;
1189  operands[3] = GEN_INT (mask);
1190
1191  return "shufps\t{%3, %2, %0|%0, %2, %3}";
1192}
1193  [(set_attr "type" "sselog")
1194   (set_attr "mode" "V4SF")])
1195
1196(define_insn "sse_storehps"
1197  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1198	(vec_select:V2SF
1199	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1200	  (parallel [(const_int 2) (const_int 3)])))]
1201  "TARGET_SSE"
1202  "@
1203   movhps\t{%1, %0|%0, %1}
1204   movhlps\t{%1, %0|%0, %1}
1205   movlps\t{%H1, %0|%0, %H1}"
1206  [(set_attr "type" "ssemov")
1207   (set_attr "mode" "V2SF,V4SF,V2SF")])
1208
1209(define_insn "sse_loadhps"
1210  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1211	(vec_concat:V4SF
1212	  (vec_select:V2SF
1213	    (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1214	    (parallel [(const_int 0) (const_int 1)]))
1215	  (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1216  "TARGET_SSE"
1217  "@
1218   movhps\t{%2, %0|%0, %2}
1219   movlhps\t{%2, %0|%0, %2}
1220   movlps\t{%2, %H0|%H0, %2}"
1221  [(set_attr "type" "ssemov")
1222   (set_attr "mode" "V2SF,V4SF,V2SF")])
1223
1224(define_insn "sse_storelps"
1225  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1226	(vec_select:V2SF
1227	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1228	  (parallel [(const_int 0) (const_int 1)])))]
1229  "TARGET_SSE"
1230  "@
1231   movlps\t{%1, %0|%0, %1}
1232   movaps\t{%1, %0|%0, %1}
1233   movlps\t{%1, %0|%0, %1}"
1234  [(set_attr "type" "ssemov")
1235   (set_attr "mode" "V2SF,V4SF,V2SF")])
1236
1237(define_insn "sse_loadlps"
1238  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1239	(vec_concat:V4SF
1240	  (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1241	  (vec_select:V2SF
1242	    (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1243	    (parallel [(const_int 2) (const_int 3)]))))]
1244  "TARGET_SSE"
1245  "@
1246   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1247   movlps\t{%2, %0|%0, %2}
1248   movlps\t{%2, %0|%0, %2}"
1249  [(set_attr "type" "sselog,ssemov,ssemov")
1250   (set_attr "mode" "V4SF,V2SF,V2SF")])
1251
1252(define_insn "sse_movss"
1253  [(set (match_operand:V4SF 0 "register_operand" "=x")
1254	(vec_merge:V4SF
1255	  (match_operand:V4SF 2 "register_operand" "x")
1256	  (match_operand:V4SF 1 "register_operand" "0")
1257	  (const_int 1)))]
1258  "TARGET_SSE"
1259  "movss\t{%2, %0|%0, %2}"
1260  [(set_attr "type" "ssemov")
1261   (set_attr "mode" "SF")])
1262
1263(define_insn "*vec_dupv4sf"
1264  [(set (match_operand:V4SF 0 "register_operand" "=x")
1265	(vec_duplicate:V4SF
1266	  (match_operand:SF 1 "register_operand" "0")))]
1267  "TARGET_SSE"
1268  "shufps\t{$0, %0, %0|%0, %0, 0}"
1269  [(set_attr "type" "sselog1")
1270   (set_attr "mode" "V4SF")])
1271
1272;; ??? In theory we can match memory for the MMX alternative, but allowing
1273;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1274;; alternatives pretty much forces the MMX alternative to be chosen.
1275(define_insn "*sse_concatv2sf"
1276  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
1277	(vec_concat:V2SF
1278	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1279	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
1280  "TARGET_SSE"
1281  "@
1282   unpcklps\t{%2, %0|%0, %2}
1283   movss\t{%1, %0|%0, %1}
1284   punpckldq\t{%2, %0|%0, %2}
1285   movd\t{%1, %0|%0, %1}"
1286  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1287   (set_attr "mode" "V4SF,SF,DI,DI")])
1288
1289(define_insn "*sse_concatv4sf"
1290  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
1291	(vec_concat:V4SF
1292	  (match_operand:V2SF 1 "register_operand" " 0,0")
1293	  (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1294  "TARGET_SSE"
1295  "@
1296   movlhps\t{%2, %0|%0, %2}
1297   movhps\t{%2, %0|%0, %2}"
1298  [(set_attr "type" "ssemov")
1299   (set_attr "mode" "V4SF,V2SF")])
1300
1301(define_expand "vec_initv4sf"
1302  [(match_operand:V4SF 0 "register_operand" "")
1303   (match_operand 1 "" "")]
1304  "TARGET_SSE"
1305{
1306  ix86_expand_vector_init (false, operands[0], operands[1]);
1307  DONE;
1308})
1309
1310(define_insn "*vec_setv4sf_0"
1311  [(set (match_operand:V4SF 0 "nonimmediate_operand"  "=x,x,Y ,m")
1312	(vec_merge:V4SF
1313	  (vec_duplicate:V4SF
1314	    (match_operand:SF 2 "general_operand"     " x,m,*r,x*rfF"))
1315	  (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1316	  (const_int 1)))]
1317  "TARGET_SSE"
1318  "@
1319   movss\t{%2, %0|%0, %2}
1320   movss\t{%2, %0|%0, %2}
1321   movd\t{%2, %0|%0, %2}
1322   #"
1323  [(set_attr "type" "ssemov")
1324   (set_attr "mode" "SF")])
1325
1326(define_split
1327  [(set (match_operand:V4SF 0 "memory_operand" "")
1328	(vec_merge:V4SF
1329	  (vec_duplicate:V4SF
1330	    (match_operand:SF 1 "nonmemory_operand" ""))
1331	  (match_dup 0)
1332	  (const_int 1)))]
1333  "TARGET_SSE && reload_completed"
1334  [(const_int 0)]
1335{
1336  emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1337  DONE;
1338})
1339
1340(define_expand "vec_setv4sf"
1341  [(match_operand:V4SF 0 "register_operand" "")
1342   (match_operand:SF 1 "register_operand" "")
1343   (match_operand 2 "const_int_operand" "")]
1344  "TARGET_SSE"
1345{
1346  ix86_expand_vector_set (false, operands[0], operands[1],
1347			  INTVAL (operands[2]));
1348  DONE;
1349})
1350
1351(define_insn_and_split "*vec_extractv4sf_0"
1352  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1353	(vec_select:SF
1354	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1355	  (parallel [(const_int 0)])))]
1356  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1357  "#"
1358  "&& reload_completed"
1359  [(const_int 0)]
1360{
1361  rtx op1 = operands[1];
1362  if (REG_P (op1))
1363    op1 = gen_rtx_REG (SFmode, REGNO (op1));
1364  else
1365    op1 = gen_lowpart (SFmode, op1);
1366  emit_move_insn (operands[0], op1);
1367  DONE;
1368})
1369
1370(define_expand "vec_extractv4sf"
1371  [(match_operand:SF 0 "register_operand" "")
1372   (match_operand:V4SF 1 "register_operand" "")
1373   (match_operand 2 "const_int_operand" "")]
1374  "TARGET_SSE"
1375{
1376  ix86_expand_vector_extract (false, operands[0], operands[1],
1377			      INTVAL (operands[2]));
1378  DONE;
1379})
1380
1381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1382;;
1383;; Parallel double-precision floating point arithmetic
1384;;
1385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1386
1387(define_expand "negv2df2"
1388  [(set (match_operand:V2DF 0 "register_operand" "")
1389	(neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1390  "TARGET_SSE2"
1391  "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1392
1393(define_expand "absv2df2"
1394  [(set (match_operand:V2DF 0 "register_operand" "")
1395	(abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1396  "TARGET_SSE2"
1397  "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1398
1399(define_expand "addv2df3"
1400  [(set (match_operand:V2DF 0 "register_operand" "")
1401	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1402		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1403  "TARGET_SSE2"
1404  "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1405
1406(define_insn "*addv2df3"
1407  [(set (match_operand:V2DF 0 "register_operand" "=x")
1408	(plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1409		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1410  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1411  "addpd\t{%2, %0|%0, %2}"
1412  [(set_attr "type" "sseadd")
1413   (set_attr "mode" "V2DF")])
1414
1415(define_insn "sse2_vmaddv2df3"
1416  [(set (match_operand:V2DF 0 "register_operand" "=x")
1417	(vec_merge:V2DF
1418	  (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1419		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1420	  (match_dup 1)
1421	  (const_int 1)))]
1422  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1423  "addsd\t{%2, %0|%0, %2}"
1424  [(set_attr "type" "sseadd")
1425   (set_attr "mode" "DF")])
1426
1427(define_expand "subv2df3"
1428  [(set (match_operand:V2DF 0 "register_operand" "")
1429	(minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1430		    (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1431  "TARGET_SSE2"
1432  "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1433
1434(define_insn "*subv2df3"
1435  [(set (match_operand:V2DF 0 "register_operand" "=x")
1436	(minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1437		    (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1438  "TARGET_SSE2"
1439  "subpd\t{%2, %0|%0, %2}"
1440  [(set_attr "type" "sseadd")
1441   (set_attr "mode" "V2DF")])
1442
1443(define_insn "sse2_vmsubv2df3"
1444  [(set (match_operand:V2DF 0 "register_operand" "=x")
1445	(vec_merge:V2DF
1446	  (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1447		      (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1448	  (match_dup 1)
1449	  (const_int 1)))]
1450  "TARGET_SSE2"
1451  "subsd\t{%2, %0|%0, %2}"
1452  [(set_attr "type" "sseadd")
1453   (set_attr "mode" "DF")])
1454
1455(define_expand "mulv2df3"
1456  [(set (match_operand:V2DF 0 "register_operand" "")
1457	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1458		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1459  "TARGET_SSE2"
1460  "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1461
1462(define_insn "*mulv2df3"
1463  [(set (match_operand:V2DF 0 "register_operand" "=x")
1464	(mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1465		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1466  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1467  "mulpd\t{%2, %0|%0, %2}"
1468  [(set_attr "type" "ssemul")
1469   (set_attr "mode" "V2DF")])
1470
1471(define_insn "sse2_vmmulv2df3"
1472  [(set (match_operand:V2DF 0 "register_operand" "=x")
1473	(vec_merge:V2DF
1474	  (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1475		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1476	  (match_dup 1)
1477	  (const_int 1)))]
1478  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1479  "mulsd\t{%2, %0|%0, %2}"
1480  [(set_attr "type" "ssemul")
1481   (set_attr "mode" "DF")])
1482
1483(define_expand "divv2df3"
1484  [(set (match_operand:V2DF 0 "register_operand" "")
1485	(div:V2DF (match_operand:V2DF 1 "register_operand" "")
1486		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1487  "TARGET_SSE2"
1488  "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1489
1490(define_insn "*divv2df3"
1491  [(set (match_operand:V2DF 0 "register_operand" "=x")
1492	(div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1493		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1494  "TARGET_SSE2"
1495  "divpd\t{%2, %0|%0, %2}"
1496  [(set_attr "type" "ssediv")
1497   (set_attr "mode" "V2DF")])
1498
1499(define_insn "sse2_vmdivv2df3"
1500  [(set (match_operand:V2DF 0 "register_operand" "=x")
1501	(vec_merge:V2DF
1502	  (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1503		    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1504	  (match_dup 1)
1505	  (const_int 1)))]
1506  "TARGET_SSE2"
1507  "divsd\t{%2, %0|%0, %2}"
1508  [(set_attr "type" "ssediv")
1509   (set_attr "mode" "DF")])
1510
1511(define_insn "sqrtv2df2"
1512  [(set (match_operand:V2DF 0 "register_operand" "=x")
1513	(sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1514  "TARGET_SSE2"
1515  "sqrtpd\t{%1, %0|%0, %1}"
1516  [(set_attr "type" "sse")
1517   (set_attr "mode" "V2DF")])
1518
1519(define_insn "sse2_vmsqrtv2df2"
1520  [(set (match_operand:V2DF 0 "register_operand" "=x")
1521	(vec_merge:V2DF
1522	  (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1523	  (match_operand:V2DF 2 "register_operand" "0")
1524	  (const_int 1)))]
1525  "TARGET_SSE2"
1526  "sqrtsd\t{%1, %0|%0, %1}"
1527  [(set_attr "type" "sse")
1528   (set_attr "mode" "SF")])
1529
1530;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1531;; isn't really correct, as those rtl operators aren't defined when 
1532;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1533
1534(define_expand "smaxv2df3"
1535  [(set (match_operand:V2DF 0 "register_operand" "")
1536	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1537		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1538  "TARGET_SSE2"
1539{
1540  if (!flag_finite_math_only)
1541    operands[1] = force_reg (V2DFmode, operands[1]);
1542  ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1543})
1544
1545(define_insn "*smaxv2df3_finite"
1546  [(set (match_operand:V2DF 0 "register_operand" "=x")
1547	(smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1548		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1549  "TARGET_SSE2 && flag_finite_math_only
1550   && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1551  "maxpd\t{%2, %0|%0, %2}"
1552  [(set_attr "type" "sseadd")
1553   (set_attr "mode" "V2DF")])
1554
1555(define_insn "*smaxv2df3"
1556  [(set (match_operand:V2DF 0 "register_operand" "=x")
1557	(smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1558		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1559  "TARGET_SSE2"
1560  "maxpd\t{%2, %0|%0, %2}"
1561  [(set_attr "type" "sseadd")
1562   (set_attr "mode" "V2DF")])
1563
1564(define_insn "*sse2_vmsmaxv2df3_finite"
1565  [(set (match_operand:V2DF 0 "register_operand" "=x")
1566	(vec_merge:V2DF
1567	  (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1568		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1569	  (match_dup 1)
1570	  (const_int 1)))]
1571  "TARGET_SSE2 && flag_finite_math_only
1572   && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1573  "maxsd\t{%2, %0|%0, %2}"
1574  [(set_attr "type" "sseadd")
1575   (set_attr "mode" "DF")])
1576
1577(define_insn "sse2_vmsmaxv2df3"
1578  [(set (match_operand:V2DF 0 "register_operand" "=x")
1579	(vec_merge:V2DF
1580	  (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1581		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1582	  (match_dup 1)
1583	  (const_int 1)))]
1584  "TARGET_SSE2"
1585  "maxsd\t{%2, %0|%0, %2}"
1586  [(set_attr "type" "sseadd")
1587   (set_attr "mode" "DF")])
1588
1589(define_expand "sminv2df3"
1590  [(set (match_operand:V2DF 0 "register_operand" "")
1591	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1592		   (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1593  "TARGET_SSE2"
1594{
1595  if (!flag_finite_math_only)
1596    operands[1] = force_reg (V2DFmode, operands[1]);
1597  ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1598})
1599
1600(define_insn "*sminv2df3_finite"
1601  [(set (match_operand:V2DF 0 "register_operand" "=x")
1602	(smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1603		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1604  "TARGET_SSE2 && flag_finite_math_only
1605   && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1606  "minpd\t{%2, %0|%0, %2}"
1607  [(set_attr "type" "sseadd")
1608   (set_attr "mode" "V2DF")])
1609
1610(define_insn "*sminv2df3"
1611  [(set (match_operand:V2DF 0 "register_operand" "=x")
1612	(smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1613		   (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1614  "TARGET_SSE2"
1615  "minpd\t{%2, %0|%0, %2}"
1616  [(set_attr "type" "sseadd")
1617   (set_attr "mode" "V2DF")])
1618
1619(define_insn "*sse2_vmsminv2df3_finite"
1620  [(set (match_operand:V2DF 0 "register_operand" "=x")
1621	(vec_merge:V2DF
1622	  (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1623		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1624	  (match_dup 1)
1625	  (const_int 1)))]
1626  "TARGET_SSE2 && flag_finite_math_only
1627   && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1628  "minsd\t{%2, %0|%0, %2}"
1629  [(set_attr "type" "sseadd")
1630   (set_attr "mode" "DF")])
1631
1632(define_insn "sse2_vmsminv2df3"
1633  [(set (match_operand:V2DF 0 "register_operand" "=x")
1634	(vec_merge:V2DF
1635	  (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1636		     (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1637	  (match_dup 1)
1638	  (const_int 1)))]
1639  "TARGET_SSE2"
1640  "minsd\t{%2, %0|%0, %2}"
1641  [(set_attr "type" "sseadd")
1642   (set_attr "mode" "DF")])
1643
1644(define_insn "sse3_addsubv2df3"
1645  [(set (match_operand:V2DF 0 "register_operand" "=x")
1646	(vec_merge:V2DF
1647	  (plus:V2DF
1648	    (match_operand:V2DF 1 "register_operand" "0")
1649	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1650	  (minus:V2DF (match_dup 1) (match_dup 2))
1651	  (const_int 1)))]
1652  "TARGET_SSE3"
1653  "addsubpd\t{%2, %0|%0, %2}"
1654  [(set_attr "type" "sseadd")
1655   (set_attr "mode" "V2DF")])
1656
1657(define_insn "sse3_haddv2df3"
1658  [(set (match_operand:V2DF 0 "register_operand" "=x")
1659	(vec_concat:V2DF
1660	  (plus:DF
1661	    (vec_select:DF
1662	      (match_operand:V2DF 1 "register_operand" "0")
1663	      (parallel [(const_int 0)]))
1664	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1665	  (plus:DF
1666	    (vec_select:DF
1667	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1668	      (parallel [(const_int 0)]))
1669	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1670  "TARGET_SSE3"
1671  "haddpd\t{%2, %0|%0, %2}"
1672  [(set_attr "type" "sseadd")
1673   (set_attr "mode" "V2DF")])
1674
1675(define_insn "sse3_hsubv2df3"
1676  [(set (match_operand:V2DF 0 "register_operand" "=x")
1677	(vec_concat:V2DF
1678	  (minus:DF
1679	    (vec_select:DF
1680	      (match_operand:V2DF 1 "register_operand" "0")
1681	      (parallel [(const_int 0)]))
1682	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1683	  (minus:DF
1684	    (vec_select:DF
1685	      (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1686	      (parallel [(const_int 0)]))
1687	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1688  "TARGET_SSE3"
1689  "hsubpd\t{%2, %0|%0, %2}"
1690  [(set_attr "type" "sseadd")
1691   (set_attr "mode" "V2DF")])
1692
1693(define_expand "reduc_splus_v2df"
1694  [(match_operand:V2DF 0 "register_operand" "")
1695   (match_operand:V2DF 1 "register_operand" "")]
1696  "TARGET_SSE3"
1697{
1698  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1699  DONE;
1700})
1701
1702;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1703;;
1704;; Parallel double-precision floating point comparisons
1705;;
1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707
1708(define_insn "sse2_maskcmpv2df3"
1709  [(set (match_operand:V2DF 0 "register_operand" "=x")
1710	(match_operator:V2DF 3 "sse_comparison_operator"
1711		[(match_operand:V2DF 1 "register_operand" "0")
1712		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1713  "TARGET_SSE2"
1714  "cmp%D3pd\t{%2, %0|%0, %2}"
1715  [(set_attr "type" "ssecmp")
1716   (set_attr "mode" "V2DF")])
1717
1718(define_insn "sse2_vmmaskcmpv2df3"
1719  [(set (match_operand:V2DF 0 "register_operand" "=x")
1720	(vec_merge:V2DF
1721	  (match_operator:V2DF 3 "sse_comparison_operator"
1722		[(match_operand:V2DF 1 "register_operand" "0")
1723		 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1724	  (match_dup 1)
1725	  (const_int 1)))]
1726  "TARGET_SSE2"
1727  "cmp%D3sd\t{%2, %0|%0, %2}"
1728  [(set_attr "type" "ssecmp")
1729   (set_attr "mode" "DF")])
1730
1731(define_insn "sse2_comi"
1732  [(set (reg:CCFP FLAGS_REG)
1733	(compare:CCFP
1734	  (vec_select:DF
1735	    (match_operand:V2DF 0 "register_operand" "x")
1736	    (parallel [(const_int 0)]))
1737	  (vec_select:DF
1738	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1739	    (parallel [(const_int 0)]))))]
1740  "TARGET_SSE2"
1741  "comisd\t{%1, %0|%0, %1}"
1742  [(set_attr "type" "ssecomi")
1743   (set_attr "mode" "DF")])
1744
1745(define_insn "sse2_ucomi"
1746  [(set (reg:CCFPU FLAGS_REG)
1747	(compare:CCFPU
1748	  (vec_select:DF
1749	    (match_operand:V2DF 0 "register_operand" "x")
1750	    (parallel [(const_int 0)]))
1751	  (vec_select:DF
1752	    (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1753	    (parallel [(const_int 0)]))))]
1754  "TARGET_SSE2"
1755  "ucomisd\t{%1, %0|%0, %1}"
1756  [(set_attr "type" "ssecomi")
1757   (set_attr "mode" "DF")])
1758
1759(define_expand "vcondv2df"
1760  [(set (match_operand:V2DF 0 "register_operand" "")
1761        (if_then_else:V2DF
1762          (match_operator 3 ""
1763            [(match_operand:V2DF 4 "nonimmediate_operand" "")
1764             (match_operand:V2DF 5 "nonimmediate_operand" "")])
1765          (match_operand:V2DF 1 "general_operand" "")
1766          (match_operand:V2DF 2 "general_operand" "")))]
1767  "TARGET_SSE2"
1768{
1769  if (ix86_expand_fp_vcond (operands))
1770    DONE;
1771  else
1772    FAIL;
1773})
1774
1775;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1776;;
1777;; Parallel double-precision floating point logical operations
1778;;
1779;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1780
1781(define_expand "andv2df3"
1782  [(set (match_operand:V2DF 0 "register_operand" "")
1783	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1784		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1785  "TARGET_SSE2"
1786  "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1787
1788(define_insn "*andv2df3"
1789  [(set (match_operand:V2DF 0 "register_operand" "=x")
1790	(and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1791		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1792  "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1793  "andpd\t{%2, %0|%0, %2}"
1794  [(set_attr "type" "sselog")
1795   (set_attr "mode" "V2DF")])
1796
1797(define_insn "sse2_nandv2df3"
1798  [(set (match_operand:V2DF 0 "register_operand" "=x")
1799	(and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1800		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1801  "TARGET_SSE2"
1802  "andnpd\t{%2, %0|%0, %2}"
1803  [(set_attr "type" "sselog")
1804   (set_attr "mode" "V2DF")])
1805
1806(define_expand "iorv2df3"
1807  [(set (match_operand:V2DF 0 "register_operand" "")
1808	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1809		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1810  "TARGET_SSE2"
1811  "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1812
1813(define_insn "*iorv2df3"
1814  [(set (match_operand:V2DF 0 "register_operand" "=x")
1815	(ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1816		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1817  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1818  "orpd\t{%2, %0|%0, %2}"
1819  [(set_attr "type" "sselog")
1820   (set_attr "mode" "V2DF")])
1821
1822(define_expand "xorv2df3"
1823  [(set (match_operand:V2DF 0 "register_operand" "")
1824	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1825		  (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1826  "TARGET_SSE2"
1827  "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1828
1829(define_insn "*xorv2df3"
1830  [(set (match_operand:V2DF 0 "register_operand" "=x")
1831	(xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1832		  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1833  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1834  "xorpd\t{%2, %0|%0, %2}"
1835  [(set_attr "type" "sselog")
1836   (set_attr "mode" "V2DF")])
1837
1838;; Also define scalar versions.  These are used for abs, neg, and
1839;; conditional move.  Using subregs into vector modes causes register
1840;; allocation lossage.  These patterns do not allow memory operands
1841;; because the native instructions read the full 128-bits.
1842
1843(define_insn "*anddf3"
1844  [(set (match_operand:DF 0 "register_operand" "=x")
1845	(and:DF (match_operand:DF 1 "register_operand" "0")
1846		(match_operand:DF 2 "register_operand" "x")))]
1847  "TARGET_SSE2"
1848  "andpd\t{%2, %0|%0, %2}"
1849  [(set_attr "type" "sselog")
1850   (set_attr "mode" "V2DF")])
1851
1852(define_insn "*nanddf3"
1853  [(set (match_operand:DF 0 "register_operand" "=x")
1854	(and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1855		(match_operand:DF 2 "register_operand" "x")))]
1856  "TARGET_SSE2"
1857  "andnpd\t{%2, %0|%0, %2}"
1858  [(set_attr "type" "sselog")
1859   (set_attr "mode" "V2DF")])
1860
1861(define_insn "*iordf3"
1862  [(set (match_operand:DF 0 "register_operand" "=x")
1863	(ior:DF (match_operand:DF 1 "register_operand" "0")
1864		(match_operand:DF 2 "register_operand" "x")))]
1865  "TARGET_SSE2"
1866  "orpd\t{%2, %0|%0, %2}"
1867  [(set_attr "type" "sselog")
1868   (set_attr "mode" "V2DF")])
1869
1870(define_insn "*xordf3"
1871  [(set (match_operand:DF 0 "register_operand" "=x")
1872	(xor:DF (match_operand:DF 1 "register_operand" "0")
1873		(match_operand:DF 2 "register_operand" "x")))]
1874  "TARGET_SSE2"
1875  "xorpd\t{%2, %0|%0, %2}"
1876  [(set_attr "type" "sselog")
1877   (set_attr "mode" "V2DF")])
1878
1879;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1880;;
1881;; Parallel double-precision floating point conversion operations
1882;;
1883;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1884
1885(define_insn "sse2_cvtpi2pd"
1886  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1887	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1888  "TARGET_SSE2"
1889  "cvtpi2pd\t{%1, %0|%0, %1}"
1890  [(set_attr "type" "ssecvt")
1891   (set_attr "unit" "mmx,*")
1892   (set_attr "mode" "V2DF")])
1893
1894(define_insn "sse2_cvtpd2pi"
1895  [(set (match_operand:V2SI 0 "register_operand" "=y")
1896	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1897		     UNSPEC_FIX_NOTRUNC))]
1898  "TARGET_SSE2"
1899  "cvtpd2pi\t{%1, %0|%0, %1}"
1900  [(set_attr "type" "ssecvt")
1901   (set_attr "unit" "mmx")
1902   (set_attr "mode" "DI")])
1903
1904(define_insn "sse2_cvttpd2pi"
1905  [(set (match_operand:V2SI 0 "register_operand" "=y")
1906	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1907  "TARGET_SSE2"
1908  "cvttpd2pi\t{%1, %0|%0, %1}"
1909  [(set_attr "type" "ssecvt")
1910   (set_attr "unit" "mmx")
1911   (set_attr "mode" "TI")])
1912
1913(define_insn "sse2_cvtsi2sd"
1914  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1915	(vec_merge:V2DF
1916	  (vec_duplicate:V2DF
1917	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1918	  (match_operand:V2DF 1 "register_operand" "0,0")
1919	  (const_int 1)))]
1920  "TARGET_SSE2"
1921  "cvtsi2sd\t{%2, %0|%0, %2}"
1922  [(set_attr "type" "sseicvt")
1923   (set_attr "mode" "DF")
1924   (set_attr "athlon_decode" "double,direct")])
1925
1926(define_insn "sse2_cvtsi2sdq"
1927  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1928	(vec_merge:V2DF
1929	  (vec_duplicate:V2DF
1930	    (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1931	  (match_operand:V2DF 1 "register_operand" "0,0")
1932	  (const_int 1)))]
1933  "TARGET_SSE2 && TARGET_64BIT"
1934  "cvtsi2sdq\t{%2, %0|%0, %2}"
1935  [(set_attr "type" "sseicvt")
1936   (set_attr "mode" "DF")
1937   (set_attr "athlon_decode" "double,direct")])
1938
1939(define_insn "sse2_cvtsd2si"
1940  [(set (match_operand:SI 0 "register_operand" "=r,r")
1941	(unspec:SI
1942	  [(vec_select:DF
1943	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1944	     (parallel [(const_int 0)]))]
1945	  UNSPEC_FIX_NOTRUNC))]
1946  "TARGET_SSE2"
1947  "cvtsd2si\t{%1, %0|%0, %1}"
1948  [(set_attr "type" "sseicvt")
1949   (set_attr "athlon_decode" "double,vector")
1950   (set_attr "mode" "SI")])
1951
1952(define_insn "sse2_cvtsd2siq"
1953  [(set (match_operand:DI 0 "register_operand" "=r,r")
1954	(unspec:DI
1955	  [(vec_select:DF
1956	     (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1957	     (parallel [(const_int 0)]))]
1958	  UNSPEC_FIX_NOTRUNC))]
1959  "TARGET_SSE2 && TARGET_64BIT"
1960  "cvtsd2siq\t{%1, %0|%0, %1}"
1961  [(set_attr "type" "sseicvt")
1962   (set_attr "athlon_decode" "double,vector")
1963   (set_attr "mode" "DI")])
1964
1965(define_insn "sse2_cvttsd2si"
1966  [(set (match_operand:SI 0 "register_operand" "=r,r")
1967	(fix:SI
1968	  (vec_select:DF
1969	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1970	    (parallel [(const_int 0)]))))]
1971  "TARGET_SSE2"
1972  "cvttsd2si\t{%1, %0|%0, %1}"
1973  [(set_attr "type" "sseicvt")
1974   (set_attr "mode" "SI")
1975   (set_attr "athlon_decode" "double,vector")])
1976
1977(define_insn "sse2_cvttsd2siq"
1978  [(set (match_operand:DI 0 "register_operand" "=r,r")
1979	(fix:DI
1980	  (vec_select:DF
1981	    (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1982	    (parallel [(const_int 0)]))))]
1983  "TARGET_SSE2 && TARGET_64BIT"
1984  "cvttsd2siq\t{%1, %0|%0, %1}"
1985  [(set_attr "type" "sseicvt")
1986   (set_attr "mode" "DI")
1987   (set_attr "athlon_decode" "double,vector")])
1988
1989(define_insn "sse2_cvtdq2pd"
1990  [(set (match_operand:V2DF 0 "register_operand" "=x")
1991	(float:V2DF
1992	  (vec_select:V2SI
1993	    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1994	    (parallel [(const_int 0) (const_int 1)]))))]
1995  "TARGET_SSE2"
1996  "cvtdq2pd\t{%1, %0|%0, %1}"
1997  [(set_attr "type" "ssecvt")
1998   (set_attr "mode" "V2DF")])
1999
2000(define_expand "sse2_cvtpd2dq"
2001  [(set (match_operand:V4SI 0 "register_operand" "")
2002	(vec_concat:V4SI
2003	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
2004		       UNSPEC_FIX_NOTRUNC)
2005	  (match_dup 2)))]
2006  "TARGET_SSE2"
2007  "operands[2] = CONST0_RTX (V2SImode);")
2008
2009(define_insn "*sse2_cvtpd2dq"
2010  [(set (match_operand:V4SI 0 "register_operand" "=x")
2011	(vec_concat:V4SI
2012	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2013		       UNSPEC_FIX_NOTRUNC)
2014	  (match_operand:V2SI 2 "const0_operand" "")))]
2015  "TARGET_SSE2"
2016  "cvtpd2dq\t{%1, %0|%0, %1}"
2017  [(set_attr "type" "ssecvt")
2018   (set_attr "mode" "TI")])
2019
2020(define_expand "sse2_cvttpd2dq"
2021  [(set (match_operand:V4SI 0 "register_operand" "")
2022	(vec_concat:V4SI
2023	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
2024	  (match_dup 2)))]
2025  "TARGET_SSE2"
2026  "operands[2] = CONST0_RTX (V2SImode);")
2027
2028(define_insn "*sse2_cvttpd2dq"
2029  [(set (match_operand:V4SI 0 "register_operand" "=x")
2030	(vec_concat:V4SI
2031	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2032	  (match_operand:V2SI 2 "const0_operand" "")))]
2033  "TARGET_SSE2"
2034  "cvttpd2dq\t{%1, %0|%0, %1}"
2035  [(set_attr "type" "ssecvt")
2036   (set_attr "mode" "TI")])
2037
2038(define_insn "sse2_cvtsd2ss"
2039  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2040	(vec_merge:V4SF
2041	  (vec_duplicate:V4SF
2042	    (float_truncate:V2SF
2043	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
2044	  (match_operand:V4SF 1 "register_operand" "0,0")
2045	  (const_int 1)))]
2046  "TARGET_SSE2"
2047  "cvtsd2ss\t{%2, %0|%0, %2}"
2048  [(set_attr "type" "ssecvt")
2049   (set_attr "athlon_decode" "vector,double")
2050   (set_attr "mode" "SF")])
2051
2052(define_insn "sse2_cvtss2sd"
2053  [(set (match_operand:V2DF 0 "register_operand" "=x")
2054	(vec_merge:V2DF
2055	  (float_extend:V2DF
2056	    (vec_select:V2SF
2057	      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
2058	      (parallel [(const_int 0) (const_int 1)])))
2059	  (match_operand:V2DF 1 "register_operand" "0")
2060	  (const_int 1)))]
2061  "TARGET_SSE2"
2062  "cvtss2sd\t{%2, %0|%0, %2}"
2063  [(set_attr "type" "ssecvt")
2064   (set_attr "mode" "DF")])
2065
2066(define_expand "sse2_cvtpd2ps"
2067  [(set (match_operand:V4SF 0 "register_operand" "")
2068	(vec_concat:V4SF
2069	  (float_truncate:V2SF
2070	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2071	  (match_dup 2)))]
2072  "TARGET_SSE2"
2073  "operands[2] = CONST0_RTX (V2SFmode);")
2074
2075(define_insn "*sse2_cvtpd2ps"
2076  [(set (match_operand:V4SF 0 "register_operand" "=x")
2077	(vec_concat:V4SF
2078	  (float_truncate:V2SF
2079	    (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2080	  (match_operand:V2SF 2 "const0_operand" "")))]
2081  "TARGET_SSE2"
2082  "cvtpd2ps\t{%1, %0|%0, %1}"
2083  [(set_attr "type" "ssecvt")
2084   (set_attr "mode" "V4SF")])
2085
2086(define_insn "sse2_cvtps2pd"
2087  [(set (match_operand:V2DF 0 "register_operand" "=x")
2088	(float_extend:V2DF
2089	  (vec_select:V2SF
2090	    (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2091	    (parallel [(const_int 0) (const_int 1)]))))]
2092  "TARGET_SSE2"
2093  "cvtps2pd\t{%1, %0|%0, %1}"
2094  [(set_attr "type" "ssecvt")
2095   (set_attr "mode" "V2DF")])
2096
2097;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2098;;
2099;; Parallel double-precision floating point element swizzling
2100;;
2101;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2102
2103(define_insn "sse2_unpckhpd"
2104  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,m")
2105	(vec_select:V2DF
2106	  (vec_concat:V4DF
2107	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2108	    (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2109	  (parallel [(const_int 1)
2110		     (const_int 3)])))]
2111  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2112  "@
2113   unpckhpd\t{%2, %0|%0, %2}
2114   movlpd\t{%H1, %0|%0, %H1}
2115   movhpd\t{%1, %0|%0, %1}"
2116  [(set_attr "type" "sselog,ssemov,ssemov")
2117   (set_attr "mode" "V2DF,V1DF,V1DF")])
2118
2119(define_insn "*sse3_movddup"
2120  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,o")
2121	(vec_select:V2DF
2122	  (vec_concat:V4DF
2123	    (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2124	    (match_dup 1))
2125	  (parallel [(const_int 0)
2126		     (const_int 2)])))]
2127  "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2128  "@
2129   movddup\t{%1, %0|%0, %1}
2130   #"
2131  [(set_attr "type" "sselog,ssemov")
2132   (set_attr "mode" "V2DF")])
2133
2134(define_split
2135  [(set (match_operand:V2DF 0 "memory_operand" "")
2136	(vec_select:V2DF
2137	  (vec_concat:V4DF
2138	    (match_operand:V2DF 1 "register_operand" "")
2139	    (match_dup 1))
2140	  (parallel [(const_int 0)
2141		     (const_int 2)])))]
2142  "TARGET_SSE3 && reload_completed"
2143  [(const_int 0)]
2144{
2145  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2146  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2147  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2148  DONE;
2149})
2150
2151(define_insn "sse2_unpcklpd"
2152  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,o")
2153	(vec_select:V2DF
2154	  (vec_concat:V4DF
2155	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2156	    (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2157	  (parallel [(const_int 0)
2158		     (const_int 2)])))]
2159  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2160  "@
2161   unpcklpd\t{%2, %0|%0, %2}
2162   movhpd\t{%2, %0|%0, %2}
2163   movlpd\t{%2, %H0|%H0, %2}"
2164  [(set_attr "type" "sselog,ssemov,ssemov")
2165   (set_attr "mode" "V2DF,V1DF,V1DF")])
2166
2167(define_expand "sse2_shufpd"
2168  [(match_operand:V2DF 0 "register_operand" "")
2169   (match_operand:V2DF 1 "register_operand" "")
2170   (match_operand:V2DF 2 "nonimmediate_operand" "")
2171   (match_operand:SI 3 "const_int_operand" "")]
2172  "TARGET_SSE2"
2173{
2174  int mask = INTVAL (operands[3]);
2175  emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2176				GEN_INT (mask & 1),
2177				GEN_INT (mask & 2 ? 3 : 2)));
2178  DONE;
2179})
2180
2181(define_insn "sse2_shufpd_1"
2182  [(set (match_operand:V2DF 0 "register_operand" "=x")
2183	(vec_select:V2DF
2184	  (vec_concat:V4DF
2185	    (match_operand:V2DF 1 "register_operand" "0")
2186	    (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2187	  (parallel [(match_operand 3 "const_0_to_1_operand" "")
2188		     (match_operand 4 "const_2_to_3_operand" "")])))]
2189  "TARGET_SSE2"
2190{
2191  int mask;
2192  mask = INTVAL (operands[3]);
2193  mask |= (INTVAL (operands[4]) - 2) << 1;
2194  operands[3] = GEN_INT (mask);
2195
2196  return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2197}
2198  [(set_attr "type" "sselog")
2199   (set_attr "mode" "V2DF")])
2200
2201(define_insn "sse2_storehpd"
2202  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2203	(vec_select:DF
2204	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2205	  (parallel [(const_int 1)])))]
2206  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2207  "@
2208   movhpd\t{%1, %0|%0, %1}
2209   unpckhpd\t%0, %0
2210   #"
2211  [(set_attr "type" "ssemov,sselog1,ssemov")
2212   (set_attr "mode" "V1DF,V2DF,DF")])
2213
2214(define_split
2215  [(set (match_operand:DF 0 "register_operand" "")
2216	(vec_select:DF
2217	  (match_operand:V2DF 1 "memory_operand" "")
2218	  (parallel [(const_int 1)])))]
2219  "TARGET_SSE2 && reload_completed"
2220  [(set (match_dup 0) (match_dup 1))]
2221{
2222  operands[1] = adjust_address (operands[1], DFmode, 8);
2223})
2224
2225(define_insn "sse2_storelpd"
2226  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x*fr")
2227	(vec_select:DF
2228	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2229	  (parallel [(const_int 0)])))]
2230  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2231  "@
2232   movlpd\t{%1, %0|%0, %1}
2233   #
2234   #"
2235  [(set_attr "type" "ssemov")
2236   (set_attr "mode" "V1DF,DF,DF")])
2237
2238(define_split
2239  [(set (match_operand:DF 0 "register_operand" "")
2240	(vec_select:DF
2241	  (match_operand:V2DF 1 "nonimmediate_operand" "")
2242	  (parallel [(const_int 0)])))]
2243  "TARGET_SSE2 && reload_completed"
2244  [(const_int 0)]
2245{
2246  rtx op1 = operands[1];
2247  if (REG_P (op1))
2248    op1 = gen_rtx_REG (DFmode, REGNO (op1));
2249  else
2250    op1 = gen_lowpart (DFmode, op1);
2251  emit_move_insn (operands[0], op1);
2252  DONE;
2253})
2254
2255(define_insn "sse2_loadhpd"
2256  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,o")
2257	(vec_concat:V2DF
2258	  (vec_select:DF
2259	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2260	    (parallel [(const_int 0)]))
2261	  (match_operand:DF 2 "nonimmediate_operand"     " m,x,0,x*fr")))]
2262  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2263  "@
2264   movhpd\t{%2, %0|%0, %2}
2265   unpcklpd\t{%2, %0|%0, %2}
2266   shufpd\t{$1, %1, %0|%0, %1, 1}
2267   #"
2268  [(set_attr "type" "ssemov,sselog,sselog,other")
2269   (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2270
2271(define_split
2272  [(set (match_operand:V2DF 0 "memory_operand" "")
2273	(vec_concat:V2DF
2274	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2275	  (match_operand:DF 1 "register_operand" "")))]
2276  "TARGET_SSE2 && reload_completed"
2277  [(set (match_dup 0) (match_dup 1))]
2278{
2279  operands[0] = adjust_address (operands[0], DFmode, 8);
2280})
2281
2282(define_insn "sse2_loadlpd"
2283  [(set (match_operand:V2DF 0 "nonimmediate_operand"    "=x,x,x,x,x,m")
2284	(vec_concat:V2DF
2285	  (match_operand:DF 2 "nonimmediate_operand"    " m,m,x,0,0,x*fr")
2286	  (vec_select:DF
2287	    (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2288	    (parallel [(const_int 1)]))))]
2289  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2290  "@
2291   movsd\t{%2, %0|%0, %2}
2292   movlpd\t{%2, %0|%0, %2}
2293   movsd\t{%2, %0|%0, %2}
2294   shufpd\t{$2, %2, %0|%0, %2, 2}
2295   movhpd\t{%H1, %0|%0, %H1}
2296   #"
2297  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2298   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2299
2300(define_split
2301  [(set (match_operand:V2DF 0 "memory_operand" "")
2302	(vec_concat:V2DF
2303	  (match_operand:DF 1 "register_operand" "")
2304	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2305  "TARGET_SSE2 && reload_completed"
2306  [(set (match_dup 0) (match_dup 1))]
2307{
2308  operands[0] = adjust_address (operands[0], DFmode, 8);
2309})
2310
2311(define_insn "sse2_movsd"
2312  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,m,x,x,o")
2313	(vec_merge:V2DF
2314	  (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2315	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2316	  (const_int 1)))]
2317  "TARGET_SSE2"
2318  "@
2319   movsd\t{%2, %0|%0, %2}
2320   movlpd\t{%2, %0|%0, %2}
2321   movlpd\t{%2, %0|%0, %2}
2322   shufpd\t{$2, %2, %0|%0, %2, 2}
2323   movhps\t{%H1, %0|%0, %H1}
2324   movhps\t{%1, %H0|%H0, %1}"
2325  [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2326   (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2327
2328(define_insn "*vec_dupv2df_sse3"
2329  [(set (match_operand:V2DF 0 "register_operand" "=x")
2330	(vec_duplicate:V2DF
2331	  (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2332  "TARGET_SSE3"
2333  "movddup\t{%1, %0|%0, %1}"
2334  [(set_attr "type" "sselog1")
2335   (set_attr "mode" "DF")])
2336
2337(define_insn "*vec_dupv2df"
2338  [(set (match_operand:V2DF 0 "register_operand" "=x")
2339	(vec_duplicate:V2DF
2340	  (match_operand:DF 1 "register_operand" "0")))]
2341  "TARGET_SSE2"
2342  "unpcklpd\t%0, %0"
2343  [(set_attr "type" "sselog1")
2344   (set_attr "mode" "V4SF")])
2345
2346(define_insn "*vec_concatv2df_sse3"
2347  [(set (match_operand:V2DF 0 "register_operand" "=x")
2348	(vec_concat:V2DF
2349	  (match_operand:DF 1 "nonimmediate_operand" "xm")
2350	  (match_dup 1)))]
2351  "TARGET_SSE3"
2352  "movddup\t{%1, %0|%0, %1}"
2353  [(set_attr "type" "sselog1")
2354   (set_attr "mode" "DF")])
2355
2356(define_insn "*vec_concatv2df"
2357  [(set (match_operand:V2DF 0 "register_operand"     "=Y,Y,Y,x,x")
2358	(vec_concat:V2DF
2359	  (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2360	  (match_operand:DF 2 "vector_move_operand"  " Y,m,C,x,m")))]
2361  "TARGET_SSE"
2362  "@
2363   unpcklpd\t{%2, %0|%0, %2}
2364   movhpd\t{%2, %0|%0, %2}
2365   movsd\t{%1, %0|%0, %1}
2366   movlhps\t{%2, %0|%0, %2}
2367   movhps\t{%2, %0|%0, %2}"
2368  [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2369   (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2370
2371(define_expand "vec_setv2df"
2372  [(match_operand:V2DF 0 "register_operand" "")
2373   (match_operand:DF 1 "register_operand" "")
2374   (match_operand 2 "const_int_operand" "")]
2375  "TARGET_SSE"
2376{
2377  ix86_expand_vector_set (false, operands[0], operands[1],
2378			  INTVAL (operands[2]));
2379  DONE;
2380})
2381
2382(define_expand "vec_extractv2df"
2383  [(match_operand:DF 0 "register_operand" "")
2384   (match_operand:V2DF 1 "register_operand" "")
2385   (match_operand 2 "const_int_operand" "")]
2386  "TARGET_SSE"
2387{
2388  ix86_expand_vector_extract (false, operands[0], operands[1],
2389			      INTVAL (operands[2]));
2390  DONE;
2391})
2392
2393(define_expand "vec_initv2df"
2394  [(match_operand:V2DF 0 "register_operand" "")
2395   (match_operand 1 "" "")]
2396  "TARGET_SSE"
2397{
2398  ix86_expand_vector_init (false, operands[0], operands[1]);
2399  DONE;
2400})
2401
2402;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2403;;
2404;; Parallel integral arithmetic
2405;;
2406;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2407
2408(define_expand "neg<mode>2"
2409  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2410	(minus:SSEMODEI
2411	  (match_dup 2)
2412	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2413  "TARGET_SSE2"
2414  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2415
2416(define_expand "add<mode>3"
2417  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2418	(plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2419		       (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2420  "TARGET_SSE2"
2421  "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2422
2423(define_insn "*add<mode>3"
2424  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2425	(plus:SSEMODEI
2426	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2427	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2428  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2429  "padd<ssevecsize>\t{%2, %0|%0, %2}"
2430  [(set_attr "type" "sseiadd")
2431   (set_attr "mode" "TI")])
2432
2433(define_insn "sse2_ssadd<mode>3"
2434  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2435	(ss_plus:SSEMODE12
2436	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2437	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2438  "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2439  "padds<ssevecsize>\t{%2, %0|%0, %2}"
2440  [(set_attr "type" "sseiadd")
2441   (set_attr "mode" "TI")])
2442
2443(define_insn "sse2_usadd<mode>3"
2444  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2445	(us_plus:SSEMODE12
2446	  (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2447	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2448  "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2449  "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2450  [(set_attr "type" "sseiadd")
2451   (set_attr "mode" "TI")])
2452
2453(define_expand "sub<mode>3"
2454  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2455	(minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2456			(match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2457  "TARGET_SSE2"
2458  "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2459
2460(define_insn "*sub<mode>3"
2461  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2462	(minus:SSEMODEI
2463	  (match_operand:SSEMODEI 1 "register_operand" "0")
2464	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2465  "TARGET_SSE2"
2466  "psub<ssevecsize>\t{%2, %0|%0, %2}"
2467  [(set_attr "type" "sseiadd")
2468   (set_attr "mode" "TI")])
2469
2470(define_insn "sse2_sssub<mode>3"
2471  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2472	(ss_minus:SSEMODE12
2473	  (match_operand:SSEMODE12 1 "register_operand" "0")
2474	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2475  "TARGET_SSE2"
2476  "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2477  [(set_attr "type" "sseiadd")
2478   (set_attr "mode" "TI")])
2479
2480(define_insn "sse2_ussub<mode>3"
2481  [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2482	(us_minus:SSEMODE12
2483	  (match_operand:SSEMODE12 1 "register_operand" "0")
2484	  (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2485  "TARGET_SSE2"
2486  "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2487  [(set_attr "type" "sseiadd")
2488   (set_attr "mode" "TI")])
2489
2490(define_expand "mulv16qi3"
2491  [(set (match_operand:V16QI 0 "register_operand" "")
2492	(mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2493		    (match_operand:V16QI 2 "register_operand" "")))]
2494  "TARGET_SSE2"
2495{
2496  rtx t[12], op0;
2497  int i;
2498
2499  for (i = 0; i < 12; ++i)
2500    t[i] = gen_reg_rtx (V16QImode);
2501
2502  /* Unpack data such that we've got a source byte in each low byte of
2503     each word.  We don't care what goes into the high byte of each word.
2504     Rather than trying to get zero in there, most convenient is to let
2505     it be a copy of the low byte.  */
2506  emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2507  emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2508  emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2509  emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2510
2511  /* Multiply words.  The end-of-line annotations here give a picture of what
2512     the output of that instruction looks like.  Dot means don't care; the 
2513     letters are the bytes of the result with A being the most significant.  */
2514  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2515			   gen_lowpart (V8HImode, t[0]),
2516			   gen_lowpart (V8HImode, t[1])));
2517  emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2518			   gen_lowpart (V8HImode, t[2]),
2519			   gen_lowpart (V8HImode, t[3])));
2520
2521  /* Extract the relevant bytes and merge them back together.  */
2522  emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4]));	/* ..AI..BJ..CK..DL */
2523  emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4]));	/* ..EM..FN..GO..HP */
2524  emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6]));	/* ....AEIM....BFJN */
2525  emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6]));	/* ....CGKO....DHLP */
2526  emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8]));	/* ........ACEGIKMO */
2527  emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8]));	/* ........BDFHJLNP */
2528
2529  op0 = operands[0];
2530  emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10]));	/* ABCDEFGHIJKLMNOP */
2531  DONE;
2532})
2533
2534(define_expand "mulv8hi3"
2535  [(set (match_operand:V8HI 0 "register_operand" "")
2536	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2537		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2538  "TARGET_SSE2"
2539  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2540
2541(define_insn "*mulv8hi3"
2542  [(set (match_operand:V8HI 0 "register_operand" "=x")
2543	(mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2544		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2545  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2546  "pmullw\t{%2, %0|%0, %2}"
2547  [(set_attr "type" "sseimul")
2548   (set_attr "mode" "TI")])
2549
2550(define_insn "sse2_smulv8hi3_highpart"
2551  [(set (match_operand:V8HI 0 "register_operand" "=x")
2552	(truncate:V8HI
2553	  (lshiftrt:V8SI
2554	    (mult:V8SI
2555	      (sign_extend:V8SI
2556		(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2557	      (sign_extend:V8SI
2558		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2559	    (const_int 16))))]
2560  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2561  "pmulhw\t{%2, %0|%0, %2}"
2562  [(set_attr "type" "sseimul")
2563   (set_attr "mode" "TI")])
2564
2565(define_insn "sse2_umulv8hi3_highpart"
2566  [(set (match_operand:V8HI 0 "register_operand" "=x")
2567	(truncate:V8HI
2568	  (lshiftrt:V8SI
2569	    (mult:V8SI
2570	      (zero_extend:V8SI
2571		(match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2572	      (zero_extend:V8SI
2573		(match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2574	    (const_int 16))))]
2575  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2576  "pmulhuw\t{%2, %0|%0, %2}"
2577  [(set_attr "type" "sseimul")
2578   (set_attr "mode" "TI")])
2579
2580(define_insn "sse2_umulv2siv2di3"
2581  [(set (match_operand:V2DI 0 "register_operand" "=x")
2582	(mult:V2DI
2583	  (zero_extend:V2DI
2584	    (vec_select:V2SI
2585	      (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2586	      (parallel [(const_int 0) (const_int 2)])))
2587	  (zero_extend:V2DI
2588	    (vec_select:V2SI
2589	      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2590	      (parallel [(const_int 0) (const_int 2)])))))]
2591  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2592  "pmuludq\t{%2, %0|%0, %2}"
2593  [(set_attr "type" "sseimul")
2594   (set_attr "mode" "TI")])
2595
2596(define_insn "sse2_pmaddwd"
2597  [(set (match_operand:V4SI 0 "register_operand" "=x")
2598	(plus:V4SI
2599	  (mult:V4SI
2600	    (sign_extend:V4SI
2601	      (vec_select:V4HI
2602		(match_operand:V8HI 1 "nonimmediate_operand" "%0")
2603		(parallel [(const_int 0)
2604			   (const_int 2)
2605			   (const_int 4)
2606			   (const_int 6)])))
2607	    (sign_extend:V4SI
2608	      (vec_select:V4HI
2609		(match_operand:V8HI 2 "nonimmediate_operand" "xm")
2610		(parallel [(const_int 0)
2611			   (const_int 2)
2612			   (const_int 4)
2613			   (const_int 6)]))))
2614	  (mult:V4SI
2615	    (sign_extend:V4SI
2616	      (vec_select:V4HI (match_dup 1)
2617		(parallel [(const_int 1)
2618			   (const_int 3)
2619			   (const_int 5)
2620			   (const_int 7)])))
2621	    (sign_extend:V4SI
2622	      (vec_select:V4HI (match_dup 2)
2623		(parallel [(const_int 1)
2624			   (const_int 3)
2625			   (const_int 5)
2626			   (const_int 7)]))))))]
2627  "TARGET_SSE2"
2628  "pmaddwd\t{%2, %0|%0, %2}"
2629  [(set_attr "type" "sseiadd")
2630   (set_attr "mode" "TI")])
2631
2632(define_expand "mulv4si3"
2633  [(set (match_operand:V4SI 0 "register_operand" "")
2634	(mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2635		   (match_operand:V4SI 2 "register_operand" "")))]
2636  "TARGET_SSE2"
2637{
2638  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2639  rtx op0, op1, op2;
2640
2641  op0 = operands[0];
2642  op1 = operands[1];
2643  op2 = operands[2];
2644  t1 = gen_reg_rtx (V4SImode);
2645  t2 = gen_reg_rtx (V4SImode);
2646  t3 = gen_reg_rtx (V4SImode);
2647  t4 = gen_reg_rtx (V4SImode);
2648  t5 = gen_reg_rtx (V4SImode);
2649  t6 = gen_reg_rtx (V4SImode);
2650  thirtytwo = GEN_INT (32);
2651
2652  /* Multiply elements 2 and 0.  */
2653  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2654
2655  /* Shift both input vectors down one element, so that elements 3 and 1
2656     are now in the slots for elements 2 and 0.  For K8, at least, this is
2657     faster than using a shuffle.  */
2658  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2659			       gen_lowpart (TImode, op1), thirtytwo));
2660  emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2661			       gen_lowpart (TImode, op2), thirtytwo));
2662
2663  /* Multiply elements 3 and 1.  */
2664  emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2665
2666  /* Move the results in element 2 down to element 1; we don't care what
2667     goes in elements 2 and 3.  */
2668  emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2669				const0_rtx, const0_rtx));
2670  emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2671				const0_rtx, const0_rtx));
2672
2673  /* Merge the parts back together.  */
2674  emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2675  DONE;
2676})
2677
2678(define_expand "mulv2di3"
2679  [(set (match_operand:V2DI 0 "register_operand" "")
2680	(mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2681		   (match_operand:V2DI 2 "register_operand" "")))]
2682  "TARGET_SSE2"
2683{
2684  rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2685  rtx op0, op1, op2;
2686
2687  op0 = operands[0];
2688  op1 = operands[1];
2689  op2 = operands[2];
2690  t1 = gen_reg_rtx (V2DImode);
2691  t2 = gen_reg_rtx (V2DImode);
2692  t3 = gen_reg_rtx (V2DImode);
2693  t4 = gen_reg_rtx (V2DImode);
2694  t5 = gen_reg_rtx (V2DImode);
2695  t6 = gen_reg_rtx (V2DImode);
2696  thirtytwo = GEN_INT (32);
2697
2698  /* Multiply low parts.  */
2699  emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2700				     gen_lowpart (V4SImode, op2)));
2701
2702  /* Shift input vectors left 32 bits so we can multiply high parts.  */
2703  emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2704  emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2705
2706  /* Multiply high parts by low parts.  */
2707  emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2708				     gen_lowpart (V4SImode, t3)));
2709  emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2710				     gen_lowpart (V4SImode, t2)));
2711
2712  /* Shift them back.  */
2713  emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2714  emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2715
2716  /* Add the three parts together.  */
2717  emit_insn (gen_addv2di3 (t6, t1, t4));
2718  emit_insn (gen_addv2di3 (op0, t6, t5));
2719  DONE;
2720})
2721
2722(define_insn "ashr<mode>3"
2723  [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2724	(ashiftrt:SSEMODE24
2725	  (match_operand:SSEMODE24 1 "register_operand" "0")
2726	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2727  "TARGET_SSE2"
2728  "psra<ssevecsize>\t{%2, %0|%0, %2}"
2729  [(set_attr "type" "sseishft")
2730   (set_attr "mode" "TI")])
2731
2732(define_insn "lshr<mode>3"
2733  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2734	(lshiftrt:SSEMODE248
2735	  (match_operand:SSEMODE248 1 "register_operand" "0")
2736	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2737  "TARGET_SSE2"
2738  "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2739  [(set_attr "type" "sseishft")
2740   (set_attr "mode" "TI")])
2741
2742(define_insn "ashl<mode>3"
2743  [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2744	(ashift:SSEMODE248
2745	  (match_operand:SSEMODE248 1 "register_operand" "0")
2746	  (match_operand:TI 2 "nonmemory_operand" "xn")))]
2747  "TARGET_SSE2"
2748  "psll<ssevecsize>\t{%2, %0|%0, %2}"
2749  [(set_attr "type" "sseishft")
2750   (set_attr "mode" "TI")])
2751
2752(define_insn "sse2_ashlti3"
2753  [(set (match_operand:TI 0 "register_operand" "=x")
2754	(ashift:TI (match_operand:TI 1 "register_operand" "0")
2755		   (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2756  "TARGET_SSE2"
2757{
2758  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2759  return "pslldq\t{%2, %0|%0, %2}";
2760}
2761  [(set_attr "type" "sseishft")
2762   (set_attr "mode" "TI")])
2763
2764(define_expand "vec_shl_<mode>"
2765  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2766        (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2767		   (match_operand:SI 2 "general_operand" "")))]
2768  "TARGET_SSE2"
2769{
2770  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2771    FAIL;
2772  operands[0] = gen_lowpart (TImode, operands[0]);
2773  operands[1] = gen_lowpart (TImode, operands[1]);
2774})
2775
2776(define_insn "sse2_lshrti3"
2777  [(set (match_operand:TI 0 "register_operand" "=x")
2778 	(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2779		     (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2780  "TARGET_SSE2"
2781{
2782  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2783  return "psrldq\t{%2, %0|%0, %2}";
2784}
2785  [(set_attr "type" "sseishft")
2786   (set_attr "mode" "TI")])
2787
2788(define_expand "vec_shr_<mode>"
2789  [(set (match_operand:SSEMODEI 0 "register_operand" "")
2790        (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2791		     (match_operand:SI 2 "general_operand" "")))]
2792  "TARGET_SSE2"
2793{
2794  if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2795    FAIL;
2796  operands[0] = gen_lowpart (TImode, operands[0]);
2797  operands[1] = gen_lowpart (TImode, operands[1]);
2798})
2799
2800(define_expand "umaxv16qi3"
2801  [(set (match_operand:V16QI 0 "register_operand" "")
2802	(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2803		    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2804  "TARGET_SSE2"
2805  "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2806
2807(define_insn "*umaxv16qi3"
2808  [(set (match_operand:V16QI 0 "register_operand" "=x")
2809	(umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2810		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2811  "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2812  "pmaxub\t{%2, %0|%0, %2}"
2813  [(set_attr "type" "sseiadd")
2814   (set_attr "mode" "TI")])
2815
2816(define_expand "smaxv8hi3"
2817  [(set (match_operand:V8HI 0 "register_operand" "")
2818	(smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2819		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2820  "TARGET_SSE2"
2821  "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2822
2823(define_insn "*smaxv8hi3"
2824  [(set (match_operand:V8HI 0 "register_operand" "=x")
2825	(smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2826		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2827  "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2828  "pmaxsw\t{%2, %0|%0, %2}"
2829  [(set_attr "type" "sseiadd")
2830   (set_attr "mode" "TI")])
2831
2832(define_expand "umaxv8hi3"
2833  [(set (match_operand:V8HI 0 "register_operand" "=x")
2834	(us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2835		       (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2836   (set (match_dup 3)
2837	(plus:V8HI (match_dup 0) (match_dup 2)))]
2838  "TARGET_SSE2"
2839{
2840  operands[3] = operands[0];
2841  if (rtx_equal_p (operands[0], operands[2]))
2842    operands[0] = gen_reg_rtx (V8HImode);
2843})
2844
2845(define_expand "smax<mode>3"
2846  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2847	(smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2848			(match_operand:SSEMODE14 2 "register_operand" "")))]
2849  "TARGET_SSE2"
2850{
2851  rtx xops[6];
2852  bool ok;
2853
2854  xops[0] = operands[0];
2855  xops[1] = operands[1];
2856  xops[2] = operands[2];
2857  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2858  xops[4] = operands[1];
2859  xops[5] = operands[2];
2860  ok = ix86_expand_int_vcond (xops);
2861  gcc_assert (ok);
2862  DONE;
2863})
2864
2865(define_expand "umaxv4si3"
2866  [(set (match_operand:V4SI 0 "register_operand" "")
2867	(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2868		   (match_operand:V4SI 2 "register_operand" "")))]
2869  "TARGET_SSE2"
2870{
2871  rtx xops[6];
2872  bool ok;
2873
2874  xops[0] = operands[0];
2875  xops[1] = operands[1];
2876  xops[2] = operands[2];
2877  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2878  xops[4] = operands[1];
2879  xops[5] = operands[2];
2880  ok = ix86_expand_int_vcond (xops);
2881  gcc_assert (ok);
2882  DONE;
2883})
2884
2885(define_expand "uminv16qi3"
2886  [(set (match_operand:V16QI 0 "register_operand" "")
2887	(umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2888		    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2889  "TARGET_SSE2"
2890  "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2891
2892(define_insn "*uminv16qi3"
2893  [(set (match_operand:V16QI 0 "register_operand" "=x")
2894	(umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2895		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2896  "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2897  "pminub\t{%2, %0|%0, %2}"
2898  [(set_attr "type" "sseiadd")
2899   (set_attr "mode" "TI")])
2900
2901(define_expand "sminv8hi3"
2902  [(set (match_operand:V8HI 0 "register_operand" "")
2903	(smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2904		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2905  "TARGET_SSE2"
2906  "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2907
2908(define_insn "*sminv8hi3"
2909  [(set (match_operand:V8HI 0 "register_operand" "=x")
2910	(smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2911		   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2912  "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2913  "pminsw\t{%2, %0|%0, %2}"
2914  [(set_attr "type" "sseiadd")
2915   (set_attr "mode" "TI")])
2916
2917(define_expand "smin<mode>3"
2918  [(set (match_operand:SSEMODE14 0 "register_operand" "")
2919	(smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2920			(match_operand:SSEMODE14 2 "register_operand" "")))]
2921  "TARGET_SSE2"
2922{
2923  rtx xops[6];
2924  bool ok;
2925
2926  xops[0] = operands[0];
2927  xops[1] = operands[2];
2928  xops[2] = operands[1];
2929  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2930  xops[4] = operands[1];
2931  xops[5] = operands[2];
2932  ok = ix86_expand_int_vcond (xops);
2933  gcc_assert (ok);
2934  DONE;
2935})
2936
2937(define_expand "umin<mode>3"
2938  [(set (match_operand:SSEMODE24 0 "register_operand" "")
2939	(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2940			(match_operand:SSEMODE24 2 "register_operand" "")))]
2941  "TARGET_SSE2"
2942{
2943  rtx xops[6];
2944  bool ok;
2945
2946  xops[0] = operands[0];
2947  xops[1] = operands[2];
2948  xops[2] = operands[1];
2949  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2950  xops[4] = operands[1];
2951  xops[5] = operands[2];
2952  ok = ix86_expand_int_vcond (xops);
2953  gcc_assert (ok);
2954  DONE;
2955})
2956
2957;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2958;;
2959;; Parallel integral comparisons
2960;;
2961;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2962
2963(define_insn "sse2_eq<mode>3"
2964  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2965	(eq:SSEMODE124
2966	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2967	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2968  "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2969  "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2970  [(set_attr "type" "ssecmp")
2971   (set_attr "mode" "TI")])
2972
2973(define_insn "sse2_gt<mode>3"
2974  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2975	(gt:SSEMODE124
2976	  (match_operand:SSEMODE124 1 "register_operand" "0")
2977	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2978  "TARGET_SSE2"
2979  "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2980  [(set_attr "type" "ssecmp")
2981   (set_attr "mode" "TI")])
2982
2983(define_expand "vcond<mode>"
2984  [(set (match_operand:SSEMODE124 0 "register_operand" "")
2985        (if_then_else:SSEMODE124
2986          (match_operator 3 ""
2987            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2988             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2989          (match_operand:SSEMODE124 1 "general_operand" "")
2990          (match_operand:SSEMODE124 2 "general_operand" "")))]
2991  "TARGET_SSE2"
2992{
2993  if (ix86_expand_int_vcond (operands))
2994    DONE;
2995  else
2996    FAIL;
2997})
2998
2999(define_expand "vcondu<mode>"
3000  [(set (match_operand:SSEMODE124 0 "register_operand" "")
3001        (if_then_else:SSEMODE124
3002          (match_operator 3 ""
3003            [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3004             (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3005          (match_operand:SSEMODE124 1 "general_operand" "")
3006          (match_operand:SSEMODE124 2 "general_operand" "")))]
3007  "TARGET_SSE2"
3008{
3009  if (ix86_expand_int_vcond (operands))
3010    DONE;
3011  else
3012    FAIL;
3013})
3014
3015;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3016;;
3017;; Parallel integral logical operations
3018;;
3019;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3020
3021(define_expand "one_cmpl<mode>2"
3022  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3023	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3024		      (match_dup 2)))]
3025  "TARGET_SSE2"
3026{
3027  int i, n = GET_MODE_NUNITS (<MODE>mode);
3028  rtvec v = rtvec_alloc (n);
3029
3030  for (i = 0; i < n; ++i)
3031    RTVEC_ELT (v, i) = constm1_rtx;
3032
3033  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3034})
3035
3036(define_expand "and<mode>3"
3037  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3038	(and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3039		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3040  "TARGET_SSE2"
3041  "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3042
3043(define_insn "*and<mode>3"
3044  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3045	(and:SSEMODEI
3046	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3047	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3048  "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3049  "pand\t{%2, %0|%0, %2}"
3050  [(set_attr "type" "sselog")
3051   (set_attr "mode" "TI")])
3052
3053(define_insn "sse2_nand<mode>3"
3054  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3055	(and:SSEMODEI
3056	  (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3057	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3058  "TARGET_SSE2"
3059  "pandn\t{%2, %0|%0, %2}"
3060  [(set_attr "type" "sselog")
3061   (set_attr "mode" "TI")])
3062
3063(define_expand "ior<mode>3"
3064  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3065	(ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3066		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3067  "TARGET_SSE2"
3068  "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3069
3070(define_insn "*ior<mode>3"
3071  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3072	(ior:SSEMODEI
3073	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3074	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3075  "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3076  "por\t{%2, %0|%0, %2}"
3077  [(set_attr "type" "sselog")
3078   (set_attr "mode" "TI")])
3079
3080(define_expand "xor<mode>3"
3081  [(set (match_operand:SSEMODEI 0 "register_operand" "")
3082	(xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3083		      (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3084  "TARGET_SSE2"
3085  "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3086
3087(define_insn "*xor<mode>3"
3088  [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3089	(xor:SSEMODEI
3090	  (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3091	  (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3092  "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3093  "pxor\t{%2, %0|%0, %2}"
3094  [(set_attr "type" "sselog")
3095   (set_attr "mode" "TI")])
3096
3097;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3098;;
3099;; Parallel integral element swizzling
3100;;
3101;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3102
3103(define_insn "sse2_packsswb"
3104  [(set (match_operand:V16QI 0 "register_operand" "=x")
3105	(vec_concat:V16QI
3106	  (ss_truncate:V8QI
3107	    (match_operand:V8HI 1 "register_operand" "0"))
3108	  (ss_truncate:V8QI
3109	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3110  "TARGET_SSE2"
3111  "packsswb\t{%2, %0|%0, %2}"
3112  [(set_attr "type" "sselog")
3113   (set_attr "mode" "TI")])
3114
3115(define_insn "sse2_packssdw"
3116  [(set (match_operand:V8HI 0 "register_operand" "=x")
3117	(vec_concat:V8HI
3118	  (ss_truncate:V4HI
3119	    (match_operand:V4SI 1 "register_operand" "0"))
3120	  (ss_truncate:V4HI
3121	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3122  "TARGET_SSE2"
3123  "packssdw\t{%2, %0|%0, %2}"
3124  [(set_attr "type" "sselog")
3125   (set_attr "mode" "TI")])
3126
3127(define_insn "sse2_packuswb"
3128  [(set (match_operand:V16QI 0 "register_operand" "=x")
3129	(vec_concat:V16QI
3130	  (us_truncate:V8QI
3131	    (match_operand:V8HI 1 "register_operand" "0"))
3132	  (us_truncate:V8QI
3133	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3134  "TARGET_SSE2"
3135  "packuswb\t{%2, %0|%0, %2}"
3136  [(set_attr "type" "sselog")
3137   (set_attr "mode" "TI")])
3138
3139(define_insn "sse2_punpckhbw"
3140  [(set (match_operand:V16QI 0 "register_operand" "=x")
3141	(vec_select:V16QI
3142	  (vec_concat:V32QI
3143	    (match_operand:V16QI 1 "register_operand" "0")
3144	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3145	  (parallel [(const_int 8)  (const_int 24)
3146		     (const_int 9)  (const_int 25)
3147		     (const_int 10) (const_int 26)
3148		     (const_int 11) (const_int 27)
3149		     (const_int 12) (const_int 28) 
3150		     (const_int 13) (const_int 29)
3151		     (const_int 14) (const_int 30)
3152		     (const_int 15) (const_int 31)])))]
3153  "TARGET_SSE2"
3154  "punpckhbw\t{%2, %0|%0, %2}"
3155  [(set_attr "type" "sselog")
3156   (set_attr "mode" "TI")])
3157
3158(define_insn "sse2_punpcklbw"
3159  [(set (match_operand:V16QI 0 "register_operand" "=x")
3160	(vec_select:V16QI
3161	  (vec_concat:V32QI
3162	    (match_operand:V16QI 1 "register_operand" "0")
3163	    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3164	  (parallel [(const_int 0) (const_int 16)
3165		     (const_int 1) (const_int 17)
3166		     (const_int 2) (const_int 18)
3167		     (const_int 3) (const_int 19)
3168		     (const_int 4) (const_int 20)
3169		     (const_int 5) (const_int 21)
3170		     (const_int 6) (const_int 22)
3171		     (const_int 7) (const_int 23)])))]
3172  "TARGET_SSE2"
3173  "punpcklbw\t{%2, %0|%0, %2}"
3174  [(set_attr "type" "sselog")
3175   (set_attr "mode" "TI")])
3176
3177(define_insn "sse2_punpckhwd"
3178  [(set (match_operand:V8HI 0 "register_operand" "=x")
3179	(vec_select:V8HI
3180	  (vec_concat:V16HI
3181	    (match_operand:V8HI 1 "register_operand" "0")
3182	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3183	  (parallel [(const_int 4) (const_int 12)
3184		     (const_int 5) (const_int 13)
3185		     (const_int 6) (const_int 14)
3186		     (const_int 7) (const_int 15)])))]
3187  "TARGET_SSE2"
3188  "punpckhwd\t{%2, %0|%0, %2}"
3189  [(set_attr "type" "sselog")
3190   (set_attr "mode" "TI")])
3191
3192(define_insn "sse2_punpcklwd"
3193  [(set (match_operand:V8HI 0 "register_operand" "=x")
3194	(vec_select:V8HI
3195	  (vec_concat:V16HI
3196	    (match_operand:V8HI 1 "register_operand" "0")
3197	    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3198	  (parallel [(const_int 0) (const_int 8)
3199		     (const_int 1) (const_int 9)
3200		     (const_int 2) (const_int 10)
3201		     (const_int 3) (const_int 11)])))]
3202  "TARGET_SSE2"
3203  "punpcklwd\t{%2, %0|%0, %2}"
3204  [(set_attr "type" "sselog")
3205   (set_attr "mode" "TI")])
3206
3207(define_insn "sse2_punpckhdq"
3208  [(set (match_operand:V4SI 0 "register_operand" "=x")
3209	(vec_select:V4SI
3210	  (vec_concat:V8SI
3211	    (match_operand:V4SI 1 "register_operand" "0")
3212	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3213	  (parallel [(const_int 2) (const_int 6)
3214		     (const_int 3) (const_int 7)])))]
3215  "TARGET_SSE2"
3216  "punpckhdq\t{%2, %0|%0, %2}"
3217  [(set_attr "type" "sselog")
3218   (set_attr "mode" "TI")])
3219
3220(define_insn "sse2_punpckldq"
3221  [(set (match_operand:V4SI 0 "register_operand" "=x")
3222	(vec_select:V4SI
3223	  (vec_concat:V8SI
3224	    (match_operand:V4SI 1 "register_operand" "0")
3225	    (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3226	  (parallel [(const_int 0) (const_int 4)
3227		     (const_int 1) (const_int 5)])))]
3228  "TARGET_SSE2"
3229  "punpckldq\t{%2, %0|%0, %2}"
3230  [(set_attr "type" "sselog")
3231   (set_attr "mode" "TI")])
3232
3233(define_insn "sse2_punpckhqdq"
3234  [(set (match_operand:V2DI 0 "register_operand" "=x")
3235	(vec_select:V2DI
3236	  (vec_concat:V4DI
3237	    (match_operand:V2DI 1 "register_operand" "0")
3238	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3239	  (parallel [(const_int 1)
3240		     (const_int 3)])))]
3241  "TARGET_SSE2"
3242  "punpckhqdq\t{%2, %0|%0, %2}"
3243  [(set_attr "type" "sselog")
3244   (set_attr "mode" "TI")])
3245
3246(define_insn "sse2_punpcklqdq"
3247  [(set (match_operand:V2DI 0 "register_operand" "=x")
3248	(vec_select:V2DI
3249	  (vec_concat:V4DI
3250	    (match_operand:V2DI 1 "register_operand" "0")
3251	    (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3252	  (parallel [(const_int 0)
3253		     (const_int 2)])))]
3254  "TARGET_SSE2"
3255  "punpcklqdq\t{%2, %0|%0, %2}"
3256  [(set_attr "type" "sselog")
3257   (set_attr "mode" "TI")])
3258
3259(define_expand "sse2_pinsrw"
3260  [(set (match_operand:V8HI 0 "register_operand" "")
3261	(vec_merge:V8HI
3262	  (vec_duplicate:V8HI
3263	    (match_operand:SI 2 "nonimmediate_operand" ""))
3264	  (match_operand:V8HI 1 "register_operand" "")
3265	  (match_operand:SI 3 "const_0_to_7_operand" "")))]
3266  "TARGET_SSE2"
3267{
3268  operands[2] = gen_lowpart (HImode, operands[2]);
3269  operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3270})
3271
3272(define_insn "*sse2_pinsrw"
3273  [(set (match_operand:V8HI 0 "register_operand" "=x")
3274	(vec_merge:V8HI
3275	  (vec_duplicate:V8HI
3276	    (match_operand:HI 2 "nonimmediate_operand" "rm"))
3277	  (match_operand:V8HI 1 "register_operand" "0")
3278	  (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3279  "TARGET_SSE2"
3280{
3281  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3282  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3283}
3284  [(set_attr "type" "sselog")
3285   (set_attr "mode" "TI")])
3286
3287(define_insn "sse2_pextrw"
3288  [(set (match_operand:SI 0 "register_operand" "=r")
3289	(zero_extend:SI
3290	  (vec_select:HI
3291	    (match_operand:V8HI 1 "register_operand" "x")
3292	    (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3293  "TARGET_SSE2"
3294  "pextrw\t{%2, %1, %0|%0, %1, %2}"
3295  [(set_attr "type" "sselog")
3296   (set_attr "mode" "TI")])
3297
3298(define_expand "sse2_pshufd"
3299  [(match_operand:V4SI 0 "register_operand" "")
3300   (match_operand:V4SI 1 "nonimmediate_operand" "")
3301   (match_operand:SI 2 "const_int_operand" "")]
3302  "TARGET_SSE2"
3303{
3304  int mask = INTVAL (operands[2]);
3305  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3306				GEN_INT ((mask >> 0) & 3),
3307				GEN_INT ((mask >> 2) & 3),
3308				GEN_INT ((mask >> 4) & 3),
3309				GEN_INT ((mask >> 6) & 3)));
3310  DONE;
3311})
3312
3313(define_insn "sse2_pshufd_1"
3314  [(set (match_operand:V4SI 0 "register_operand" "=x")
3315	(vec_select:V4SI
3316	  (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3317	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
3318		     (match_operand 3 "const_0_to_3_operand" "")
3319		     (match_operand 4 "const_0_to_3_operand" "")
3320		     (match_operand 5 "const_0_to_3_operand" "")])))]
3321  "TARGET_SSE2"
3322{
3323  int mask = 0;
3324  mask |= INTVAL (operands[2]) << 0;
3325  mask |= INTVAL (operands[3]) << 2;
3326  mask |= INTVAL (operands[4]) << 4;
3327  mask |= INTVAL (operands[5]) << 6;
3328  operands[2] = GEN_INT (mask);
3329
3330  return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3331}
3332  [(set_attr "type" "sselog1")
3333   (set_attr "mode" "TI")])
3334
3335(define_expand "sse2_pshuflw"
3336  [(match_operand:V8HI 0 "register_operand" "")
3337   (match_operand:V8HI 1 "nonimmediate_operand" "")
3338   (match_operand:SI 2 "const_int_operand" "")]
3339  "TARGET_SSE2"
3340{
3341  int mask = INTVAL (operands[2]);
3342  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3343				 GEN_INT ((mask >> 0) & 3),
3344				 GEN_INT ((mask >> 2) & 3),
3345				 GEN_INT ((mask >> 4) & 3),
3346				 GEN_INT ((mask >> 6) & 3)));
3347  DONE;
3348})
3349
3350(define_insn "sse2_pshuflw_1"
3351  [(set (match_operand:V8HI 0 "register_operand" "=x")
3352	(vec_select:V8HI
3353	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3354	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
3355		     (match_operand 3 "const_0_to_3_operand" "")
3356		     (match_operand 4 "const_0_to_3_operand" "")
3357		     (match_operand 5 "const_0_to_3_operand" "")
3358		     (const_int 4)
3359		     (const_int 5)
3360		     (const_int 6)
3361		     (const_int 7)])))]
3362  "TARGET_SSE2"
3363{
3364  int mask = 0;
3365  mask |= INTVAL (operands[2]) << 0;
3366  mask |= INTVAL (operands[3]) << 2;
3367  mask |= INTVAL (operands[4]) << 4;
3368  mask |= INTVAL (operands[5]) << 6;
3369  operands[2] = GEN_INT (mask);
3370
3371  return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3372}
3373  [(set_attr "type" "sselog")
3374   (set_attr "mode" "TI")])
3375
3376(define_expand "sse2_pshufhw"
3377  [(match_operand:V8HI 0 "register_operand" "")
3378   (match_operand:V8HI 1 "nonimmediate_operand" "")
3379   (match_operand:SI 2 "const_int_operand" "")]
3380  "TARGET_SSE2"
3381{
3382  int mask = INTVAL (operands[2]);
3383  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3384				 GEN_INT (((mask >> 0) & 3) + 4),
3385				 GEN_INT (((mask >> 2) & 3) + 4),
3386				 GEN_INT (((mask >> 4) & 3) + 4),
3387				 GEN_INT (((mask >> 6) & 3) + 4)));
3388  DONE;
3389})
3390
3391(define_insn "sse2_pshufhw_1"
3392  [(set (match_operand:V8HI 0 "register_operand" "=x")
3393	(vec_select:V8HI
3394	  (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3395	  (parallel [(const_int 0)
3396		     (const_int 1)
3397		     (const_int 2)
3398		     (const_int 3)
3399		     (match_operand 2 "const_4_to_7_operand" "")
3400		     (match_operand 3 "const_4_to_7_operand" "")
3401		     (match_operand 4 "const_4_to_7_operand" "")
3402		     (match_operand 5 "const_4_to_7_operand" "")])))]
3403  "TARGET_SSE2"
3404{
3405  int mask = 0;
3406  mask |= (INTVAL (operands[2]) - 4) << 0;
3407  mask |= (INTVAL (operands[3]) - 4) << 2;
3408  mask |= (INTVAL (operands[4]) - 4) << 4;
3409  mask |= (INTVAL (operands[5]) - 4) << 6;
3410  operands[2] = GEN_INT (mask);
3411
3412  return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3413}
3414  [(set_attr "type" "sselog")
3415   (set_attr "mode" "TI")])
3416
3417(define_expand "sse2_loadd"
3418  [(set (match_operand:V4SI 0 "register_operand" "")
3419	(vec_merge:V4SI
3420	  (vec_duplicate:V4SI
3421	    (match_operand:SI 1 "nonimmediate_operand" ""))
3422	  (match_dup 2)
3423	  (const_int 1)))]
3424  "TARGET_SSE"
3425  "operands[2] = CONST0_RTX (V4SImode);")
3426
3427(define_insn "sse2_loadld"
3428  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3429	(vec_merge:V4SI
3430	  (vec_duplicate:V4SI
3431	    (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3432	  (match_operand:V4SI 1 "reg_or_0_operand"     " C,C,0")
3433	  (const_int 1)))]
3434  "TARGET_SSE"
3435  "@
3436   movd\t{%2, %0|%0, %2}
3437   movss\t{%2, %0|%0, %2}
3438   movss\t{%2, %0|%0, %2}"
3439  [(set_attr "type" "ssemov")
3440   (set_attr "mode" "TI,V4SF,SF")])
3441
3442;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3443;; be taken into account, and movdi isn't fully populated even without.
3444(define_insn_and_split "sse2_stored"
3445  [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3446	(vec_select:SI
3447	  (match_operand:V4SI 1 "register_operand" "x")
3448	  (parallel [(const_int 0)])))]
3449  "TARGET_SSE"
3450  "#"
3451  "&& reload_completed"
3452  [(set (match_dup 0) (match_dup 1))]
3453{
3454  operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3455})
3456
3457(define_expand "sse_storeq"
3458  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3459	(vec_select:DI
3460	  (match_operand:V2DI 1 "register_operand" "")
3461	  (parallel [(const_int 0)])))]
3462  "TARGET_SSE"
3463  "")
3464
3465;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3466;; be taken into account, and movdi isn't fully populated even without.
3467(define_insn "*sse2_storeq"
3468  [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3469	(vec_select:DI
3470	  (match_operand:V2DI 1 "register_operand" "x")
3471	  (parallel [(const_int 0)])))]
3472  "TARGET_SSE"
3473  "#")
3474
3475(define_split
3476  [(set (match_operand:DI 0 "nonimmediate_operand" "")
3477	(vec_select:DI
3478	  (match_operand:V2DI 1 "register_operand" "")
3479	  (parallel [(const_int 0)])))]
3480  "TARGET_SSE && reload_completed"
3481  [(set (match_dup 0) (match_dup 1))]
3482{
3483  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3484})
3485
3486(define_insn "*vec_extractv2di_1_sse2"
3487  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3488	(vec_select:DI
3489	  (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3490	  (parallel [(const_int 1)])))]
3491  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3492  "@
3493   movhps\t{%1, %0|%0, %1}
3494   psrldq\t{$8, %0|%0, 8}
3495   movq\t{%H1, %0|%0, %H1}"
3496  [(set_attr "type" "ssemov,sseishft,ssemov")
3497   (set_attr "memory" "*,none,*")
3498   (set_attr "mode" "V2SF,TI,TI")])
3499
3500;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3501(define_insn "*vec_extractv2di_1_sse"
3502  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3503	(vec_select:DI
3504	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3505	  (parallel [(const_int 1)])))]
3506  "!TARGET_SSE2 && TARGET_SSE
3507   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3508  "@
3509   movhps\t{%1, %0|%0, %1}
3510   movhlps\t{%1, %0|%0, %1}
3511   movlps\t{%H1, %0|%0, %H1}"
3512  [(set_attr "type" "ssemov")
3513   (set_attr "mode" "V2SF,V4SF,V2SF")])
3514
3515(define_insn "*vec_dupv4si"
3516  [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3517	(vec_duplicate:V4SI
3518	  (match_operand:SI 1 "register_operand" " Y,0")))]
3519  "TARGET_SSE"
3520  "@
3521   pshufd\t{$0, %1, %0|%0, %1, 0}
3522   shufps\t{$0, %0, %0|%0, %0, 0}"
3523  [(set_attr "type" "sselog1")
3524   (set_attr "mode" "TI,V4SF")])
3525
3526(define_insn "*vec_dupv2di"
3527  [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3528	(vec_duplicate:V2DI
3529	  (match_operand:DI 1 "register_operand" " 0,0")))]
3530  "TARGET_SSE"
3531  "@
3532   punpcklqdq\t%0, %0
3533   movlhps\t%0, %0"
3534  [(set_attr "type" "sselog1,ssemov")
3535   (set_attr "mode" "TI,V4SF")])
3536
3537;; ??? In theory we can match memory for the MMX alternative, but allowing
3538;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3539;; alternatives pretty much forces the MMX alternative to be chosen.
3540(define_insn "*sse2_concatv2si"
3541  [(set (match_operand:V2SI 0 "register_operand"     "=Y, Y,*y,*y")
3542	(vec_concat:V2SI
3543	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3544	  (match_operand:SI 2 "reg_or_0_operand"     " Y, C,*y, C")))]
3545  "TARGET_SSE2"
3546  "@
3547   punpckldq\t{%2, %0|%0, %2}
3548   movd\t{%1, %0|%0, %1}
3549   punpckldq\t{%2, %0|%0, %2}
3550   movd\t{%1, %0|%0, %1}"
3551  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3552   (set_attr "mode" "TI,TI,DI,DI")])
3553
3554(define_insn "*sse1_concatv2si"
3555  [(set (match_operand:V2SI 0 "register_operand"     "=x,x,*y,*y")
3556	(vec_concat:V2SI
3557	  (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3558	  (match_operand:SI 2 "reg_or_0_operand"     " x,C,*y,C")))]
3559  "TARGET_SSE"
3560  "@
3561   unpcklps\t{%2, %0|%0, %2}
3562   movss\t{%1, %0|%0, %1}
3563   punpckldq\t{%2, %0|%0, %2}
3564   movd\t{%1, %0|%0, %1}"
3565  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3566   (set_attr "mode" "V4SF,V4SF,DI,DI")])
3567
3568(define_insn "*vec_concatv4si_1"
3569  [(set (match_operand:V4SI 0 "register_operand"       "=Y,x,x")
3570	(vec_concat:V4SI
3571	  (match_operand:V2SI 1 "register_operand"     " 0,0,0")
3572	  (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3573  "TARGET_SSE"
3574  "@
3575   punpcklqdq\t{%2, %0|%0, %2}
3576   movlhps\t{%2, %0|%0, %2}
3577   movhps\t{%2, %0|%0, %2}"
3578  [(set_attr "type" "sselog,ssemov,ssemov")
3579   (set_attr "mode" "TI,V4SF,V2SF")])
3580
3581(define_insn "*vec_concatv2di"
3582  [(set (match_operand:V2DI 0 "register_operand"     "=Y,?Y,Y,x,x,x")
3583	(vec_concat:V2DI
3584	  (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3585	  (match_operand:DI 2 "vector_move_operand"  " C, C,Y,x,m,0")))]
3586  "TARGET_SSE"
3587  "@
3588   movq\t{%1, %0|%0, %1}
3589   movq2dq\t{%1, %0|%0, %1}
3590   punpcklqdq\t{%2, %0|%0, %2}
3591   movlhps\t{%2, %0|%0, %2}
3592   movhps\t{%2, %0|%0, %2}
3593   movlps\t{%1, %0|%0, %1}"
3594  [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3595   (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3596
3597(define_expand "vec_setv2di"
3598  [(match_operand:V2DI 0 "register_operand" "")
3599   (match_operand:DI 1 "register_operand" "")
3600   (match_operand 2 "const_int_operand" "")]
3601  "TARGET_SSE"
3602{
3603  ix86_expand_vector_set (false, operands[0], operands[1],
3604			  INTVAL (operands[2]));
3605  DONE;
3606})
3607
3608(define_expand "vec_extractv2di"
3609  [(match_operand:DI 0 "register_operand" "")
3610   (match_operand:V2DI 1 "register_operand" "")
3611   (match_operand 2 "const_int_operand" "")]
3612  "TARGET_SSE"
3613{
3614  ix86_expand_vector_extract (false, operands[0], operands[1],
3615			      INTVAL (operands[2]));
3616  DONE;
3617})
3618
3619(define_expand "vec_initv2di"
3620  [(match_operand:V2DI 0 "register_operand" "")
3621   (match_operand 1 "" "")]
3622  "TARGET_SSE"
3623{
3624  ix86_expand_vector_init (false, operands[0], operands[1]);
3625  DONE;
3626})
3627
3628(define_expand "vec_setv4si"
3629  [(match_operand:V4SI 0 "register_operand" "")
3630   (match_operand:SI 1 "register_operand" "")
3631   (match_operand 2 "const_int_operand" "")]
3632  "TARGET_SSE"
3633{
3634  ix86_expand_vector_set (false, operands[0], operands[1],
3635			  INTVAL (operands[2]));
3636  DONE;
3637})
3638
3639(define_expand "vec_extractv4si"
3640  [(match_operand:SI 0 "register_operand" "")
3641   (match_operand:V4SI 1 "register_operand" "")
3642   (match_operand 2 "const_int_operand" "")]
3643  "TARGET_SSE"
3644{
3645  ix86_expand_vector_extract (false, operands[0], operands[1],
3646			      INTVAL (operands[2]));
3647  DONE;
3648})
3649
3650(define_expand "vec_initv4si"
3651  [(match_operand:V4SI 0 "register_operand" "")
3652   (match_operand 1 "" "")]
3653  "TARGET_SSE"
3654{
3655  ix86_expand_vector_init (false, operands[0], operands[1]);
3656  DONE;
3657})
3658
3659(define_expand "vec_setv8hi"
3660  [(match_operand:V8HI 0 "register_operand" "")
3661   (match_operand:HI 1 "register_operand" "")
3662   (match_operand 2 "const_int_operand" "")]
3663  "TARGET_SSE"
3664{
3665  ix86_expand_vector_set (false, operands[0], operands[1],
3666			  INTVAL (operands[2]));
3667  DONE;
3668})
3669
3670(define_expand "vec_extractv8hi"
3671  [(match_operand:HI 0 "register_operand" "")
3672   (match_operand:V8HI 1 "register_operand" "")
3673   (match_operand 2 "const_int_operand" "")]
3674  "TARGET_SSE"
3675{
3676  ix86_expand_vector_extract (false, operands[0], operands[1],
3677			      INTVAL (operands[2]));
3678  DONE;
3679})
3680
3681(define_expand "vec_initv8hi"
3682  [(match_operand:V8HI 0 "register_operand" "")
3683   (match_operand 1 "" "")]
3684  "TARGET_SSE"
3685{
3686  ix86_expand_vector_init (false, operands[0], operands[1]);
3687  DONE;
3688})
3689
3690(define_expand "vec_setv16qi"
3691  [(match_operand:V16QI 0 "register_operand" "")
3692   (match_operand:QI 1 "register_operand" "")
3693   (match_operand 2 "const_int_operand" "")]
3694  "TARGET_SSE"
3695{
3696  ix86_expand_vector_set (false, operands[0], operands[1],
3697			  INTVAL (operands[2]));
3698  DONE;
3699})
3700
3701(define_expand "vec_extractv16qi"
3702  [(match_operand:QI 0 "register_operand" "")
3703   (match_operand:V16QI 1 "register_operand" "")
3704   (match_operand 2 "const_int_operand" "")]
3705  "TARGET_SSE"
3706{
3707  ix86_expand_vector_extract (false, operands[0], operands[1],
3708			      INTVAL (operands[2]));
3709  DONE;
3710})
3711
3712(define_expand "vec_initv16qi"
3713  [(match_operand:V16QI 0 "register_operand" "")
3714   (match_operand 1 "" "")]
3715  "TARGET_SSE"
3716{
3717  ix86_expand_vector_init (false, operands[0], operands[1]);
3718  DONE;
3719})
3720
3721;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3722;;
3723;; Miscellaneous
3724;;
3725;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3726
3727(define_insn "sse2_uavgv16qi3"
3728  [(set (match_operand:V16QI 0 "register_operand" "=x")
3729	(truncate:V16QI
3730	  (lshiftrt:V16HI
3731	    (plus:V16HI
3732	      (plus:V16HI
3733		(zero_extend:V16HI
3734		  (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3735		(zero_extend:V16HI
3736		  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3737	      (const_vector:V16QI [(const_int 1) (const_int 1)
3738				   (const_int 1) (const_int 1)
3739				   (const_int 1) (const_int 1)
3740				   (const_int 1) (const_int 1)
3741				   (const_int 1) (const_int 1)
3742				   (const_int 1) (const_int 1)
3743				   (const_int 1) (const_int 1)
3744				   (const_int 1) (const_int 1)]))
3745	    (const_int 1))))]
3746  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3747  "pavgb\t{%2, %0|%0, %2}"
3748  [(set_attr "type" "sseiadd")
3749   (set_attr "mode" "TI")])
3750
3751(define_insn "sse2_uavgv8hi3"
3752  [(set (match_operand:V8HI 0 "register_operand" "=x")
3753	(truncate:V8HI
3754	  (lshiftrt:V8SI
3755	    (plus:V8SI
3756	      (plus:V8SI
3757		(zero_extend:V8SI
3758		  (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3759		(zero_extend:V8SI
3760		  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3761	      (const_vector:V8HI [(const_int 1) (const_int 1)
3762				  (const_int 1) (const_int 1)
3763				  (const_int 1) (const_int 1)
3764				  (const_int 1) (const_int 1)]))
3765	    (const_int 1))))]
3766  "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3767  "pavgw\t{%2, %0|%0, %2}"
3768  [(set_attr "type" "sseiadd")
3769   (set_attr "mode" "TI")])
3770
3771;; The correct representation for this is absolutely enormous, and 
3772;; surely not generally useful.
3773(define_insn "sse2_psadbw"
3774  [(set (match_operand:V2DI 0 "register_operand" "=x")
3775	(unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3776		      (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3777		     UNSPEC_PSADBW))]
3778  "TARGET_SSE2"
3779  "psadbw\t{%2, %0|%0, %2}"
3780  [(set_attr "type" "sseiadd")
3781   (set_attr "mode" "TI")])
3782
3783(define_insn "sse_movmskps"
3784  [(set (match_operand:SI 0 "register_operand" "=r")
3785	(unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3786		   UNSPEC_MOVMSK))]
3787  "TARGET_SSE"
3788  "movmskps\t{%1, %0|%0, %1}"
3789  [(set_attr "type" "ssecvt")
3790   (set_attr "mode" "V4SF")])
3791
3792(define_insn "sse2_movmskpd"
3793  [(set (match_operand:SI 0 "register_operand" "=r")
3794	(unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3795		   UNSPEC_MOVMSK))]
3796  "TARGET_SSE2"
3797  "movmskpd\t{%1, %0|%0, %1}"
3798  [(set_attr "type" "ssecvt")
3799   (set_attr "mode" "V2DF")])
3800
3801(define_insn "sse2_pmovmskb"
3802  [(set (match_operand:SI 0 "register_operand" "=r")
3803	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3804		   UNSPEC_MOVMSK))]
3805  "TARGET_SSE2"
3806  "pmovmskb\t{%1, %0|%0, %1}"
3807  [(set_attr "type" "ssecvt")
3808   (set_attr "mode" "V2DF")])
3809
3810(define_expand "sse2_maskmovdqu"
3811  [(set (match_operand:V16QI 0 "memory_operand" "")
3812	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3813		       (match_operand:V16QI 2 "register_operand" "x")
3814		       (match_dup 0)]
3815		      UNSPEC_MASKMOV))]
3816  "TARGET_SSE2"
3817  "")
3818
3819(define_insn "*sse2_maskmovdqu"
3820  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3821	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3822		       (match_operand:V16QI 2 "register_operand" "x")
3823		       (mem:V16QI (match_dup 0))]
3824		      UNSPEC_MASKMOV))]
3825  "TARGET_SSE2 && !TARGET_64BIT"
3826  ;; @@@ check ordering of operands in intel/nonintel syntax
3827  "maskmovdqu\t{%2, %1|%1, %2}"
3828  [(set_attr "type" "ssecvt")
3829   (set_attr "mode" "TI")])
3830
3831(define_insn "*sse2_maskmovdqu_rex64"
3832  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3833	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3834		       (match_operand:V16QI 2 "register_operand" "x")
3835		       (mem:V16QI (match_dup 0))]
3836		      UNSPEC_MASKMOV))]
3837  "TARGET_SSE2 && TARGET_64BIT"
3838  ;; @@@ check ordering of operands in intel/nonintel syntax
3839  "maskmovdqu\t{%2, %1|%1, %2}"
3840  [(set_attr "type" "ssecvt")
3841   (set_attr "mode" "TI")])
3842
3843(define_insn "sse_ldmxcsr"
3844  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3845		    UNSPECV_LDMXCSR)]
3846  "TARGET_SSE"
3847  "ldmxcsr\t%0"
3848  [(set_attr "type" "sse")
3849   (set_attr "memory" "load")])
3850
3851(define_insn "sse_stmxcsr"
3852  [(set (match_operand:SI 0 "memory_operand" "=m")
3853	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3854  "TARGET_SSE"
3855  "stmxcsr\t%0"
3856  [(set_attr "type" "sse")
3857   (set_attr "memory" "store")])
3858
3859(define_expand "sse_sfence"
3860  [(set (match_dup 0)
3861	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3862  "TARGET_SSE || TARGET_3DNOW_A"
3863{
3864  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3865  MEM_VOLATILE_P (operands[0]) = 1;
3866})
3867
3868(define_insn "*sse_sfence"
3869  [(set (match_operand:BLK 0 "" "")
3870	(unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3871  "TARGET_SSE || TARGET_3DNOW_A"
3872  "sfence"
3873  [(set_attr "type" "sse")
3874   (set_attr "memory" "unknown")])
3875
3876(define_insn "sse2_clflush"
3877  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3878		    UNSPECV_CLFLUSH)]
3879  "TARGET_SSE2"
3880  "clflush\t%a0"
3881  [(set_attr "type" "sse")
3882   (set_attr "memory" "unknown")])
3883
3884(define_expand "sse2_mfence"
3885  [(set (match_dup 0)
3886	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3887  "TARGET_SSE2"
3888{
3889  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3890  MEM_VOLATILE_P (operands[0]) = 1;
3891})
3892
3893(define_insn "*sse2_mfence"
3894  [(set (match_operand:BLK 0 "" "")
3895	(unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3896  "TARGET_SSE2"
3897  "mfence"
3898  [(set_attr "type" "sse")
3899   (set_attr "memory" "unknown")])
3900
3901(define_expand "sse2_lfence"
3902  [(set (match_dup 0)
3903	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3904  "TARGET_SSE2"
3905{
3906  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3907  MEM_VOLATILE_P (operands[0]) = 1;
3908})
3909
3910(define_insn "*sse2_lfence"
3911  [(set (match_operand:BLK 0 "" "")
3912	(unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3913  "TARGET_SSE2"
3914  "lfence"
3915  [(set_attr "type" "sse")
3916   (set_attr "memory" "unknown")])
3917
3918(define_insn "sse3_mwait"
3919  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3920		     (match_operand:SI 1 "register_operand" "c")]
3921		    UNSPECV_MWAIT)]
3922  "TARGET_SSE3"
3923  "mwait\t%0, %1"
3924  [(set_attr "length" "3")])
3925
3926(define_insn "sse3_monitor"
3927  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3928		     (match_operand:SI 1 "register_operand" "c")
3929		     (match_operand:SI 2 "register_operand" "d")]
3930		    UNSPECV_MONITOR)]
3931  "TARGET_SSE3"
3932  "monitor\t%0, %1, %2"
3933  [(set_attr "length" "3")])
3934