Deleted Added
full compact
sse.md (171826) sse.md (219639)
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 2, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING. If not, write to
19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20;; Boston, MA 02110-1301, USA.
21
22
23;; 16 byte integral modes handled by SSE, minus TImode, which gets
24;; special-cased for TARGET_64BIT.
25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27;; All 16-byte vector modes handled by SSE
28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30;; Mix-n-match
31(define_mode_macro SSEMODE12 [V16QI V8HI])
32(define_mode_macro SSEMODE24 [V8HI V4SI])
33(define_mode_macro SSEMODE14 [V16QI V4SI])
34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37;; Mapping from integer vector mode to mnemonic suffix
38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43;;
44;; Move patterns
45;;
46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48;; All of these patterns are enabled for SSE1 as well as SSE2.
49;; This is essential for maintaining stable calling conventions.
50
51(define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54 "TARGET_SSE"
55{
56 ix86_expand_vector_move (<MODE>mode, operands);
57 DONE;
58})
59
60(define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64{
65 switch (which_alternative)
66 {
67 case 0:
68 return standard_sse_constant_opcode (insn, operands[1]);
69 case 1:
70 case 2:
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
73 else
74 return "movdqa\t{%1, %0|%0, %1}";
75 default:
76 gcc_unreachable ();
77 }
78}
79 [(set_attr "type" "sselog1,ssemov,ssemov")
80 (set (attr "mode")
81 (if_then_else
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
86 (const_int 0))))
87 (const_string "V4SF")
88 (const_string "TI")))])
89
90(define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
93 "TARGET_SSE"
94{
95 ix86_expand_vector_move (V4SFmode, operands);
96 DONE;
97})
98
99(define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
102 "TARGET_SSE"
103{
104 switch (which_alternative)
105 {
106 case 0:
107 return standard_sse_constant_opcode (insn, operands[1]);
108 case 1:
109 case 2:
110 return "movaps\t{%1, %0|%0, %1}";
111 default:
112 abort();
113 }
114}
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
117
118(define_split
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
122 [(set (match_dup 0)
123 (vec_merge:V4SF
124 (vec_duplicate:V4SF (match_dup 1))
125 (match_dup 2)
126 (const_int 1)))]
127{
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
130})
131
132(define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
135 "TARGET_SSE"
136{
137 ix86_expand_vector_move (V2DFmode, operands);
138 DONE;
139})
140
141(define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
145{
146 switch (which_alternative)
147 {
148 case 0:
149 return standard_sse_constant_opcode (insn, operands[1]);
150 case 1:
151 case 2:
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
154 else
155 return "movapd\t{%1, %0|%0, %1}";
156 default:
157 gcc_unreachable ();
158 }
159}
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set (attr "mode")
162 (if_then_else
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
167 (const_int 0))))
168 (const_string "V4SF")
169 (const_string "V2DF")))])
170
171(define_split
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
176{
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
179})
180
181(define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
183 "TARGET_SSE"
184{
185 ix86_expand_push (<MODE>mode, operands[0]);
186 DONE;
187})
188
189(define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 "TARGET_SSE"
193{
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
195 DONE;
196})
197
198(define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
201 UNSPEC_MOVU))]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
206
207(define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
210 UNSPEC_MOVU))]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
215
216(define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
219 UNSPEC_MOVU))]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
224
225(define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 UNSPEC_MOVNT))]
229 "TARGET_SSE"
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
233
234(define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 UNSPEC_MOVNT))]
238 "TARGET_SSE2"
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
242
243(define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 UNSPEC_MOVNT))]
247 "TARGET_SSE2"
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
251
252(define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 UNSPEC_MOVNT))]
256 "TARGET_SSE2"
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
260
261(define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 UNSPEC_LDQQU))]
265 "TARGET_SSE3"
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
269
270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
271;;
272;; Parallel single-precision floating point arithmetic
273;;
274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
275
276(define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
279 "TARGET_SSE"
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
281
282(define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
285 "TARGET_SSE"
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
287
288(define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
292 "TARGET_SSE"
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
294
295(define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
303
304(define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
306 (vec_merge:V4SF
307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 (match_dup 1)
310 (const_int 1)))]
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
315
316(define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
320 "TARGET_SSE"
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
322
323(define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
327 "TARGET_SSE"
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
331
332(define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
334 (vec_merge:V4SF
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
337 (match_dup 1)
338 (const_int 1)))]
339 "TARGET_SSE"
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
343
344(define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
348 "TARGET_SSE"
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
350
351(define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
359
360(define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
362 (vec_merge:V4SF
363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 (match_dup 1)
366 (const_int 1)))]
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
371
372(define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
376 "TARGET_SSE"
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
378
379(define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
383 "TARGET_SSE"
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
387
388(define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
390 (vec_merge:V4SF
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
393 (match_dup 1)
394 (const_int 1)))]
395 "TARGET_SSE"
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
399
400(define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
402 (unspec:V4SF
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
404 "TARGET_SSE"
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
408
409(define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
411 (vec_merge:V4SF
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
413 UNSPEC_RCP)
414 (match_operand:V4SF 2 "register_operand" "0")
415 (const_int 1)))]
416 "TARGET_SSE"
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
420
421(define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
423 (unspec:V4SF
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
425 "TARGET_SSE"
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
429
430(define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
432 (vec_merge:V4SF
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
434 UNSPEC_RSQRT)
435 (match_operand:V4SF 2 "register_operand" "0")
436 (const_int 1)))]
437 "TARGET_SSE"
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
441
442(define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
445 "TARGET_SSE"
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
449
450(define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (vec_merge:V4SF
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
455 (const_int 1)))]
456 "TARGET_SSE"
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
460
461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462;; isn't really correct, as those rtl operators aren't defined when
463;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
464
465(define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 "TARGET_SSE"
470{
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474})
475
476(define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
485
486(define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
490 "TARGET_SSE"
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
494
495(define_insn "sse_vmsmaxv4sf3"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
497 (vec_merge:V4SF
498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 (match_dup 1)
501 (const_int 1)))]
502 "TARGET_SSE"
503 "maxss\t{%2, %0|%0, %2}"
504 [(set_attr "type" "sse")
505 (set_attr "mode" "SF")])
506
507(define_expand "sminv4sf3"
508 [(set (match_operand:V4SF 0 "register_operand" "")
509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
510 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
511 "TARGET_SSE"
512{
513 if (!flag_finite_math_only)
514 operands[1] = force_reg (V4SFmode, operands[1]);
515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
516})
517
518(define_insn "*sminv4sf3_finite"
519 [(set (match_operand:V4SF 0 "register_operand" "=x")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
522 "TARGET_SSE && flag_finite_math_only
523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
524 "minps\t{%2, %0|%0, %2}"
525 [(set_attr "type" "sse")
526 (set_attr "mode" "V4SF")])
527
528(define_insn "*sminv4sf3"
529 [(set (match_operand:V4SF 0 "register_operand" "=x")
530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
532 "TARGET_SSE"
533 "minps\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "V4SF")])
536
537(define_insn "sse_vmsminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "=x")
539 (vec_merge:V4SF
540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
541 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
542 (match_dup 1)
543 (const_int 1)))]
544 "TARGET_SSE"
545 "minss\t{%2, %0|%0, %2}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "SF")])
548
549;; These versions of the min/max patterns implement exactly the operations
550;; min = (op1 < op2 ? op1 : op2)
551;; max = (!(op1 < op2) ? op1 : op2)
552;; Their operands are not commutative, and thus they may be used in the
553;; presence of -0.0 and NaN.
554
555(define_insn "*ieee_sminv4sf3"
556 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
559 UNSPEC_IEEE_MIN))]
560 "TARGET_SSE"
561 "minps\t{%2, %0|%0, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "mode" "V4SF")])
564
565(define_insn "*ieee_smaxv4sf3"
566 [(set (match_operand:V4SF 0 "register_operand" "=x")
567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
569 UNSPEC_IEEE_MAX))]
570 "TARGET_SSE"
571 "maxps\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sseadd")
573 (set_attr "mode" "V4SF")])
574
575(define_insn "*ieee_sminv2df3"
576 [(set (match_operand:V2DF 0 "register_operand" "=x")
577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
579 UNSPEC_IEEE_MIN))]
580 "TARGET_SSE2"
581 "minpd\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "V2DF")])
584
585(define_insn "*ieee_smaxv2df3"
586 [(set (match_operand:V2DF 0 "register_operand" "=x")
587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
589 UNSPEC_IEEE_MAX))]
590 "TARGET_SSE2"
591 "maxpd\t{%2, %0|%0, %2}"
592 [(set_attr "type" "sseadd")
593 (set_attr "mode" "V2DF")])
594
595(define_insn "sse3_addsubv4sf3"
596 [(set (match_operand:V4SF 0 "register_operand" "=x")
597 (vec_merge:V4SF
598 (plus:V4SF
599 (match_operand:V4SF 1 "register_operand" "0")
600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
601 (minus:V4SF (match_dup 1) (match_dup 2))
602 (const_int 5)))]
603 "TARGET_SSE3"
604 "addsubps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
607
608(define_insn "sse3_haddv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (vec_concat:V4SF
611 (vec_concat:V2SF
612 (plus:SF
613 (vec_select:SF
614 (match_operand:V4SF 1 "register_operand" "0")
615 (parallel [(const_int 0)]))
616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
617 (plus:SF
618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
620 (vec_concat:V2SF
621 (plus:SF
622 (vec_select:SF
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
624 (parallel [(const_int 0)]))
625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
626 (plus:SF
627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
629 "TARGET_SSE3"
630 "haddps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
633
634(define_insn "sse3_hsubv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (vec_concat:V4SF
637 (vec_concat:V2SF
638 (minus:SF
639 (vec_select:SF
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
643 (minus:SF
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
646 (vec_concat:V2SF
647 (minus:SF
648 (vec_select:SF
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
652 (minus:SF
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
655 "TARGET_SSE3"
656 "hsubps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
659
660(define_expand "reduc_splus_v4sf"
661 [(match_operand:V4SF 0 "register_operand" "")
662 (match_operand:V4SF 1 "register_operand" "")]
663 "TARGET_SSE"
664{
665 if (TARGET_SSE3)
666 {
667 rtx tmp = gen_reg_rtx (V4SFmode);
668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
670 }
671 else
672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
673 DONE;
674})
675
676(define_expand "reduc_smax_v4sf"
677 [(match_operand:V4SF 0 "register_operand" "")
678 (match_operand:V4SF 1 "register_operand" "")]
679 "TARGET_SSE"
680{
681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
682 DONE;
683})
684
685(define_expand "reduc_smin_v4sf"
686 [(match_operand:V4SF 0 "register_operand" "")
687 (match_operand:V4SF 1 "register_operand" "")]
688 "TARGET_SSE"
689{
690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
691 DONE;
692})
693
694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
695;;
696;; Parallel single-precision floating point comparisons
697;;
698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
699
700(define_insn "sse_maskcmpv4sf3"
701 [(set (match_operand:V4SF 0 "register_operand" "=x")
702 (match_operator:V4SF 3 "sse_comparison_operator"
703 [(match_operand:V4SF 1 "register_operand" "0")
704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
705 "TARGET_SSE"
706 "cmp%D3ps\t{%2, %0|%0, %2}"
707 [(set_attr "type" "ssecmp")
708 (set_attr "mode" "V4SF")])
709
710(define_insn "sse_vmmaskcmpv4sf3"
711 [(set (match_operand:V4SF 0 "register_operand" "=x")
712 (vec_merge:V4SF
713 (match_operator:V4SF 3 "sse_comparison_operator"
714 [(match_operand:V4SF 1 "register_operand" "0")
715 (match_operand:V4SF 2 "register_operand" "x")])
716 (match_dup 1)
717 (const_int 1)))]
718 "TARGET_SSE"
719 "cmp%D3ss\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "SF")])
722
723(define_insn "sse_comi"
724 [(set (reg:CCFP FLAGS_REG)
725 (compare:CCFP
726 (vec_select:SF
727 (match_operand:V4SF 0 "register_operand" "x")
728 (parallel [(const_int 0)]))
729 (vec_select:SF
730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
731 (parallel [(const_int 0)]))))]
732 "TARGET_SSE"
733 "comiss\t{%1, %0|%0, %1}"
734 [(set_attr "type" "ssecomi")
735 (set_attr "mode" "SF")])
736
737(define_insn "sse_ucomi"
738 [(set (reg:CCFPU FLAGS_REG)
739 (compare:CCFPU
740 (vec_select:SF
741 (match_operand:V4SF 0 "register_operand" "x")
742 (parallel [(const_int 0)]))
743 (vec_select:SF
744 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
745 (parallel [(const_int 0)]))))]
746 "TARGET_SSE"
747 "ucomiss\t{%1, %0|%0, %1}"
748 [(set_attr "type" "ssecomi")
749 (set_attr "mode" "SF")])
750
751(define_expand "vcondv4sf"
752 [(set (match_operand:V4SF 0 "register_operand" "")
753 (if_then_else:V4SF
754 (match_operator 3 ""
755 [(match_operand:V4SF 4 "nonimmediate_operand" "")
756 (match_operand:V4SF 5 "nonimmediate_operand" "")])
757 (match_operand:V4SF 1 "general_operand" "")
758 (match_operand:V4SF 2 "general_operand" "")))]
759 "TARGET_SSE"
760{
761 if (ix86_expand_fp_vcond (operands))
762 DONE;
763 else
764 FAIL;
765})
766
767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
768;;
769;; Parallel single-precision floating point logical operations
770;;
771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
772
773(define_expand "andv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "")
775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
776 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
777 "TARGET_SSE"
778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
779
780(define_insn "*andv4sf3"
781 [(set (match_operand:V4SF 0 "register_operand" "=x")
782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
785 "andps\t{%2, %0|%0, %2}"
786 [(set_attr "type" "sselog")
787 (set_attr "mode" "V4SF")])
788
789(define_insn "sse_nandv4sf3"
790 [(set (match_operand:V4SF 0 "register_operand" "=x")
791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
793 "TARGET_SSE"
794 "andnps\t{%2, %0|%0, %2}"
795 [(set_attr "type" "sselog")
796 (set_attr "mode" "V4SF")])
797
798(define_expand "iorv4sf3"
799 [(set (match_operand:V4SF 0 "register_operand" "")
800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
801 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "TARGET_SSE"
803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
804
805(define_insn "*iorv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
810 "orps\t{%2, %0|%0, %2}"
811 [(set_attr "type" "sselog")
812 (set_attr "mode" "V4SF")])
813
814(define_expand "xorv4sf3"
815 [(set (match_operand:V4SF 0 "register_operand" "")
816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
817 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
818 "TARGET_SSE"
819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
820
821(define_insn "*xorv4sf3"
822 [(set (match_operand:V4SF 0 "register_operand" "=x")
823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
826 "xorps\t{%2, %0|%0, %2}"
827 [(set_attr "type" "sselog")
828 (set_attr "mode" "V4SF")])
829
830;; Also define scalar versions. These are used for abs, neg, and
831;; conditional move. Using subregs into vector modes causes register
832;; allocation lossage. These patterns do not allow memory operands
833;; because the native instructions read the full 128-bits.
834
835(define_insn "*andsf3"
836 [(set (match_operand:SF 0 "register_operand" "=x")
837 (and:SF (match_operand:SF 1 "register_operand" "0")
838 (match_operand:SF 2 "register_operand" "x")))]
839 "TARGET_SSE"
840 "andps\t{%2, %0|%0, %2}"
841 [(set_attr "type" "sselog")
842 (set_attr "mode" "V4SF")])
843
844(define_insn "*nandsf3"
845 [(set (match_operand:SF 0 "register_operand" "=x")
846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
847 (match_operand:SF 2 "register_operand" "x")))]
848 "TARGET_SSE"
849 "andnps\t{%2, %0|%0, %2}"
850 [(set_attr "type" "sselog")
851 (set_attr "mode" "V4SF")])
852
853(define_insn "*iorsf3"
854 [(set (match_operand:SF 0 "register_operand" "=x")
855 (ior:SF (match_operand:SF 1 "register_operand" "0")
856 (match_operand:SF 2 "register_operand" "x")))]
857 "TARGET_SSE"
858 "orps\t{%2, %0|%0, %2}"
859 [(set_attr "type" "sselog")
860 (set_attr "mode" "V4SF")])
861
862(define_insn "*xorsf3"
863 [(set (match_operand:SF 0 "register_operand" "=x")
864 (xor:SF (match_operand:SF 1 "register_operand" "0")
865 (match_operand:SF 2 "register_operand" "x")))]
866 "TARGET_SSE"
867 "xorps\t{%2, %0|%0, %2}"
868 [(set_attr "type" "sselog")
869 (set_attr "mode" "V4SF")])
870
871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
872;;
873;; Parallel single-precision floating point conversion operations
874;;
875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
876
877(define_insn "sse_cvtpi2ps"
878 [(set (match_operand:V4SF 0 "register_operand" "=x")
879 (vec_merge:V4SF
880 (vec_duplicate:V4SF
881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
882 (match_operand:V4SF 1 "register_operand" "0")
883 (const_int 3)))]
884 "TARGET_SSE"
885 "cvtpi2ps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "ssecvt")
887 (set_attr "mode" "V4SF")])
888
889(define_insn "sse_cvtps2pi"
890 [(set (match_operand:V2SI 0 "register_operand" "=y")
891 (vec_select:V2SI
892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
893 UNSPEC_FIX_NOTRUNC)
894 (parallel [(const_int 0) (const_int 1)])))]
895 "TARGET_SSE"
896 "cvtps2pi\t{%1, %0|%0, %1}"
897 [(set_attr "type" "ssecvt")
898 (set_attr "unit" "mmx")
899 (set_attr "mode" "DI")])
900
901(define_insn "sse_cvttps2pi"
902 [(set (match_operand:V2SI 0 "register_operand" "=y")
903 (vec_select:V2SI
904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
905 (parallel [(const_int 0) (const_int 1)])))]
906 "TARGET_SSE"
907 "cvttps2pi\t{%1, %0|%0, %1}"
908 [(set_attr "type" "ssecvt")
909 (set_attr "unit" "mmx")
910 (set_attr "mode" "SF")])
911
912(define_insn "sse_cvtsi2ss"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914 (vec_merge:V4SF
915 (vec_duplicate:V4SF
916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
917 (match_operand:V4SF 1 "register_operand" "0,0")
918 (const_int 1)))]
919 "TARGET_SSE"
920 "cvtsi2ss\t{%2, %0|%0, %2}"
921 [(set_attr "type" "sseicvt")
922 (set_attr "athlon_decode" "vector,double")
923 (set_attr "mode" "SF")])
924
925(define_insn "sse_cvtsi2ssq"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
927 (vec_merge:V4SF
928 (vec_duplicate:V4SF
929 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
930 (match_operand:V4SF 1 "register_operand" "0,0")
931 (const_int 1)))]
932 "TARGET_SSE && TARGET_64BIT"
933 "cvtsi2ssq\t{%2, %0|%0, %2}"
934 [(set_attr "type" "sseicvt")
935 (set_attr "athlon_decode" "vector,double")
936 (set_attr "mode" "SF")])
937
938(define_insn "sse_cvtss2si"
939 [(set (match_operand:SI 0 "register_operand" "=r,r")
940 (unspec:SI
941 [(vec_select:SF
942 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
943 (parallel [(const_int 0)]))]
944 UNSPEC_FIX_NOTRUNC))]
945 "TARGET_SSE"
946 "cvtss2si\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sseicvt")
948 (set_attr "athlon_decode" "double,vector")
949 (set_attr "mode" "SI")])
950
951(define_insn "sse_cvtss2siq"
952 [(set (match_operand:DI 0 "register_operand" "=r,r")
953 (unspec:DI
954 [(vec_select:SF
955 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
956 (parallel [(const_int 0)]))]
957 UNSPEC_FIX_NOTRUNC))]
958 "TARGET_SSE && TARGET_64BIT"
959 "cvtss2siq\t{%1, %0|%0, %1}"
960 [(set_attr "type" "sseicvt")
961 (set_attr "athlon_decode" "double,vector")
962 (set_attr "mode" "DI")])
963
964(define_insn "sse_cvttss2si"
965 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (fix:SI
967 (vec_select:SF
968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
969 (parallel [(const_int 0)]))))]
970 "TARGET_SSE"
971 "cvttss2si\t{%1, %0|%0, %1}"
972 [(set_attr "type" "sseicvt")
973 (set_attr "athlon_decode" "double,vector")
974 (set_attr "mode" "SI")])
975
976(define_insn "sse_cvttss2siq"
977 [(set (match_operand:DI 0 "register_operand" "=r,r")
978 (fix:DI
979 (vec_select:SF
980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
981 (parallel [(const_int 0)]))))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvttss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
987
988(define_insn "sse2_cvtdq2ps"
989 [(set (match_operand:V4SF 0 "register_operand" "=x")
990 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
991 "TARGET_SSE2"
992 "cvtdq2ps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "ssecvt")
994 (set_attr "mode" "V2DF")])
995
996(define_insn "sse2_cvtps2dq"
997 [(set (match_operand:V4SI 0 "register_operand" "=x")
998 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
999 UNSPEC_FIX_NOTRUNC))]
1000 "TARGET_SSE2"
1001 "cvtps2dq\t{%1, %0|%0, %1}"
1002 [(set_attr "type" "ssecvt")
1003 (set_attr "mode" "TI")])
1004
1005(define_insn "sse2_cvttps2dq"
1006 [(set (match_operand:V4SI 0 "register_operand" "=x")
1007 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1008 "TARGET_SSE2"
1009 "cvttps2dq\t{%1, %0|%0, %1}"
1010 [(set_attr "type" "ssecvt")
1011 (set_attr "mode" "TI")])
1012
1013;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1014;;
1015;; Parallel single-precision floating point element swizzling
1016;;
1017;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1018
1019(define_insn "sse_movhlps"
1020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1021 (vec_select:V4SF
1022 (vec_concat:V8SF
1023 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1024 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1025 (parallel [(const_int 6)
1026 (const_int 7)
1027 (const_int 2)
1028 (const_int 3)])))]
1029 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1030 "@
1031 movhlps\t{%2, %0|%0, %2}
1032 movlps\t{%H2, %0|%0, %H2}
1033 movhps\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "ssemov")
1035 (set_attr "mode" "V4SF,V2SF,V2SF")])
1036
1037(define_insn "sse_movlhps"
1038 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1039 (vec_select:V4SF
1040 (vec_concat:V8SF
1041 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1042 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1043 (parallel [(const_int 0)
1044 (const_int 1)
1045 (const_int 4)
1046 (const_int 5)])))]
1047 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1048 "@
1049 movlhps\t{%2, %0|%0, %2}
1050 movhps\t{%2, %0|%0, %2}
1051 movlps\t{%2, %H0|%H0, %2}"
1052 [(set_attr "type" "ssemov")
1053 (set_attr "mode" "V4SF,V2SF,V2SF")])
1054
1055(define_insn "sse_unpckhps"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x")
1057 (vec_select:V4SF
1058 (vec_concat:V8SF
1059 (match_operand:V4SF 1 "register_operand" "0")
1060 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1061 (parallel [(const_int 2) (const_int 6)
1062 (const_int 3) (const_int 7)])))]
1063 "TARGET_SSE"
1064 "unpckhps\t{%2, %0|%0, %2}"
1065 [(set_attr "type" "sselog")
1066 (set_attr "mode" "V4SF")])
1067
1068(define_insn "sse_unpcklps"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x")
1070 (vec_select:V4SF
1071 (vec_concat:V8SF
1072 (match_operand:V4SF 1 "register_operand" "0")
1073 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1074 (parallel [(const_int 0) (const_int 4)
1075 (const_int 1) (const_int 5)])))]
1076 "TARGET_SSE"
1077 "unpcklps\t{%2, %0|%0, %2}"
1078 [(set_attr "type" "sselog")
1079 (set_attr "mode" "V4SF")])
1080
1081;; These are modeled with the same vec_concat as the others so that we
1082;; capture users of shufps that can use the new instructions
1083(define_insn "sse3_movshdup"
1084 [(set (match_operand:V4SF 0 "register_operand" "=x")
1085 (vec_select:V4SF
1086 (vec_concat:V8SF
1087 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1088 (match_dup 1))
1089 (parallel [(const_int 1)
1090 (const_int 1)
1091 (const_int 7)
1092 (const_int 7)])))]
1093 "TARGET_SSE3"
1094 "movshdup\t{%1, %0|%0, %1}"
1095 [(set_attr "type" "sse")
1096 (set_attr "mode" "V4SF")])
1097
1098(define_insn "sse3_movsldup"
1099 [(set (match_operand:V4SF 0 "register_operand" "=x")
1100 (vec_select:V4SF
1101 (vec_concat:V8SF
1102 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1103 (match_dup 1))
1104 (parallel [(const_int 0)
1105 (const_int 0)
1106 (const_int 6)
1107 (const_int 6)])))]
1108 "TARGET_SSE3"
1109 "movsldup\t{%1, %0|%0, %1}"
1110 [(set_attr "type" "sse")
1111 (set_attr "mode" "V4SF")])
1112
1113(define_expand "sse_shufps"
1114 [(match_operand:V4SF 0 "register_operand" "")
1115 (match_operand:V4SF 1 "register_operand" "")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "")
1117 (match_operand:SI 3 "const_int_operand" "")]
1118 "TARGET_SSE"
1119{
1120 int mask = INTVAL (operands[3]);
1121 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1122 GEN_INT ((mask >> 0) & 3),
1123 GEN_INT ((mask >> 2) & 3),
1124 GEN_INT (((mask >> 4) & 3) + 4),
1125 GEN_INT (((mask >> 6) & 3) + 4)));
1126 DONE;
1127})
1128
1129(define_insn "sse_shufps_1"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1131 (vec_select:V4SF
1132 (vec_concat:V8SF
1133 (match_operand:V4SF 1 "register_operand" "0")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1136 (match_operand 4 "const_0_to_3_operand" "")
1137 (match_operand 5 "const_4_to_7_operand" "")
1138 (match_operand 6 "const_4_to_7_operand" "")])))]
1139 "TARGET_SSE"
1140{
1141 int mask = 0;
1142 mask |= INTVAL (operands[3]) << 0;
1143 mask |= INTVAL (operands[4]) << 2;
1144 mask |= (INTVAL (operands[5]) - 4) << 4;
1145 mask |= (INTVAL (operands[6]) - 4) << 6;
1146 operands[3] = GEN_INT (mask);
1147
1148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1149}
1150 [(set_attr "type" "sselog")
1151 (set_attr "mode" "V4SF")])
1152
1153(define_insn "sse_storehps"
1154 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1155 (vec_select:V2SF
1156 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1157 (parallel [(const_int 2) (const_int 3)])))]
1158 "TARGET_SSE"
1159 "@
1160 movhps\t{%1, %0|%0, %1}
1161 movhlps\t{%1, %0|%0, %1}
1162 movlps\t{%H1, %0|%0, %H1}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "mode" "V2SF,V4SF,V2SF")])
1165
1166(define_insn "sse_loadhps"
1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1168 (vec_concat:V4SF
1169 (vec_select:V2SF
1170 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1171 (parallel [(const_int 0) (const_int 1)]))
1172 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1173 "TARGET_SSE"
1174 "@
1175 movhps\t{%2, %0|%0, %2}
1176 movlhps\t{%2, %0|%0, %2}
1177 movlps\t{%2, %H0|%H0, %2}"
1178 [(set_attr "type" "ssemov")
1179 (set_attr "mode" "V2SF,V4SF,V2SF")])
1180
1181(define_insn "sse_storelps"
1182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1183 (vec_select:V2SF
1184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1185 (parallel [(const_int 0) (const_int 1)])))]
1186 "TARGET_SSE"
1187 "@
1188 movlps\t{%1, %0|%0, %1}
1189 movaps\t{%1, %0|%0, %1}
1190 movlps\t{%1, %0|%0, %1}"
1191 [(set_attr "type" "ssemov")
1192 (set_attr "mode" "V2SF,V4SF,V2SF")])
1193
1194(define_insn "sse_loadlps"
1195 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1196 (vec_concat:V4SF
1197 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1198 (vec_select:V2SF
1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1200 (parallel [(const_int 2) (const_int 3)]))))]
1201 "TARGET_SSE"
1202 "@
1203 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1204 movlps\t{%2, %0|%0, %2}
1205 movlps\t{%2, %0|%0, %2}"
1206 [(set_attr "type" "sselog,ssemov,ssemov")
1207 (set_attr "mode" "V4SF,V2SF,V2SF")])
1208
1209(define_insn "sse_movss"
1210 [(set (match_operand:V4SF 0 "register_operand" "=x")
1211 (vec_merge:V4SF
1212 (match_operand:V4SF 2 "register_operand" "x")
1213 (match_operand:V4SF 1 "register_operand" "0")
1214 (const_int 1)))]
1215 "TARGET_SSE"
1216 "movss\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "ssemov")
1218 (set_attr "mode" "SF")])
1219
1220(define_insn "*vec_dupv4sf"
1221 [(set (match_operand:V4SF 0 "register_operand" "=x")
1222 (vec_duplicate:V4SF
1223 (match_operand:SF 1 "register_operand" "0")))]
1224 "TARGET_SSE"
1225 "shufps\t{$0, %0, %0|%0, %0, 0}"
1226 [(set_attr "type" "sselog1")
1227 (set_attr "mode" "V4SF")])
1228
1229;; ??? In theory we can match memory for the MMX alternative, but allowing
1230;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1231;; alternatives pretty much forces the MMX alternative to be chosen.
1232(define_insn "*sse_concatv2sf"
1233 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1234 (vec_concat:V2SF
1235 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1236 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1237 "TARGET_SSE"
1238 "@
1239 unpcklps\t{%2, %0|%0, %2}
1240 movss\t{%1, %0|%0, %1}
1241 punpckldq\t{%2, %0|%0, %2}
1242 movd\t{%1, %0|%0, %1}"
1243 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1244 (set_attr "mode" "V4SF,SF,DI,DI")])
1245
1246(define_insn "*sse_concatv4sf"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1248 (vec_concat:V4SF
1249 (match_operand:V2SF 1 "register_operand" " 0,0")
1250 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1251 "TARGET_SSE"
1252 "@
1253 movlhps\t{%2, %0|%0, %2}
1254 movhps\t{%2, %0|%0, %2}"
1255 [(set_attr "type" "ssemov")
1256 (set_attr "mode" "V4SF,V2SF")])
1257
1258(define_expand "vec_initv4sf"
1259 [(match_operand:V4SF 0 "register_operand" "")
1260 (match_operand 1 "" "")]
1261 "TARGET_SSE"
1262{
1263 ix86_expand_vector_init (false, operands[0], operands[1]);
1264 DONE;
1265})
1266
1267(define_insn "*vec_setv4sf_0"
1268 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1269 (vec_merge:V4SF
1270 (vec_duplicate:V4SF
1271 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1272 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1273 (const_int 1)))]
1274 "TARGET_SSE"
1275 "@
1276 movss\t{%2, %0|%0, %2}
1277 movss\t{%2, %0|%0, %2}
1278 movd\t{%2, %0|%0, %2}
1279 #"
1280 [(set_attr "type" "ssemov")
1281 (set_attr "mode" "SF")])
1282
1283(define_split
1284 [(set (match_operand:V4SF 0 "memory_operand" "")
1285 (vec_merge:V4SF
1286 (vec_duplicate:V4SF
1287 (match_operand:SF 1 "nonmemory_operand" ""))
1288 (match_dup 0)
1289 (const_int 1)))]
1290 "TARGET_SSE && reload_completed"
1291 [(const_int 0)]
1292{
1293 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1294 DONE;
1295})
1296
1297(define_expand "vec_setv4sf"
1298 [(match_operand:V4SF 0 "register_operand" "")
1299 (match_operand:SF 1 "register_operand" "")
1300 (match_operand 2 "const_int_operand" "")]
1301 "TARGET_SSE"
1302{
1303 ix86_expand_vector_set (false, operands[0], operands[1],
1304 INTVAL (operands[2]));
1305 DONE;
1306})
1307
1308(define_insn_and_split "*vec_extractv4sf_0"
1309 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1310 (vec_select:SF
1311 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1312 (parallel [(const_int 0)])))]
1313 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1314 "#"
1315 "&& reload_completed"
1316 [(const_int 0)]
1317{
1318 rtx op1 = operands[1];
1319 if (REG_P (op1))
1320 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1321 else
1322 op1 = gen_lowpart (SFmode, op1);
1323 emit_move_insn (operands[0], op1);
1324 DONE;
1325})
1326
1327(define_expand "vec_extractv4sf"
1328 [(match_operand:SF 0 "register_operand" "")
1329 (match_operand:V4SF 1 "register_operand" "")
1330 (match_operand 2 "const_int_operand" "")]
1331 "TARGET_SSE"
1332{
1333 ix86_expand_vector_extract (false, operands[0], operands[1],
1334 INTVAL (operands[2]));
1335 DONE;
1336})
1337
1338;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1339;;
1340;; Parallel double-precision floating point arithmetic
1341;;
1342;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1343
1344(define_expand "negv2df2"
1345 [(set (match_operand:V2DF 0 "register_operand" "")
1346 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1347 "TARGET_SSE2"
1348 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1349
1350(define_expand "absv2df2"
1351 [(set (match_operand:V2DF 0 "register_operand" "")
1352 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1353 "TARGET_SSE2"
1354 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1355
1356(define_expand "addv2df3"
1357 [(set (match_operand:V2DF 0 "register_operand" "")
1358 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1359 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1360 "TARGET_SSE2"
1361 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1362
1363(define_insn "*addv2df3"
1364 [(set (match_operand:V2DF 0 "register_operand" "=x")
1365 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1366 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1367 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1368 "addpd\t{%2, %0|%0, %2}"
1369 [(set_attr "type" "sseadd")
1370 (set_attr "mode" "V2DF")])
1371
1372(define_insn "sse2_vmaddv2df3"
1373 [(set (match_operand:V2DF 0 "register_operand" "=x")
1374 (vec_merge:V2DF
1375 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1376 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1377 (match_dup 1)
1378 (const_int 1)))]
1379 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1380 "addsd\t{%2, %0|%0, %2}"
1381 [(set_attr "type" "sseadd")
1382 (set_attr "mode" "DF")])
1383
1384(define_expand "subv2df3"
1385 [(set (match_operand:V2DF 0 "register_operand" "")
1386 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1387 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1388 "TARGET_SSE2"
1389 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1390
1391(define_insn "*subv2df3"
1392 [(set (match_operand:V2DF 0 "register_operand" "=x")
1393 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1394 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1395 "TARGET_SSE2"
1396 "subpd\t{%2, %0|%0, %2}"
1397 [(set_attr "type" "sseadd")
1398 (set_attr "mode" "V2DF")])
1399
1400(define_insn "sse2_vmsubv2df3"
1401 [(set (match_operand:V2DF 0 "register_operand" "=x")
1402 (vec_merge:V2DF
1403 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1404 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1405 (match_dup 1)
1406 (const_int 1)))]
1407 "TARGET_SSE2"
1408 "subsd\t{%2, %0|%0, %2}"
1409 [(set_attr "type" "sseadd")
1410 (set_attr "mode" "DF")])
1411
1412(define_expand "mulv2df3"
1413 [(set (match_operand:V2DF 0 "register_operand" "")
1414 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1416 "TARGET_SSE2"
1417 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1418
1419(define_insn "*mulv2df3"
1420 [(set (match_operand:V2DF 0 "register_operand" "=x")
1421 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1423 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1424 "mulpd\t{%2, %0|%0, %2}"
1425 [(set_attr "type" "ssemul")
1426 (set_attr "mode" "V2DF")])
1427
1428(define_insn "sse2_vmmulv2df3"
1429 [(set (match_operand:V2DF 0 "register_operand" "=x")
1430 (vec_merge:V2DF
1431 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1433 (match_dup 1)
1434 (const_int 1)))]
1435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1436 "mulsd\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "ssemul")
1438 (set_attr "mode" "DF")])
1439
1440(define_expand "divv2df3"
1441 [(set (match_operand:V2DF 0 "register_operand" "")
1442 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1444 "TARGET_SSE2"
1445 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1446
1447(define_insn "*divv2df3"
1448 [(set (match_operand:V2DF 0 "register_operand" "=x")
1449 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1451 "TARGET_SSE2"
1452 "divpd\t{%2, %0|%0, %2}"
1453 [(set_attr "type" "ssediv")
1454 (set_attr "mode" "V2DF")])
1455
1456(define_insn "sse2_vmdivv2df3"
1457 [(set (match_operand:V2DF 0 "register_operand" "=x")
1458 (vec_merge:V2DF
1459 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1461 (match_dup 1)
1462 (const_int 1)))]
1463 "TARGET_SSE2"
1464 "divsd\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "ssediv")
1466 (set_attr "mode" "DF")])
1467
1468(define_insn "sqrtv2df2"
1469 [(set (match_operand:V2DF 0 "register_operand" "=x")
1470 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1471 "TARGET_SSE2"
1472 "sqrtpd\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sse")
1474 (set_attr "mode" "V2DF")])
1475
1476(define_insn "sse2_vmsqrtv2df2"
1477 [(set (match_operand:V2DF 0 "register_operand" "=x")
1478 (vec_merge:V2DF
1479 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1480 (match_operand:V2DF 2 "register_operand" "0")
1481 (const_int 1)))]
1482 "TARGET_SSE2"
1483 "sqrtsd\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sse")
1485 (set_attr "mode" "DF")])
1486
1487;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1488;; isn't really correct, as those rtl operators aren't defined when
1489;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1490
1491(define_expand "smaxv2df3"
1492 [(set (match_operand:V2DF 0 "register_operand" "")
1493 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1494 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1495 "TARGET_SSE2"
1496{
1497 if (!flag_finite_math_only)
1498 operands[1] = force_reg (V2DFmode, operands[1]);
1499 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1500})
1501
1502(define_insn "*smaxv2df3_finite"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1504 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1506 "TARGET_SSE2 && flag_finite_math_only
1507 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1508 "maxpd\t{%2, %0|%0, %2}"
1509 [(set_attr "type" "sseadd")
1510 (set_attr "mode" "V2DF")])
1511
1512(define_insn "*smaxv2df3"
1513 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1516 "TARGET_SSE2"
1517 "maxpd\t{%2, %0|%0, %2}"
1518 [(set_attr "type" "sseadd")
1519 (set_attr "mode" "V2DF")])
1520
1521(define_insn "sse2_vmsmaxv2df3"
1522 [(set (match_operand:V2DF 0 "register_operand" "=x")
1523 (vec_merge:V2DF
1524 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1525 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1526 (match_dup 1)
1527 (const_int 1)))]
1528 "TARGET_SSE2"
1529 "maxsd\t{%2, %0|%0, %2}"
1530 [(set_attr "type" "sseadd")
1531 (set_attr "mode" "DF")])
1532
1533(define_expand "sminv2df3"
1534 [(set (match_operand:V2DF 0 "register_operand" "")
1535 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1536 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1537 "TARGET_SSE2"
1538{
1539 if (!flag_finite_math_only)
1540 operands[1] = force_reg (V2DFmode, operands[1]);
1541 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1542})
1543
1544(define_insn "*sminv2df3_finite"
1545 [(set (match_operand:V2DF 0 "register_operand" "=x")
1546 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1548 "TARGET_SSE2 && flag_finite_math_only
1549 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1550 "minpd\t{%2, %0|%0, %2}"
1551 [(set_attr "type" "sseadd")
1552 (set_attr "mode" "V2DF")])
1553
1554(define_insn "*sminv2df3"
1555 [(set (match_operand:V2DF 0 "register_operand" "=x")
1556 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1558 "TARGET_SSE2"
1559 "minpd\t{%2, %0|%0, %2}"
1560 [(set_attr "type" "sseadd")
1561 (set_attr "mode" "V2DF")])
1562
1563(define_insn "sse2_vmsminv2df3"
1564 [(set (match_operand:V2DF 0 "register_operand" "=x")
1565 (vec_merge:V2DF
1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1568 (match_dup 1)
1569 (const_int 1)))]
1570 "TARGET_SSE2"
1571 "minsd\t{%2, %0|%0, %2}"
1572 [(set_attr "type" "sseadd")
1573 (set_attr "mode" "DF")])
1574
1575(define_insn "sse3_addsubv2df3"
1576 [(set (match_operand:V2DF 0 "register_operand" "=x")
1577 (vec_merge:V2DF
1578 (plus:V2DF
1579 (match_operand:V2DF 1 "register_operand" "0")
1580 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1581 (minus:V2DF (match_dup 1) (match_dup 2))
1582 (const_int 1)))]
1583 "TARGET_SSE3"
1584 "addsubpd\t{%2, %0|%0, %2}"
1585 [(set_attr "type" "sseadd")
1586 (set_attr "mode" "V2DF")])
1587
1588(define_insn "sse3_haddv2df3"
1589 [(set (match_operand:V2DF 0 "register_operand" "=x")
1590 (vec_concat:V2DF
1591 (plus:DF
1592 (vec_select:DF
1593 (match_operand:V2DF 1 "register_operand" "0")
1594 (parallel [(const_int 0)]))
1595 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1596 (plus:DF
1597 (vec_select:DF
1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1599 (parallel [(const_int 0)]))
1600 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1601 "TARGET_SSE3"
1602 "haddpd\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sseadd")
1604 (set_attr "mode" "V2DF")])
1605
1606(define_insn "sse3_hsubv2df3"
1607 [(set (match_operand:V2DF 0 "register_operand" "=x")
1608 (vec_concat:V2DF
1609 (minus:DF
1610 (vec_select:DF
1611 (match_operand:V2DF 1 "register_operand" "0")
1612 (parallel [(const_int 0)]))
1613 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1614 (minus:DF
1615 (vec_select:DF
1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1617 (parallel [(const_int 0)]))
1618 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1619 "TARGET_SSE3"
1620 "hsubpd\t{%2, %0|%0, %2}"
1621 [(set_attr "type" "sseadd")
1622 (set_attr "mode" "V2DF")])
1623
1624(define_expand "reduc_splus_v2df"
1625 [(match_operand:V2DF 0 "register_operand" "")
1626 (match_operand:V2DF 1 "register_operand" "")]
1627 "TARGET_SSE3"
1628{
1629 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1630 DONE;
1631})
1632
1633;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1634;;
1635;; Parallel double-precision floating point comparisons
1636;;
1637;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1638
1639(define_insn "sse2_maskcmpv2df3"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (match_operator:V2DF 3 "sse_comparison_operator"
1642 [(match_operand:V2DF 1 "register_operand" "0")
1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1644 "TARGET_SSE2"
1645 "cmp%D3pd\t{%2, %0|%0, %2}"
1646 [(set_attr "type" "ssecmp")
1647 (set_attr "mode" "V2DF")])
1648
1649(define_insn "sse2_vmmaskcmpv2df3"
1650 [(set (match_operand:V2DF 0 "register_operand" "=x")
1651 (vec_merge:V2DF
1652 (match_operator:V2DF 3 "sse_comparison_operator"
1653 [(match_operand:V2DF 1 "register_operand" "0")
1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1655 (match_dup 1)
1656 (const_int 1)))]
1657 "TARGET_SSE2"
1658 "cmp%D3sd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssecmp")
1660 (set_attr "mode" "DF")])
1661
1662(define_insn "sse2_comi"
1663 [(set (reg:CCFP FLAGS_REG)
1664 (compare:CCFP
1665 (vec_select:DF
1666 (match_operand:V2DF 0 "register_operand" "x")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF
1669 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1670 (parallel [(const_int 0)]))))]
1671 "TARGET_SSE2"
1672 "comisd\t{%1, %0|%0, %1}"
1673 [(set_attr "type" "ssecomi")
1674 (set_attr "mode" "DF")])
1675
1676(define_insn "sse2_ucomi"
1677 [(set (reg:CCFPU FLAGS_REG)
1678 (compare:CCFPU
1679 (vec_select:DF
1680 (match_operand:V2DF 0 "register_operand" "x")
1681 (parallel [(const_int 0)]))
1682 (vec_select:DF
1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1684 (parallel [(const_int 0)]))))]
1685 "TARGET_SSE2"
1686 "ucomisd\t{%1, %0|%0, %1}"
1687 [(set_attr "type" "ssecomi")
1688 (set_attr "mode" "DF")])
1689
1690(define_expand "vcondv2df"
1691 [(set (match_operand:V2DF 0 "register_operand" "")
1692 (if_then_else:V2DF
1693 (match_operator 3 ""
1694 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1695 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1696 (match_operand:V2DF 1 "general_operand" "")
1697 (match_operand:V2DF 2 "general_operand" "")))]
1698 "TARGET_SSE2"
1699{
1700 if (ix86_expand_fp_vcond (operands))
1701 DONE;
1702 else
1703 FAIL;
1704})
1705
1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707;;
1708;; Parallel double-precision floating point logical operations
1709;;
1710;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711
1712(define_expand "andv2df3"
1713 [(set (match_operand:V2DF 0 "register_operand" "")
1714 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1715 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1716 "TARGET_SSE2"
1717 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1718
1719(define_insn "*andv2df3"
1720 [(set (match_operand:V2DF 0 "register_operand" "=x")
1721 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1722 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1723 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1724 "andpd\t{%2, %0|%0, %2}"
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "V2DF")])
1727
1728(define_insn "sse2_nandv2df3"
1729 [(set (match_operand:V2DF 0 "register_operand" "=x")
1730 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1732 "TARGET_SSE2"
1733 "andnpd\t{%2, %0|%0, %2}"
1734 [(set_attr "type" "sselog")
1735 (set_attr "mode" "V2DF")])
1736
1737(define_expand "iorv2df3"
1738 [(set (match_operand:V2DF 0 "register_operand" "")
1739 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1740 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1741 "TARGET_SSE2"
1742 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1743
1744(define_insn "*iorv2df3"
1745 [(set (match_operand:V2DF 0 "register_operand" "=x")
1746 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1748 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1749 "orpd\t{%2, %0|%0, %2}"
1750 [(set_attr "type" "sselog")
1751 (set_attr "mode" "V2DF")])
1752
1753(define_expand "xorv2df3"
1754 [(set (match_operand:V2DF 0 "register_operand" "")
1755 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1756 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1757 "TARGET_SSE2"
1758 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1759
1760(define_insn "*xorv2df3"
1761 [(set (match_operand:V2DF 0 "register_operand" "=x")
1762 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1764 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1765 "xorpd\t{%2, %0|%0, %2}"
1766 [(set_attr "type" "sselog")
1767 (set_attr "mode" "V2DF")])
1768
1769;; Also define scalar versions. These are used for abs, neg, and
1770;; conditional move. Using subregs into vector modes causes register
1771;; allocation lossage. These patterns do not allow memory operands
1772;; because the native instructions read the full 128-bits.
1773
1774(define_insn "*anddf3"
1775 [(set (match_operand:DF 0 "register_operand" "=x")
1776 (and:DF (match_operand:DF 1 "register_operand" "0")
1777 (match_operand:DF 2 "register_operand" "x")))]
1778 "TARGET_SSE2"
1779 "andpd\t{%2, %0|%0, %2}"
1780 [(set_attr "type" "sselog")
1781 (set_attr "mode" "V2DF")])
1782
1783(define_insn "*nanddf3"
1784 [(set (match_operand:DF 0 "register_operand" "=x")
1785 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1786 (match_operand:DF 2 "register_operand" "x")))]
1787 "TARGET_SSE2"
1788 "andnpd\t{%2, %0|%0, %2}"
1789 [(set_attr "type" "sselog")
1790 (set_attr "mode" "V2DF")])
1791
1792(define_insn "*iordf3"
1793 [(set (match_operand:DF 0 "register_operand" "=x")
1794 (ior:DF (match_operand:DF 1 "register_operand" "0")
1795 (match_operand:DF 2 "register_operand" "x")))]
1796 "TARGET_SSE2"
1797 "orpd\t{%2, %0|%0, %2}"
1798 [(set_attr "type" "sselog")
1799 (set_attr "mode" "V2DF")])
1800
1801(define_insn "*xordf3"
1802 [(set (match_operand:DF 0 "register_operand" "=x")
1803 (xor:DF (match_operand:DF 1 "register_operand" "0")
1804 (match_operand:DF 2 "register_operand" "x")))]
1805 "TARGET_SSE2"
1806 "xorpd\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "V2DF")])
1809
1810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1811;;
1812;; Parallel double-precision floating point conversion operations
1813;;
1814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1815
1816(define_insn "sse2_cvtpi2pd"
1817 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1818 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1819 "TARGET_SSE2"
1820 "cvtpi2pd\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx,*")
1823 (set_attr "mode" "V2DF")])
1824
1825(define_insn "sse2_cvtpd2pi"
1826 [(set (match_operand:V2SI 0 "register_operand" "=y")
1827 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1828 UNSPEC_FIX_NOTRUNC))]
1829 "TARGET_SSE2"
1830 "cvtpd2pi\t{%1, %0|%0, %1}"
1831 [(set_attr "type" "ssecvt")
1832 (set_attr "unit" "mmx")
1833 (set_attr "mode" "DI")])
1834
1835(define_insn "sse2_cvttpd2pi"
1836 [(set (match_operand:V2SI 0 "register_operand" "=y")
1837 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1838 "TARGET_SSE2"
1839 "cvttpd2pi\t{%1, %0|%0, %1}"
1840 [(set_attr "type" "ssecvt")
1841 (set_attr "unit" "mmx")
1842 (set_attr "mode" "TI")])
1843
1844(define_insn "sse2_cvtsi2sd"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1846 (vec_merge:V2DF
1847 (vec_duplicate:V2DF
1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1849 (match_operand:V2DF 1 "register_operand" "0,0")
1850 (const_int 1)))]
1851 "TARGET_SSE2"
1852 "cvtsi2sd\t{%2, %0|%0, %2}"
1853 [(set_attr "type" "sseicvt")
1854 (set_attr "mode" "DF")
1855 (set_attr "athlon_decode" "double,direct")])
1856
1857(define_insn "sse2_cvtsi2sdq"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1859 (vec_merge:V2DF
1860 (vec_duplicate:V2DF
1861 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1862 (match_operand:V2DF 1 "register_operand" "0,0")
1863 (const_int 1)))]
1864 "TARGET_SSE2 && TARGET_64BIT"
1865 "cvtsi2sdq\t{%2, %0|%0, %2}"
1866 [(set_attr "type" "sseicvt")
1867 (set_attr "mode" "DF")
1868 (set_attr "athlon_decode" "double,direct")])
1869
1870(define_insn "sse2_cvtsd2si"
1871 [(set (match_operand:SI 0 "register_operand" "=r,r")
1872 (unspec:SI
1873 [(vec_select:DF
1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1875 (parallel [(const_int 0)]))]
1876 UNSPEC_FIX_NOTRUNC))]
1877 "TARGET_SSE2"
1878 "cvtsd2si\t{%1, %0|%0, %1}"
1879 [(set_attr "type" "sseicvt")
1880 (set_attr "athlon_decode" "double,vector")
1881 (set_attr "mode" "SI")])
1882
1883(define_insn "sse2_cvtsd2siq"
1884 [(set (match_operand:DI 0 "register_operand" "=r,r")
1885 (unspec:DI
1886 [(vec_select:DF
1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1888 (parallel [(const_int 0)]))]
1889 UNSPEC_FIX_NOTRUNC))]
1890 "TARGET_SSE2 && TARGET_64BIT"
1891 "cvtsd2siq\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "sseicvt")
1893 (set_attr "athlon_decode" "double,vector")
1894 (set_attr "mode" "DI")])
1895
1896(define_insn "sse2_cvttsd2si"
1897 [(set (match_operand:SI 0 "register_operand" "=r,r")
1898 (fix:SI
1899 (vec_select:DF
1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1901 (parallel [(const_int 0)]))))]
1902 "TARGET_SSE2"
1903 "cvttsd2si\t{%1, %0|%0, %1}"
1904 [(set_attr "type" "sseicvt")
1905 (set_attr "mode" "SI")
1906 (set_attr "athlon_decode" "double,vector")])
1907
1908(define_insn "sse2_cvttsd2siq"
1909 [(set (match_operand:DI 0 "register_operand" "=r,r")
1910 (fix:DI
1911 (vec_select:DF
1912 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1913 (parallel [(const_int 0)]))))]
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvttsd2siq\t{%1, %0|%0, %1}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DI")
1918 (set_attr "athlon_decode" "double,vector")])
1919
1920(define_insn "sse2_cvtdq2pd"
1921 [(set (match_operand:V2DF 0 "register_operand" "=x")
1922 (float:V2DF
1923 (vec_select:V2SI
1924 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1925 (parallel [(const_int 0) (const_int 1)]))))]
1926 "TARGET_SSE2"
1927 "cvtdq2pd\t{%1, %0|%0, %1}"
1928 [(set_attr "type" "ssecvt")
1929 (set_attr "mode" "V2DF")])
1930
1931(define_expand "sse2_cvtpd2dq"
1932 [(set (match_operand:V4SI 0 "register_operand" "")
1933 (vec_concat:V4SI
1934 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1935 UNSPEC_FIX_NOTRUNC)
1936 (match_dup 2)))]
1937 "TARGET_SSE2"
1938 "operands[2] = CONST0_RTX (V2SImode);")
1939
1940(define_insn "*sse2_cvtpd2dq"
1941 [(set (match_operand:V4SI 0 "register_operand" "=x")
1942 (vec_concat:V4SI
1943 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1944 UNSPEC_FIX_NOTRUNC)
1945 (match_operand:V2SI 2 "const0_operand" "")))]
1946 "TARGET_SSE2"
1947 "cvtpd2dq\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "ssecvt")
1949 (set_attr "mode" "TI")])
1950
1951(define_expand "sse2_cvttpd2dq"
1952 [(set (match_operand:V4SI 0 "register_operand" "")
1953 (vec_concat:V4SI
1954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1955 (match_dup 2)))]
1956 "TARGET_SSE2"
1957 "operands[2] = CONST0_RTX (V2SImode);")
1958
1959(define_insn "*sse2_cvttpd2dq"
1960 [(set (match_operand:V4SI 0 "register_operand" "=x")
1961 (vec_concat:V4SI
1962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1963 (match_operand:V2SI 2 "const0_operand" "")))]
1964 "TARGET_SSE2"
1965 "cvttpd2dq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "ssecvt")
1967 (set_attr "mode" "TI")])
1968
1969(define_insn "sse2_cvtsd2ss"
1970 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1971 (vec_merge:V4SF
1972 (vec_duplicate:V4SF
1973 (float_truncate:V2SF
1974 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1975 (match_operand:V4SF 1 "register_operand" "0,0")
1976 (const_int 1)))]
1977 "TARGET_SSE2"
1978 "cvtsd2ss\t{%2, %0|%0, %2}"
1979 [(set_attr "type" "ssecvt")
1980 (set_attr "athlon_decode" "vector,double")
1981 (set_attr "mode" "SF")])
1982
1983(define_insn "sse2_cvtss2sd"
1984 [(set (match_operand:V2DF 0 "register_operand" "=x")
1985 (vec_merge:V2DF
1986 (float_extend:V2DF
1987 (vec_select:V2SF
1988 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1989 (parallel [(const_int 0) (const_int 1)])))
1990 (match_operand:V2DF 1 "register_operand" "0")
1991 (const_int 1)))]
1992 "TARGET_SSE2"
1993 "cvtss2sd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "ssecvt")
1995 (set_attr "mode" "DF")])
1996
1997(define_expand "sse2_cvtpd2ps"
1998 [(set (match_operand:V4SF 0 "register_operand" "")
1999 (vec_concat:V4SF
2000 (float_truncate:V2SF
2001 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2002 (match_dup 2)))]
2003 "TARGET_SSE2"
2004 "operands[2] = CONST0_RTX (V2SFmode);")
2005
2006(define_insn "*sse2_cvtpd2ps"
2007 [(set (match_operand:V4SF 0 "register_operand" "=x")
2008 (vec_concat:V4SF
2009 (float_truncate:V2SF
2010 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2011 (match_operand:V2SF 2 "const0_operand" "")))]
2012 "TARGET_SSE2"
2013 "cvtpd2ps\t{%1, %0|%0, %1}"
2014 [(set_attr "type" "ssecvt")
2015 (set_attr "mode" "V4SF")])
2016
2017(define_insn "sse2_cvtps2pd"
2018 [(set (match_operand:V2DF 0 "register_operand" "=x")
2019 (float_extend:V2DF
2020 (vec_select:V2SF
2021 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2022 (parallel [(const_int 0) (const_int 1)]))))]
2023 "TARGET_SSE2"
2024 "cvtps2pd\t{%1, %0|%0, %1}"
2025 [(set_attr "type" "ssecvt")
2026 (set_attr "mode" "V2DF")])
2027
2028;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2029;;
2030;; Parallel double-precision floating point element swizzling
2031;;
2032;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2033
2034(define_insn "sse2_unpckhpd"
2035 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2036 (vec_select:V2DF
2037 (vec_concat:V4DF
2038 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2039 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2040 (parallel [(const_int 1)
2041 (const_int 3)])))]
2042 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2043 "@
2044 unpckhpd\t{%2, %0|%0, %2}
2045 movlpd\t{%H1, %0|%0, %H1}
2046 movhpd\t{%1, %0|%0, %1}"
2047 [(set_attr "type" "sselog,ssemov,ssemov")
2048 (set_attr "mode" "V2DF,V1DF,V1DF")])
2049
2050(define_insn "*sse3_movddup"
2051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2052 (vec_select:V2DF
2053 (vec_concat:V4DF
2054 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2055 (match_dup 1))
2056 (parallel [(const_int 0)
2057 (const_int 2)])))]
2058 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2059 "@
2060 movddup\t{%1, %0|%0, %1}
2061 #"
2062 [(set_attr "type" "sselog1,ssemov")
2063 (set_attr "mode" "V2DF")])
2064
2065(define_split
2066 [(set (match_operand:V2DF 0 "memory_operand" "")
2067 (vec_select:V2DF
2068 (vec_concat:V4DF
2069 (match_operand:V2DF 1 "register_operand" "")
2070 (match_dup 1))
2071 (parallel [(const_int 0)
2072 (const_int 2)])))]
2073 "TARGET_SSE3 && reload_completed"
2074 [(const_int 0)]
2075{
2076 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2077 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2078 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2079 DONE;
2080})
2081
2082(define_insn "sse2_unpcklpd"
2083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2084 (vec_select:V2DF
2085 (vec_concat:V4DF
2086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2088 (parallel [(const_int 0)
2089 (const_int 2)])))]
2090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2091 "@
2092 unpcklpd\t{%2, %0|%0, %2}
2093 movhpd\t{%2, %0|%0, %2}
2094 movlpd\t{%2, %H0|%H0, %2}"
2095 [(set_attr "type" "sselog,ssemov,ssemov")
2096 (set_attr "mode" "V2DF,V1DF,V1DF")])
2097
2098(define_expand "sse2_shufpd"
2099 [(match_operand:V2DF 0 "register_operand" "")
2100 (match_operand:V2DF 1 "register_operand" "")
2101 (match_operand:V2DF 2 "nonimmediate_operand" "")
2102 (match_operand:SI 3 "const_int_operand" "")]
2103 "TARGET_SSE2"
2104{
2105 int mask = INTVAL (operands[3]);
2106 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2107 GEN_INT (mask & 1),
2108 GEN_INT (mask & 2 ? 3 : 2)));
2109 DONE;
2110})
2111
2112(define_insn "sse2_shufpd_1"
2113 [(set (match_operand:V2DF 0 "register_operand" "=x")
2114 (vec_select:V2DF
2115 (vec_concat:V4DF
2116 (match_operand:V2DF 1 "register_operand" "0")
2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2118 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2119 (match_operand 4 "const_2_to_3_operand" "")])))]
2120 "TARGET_SSE2"
2121{
2122 int mask;
2123 mask = INTVAL (operands[3]);
2124 mask |= (INTVAL (operands[4]) - 2) << 1;
2125 operands[3] = GEN_INT (mask);
2126
2127 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2128}
2129 [(set_attr "type" "sselog")
2130 (set_attr "mode" "V2DF")])
2131
2132(define_insn "sse2_storehpd"
2133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2134 (vec_select:DF
2135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2136 (parallel [(const_int 1)])))]
2137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2138 "@
2139 movhpd\t{%1, %0|%0, %1}
2140 unpckhpd\t%0, %0
2141 #"
2142 [(set_attr "type" "ssemov,sselog1,ssemov")
2143 (set_attr "mode" "V1DF,V2DF,DF")])
2144
2145(define_split
2146 [(set (match_operand:DF 0 "register_operand" "")
2147 (vec_select:DF
2148 (match_operand:V2DF 1 "memory_operand" "")
2149 (parallel [(const_int 1)])))]
2150 "TARGET_SSE2 && reload_completed"
2151 [(set (match_dup 0) (match_dup 1))]
2152{
2153 operands[1] = adjust_address (operands[1], DFmode, 8);
2154})
2155
2156(define_insn "sse2_storelpd"
2157 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2158 (vec_select:DF
2159 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2160 (parallel [(const_int 0)])))]
2161 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2162 "@
2163 movlpd\t{%1, %0|%0, %1}
2164 #
2165 #"
2166 [(set_attr "type" "ssemov")
2167 (set_attr "mode" "V1DF,DF,DF")])
2168
2169(define_split
2170 [(set (match_operand:DF 0 "register_operand" "")
2171 (vec_select:DF
2172 (match_operand:V2DF 1 "nonimmediate_operand" "")
2173 (parallel [(const_int 0)])))]
2174 "TARGET_SSE2 && reload_completed"
2175 [(const_int 0)]
2176{
2177 rtx op1 = operands[1];
2178 if (REG_P (op1))
2179 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2180 else
2181 op1 = gen_lowpart (DFmode, op1);
2182 emit_move_insn (operands[0], op1);
2183 DONE;
2184})
2185
2186(define_insn "sse2_loadhpd"
2187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2188 (vec_concat:V2DF
2189 (vec_select:DF
2190 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2191 (parallel [(const_int 0)]))
2192 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2193 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2194 "@
2195 movhpd\t{%2, %0|%0, %2}
2196 unpcklpd\t{%2, %0|%0, %2}
2197 shufpd\t{$1, %1, %0|%0, %1, 1}
2198 #"
2199 [(set_attr "type" "ssemov,sselog,sselog,other")
2200 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2201
2202(define_split
2203 [(set (match_operand:V2DF 0 "memory_operand" "")
2204 (vec_concat:V2DF
2205 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2206 (match_operand:DF 1 "register_operand" "")))]
2207 "TARGET_SSE2 && reload_completed"
2208 [(set (match_dup 0) (match_dup 1))]
2209{
2210 operands[0] = adjust_address (operands[0], DFmode, 8);
2211})
2212
2213(define_insn "sse2_loadlpd"
2214 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2215 (vec_concat:V2DF
2216 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2217 (vec_select:DF
2218 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2219 (parallel [(const_int 1)]))))]
2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2221 "@
2222 movsd\t{%2, %0|%0, %2}
2223 movlpd\t{%2, %0|%0, %2}
2224 movsd\t{%2, %0|%0, %2}
2225 shufpd\t{$2, %2, %0|%0, %2, 2}
2226 movhpd\t{%H1, %0|%0, %H1}
2227 #"
2228 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2229 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2230
2231(define_split
2232 [(set (match_operand:V2DF 0 "memory_operand" "")
2233 (vec_concat:V2DF
2234 (match_operand:DF 1 "register_operand" "")
2235 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2236 "TARGET_SSE2 && reload_completed"
2237 [(set (match_dup 0) (match_dup 1))]
2238{
2239 operands[0] = adjust_address (operands[0], DFmode, 8);
2240})
2241
2242;; Not sure these two are ever used, but it doesn't hurt to have
2243;; them. -aoliva
2244(define_insn "*vec_extractv2df_1_sse"
2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2246 (vec_select:DF
2247 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2248 (parallel [(const_int 1)])))]
2249 "!TARGET_SSE2 && TARGET_SSE
2250 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2251 "@
2252 movhps\t{%1, %0|%0, %1}
2253 movhlps\t{%1, %0|%0, %1}
2254 movlps\t{%H1, %0|%0, %H1}"
2255 [(set_attr "type" "ssemov")
2256 (set_attr "mode" "V2SF,V4SF,V2SF")])
2257
2258(define_insn "*vec_extractv2df_0_sse"
2259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2260 (vec_select:DF
2261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2262 (parallel [(const_int 0)])))]
2263 "!TARGET_SSE2 && TARGET_SSE
2264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2265 "@
2266 movlps\t{%1, %0|%0, %1}
2267 movaps\t{%1, %0|%0, %1}
2268 movlps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssemov")
2270 (set_attr "mode" "V2SF,V4SF,V2SF")])
2271
2272(define_insn "sse2_movsd"
2273 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2274 (vec_merge:V2DF
2275 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2276 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2277 (const_int 1)))]
2278 "TARGET_SSE2"
2279 "@
2280 movsd\t{%2, %0|%0, %2}
2281 movlpd\t{%2, %0|%0, %2}
2282 movlpd\t{%2, %0|%0, %2}
2283 shufpd\t{$2, %2, %0|%0, %2, 2}
2284 movhps\t{%H1, %0|%0, %H1}
2285 movhps\t{%1, %H0|%H0, %1}"
2286 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2287 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2288
2289(define_insn "*vec_dupv2df_sse3"
2290 [(set (match_operand:V2DF 0 "register_operand" "=x")
2291 (vec_duplicate:V2DF
2292 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2293 "TARGET_SSE3"
2294 "movddup\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sselog1")
2296 (set_attr "mode" "DF")])
2297
2298(define_insn "*vec_dupv2df"
2299 [(set (match_operand:V2DF 0 "register_operand" "=x")
2300 (vec_duplicate:V2DF
2301 (match_operand:DF 1 "register_operand" "0")))]
2302 "TARGET_SSE2"
2303 "unpcklpd\t%0, %0"
2304 [(set_attr "type" "sselog1")
2305 (set_attr "mode" "V4SF")])
2306
2307(define_insn "*vec_concatv2df_sse3"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x")
2309 (vec_concat:V2DF
2310 (match_operand:DF 1 "nonimmediate_operand" "xm")
2311 (match_dup 1)))]
2312 "TARGET_SSE3"
2313 "movddup\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "sselog1")
2315 (set_attr "mode" "DF")])
2316
2317(define_insn "*vec_concatv2df"
2318 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2319 (vec_concat:V2DF
2320 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2321 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2322 "TARGET_SSE"
2323 "@
2324 unpcklpd\t{%2, %0|%0, %2}
2325 movhpd\t{%2, %0|%0, %2}
2326 movsd\t{%1, %0|%0, %1}
2327 movlhps\t{%2, %0|%0, %2}
2328 movhps\t{%2, %0|%0, %2}"
2329 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2330 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2331
2332(define_expand "vec_setv2df"
2333 [(match_operand:V2DF 0 "register_operand" "")
2334 (match_operand:DF 1 "register_operand" "")
2335 (match_operand 2 "const_int_operand" "")]
2336 "TARGET_SSE"
2337{
2338 ix86_expand_vector_set (false, operands[0], operands[1],
2339 INTVAL (operands[2]));
2340 DONE;
2341})
2342
2343(define_expand "vec_extractv2df"
2344 [(match_operand:DF 0 "register_operand" "")
2345 (match_operand:V2DF 1 "register_operand" "")
2346 (match_operand 2 "const_int_operand" "")]
2347 "TARGET_SSE"
2348{
2349 ix86_expand_vector_extract (false, operands[0], operands[1],
2350 INTVAL (operands[2]));
2351 DONE;
2352})
2353
2354(define_expand "vec_initv2df"
2355 [(match_operand:V2DF 0 "register_operand" "")
2356 (match_operand 1 "" "")]
2357 "TARGET_SSE"
2358{
2359 ix86_expand_vector_init (false, operands[0], operands[1]);
2360 DONE;
2361})
2362
2363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2364;;
2365;; Parallel integral arithmetic
2366;;
2367;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2368
2369(define_expand "neg<mode>2"
2370 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2371 (minus:SSEMODEI
2372 (match_dup 2)
2373 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2374 "TARGET_SSE2"
2375 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2376
2377(define_expand "add<mode>3"
2378 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2379 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2381 "TARGET_SSE2"
2382 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2383
2384(define_insn "*add<mode>3"
2385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2386 (plus:SSEMODEI
2387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2389 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2390 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2391 [(set_attr "type" "sseiadd")
2392 (set_attr "mode" "TI")])
2393
2394(define_insn "sse2_ssadd<mode>3"
2395 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2396 (ss_plus:SSEMODE12
2397 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2398 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2399 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2400 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2401 [(set_attr "type" "sseiadd")
2402 (set_attr "mode" "TI")])
2403
2404(define_insn "sse2_usadd<mode>3"
2405 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2406 (us_plus:SSEMODE12
2407 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2408 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2409 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2410 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2413
2414(define_expand "sub<mode>3"
2415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2416 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2417 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2418 "TARGET_SSE2"
2419 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2420
2421(define_insn "*sub<mode>3"
2422 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2423 (minus:SSEMODEI
2424 (match_operand:SSEMODEI 1 "register_operand" "0")
2425 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2426 "TARGET_SSE2"
2427 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2428 [(set_attr "type" "sseiadd")
2429 (set_attr "mode" "TI")])
2430
2431(define_insn "sse2_sssub<mode>3"
2432 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2433 (ss_minus:SSEMODE12
2434 (match_operand:SSEMODE12 1 "register_operand" "0")
2435 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2436 "TARGET_SSE2"
2437 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2438 [(set_attr "type" "sseiadd")
2439 (set_attr "mode" "TI")])
2440
2441(define_insn "sse2_ussub<mode>3"
2442 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2443 (us_minus:SSEMODE12
2444 (match_operand:SSEMODE12 1 "register_operand" "0")
2445 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2446 "TARGET_SSE2"
2447 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2448 [(set_attr "type" "sseiadd")
2449 (set_attr "mode" "TI")])
2450
2451(define_expand "mulv16qi3"
2452 [(set (match_operand:V16QI 0 "register_operand" "")
2453 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2454 (match_operand:V16QI 2 "register_operand" "")))]
2455 "TARGET_SSE2"
2456{
2457 rtx t[12], op0;
2458 int i;
2459
2460 for (i = 0; i < 12; ++i)
2461 t[i] = gen_reg_rtx (V16QImode);
2462
2463 /* Unpack data such that we've got a source byte in each low byte of
2464 each word. We don't care what goes into the high byte of each word.
2465 Rather than trying to get zero in there, most convenient is to let
2466 it be a copy of the low byte. */
2467 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2468 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2469 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2470 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2471
2472 /* Multiply words. The end-of-line annotations here give a picture of what
2473 the output of that instruction looks like. Dot means don't care; the
2474 letters are the bytes of the result with A being the most significant. */
2475 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2476 gen_lowpart (V8HImode, t[0]),
2477 gen_lowpart (V8HImode, t[1])));
2478 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2479 gen_lowpart (V8HImode, t[2]),
2480 gen_lowpart (V8HImode, t[3])));
2481
2482 /* Extract the relevant bytes and merge them back together. */
2483 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2484 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2485 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2486 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2487 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2488 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2489
2490 op0 = operands[0];
2491 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2492 DONE;
2493})
2494
2495(define_expand "mulv8hi3"
2496 [(set (match_operand:V8HI 0 "register_operand" "")
2497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2498 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2499 "TARGET_SSE2"
2500 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2501
2502(define_insn "*mulv8hi3"
2503 [(set (match_operand:V8HI 0 "register_operand" "=x")
2504 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2507 "pmullw\t{%2, %0|%0, %2}"
2508 [(set_attr "type" "sseimul")
2509 (set_attr "mode" "TI")])
2510
2511(define_insn "sse2_smulv8hi3_highpart"
2512 [(set (match_operand:V8HI 0 "register_operand" "=x")
2513 (truncate:V8HI
2514 (lshiftrt:V8SI
2515 (mult:V8SI
2516 (sign_extend:V8SI
2517 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2518 (sign_extend:V8SI
2519 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2520 (const_int 16))))]
2521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2522 "pmulhw\t{%2, %0|%0, %2}"
2523 [(set_attr "type" "sseimul")
2524 (set_attr "mode" "TI")])
2525
2526(define_insn "sse2_umulv8hi3_highpart"
2527 [(set (match_operand:V8HI 0 "register_operand" "=x")
2528 (truncate:V8HI
2529 (lshiftrt:V8SI
2530 (mult:V8SI
2531 (zero_extend:V8SI
2532 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2533 (zero_extend:V8SI
2534 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2535 (const_int 16))))]
2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2537 "pmulhuw\t{%2, %0|%0, %2}"
2538 [(set_attr "type" "sseimul")
2539 (set_attr "mode" "TI")])
2540
2541(define_insn "sse2_umulv2siv2di3"
2542 [(set (match_operand:V2DI 0 "register_operand" "=x")
2543 (mult:V2DI
2544 (zero_extend:V2DI
2545 (vec_select:V2SI
2546 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2547 (parallel [(const_int 0) (const_int 2)])))
2548 (zero_extend:V2DI
2549 (vec_select:V2SI
2550 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2551 (parallel [(const_int 0) (const_int 2)])))))]
2552 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2553 "pmuludq\t{%2, %0|%0, %2}"
2554 [(set_attr "type" "sseimul")
2555 (set_attr "mode" "TI")])
2556
2557(define_insn "sse2_pmaddwd"
2558 [(set (match_operand:V4SI 0 "register_operand" "=x")
2559 (plus:V4SI
2560 (mult:V4SI
2561 (sign_extend:V4SI
2562 (vec_select:V4HI
2563 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2564 (parallel [(const_int 0)
2565 (const_int 2)
2566 (const_int 4)
2567 (const_int 6)])))
2568 (sign_extend:V4SI
2569 (vec_select:V4HI
2570 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2571 (parallel [(const_int 0)
2572 (const_int 2)
2573 (const_int 4)
2574 (const_int 6)]))))
2575 (mult:V4SI
2576 (sign_extend:V4SI
2577 (vec_select:V4HI (match_dup 1)
2578 (parallel [(const_int 1)
2579 (const_int 3)
2580 (const_int 5)
2581 (const_int 7)])))
2582 (sign_extend:V4SI
2583 (vec_select:V4HI (match_dup 2)
2584 (parallel [(const_int 1)
2585 (const_int 3)
2586 (const_int 5)
2587 (const_int 7)]))))))]
2588 "TARGET_SSE2"
2589 "pmaddwd\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "sseiadd")
2591 (set_attr "mode" "TI")])
2592
2593(define_expand "mulv4si3"
2594 [(set (match_operand:V4SI 0 "register_operand" "")
2595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2596 (match_operand:V4SI 2 "register_operand" "")))]
2597 "TARGET_SSE2"
2598{
2599 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2600 rtx op0, op1, op2;
2601
2602 op0 = operands[0];
2603 op1 = operands[1];
2604 op2 = operands[2];
2605 t1 = gen_reg_rtx (V4SImode);
2606 t2 = gen_reg_rtx (V4SImode);
2607 t3 = gen_reg_rtx (V4SImode);
2608 t4 = gen_reg_rtx (V4SImode);
2609 t5 = gen_reg_rtx (V4SImode);
2610 t6 = gen_reg_rtx (V4SImode);
2611 thirtytwo = GEN_INT (32);
2612
2613 /* Multiply elements 2 and 0. */
2614 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2615
2616 /* Shift both input vectors down one element, so that elements 3 and 1
2617 are now in the slots for elements 2 and 0. For K8, at least, this is
2618 faster than using a shuffle. */
2619 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2620 gen_lowpart (TImode, op1), thirtytwo));
2621 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2622 gen_lowpart (TImode, op2), thirtytwo));
2623
2624 /* Multiply elements 3 and 1. */
2625 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2626
2627 /* Move the results in element 2 down to element 1; we don't care what
2628 goes in elements 2 and 3. */
2629 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2630 const0_rtx, const0_rtx));
2631 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2632 const0_rtx, const0_rtx));
2633
2634 /* Merge the parts back together. */
2635 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2636 DONE;
2637})
2638
2639(define_expand "mulv2di3"
2640 [(set (match_operand:V2DI 0 "register_operand" "")
2641 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2642 (match_operand:V2DI 2 "register_operand" "")))]
2643 "TARGET_SSE2"
2644{
2645 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2646 rtx op0, op1, op2;
2647
2648 op0 = operands[0];
2649 op1 = operands[1];
2650 op2 = operands[2];
2651 t1 = gen_reg_rtx (V2DImode);
2652 t2 = gen_reg_rtx (V2DImode);
2653 t3 = gen_reg_rtx (V2DImode);
2654 t4 = gen_reg_rtx (V2DImode);
2655 t5 = gen_reg_rtx (V2DImode);
2656 t6 = gen_reg_rtx (V2DImode);
2657 thirtytwo = GEN_INT (32);
2658
2659 /* Multiply low parts. */
2660 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2661 gen_lowpart (V4SImode, op2)));
2662
2663 /* Shift input vectors left 32 bits so we can multiply high parts. */
2664 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2665 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2666
2667 /* Multiply high parts by low parts. */
2668 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2669 gen_lowpart (V4SImode, t3)));
2670 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2671 gen_lowpart (V4SImode, t2)));
2672
2673 /* Shift them back. */
2674 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2675 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2676
2677 /* Add the three parts together. */
2678 emit_insn (gen_addv2di3 (t6, t1, t4));
2679 emit_insn (gen_addv2di3 (op0, t6, t5));
2680 DONE;
2681})
2682
2683(define_expand "sdot_prodv8hi"
2684 [(match_operand:V4SI 0 "register_operand" "")
2685 (match_operand:V8HI 1 "nonimmediate_operand" "")
2686 (match_operand:V8HI 2 "nonimmediate_operand" "")
2687 (match_operand:V4SI 3 "register_operand" "")]
2688 "TARGET_SSE2"
2689{
2690 rtx t = gen_reg_rtx (V4SImode);
2691 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2692 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2693 DONE;
2694})
2695
2696(define_expand "udot_prodv4si"
2697 [(match_operand:V2DI 0 "register_operand" "")
2698 (match_operand:V4SI 1 "register_operand" "")
2699 (match_operand:V4SI 2 "register_operand" "")
2700 (match_operand:V2DI 3 "register_operand" "")]
2701 "TARGET_SSE2"
2702{
2703 rtx t1, t2, t3, t4;
2704
2705 t1 = gen_reg_rtx (V2DImode);
2706 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2707 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2708
2709 t2 = gen_reg_rtx (V4SImode);
2710 t3 = gen_reg_rtx (V4SImode);
2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2712 gen_lowpart (TImode, operands[1]),
2713 GEN_INT (32)));
2714 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2715 gen_lowpart (TImode, operands[2]),
2716 GEN_INT (32)));
2717
2718 t4 = gen_reg_rtx (V2DImode);
2719 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2720
2721 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2722 DONE;
2723})
2724
2725(define_insn "ashr<mode>3"
2726 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2727 (ashiftrt:SSEMODE24
2728 (match_operand:SSEMODE24 1 "register_operand" "0")
2729 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2730 "TARGET_SSE2"
2731 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "sseishft")
2733 (set_attr "mode" "TI")])
2734
2735(define_insn "lshr<mode>3"
2736 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2737 (lshiftrt:SSEMODE248
2738 (match_operand:SSEMODE248 1 "register_operand" "0")
2739 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2740 "TARGET_SSE2"
2741 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2742 [(set_attr "type" "sseishft")
2743 (set_attr "mode" "TI")])
2744
2745(define_insn "ashl<mode>3"
2746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2747 (ashift:SSEMODE248
2748 (match_operand:SSEMODE248 1 "register_operand" "0")
2749 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2750 "TARGET_SSE2"
2751 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2752 [(set_attr "type" "sseishft")
2753 (set_attr "mode" "TI")])
2754
2755(define_insn "sse2_ashlti3"
2756 [(set (match_operand:TI 0 "register_operand" "=x")
2757 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2759 "TARGET_SSE2"
2760{
2761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2762 return "pslldq\t{%2, %0|%0, %2}";
2763}
2764 [(set_attr "type" "sseishft")
2765 (set_attr "mode" "TI")])
2766
2767(define_expand "vec_shl_<mode>"
2768 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2769 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2770 (match_operand:SI 2 "general_operand" "")))]
2771 "TARGET_SSE2"
2772{
2773 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2774 FAIL;
2775 operands[0] = gen_lowpart (TImode, operands[0]);
2776 operands[1] = gen_lowpart (TImode, operands[1]);
2777})
2778
2779(define_insn "sse2_lshrti3"
2780 [(set (match_operand:TI 0 "register_operand" "=x")
2781 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2783 "TARGET_SSE2"
2784{
2785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2786 return "psrldq\t{%2, %0|%0, %2}";
2787}
2788 [(set_attr "type" "sseishft")
2789 (set_attr "mode" "TI")])
2790
2791(define_expand "vec_shr_<mode>"
2792 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2793 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2794 (match_operand:SI 2 "general_operand" "")))]
2795 "TARGET_SSE2"
2796{
2797 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2798 FAIL;
2799 operands[0] = gen_lowpart (TImode, operands[0]);
2800 operands[1] = gen_lowpart (TImode, operands[1]);
2801})
2802
2803(define_expand "umaxv16qi3"
2804 [(set (match_operand:V16QI 0 "register_operand" "")
2805 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2806 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2807 "TARGET_SSE2"
2808 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2809
2810(define_insn "*umaxv16qi3"
2811 [(set (match_operand:V16QI 0 "register_operand" "=x")
2812 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2814 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2815 "pmaxub\t{%2, %0|%0, %2}"
2816 [(set_attr "type" "sseiadd")
2817 (set_attr "mode" "TI")])
2818
2819(define_expand "smaxv8hi3"
2820 [(set (match_operand:V8HI 0 "register_operand" "")
2821 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2822 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2823 "TARGET_SSE2"
2824 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2825
2826(define_insn "*smaxv8hi3"
2827 [(set (match_operand:V8HI 0 "register_operand" "=x")
2828 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2830 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2831 "pmaxsw\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "sseiadd")
2833 (set_attr "mode" "TI")])
2834
2835(define_expand "umaxv8hi3"
2836 [(set (match_operand:V8HI 0 "register_operand" "=x")
2837 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2838 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2839 (set (match_dup 3)
2840 (plus:V8HI (match_dup 0) (match_dup 2)))]
2841 "TARGET_SSE2"
2842{
2843 operands[3] = operands[0];
2844 if (rtx_equal_p (operands[0], operands[2]))
2845 operands[0] = gen_reg_rtx (V8HImode);
2846})
2847
2848(define_expand "smax<mode>3"
2849 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2850 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2851 (match_operand:SSEMODE14 2 "register_operand" "")))]
2852 "TARGET_SSE2"
2853{
2854 rtx xops[6];
2855 bool ok;
2856
2857 xops[0] = operands[0];
2858 xops[1] = operands[1];
2859 xops[2] = operands[2];
2860 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2861 xops[4] = operands[1];
2862 xops[5] = operands[2];
2863 ok = ix86_expand_int_vcond (xops);
2864 gcc_assert (ok);
2865 DONE;
2866})
2867
2868(define_expand "umaxv4si3"
2869 [(set (match_operand:V4SI 0 "register_operand" "")
2870 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2871 (match_operand:V4SI 2 "register_operand" "")))]
2872 "TARGET_SSE2"
2873{
2874 rtx xops[6];
2875 bool ok;
2876
2877 xops[0] = operands[0];
2878 xops[1] = operands[1];
2879 xops[2] = operands[2];
2880 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2881 xops[4] = operands[1];
2882 xops[5] = operands[2];
2883 ok = ix86_expand_int_vcond (xops);
2884 gcc_assert (ok);
2885 DONE;
2886})
2887
2888(define_expand "uminv16qi3"
2889 [(set (match_operand:V16QI 0 "register_operand" "")
2890 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2891 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2892 "TARGET_SSE2"
2893 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2894
2895(define_insn "*uminv16qi3"
2896 [(set (match_operand:V16QI 0 "register_operand" "=x")
2897 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2899 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2900 "pminub\t{%2, %0|%0, %2}"
2901 [(set_attr "type" "sseiadd")
2902 (set_attr "mode" "TI")])
2903
2904(define_expand "sminv8hi3"
2905 [(set (match_operand:V8HI 0 "register_operand" "")
2906 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2907 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2908 "TARGET_SSE2"
2909 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2910
2911(define_insn "*sminv8hi3"
2912 [(set (match_operand:V8HI 0 "register_operand" "=x")
2913 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2915 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2916 "pminsw\t{%2, %0|%0, %2}"
2917 [(set_attr "type" "sseiadd")
2918 (set_attr "mode" "TI")])
2919
2920(define_expand "smin<mode>3"
2921 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2922 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2923 (match_operand:SSEMODE14 2 "register_operand" "")))]
2924 "TARGET_SSE2"
2925{
2926 rtx xops[6];
2927 bool ok;
2928
2929 xops[0] = operands[0];
2930 xops[1] = operands[2];
2931 xops[2] = operands[1];
2932 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2933 xops[4] = operands[1];
2934 xops[5] = operands[2];
2935 ok = ix86_expand_int_vcond (xops);
2936 gcc_assert (ok);
2937 DONE;
2938})
2939
2940(define_expand "umin<mode>3"
2941 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2942 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2943 (match_operand:SSEMODE24 2 "register_operand" "")))]
2944 "TARGET_SSE2"
2945{
2946 rtx xops[6];
2947 bool ok;
2948
2949 xops[0] = operands[0];
2950 xops[1] = operands[2];
2951 xops[2] = operands[1];
2952 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2953 xops[4] = operands[1];
2954 xops[5] = operands[2];
2955 ok = ix86_expand_int_vcond (xops);
2956 gcc_assert (ok);
2957 DONE;
2958})
2959
2960;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2961;;
2962;; Parallel integral comparisons
2963;;
2964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2965
2966(define_insn "sse2_eq<mode>3"
2967 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2968 (eq:SSEMODE124
2969 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2970 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2971 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2972 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "ssecmp")
2974 (set_attr "mode" "TI")])
2975
2976(define_insn "sse2_gt<mode>3"
2977 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2978 (gt:SSEMODE124
2979 (match_operand:SSEMODE124 1 "register_operand" "0")
2980 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2981 "TARGET_SSE2"
2982 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2983 [(set_attr "type" "ssecmp")
2984 (set_attr "mode" "TI")])
2985
2986(define_expand "vcond<mode>"
2987 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2988 (if_then_else:SSEMODE124
2989 (match_operator 3 ""
2990 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2991 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2992 (match_operand:SSEMODE124 1 "general_operand" "")
2993 (match_operand:SSEMODE124 2 "general_operand" "")))]
2994 "TARGET_SSE2"
2995{
2996 if (ix86_expand_int_vcond (operands))
2997 DONE;
2998 else
2999 FAIL;
3000})
3001
3002(define_expand "vcondu<mode>"
3003 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3004 (if_then_else:SSEMODE124
3005 (match_operator 3 ""
3006 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3007 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3008 (match_operand:SSEMODE124 1 "general_operand" "")
3009 (match_operand:SSEMODE124 2 "general_operand" "")))]
3010 "TARGET_SSE2"
3011{
3012 if (ix86_expand_int_vcond (operands))
3013 DONE;
3014 else
3015 FAIL;
3016})
3017
3018;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3019;;
3020;; Parallel integral logical operations
3021;;
3022;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3023
3024(define_expand "one_cmpl<mode>2"
3025 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3026 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3027 (match_dup 2)))]
3028 "TARGET_SSE2"
3029{
3030 int i, n = GET_MODE_NUNITS (<MODE>mode);
3031 rtvec v = rtvec_alloc (n);
3032
3033 for (i = 0; i < n; ++i)
3034 RTVEC_ELT (v, i) = constm1_rtx;
3035
3036 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3037})
3038
3039(define_expand "and<mode>3"
3040 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3041 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3042 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3043 "TARGET_SSE2"
3044 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3045
3046(define_insn "*and<mode>3"
3047 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3048 (and:SSEMODEI
3049 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3050 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3051 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3052 "pand\t{%2, %0|%0, %2}"
3053 [(set_attr "type" "sselog")
3054 (set_attr "mode" "TI")])
3055
3056(define_insn "sse2_nand<mode>3"
3057 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3058 (and:SSEMODEI
3059 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3061 "TARGET_SSE2"
3062 "pandn\t{%2, %0|%0, %2}"
3063 [(set_attr "type" "sselog")
3064 (set_attr "mode" "TI")])
3065
3066(define_expand "ior<mode>3"
3067 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3068 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3069 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3070 "TARGET_SSE2"
3071 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3072
3073(define_insn "*ior<mode>3"
3074 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3075 (ior:SSEMODEI
3076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3078 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3079 "por\t{%2, %0|%0, %2}"
3080 [(set_attr "type" "sselog")
3081 (set_attr "mode" "TI")])
3082
3083(define_expand "xor<mode>3"
3084 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3085 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3086 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3087 "TARGET_SSE2"
3088 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3089
3090(define_insn "*xor<mode>3"
3091 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3092 (xor:SSEMODEI
3093 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3095 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3096 "pxor\t{%2, %0|%0, %2}"
3097 [(set_attr "type" "sselog")
3098 (set_attr "mode" "TI")])
3099
3100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3101;;
3102;; Parallel integral element swizzling
3103;;
3104;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3105
3106(define_insn "sse2_packsswb"
3107 [(set (match_operand:V16QI 0 "register_operand" "=x")
3108 (vec_concat:V16QI
3109 (ss_truncate:V8QI
3110 (match_operand:V8HI 1 "register_operand" "0"))
3111 (ss_truncate:V8QI
3112 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3113 "TARGET_SSE2"
3114 "packsswb\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3117
3118(define_insn "sse2_packssdw"
3119 [(set (match_operand:V8HI 0 "register_operand" "=x")
3120 (vec_concat:V8HI
3121 (ss_truncate:V4HI
3122 (match_operand:V4SI 1 "register_operand" "0"))
3123 (ss_truncate:V4HI
3124 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3125 "TARGET_SSE2"
3126 "packssdw\t{%2, %0|%0, %2}"
3127 [(set_attr "type" "sselog")
3128 (set_attr "mode" "TI")])
3129
3130(define_insn "sse2_packuswb"
3131 [(set (match_operand:V16QI 0 "register_operand" "=x")
3132 (vec_concat:V16QI
3133 (us_truncate:V8QI
3134 (match_operand:V8HI 1 "register_operand" "0"))
3135 (us_truncate:V8QI
3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3137 "TARGET_SSE2"
3138 "packuswb\t{%2, %0|%0, %2}"
3139 [(set_attr "type" "sselog")
3140 (set_attr "mode" "TI")])
3141
3142(define_insn "sse2_punpckhbw"
3143 [(set (match_operand:V16QI 0 "register_operand" "=x")
3144 (vec_select:V16QI
3145 (vec_concat:V32QI
3146 (match_operand:V16QI 1 "register_operand" "0")
3147 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3148 (parallel [(const_int 8) (const_int 24)
3149 (const_int 9) (const_int 25)
3150 (const_int 10) (const_int 26)
3151 (const_int 11) (const_int 27)
3152 (const_int 12) (const_int 28)
3153 (const_int 13) (const_int 29)
3154 (const_int 14) (const_int 30)
3155 (const_int 15) (const_int 31)])))]
3156 "TARGET_SSE2"
3157 "punpckhbw\t{%2, %0|%0, %2}"
3158 [(set_attr "type" "sselog")
3159 (set_attr "mode" "TI")])
3160
3161(define_insn "sse2_punpcklbw"
3162 [(set (match_operand:V16QI 0 "register_operand" "=x")
3163 (vec_select:V16QI
3164 (vec_concat:V32QI
3165 (match_operand:V16QI 1 "register_operand" "0")
3166 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3167 (parallel [(const_int 0) (const_int 16)
3168 (const_int 1) (const_int 17)
3169 (const_int 2) (const_int 18)
3170 (const_int 3) (const_int 19)
3171 (const_int 4) (const_int 20)
3172 (const_int 5) (const_int 21)
3173 (const_int 6) (const_int 22)
3174 (const_int 7) (const_int 23)])))]
3175 "TARGET_SSE2"
3176 "punpcklbw\t{%2, %0|%0, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "mode" "TI")])
3179
3180(define_insn "sse2_punpckhwd"
3181 [(set (match_operand:V8HI 0 "register_operand" "=x")
3182 (vec_select:V8HI
3183 (vec_concat:V16HI
3184 (match_operand:V8HI 1 "register_operand" "0")
3185 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3186 (parallel [(const_int 4) (const_int 12)
3187 (const_int 5) (const_int 13)
3188 (const_int 6) (const_int 14)
3189 (const_int 7) (const_int 15)])))]
3190 "TARGET_SSE2"
3191 "punpckhwd\t{%2, %0|%0, %2}"
3192 [(set_attr "type" "sselog")
3193 (set_attr "mode" "TI")])
3194
3195(define_insn "sse2_punpcklwd"
3196 [(set (match_operand:V8HI 0 "register_operand" "=x")
3197 (vec_select:V8HI
3198 (vec_concat:V16HI
3199 (match_operand:V8HI 1 "register_operand" "0")
3200 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3201 (parallel [(const_int 0) (const_int 8)
3202 (const_int 1) (const_int 9)
3203 (const_int 2) (const_int 10)
3204 (const_int 3) (const_int 11)])))]
3205 "TARGET_SSE2"
3206 "punpcklwd\t{%2, %0|%0, %2}"
3207 [(set_attr "type" "sselog")
3208 (set_attr "mode" "TI")])
3209
3210(define_insn "sse2_punpckhdq"
3211 [(set (match_operand:V4SI 0 "register_operand" "=x")
3212 (vec_select:V4SI
3213 (vec_concat:V8SI
3214 (match_operand:V4SI 1 "register_operand" "0")
3215 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3216 (parallel [(const_int 2) (const_int 6)
3217 (const_int 3) (const_int 7)])))]
3218 "TARGET_SSE2"
3219 "punpckhdq\t{%2, %0|%0, %2}"
3220 [(set_attr "type" "sselog")
3221 (set_attr "mode" "TI")])
3222
3223(define_insn "sse2_punpckldq"
3224 [(set (match_operand:V4SI 0 "register_operand" "=x")
3225 (vec_select:V4SI
3226 (vec_concat:V8SI
3227 (match_operand:V4SI 1 "register_operand" "0")
3228 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3229 (parallel [(const_int 0) (const_int 4)
3230 (const_int 1) (const_int 5)])))]
3231 "TARGET_SSE2"
3232 "punpckldq\t{%2, %0|%0, %2}"
3233 [(set_attr "type" "sselog")
3234 (set_attr "mode" "TI")])
3235
3236(define_insn "sse2_punpckhqdq"
3237 [(set (match_operand:V2DI 0 "register_operand" "=x")
3238 (vec_select:V2DI
3239 (vec_concat:V4DI
3240 (match_operand:V2DI 1 "register_operand" "0")
3241 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3242 (parallel [(const_int 1)
3243 (const_int 3)])))]
3244 "TARGET_SSE2"
3245 "punpckhqdq\t{%2, %0|%0, %2}"
3246 [(set_attr "type" "sselog")
3247 (set_attr "mode" "TI")])
3248
3249(define_insn "sse2_punpcklqdq"
3250 [(set (match_operand:V2DI 0 "register_operand" "=x")
3251 (vec_select:V2DI
3252 (vec_concat:V4DI
3253 (match_operand:V2DI 1 "register_operand" "0")
3254 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3255 (parallel [(const_int 0)
3256 (const_int 2)])))]
3257 "TARGET_SSE2"
3258 "punpcklqdq\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sselog")
3260 (set_attr "mode" "TI")])
3261
3262(define_expand "sse2_pinsrw"
3263 [(set (match_operand:V8HI 0 "register_operand" "")
3264 (vec_merge:V8HI
3265 (vec_duplicate:V8HI
3266 (match_operand:SI 2 "nonimmediate_operand" ""))
3267 (match_operand:V8HI 1 "register_operand" "")
3268 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3269 "TARGET_SSE2"
3270{
3271 operands[2] = gen_lowpart (HImode, operands[2]);
3272 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3273})
3274
3275(define_insn "*sse2_pinsrw"
3276 [(set (match_operand:V8HI 0 "register_operand" "=x")
3277 (vec_merge:V8HI
3278 (vec_duplicate:V8HI
3279 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3280 (match_operand:V8HI 1 "register_operand" "0")
3281 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3282 "TARGET_SSE2"
3283{
3284 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3285 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3286}
3287 [(set_attr "type" "sselog")
3288 (set_attr "mode" "TI")])
3289
3290(define_insn "sse2_pextrw"
3291 [(set (match_operand:SI 0 "register_operand" "=r")
3292 (zero_extend:SI
3293 (vec_select:HI
3294 (match_operand:V8HI 1 "register_operand" "x")
3295 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3296 "TARGET_SSE2"
3297 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3298 [(set_attr "type" "sselog")
3299 (set_attr "mode" "TI")])
3300
3301(define_expand "sse2_pshufd"
3302 [(match_operand:V4SI 0 "register_operand" "")
3303 (match_operand:V4SI 1 "nonimmediate_operand" "")
3304 (match_operand:SI 2 "const_int_operand" "")]
3305 "TARGET_SSE2"
3306{
3307 int mask = INTVAL (operands[2]);
3308 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3309 GEN_INT ((mask >> 0) & 3),
3310 GEN_INT ((mask >> 2) & 3),
3311 GEN_INT ((mask >> 4) & 3),
3312 GEN_INT ((mask >> 6) & 3)));
3313 DONE;
3314})
3315
3316(define_insn "sse2_pshufd_1"
3317 [(set (match_operand:V4SI 0 "register_operand" "=x")
3318 (vec_select:V4SI
3319 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3320 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3321 (match_operand 3 "const_0_to_3_operand" "")
3322 (match_operand 4 "const_0_to_3_operand" "")
3323 (match_operand 5 "const_0_to_3_operand" "")])))]
3324 "TARGET_SSE2"
3325{
3326 int mask = 0;
3327 mask |= INTVAL (operands[2]) << 0;
3328 mask |= INTVAL (operands[3]) << 2;
3329 mask |= INTVAL (operands[4]) << 4;
3330 mask |= INTVAL (operands[5]) << 6;
3331 operands[2] = GEN_INT (mask);
3332
3333 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3334}
3335 [(set_attr "type" "sselog1")
3336 (set_attr "mode" "TI")])
3337
3338(define_expand "sse2_pshuflw"
3339 [(match_operand:V8HI 0 "register_operand" "")
3340 (match_operand:V8HI 1 "nonimmediate_operand" "")
3341 (match_operand:SI 2 "const_int_operand" "")]
3342 "TARGET_SSE2"
3343{
3344 int mask = INTVAL (operands[2]);
3345 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3346 GEN_INT ((mask >> 0) & 3),
3347 GEN_INT ((mask >> 2) & 3),
3348 GEN_INT ((mask >> 4) & 3),
3349 GEN_INT ((mask >> 6) & 3)));
3350 DONE;
3351})
3352
3353(define_insn "sse2_pshuflw_1"
3354 [(set (match_operand:V8HI 0 "register_operand" "=x")
3355 (vec_select:V8HI
3356 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3357 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3358 (match_operand 3 "const_0_to_3_operand" "")
3359 (match_operand 4 "const_0_to_3_operand" "")
3360 (match_operand 5 "const_0_to_3_operand" "")
3361 (const_int 4)
3362 (const_int 5)
3363 (const_int 6)
3364 (const_int 7)])))]
3365 "TARGET_SSE2"
3366{
3367 int mask = 0;
3368 mask |= INTVAL (operands[2]) << 0;
3369 mask |= INTVAL (operands[3]) << 2;
3370 mask |= INTVAL (operands[4]) << 4;
3371 mask |= INTVAL (operands[5]) << 6;
3372 operands[2] = GEN_INT (mask);
3373
3374 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3375}
3376 [(set_attr "type" "sselog")
3377 (set_attr "mode" "TI")])
3378
3379(define_expand "sse2_pshufhw"
3380 [(match_operand:V8HI 0 "register_operand" "")
3381 (match_operand:V8HI 1 "nonimmediate_operand" "")
3382 (match_operand:SI 2 "const_int_operand" "")]
3383 "TARGET_SSE2"
3384{
3385 int mask = INTVAL (operands[2]);
3386 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3387 GEN_INT (((mask >> 0) & 3) + 4),
3388 GEN_INT (((mask >> 2) & 3) + 4),
3389 GEN_INT (((mask >> 4) & 3) + 4),
3390 GEN_INT (((mask >> 6) & 3) + 4)));
3391 DONE;
3392})
3393
3394(define_insn "sse2_pshufhw_1"
3395 [(set (match_operand:V8HI 0 "register_operand" "=x")
3396 (vec_select:V8HI
3397 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3398 (parallel [(const_int 0)
3399 (const_int 1)
3400 (const_int 2)
3401 (const_int 3)
3402 (match_operand 2 "const_4_to_7_operand" "")
3403 (match_operand 3 "const_4_to_7_operand" "")
3404 (match_operand 4 "const_4_to_7_operand" "")
3405 (match_operand 5 "const_4_to_7_operand" "")])))]
3406 "TARGET_SSE2"
3407{
3408 int mask = 0;
3409 mask |= (INTVAL (operands[2]) - 4) << 0;
3410 mask |= (INTVAL (operands[3]) - 4) << 2;
3411 mask |= (INTVAL (operands[4]) - 4) << 4;
3412 mask |= (INTVAL (operands[5]) - 4) << 6;
3413 operands[2] = GEN_INT (mask);
3414
3415 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3416}
3417 [(set_attr "type" "sselog")
3418 (set_attr "mode" "TI")])
3419
3420(define_expand "sse2_loadd"
3421 [(set (match_operand:V4SI 0 "register_operand" "")
3422 (vec_merge:V4SI
3423 (vec_duplicate:V4SI
3424 (match_operand:SI 1 "nonimmediate_operand" ""))
3425 (match_dup 2)
3426 (const_int 1)))]
3427 "TARGET_SSE"
3428 "operands[2] = CONST0_RTX (V4SImode);")
3429
3430(define_insn "sse2_loadld"
3431 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3432 (vec_merge:V4SI
3433 (vec_duplicate:V4SI
3434 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3435 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3436 (const_int 1)))]
3437 "TARGET_SSE"
3438 "@
3439 movd\t{%2, %0|%0, %2}
3440 movss\t{%2, %0|%0, %2}
3441 movss\t{%2, %0|%0, %2}"
3442 [(set_attr "type" "ssemov")
3443 (set_attr "mode" "TI,V4SF,SF")])
3444
3445;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3446;; be taken into account, and movdi isn't fully populated even without.
3447(define_insn_and_split "sse2_stored"
3448 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3449 (vec_select:SI
3450 (match_operand:V4SI 1 "register_operand" "x")
3451 (parallel [(const_int 0)])))]
3452 "TARGET_SSE"
3453 "#"
3454 "&& reload_completed"
3455 [(set (match_dup 0) (match_dup 1))]
3456{
3457 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3458})
3459
3460(define_expand "sse_storeq"
3461 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3462 (vec_select:DI
3463 (match_operand:V2DI 1 "register_operand" "")
3464 (parallel [(const_int 0)])))]
3465 "TARGET_SSE"
3466 "")
3467
3468;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3469;; be taken into account, and movdi isn't fully populated even without.
3470(define_insn "*sse2_storeq"
3471 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3472 (vec_select:DI
3473 (match_operand:V2DI 1 "register_operand" "x")
3474 (parallel [(const_int 0)])))]
3475 "TARGET_SSE"
3476 "#")
3477
3478(define_split
3479 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3480 (vec_select:DI
3481 (match_operand:V2DI 1 "register_operand" "")
3482 (parallel [(const_int 0)])))]
3483 "TARGET_SSE && reload_completed"
3484 [(set (match_dup 0) (match_dup 1))]
3485{
3486 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3487})
3488
3489(define_insn "*vec_extractv2di_1_sse2"
3490 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3491 (vec_select:DI
3492 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3493 (parallel [(const_int 1)])))]
3494 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3495 "@
3496 movhps\t{%1, %0|%0, %1}
3497 psrldq\t{$8, %0|%0, 8}
3498 movq\t{%H1, %0|%0, %H1}"
3499 [(set_attr "type" "ssemov,sseishft,ssemov")
3500 (set_attr "memory" "*,none,*")
3501 (set_attr "mode" "V2SF,TI,TI")])
3502
3503;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3504(define_insn "*vec_extractv2di_1_sse"
3505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3506 (vec_select:DI
3507 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3508 (parallel [(const_int 1)])))]
3509 "!TARGET_SSE2 && TARGET_SSE
3510 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3511 "@
3512 movhps\t{%1, %0|%0, %1}
3513 movhlps\t{%1, %0|%0, %1}
3514 movlps\t{%H1, %0|%0, %H1}"
3515 [(set_attr "type" "ssemov")
3516 (set_attr "mode" "V2SF,V4SF,V2SF")])
3517
3518(define_insn "*vec_dupv4si"
3519 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3520 (vec_duplicate:V4SI
3521 (match_operand:SI 1 "register_operand" " Y,0")))]
3522 "TARGET_SSE"
3523 "@
3524 pshufd\t{$0, %1, %0|%0, %1, 0}
3525 shufps\t{$0, %0, %0|%0, %0, 0}"
3526 [(set_attr "type" "sselog1")
3527 (set_attr "mode" "TI,V4SF")])
3528
3529(define_insn "*vec_dupv2di"
3530 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3531 (vec_duplicate:V2DI
3532 (match_operand:DI 1 "register_operand" " 0,0")))]
3533 "TARGET_SSE"
3534 "@
3535 punpcklqdq\t%0, %0
3536 movlhps\t%0, %0"
3537 [(set_attr "type" "sselog1,ssemov")
3538 (set_attr "mode" "TI,V4SF")])
3539
3540;; ??? In theory we can match memory for the MMX alternative, but allowing
3541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3542;; alternatives pretty much forces the MMX alternative to be chosen.
3543(define_insn "*sse2_concatv2si"
3544 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3545 (vec_concat:V2SI
3546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3547 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3548 "TARGET_SSE2"
3549 "@
3550 punpckldq\t{%2, %0|%0, %2}
3551 movd\t{%1, %0|%0, %1}
3552 punpckldq\t{%2, %0|%0, %2}
3553 movd\t{%1, %0|%0, %1}"
3554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3555 (set_attr "mode" "TI,TI,DI,DI")])
3556
3557(define_insn "*sse1_concatv2si"
3558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3559 (vec_concat:V2SI
3560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3562 "TARGET_SSE"
3563 "@
3564 unpcklps\t{%2, %0|%0, %2}
3565 movss\t{%1, %0|%0, %1}
3566 punpckldq\t{%2, %0|%0, %2}
3567 movd\t{%1, %0|%0, %1}"
3568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3569 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3570
3571(define_insn "*vec_concatv4si_1"
3572 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3573 (vec_concat:V4SI
3574 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3575 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3576 "TARGET_SSE"
3577 "@
3578 punpcklqdq\t{%2, %0|%0, %2}
3579 movlhps\t{%2, %0|%0, %2}
3580 movhps\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sselog,ssemov,ssemov")
3582 (set_attr "mode" "TI,V4SF,V2SF")])
3583
3584(define_insn "*vec_concatv2di"
3585 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3586 (vec_concat:V2DI
3587 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3588 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3589 "TARGET_SSE"
3590 "@
3591 movq\t{%1, %0|%0, %1}
3592 movq2dq\t{%1, %0|%0, %1}
3593 punpcklqdq\t{%2, %0|%0, %2}
3594 movlhps\t{%2, %0|%0, %2}
3595 movhps\t{%2, %0|%0, %2}
3596 movlps\t{%1, %0|%0, %1}"
3597 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3598 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3599
3600(define_expand "vec_setv2di"
3601 [(match_operand:V2DI 0 "register_operand" "")
3602 (match_operand:DI 1 "register_operand" "")
3603 (match_operand 2 "const_int_operand" "")]
3604 "TARGET_SSE"
3605{
3606 ix86_expand_vector_set (false, operands[0], operands[1],
3607 INTVAL (operands[2]));
3608 DONE;
3609})
3610
3611(define_expand "vec_extractv2di"
3612 [(match_operand:DI 0 "register_operand" "")
3613 (match_operand:V2DI 1 "register_operand" "")
3614 (match_operand 2 "const_int_operand" "")]
3615 "TARGET_SSE"
3616{
3617 ix86_expand_vector_extract (false, operands[0], operands[1],
3618 INTVAL (operands[2]));
3619 DONE;
3620})
3621
3622(define_expand "vec_initv2di"
3623 [(match_operand:V2DI 0 "register_operand" "")
3624 (match_operand 1 "" "")]
3625 "TARGET_SSE"
3626{
3627 ix86_expand_vector_init (false, operands[0], operands[1]);
3628 DONE;
3629})
3630
3631(define_expand "vec_setv4si"
3632 [(match_operand:V4SI 0 "register_operand" "")
3633 (match_operand:SI 1 "register_operand" "")
3634 (match_operand 2 "const_int_operand" "")]
3635 "TARGET_SSE"
3636{
3637 ix86_expand_vector_set (false, operands[0], operands[1],
3638 INTVAL (operands[2]));
3639 DONE;
3640})
3641
3642(define_expand "vec_extractv4si"
3643 [(match_operand:SI 0 "register_operand" "")
3644 (match_operand:V4SI 1 "register_operand" "")
3645 (match_operand 2 "const_int_operand" "")]
3646 "TARGET_SSE"
3647{
3648 ix86_expand_vector_extract (false, operands[0], operands[1],
3649 INTVAL (operands[2]));
3650 DONE;
3651})
3652
3653(define_expand "vec_initv4si"
3654 [(match_operand:V4SI 0 "register_operand" "")
3655 (match_operand 1 "" "")]
3656 "TARGET_SSE"
3657{
3658 ix86_expand_vector_init (false, operands[0], operands[1]);
3659 DONE;
3660})
3661
3662(define_expand "vec_setv8hi"
3663 [(match_operand:V8HI 0 "register_operand" "")
3664 (match_operand:HI 1 "register_operand" "")
3665 (match_operand 2 "const_int_operand" "")]
3666 "TARGET_SSE"
3667{
3668 ix86_expand_vector_set (false, operands[0], operands[1],
3669 INTVAL (operands[2]));
3670 DONE;
3671})
3672
3673(define_expand "vec_extractv8hi"
3674 [(match_operand:HI 0 "register_operand" "")
3675 (match_operand:V8HI 1 "register_operand" "")
3676 (match_operand 2 "const_int_operand" "")]
3677 "TARGET_SSE"
3678{
3679 ix86_expand_vector_extract (false, operands[0], operands[1],
3680 INTVAL (operands[2]));
3681 DONE;
3682})
3683
3684(define_expand "vec_initv8hi"
3685 [(match_operand:V8HI 0 "register_operand" "")
3686 (match_operand 1 "" "")]
3687 "TARGET_SSE"
3688{
3689 ix86_expand_vector_init (false, operands[0], operands[1]);
3690 DONE;
3691})
3692
3693(define_expand "vec_setv16qi"
3694 [(match_operand:V16QI 0 "register_operand" "")
3695 (match_operand:QI 1 "register_operand" "")
3696 (match_operand 2 "const_int_operand" "")]
3697 "TARGET_SSE"
3698{
3699 ix86_expand_vector_set (false, operands[0], operands[1],
3700 INTVAL (operands[2]));
3701 DONE;
3702})
3703
3704(define_expand "vec_extractv16qi"
3705 [(match_operand:QI 0 "register_operand" "")
3706 (match_operand:V16QI 1 "register_operand" "")
3707 (match_operand 2 "const_int_operand" "")]
3708 "TARGET_SSE"
3709{
3710 ix86_expand_vector_extract (false, operands[0], operands[1],
3711 INTVAL (operands[2]));
3712 DONE;
3713})
3714
3715(define_expand "vec_initv16qi"
3716 [(match_operand:V16QI 0 "register_operand" "")
3717 (match_operand 1 "" "")]
3718 "TARGET_SSE"
3719{
3720 ix86_expand_vector_init (false, operands[0], operands[1]);
3721 DONE;
3722})
3723
3724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3725;;
3726;; Miscellaneous
3727;;
3728;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3729
3730(define_insn "sse2_uavgv16qi3"
3731 [(set (match_operand:V16QI 0 "register_operand" "=x")
3732 (truncate:V16QI
3733 (lshiftrt:V16HI
3734 (plus:V16HI
3735 (plus:V16HI
3736 (zero_extend:V16HI
3737 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3738 (zero_extend:V16HI
3739 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3740 (const_vector:V16QI [(const_int 1) (const_int 1)
3741 (const_int 1) (const_int 1)
3742 (const_int 1) (const_int 1)
3743 (const_int 1) (const_int 1)
3744 (const_int 1) (const_int 1)
3745 (const_int 1) (const_int 1)
3746 (const_int 1) (const_int 1)
3747 (const_int 1) (const_int 1)]))
3748 (const_int 1))))]
3749 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3750 "pavgb\t{%2, %0|%0, %2}"
3751 [(set_attr "type" "sseiadd")
3752 (set_attr "mode" "TI")])
3753
3754(define_insn "sse2_uavgv8hi3"
3755 [(set (match_operand:V8HI 0 "register_operand" "=x")
3756 (truncate:V8HI
3757 (lshiftrt:V8SI
3758 (plus:V8SI
3759 (plus:V8SI
3760 (zero_extend:V8SI
3761 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3762 (zero_extend:V8SI
3763 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3764 (const_vector:V8HI [(const_int 1) (const_int 1)
3765 (const_int 1) (const_int 1)
3766 (const_int 1) (const_int 1)
3767 (const_int 1) (const_int 1)]))
3768 (const_int 1))))]
3769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3770 "pavgw\t{%2, %0|%0, %2}"
3771 [(set_attr "type" "sseiadd")
3772 (set_attr "mode" "TI")])
3773
3774;; The correct representation for this is absolutely enormous, and
3775;; surely not generally useful.
3776(define_insn "sse2_psadbw"
3777 [(set (match_operand:V2DI 0 "register_operand" "=x")
3778 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3779 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3780 UNSPEC_PSADBW))]
3781 "TARGET_SSE2"
3782 "psadbw\t{%2, %0|%0, %2}"
3783 [(set_attr "type" "sseiadd")
3784 (set_attr "mode" "TI")])
3785
3786(define_insn "sse_movmskps"
3787 [(set (match_operand:SI 0 "register_operand" "=r")
3788 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3789 UNSPEC_MOVMSK))]
3790 "TARGET_SSE"
3791 "movmskps\t{%1, %0|%0, %1}"
3792 [(set_attr "type" "ssecvt")
3793 (set_attr "mode" "V4SF")])
3794
3795(define_insn "sse2_movmskpd"
3796 [(set (match_operand:SI 0 "register_operand" "=r")
3797 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3798 UNSPEC_MOVMSK))]
3799 "TARGET_SSE2"
3800 "movmskpd\t{%1, %0|%0, %1}"
3801 [(set_attr "type" "ssecvt")
3802 (set_attr "mode" "V2DF")])
3803
3804(define_insn "sse2_pmovmskb"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3806 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3807 UNSPEC_MOVMSK))]
3808 "TARGET_SSE2"
3809 "pmovmskb\t{%1, %0|%0, %1}"
3810 [(set_attr "type" "ssecvt")
3811 (set_attr "mode" "V2DF")])
3812
3813(define_expand "sse2_maskmovdqu"
3814 [(set (match_operand:V16QI 0 "memory_operand" "")
3815 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3816 (match_operand:V16QI 2 "register_operand" "x")
3817 (match_dup 0)]
3818 UNSPEC_MASKMOV))]
3819 "TARGET_SSE2"
3820 "")
3821
3822(define_insn "*sse2_maskmovdqu"
3823 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3824 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3825 (match_operand:V16QI 2 "register_operand" "x")
3826 (mem:V16QI (match_dup 0))]
3827 UNSPEC_MASKMOV))]
3828 "TARGET_SSE2 && !TARGET_64BIT"
3829 ;; @@@ check ordering of operands in intel/nonintel syntax
3830 "maskmovdqu\t{%2, %1|%1, %2}"
3831 [(set_attr "type" "ssecvt")
3832 (set_attr "mode" "TI")])
3833
3834(define_insn "*sse2_maskmovdqu_rex64"
3835 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3836 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3837 (match_operand:V16QI 2 "register_operand" "x")
3838 (mem:V16QI (match_dup 0))]
3839 UNSPEC_MASKMOV))]
3840 "TARGET_SSE2 && TARGET_64BIT"
3841 ;; @@@ check ordering of operands in intel/nonintel syntax
3842 "maskmovdqu\t{%2, %1|%1, %2}"
3843 [(set_attr "type" "ssecvt")
3844 (set_attr "mode" "TI")])
3845
3846(define_insn "sse_ldmxcsr"
3847 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3848 UNSPECV_LDMXCSR)]
3849 "TARGET_SSE"
3850 "ldmxcsr\t%0"
3851 [(set_attr "type" "sse")
3852 (set_attr "memory" "load")])
3853
3854(define_insn "sse_stmxcsr"
3855 [(set (match_operand:SI 0 "memory_operand" "=m")
3856 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3857 "TARGET_SSE"
3858 "stmxcsr\t%0"
3859 [(set_attr "type" "sse")
3860 (set_attr "memory" "store")])
3861
3862(define_expand "sse_sfence"
3863 [(set (match_dup 0)
3864 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3865 "TARGET_SSE || TARGET_3DNOW_A"
3866{
3867 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3868 MEM_VOLATILE_P (operands[0]) = 1;
3869})
3870
3871(define_insn "*sse_sfence"
3872 [(set (match_operand:BLK 0 "" "")
3873 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3874 "TARGET_SSE || TARGET_3DNOW_A"
3875 "sfence"
3876 [(set_attr "type" "sse")
3877 (set_attr "memory" "unknown")])
3878
3879(define_insn "sse2_clflush"
3880 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3881 UNSPECV_CLFLUSH)]
3882 "TARGET_SSE2"
3883 "clflush\t%a0"
3884 [(set_attr "type" "sse")
3885 (set_attr "memory" "unknown")])
3886
3887(define_expand "sse2_mfence"
3888 [(set (match_dup 0)
3889 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3890 "TARGET_SSE2"
3891{
3892 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3893 MEM_VOLATILE_P (operands[0]) = 1;
3894})
3895
3896(define_insn "*sse2_mfence"
3897 [(set (match_operand:BLK 0 "" "")
3898 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3899 "TARGET_SSE2"
3900 "mfence"
3901 [(set_attr "type" "sse")
3902 (set_attr "memory" "unknown")])
3903
3904(define_expand "sse2_lfence"
3905 [(set (match_dup 0)
3906 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3907 "TARGET_SSE2"
3908{
3909 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3910 MEM_VOLATILE_P (operands[0]) = 1;
3911})
3912
3913(define_insn "*sse2_lfence"
3914 [(set (match_operand:BLK 0 "" "")
3915 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3916 "TARGET_SSE2"
3917 "lfence"
3918 [(set_attr "type" "sse")
3919 (set_attr "memory" "unknown")])
3920
3921(define_insn "sse3_mwait"
3922 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3923 (match_operand:SI 1 "register_operand" "c")]
3924 UNSPECV_MWAIT)]
3925 "TARGET_SSE3"
3926;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3927;; Since 32bit register operands are implicitly zero extended to 64bit,
3928;; we only need to set up 32bit registers.
3929 "mwait"
3930 [(set_attr "length" "3")])
3931
3932(define_insn "sse3_monitor"
3933 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3934 (match_operand:SI 1 "register_operand" "c")
3935 (match_operand:SI 2 "register_operand" "d")]
3936 UNSPECV_MONITOR)]
3937 "TARGET_SSE3 && !TARGET_64BIT"
3938 "monitor\t%0, %1, %2"
3939 [(set_attr "length" "3")])
3940
3941(define_insn "sse3_monitor64"
3942 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3943 (match_operand:SI 1 "register_operand" "c")
3944 (match_operand:SI 2 "register_operand" "d")]
3945 UNSPECV_MONITOR)]
3946 "TARGET_SSE3 && TARGET_64BIT"
3947;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3948;; RCX and RDX are used. Since 32bit register operands are implicitly
3949;; zero extended to 64bit, we only need to set up 32bit registers.
3950 "monitor"
3951 [(set_attr "length" "3")])
1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005, 2006
3;; Free Software Foundation, Inc.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify
8;; it under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 2, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful,
13;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15;; GNU General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING. If not, write to
19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20;; Boston, MA 02110-1301, USA.
21
22
23;; 16 byte integral modes handled by SSE, minus TImode, which gets
24;; special-cased for TARGET_64BIT.
25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
26
27;; All 16-byte vector modes handled by SSE
28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
29
30;; Mix-n-match
31(define_mode_macro SSEMODE12 [V16QI V8HI])
32(define_mode_macro SSEMODE24 [V8HI V4SI])
33(define_mode_macro SSEMODE14 [V16QI V4SI])
34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI])
35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI])
36
37;; Mapping from integer vector mode to mnemonic suffix
38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
39
40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
41
42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43;;
44;; Move patterns
45;;
46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
47
48;; All of these patterns are enabled for SSE1 as well as SSE2.
49;; This is essential for maintaining stable calling conventions.
50
51(define_expand "mov<mode>"
52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
54 "TARGET_SSE"
55{
56 ix86_expand_vector_move (<MODE>mode, operands);
57 DONE;
58})
59
60(define_insn "*mov<mode>_internal"
61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
64{
65 switch (which_alternative)
66 {
67 case 0:
68 return standard_sse_constant_opcode (insn, operands[1]);
69 case 1:
70 case 2:
71 if (get_attr_mode (insn) == MODE_V4SF)
72 return "movaps\t{%1, %0|%0, %1}";
73 else
74 return "movdqa\t{%1, %0|%0, %1}";
75 default:
76 gcc_unreachable ();
77 }
78}
79 [(set_attr "type" "sselog1,ssemov,ssemov")
80 (set (attr "mode")
81 (if_then_else
82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
83 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
84 (and (eq_attr "alternative" "2")
85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
86 (const_int 0))))
87 (const_string "V4SF")
88 (const_string "TI")))])
89
90(define_expand "movv4sf"
91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
92 (match_operand:V4SF 1 "nonimmediate_operand" ""))]
93 "TARGET_SSE"
94{
95 ix86_expand_vector_move (V4SFmode, operands);
96 DONE;
97})
98
99(define_insn "*movv4sf_internal"
100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
102 "TARGET_SSE"
103{
104 switch (which_alternative)
105 {
106 case 0:
107 return standard_sse_constant_opcode (insn, operands[1]);
108 case 1:
109 case 2:
110 return "movaps\t{%1, %0|%0, %1}";
111 default:
112 abort();
113 }
114}
115 [(set_attr "type" "sselog1,ssemov,ssemov")
116 (set_attr "mode" "V4SF")])
117
118(define_split
119 [(set (match_operand:V4SF 0 "register_operand" "")
120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
121 "TARGET_SSE && reload_completed"
122 [(set (match_dup 0)
123 (vec_merge:V4SF
124 (vec_duplicate:V4SF (match_dup 1))
125 (match_dup 2)
126 (const_int 1)))]
127{
128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
129 operands[2] = CONST0_RTX (V4SFmode);
130})
131
132(define_expand "movv2df"
133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
134 (match_operand:V2DF 1 "nonimmediate_operand" ""))]
135 "TARGET_SSE"
136{
137 ix86_expand_vector_move (V2DFmode, operands);
138 DONE;
139})
140
141(define_insn "*movv2df_internal"
142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
145{
146 switch (which_alternative)
147 {
148 case 0:
149 return standard_sse_constant_opcode (insn, operands[1]);
150 case 1:
151 case 2:
152 if (get_attr_mode (insn) == MODE_V4SF)
153 return "movaps\t{%1, %0|%0, %1}";
154 else
155 return "movapd\t{%1, %0|%0, %1}";
156 default:
157 gcc_unreachable ();
158 }
159}
160 [(set_attr "type" "sselog1,ssemov,ssemov")
161 (set (attr "mode")
162 (if_then_else
163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
164 (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
165 (and (eq_attr "alternative" "2")
166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
167 (const_int 0))))
168 (const_string "V4SF")
169 (const_string "V2DF")))])
170
171(define_split
172 [(set (match_operand:V2DF 0 "register_operand" "")
173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
174 "TARGET_SSE2 && reload_completed"
175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
176{
177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
178 operands[2] = CONST0_RTX (DFmode);
179})
180
181(define_expand "push<mode>1"
182 [(match_operand:SSEMODE 0 "register_operand" "")]
183 "TARGET_SSE"
184{
185 ix86_expand_push (<MODE>mode, operands[0]);
186 DONE;
187})
188
189(define_expand "movmisalign<mode>"
190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
192 "TARGET_SSE"
193{
194 ix86_expand_vector_move_misalign (<MODE>mode, operands);
195 DONE;
196})
197
198(define_insn "sse_movups"
199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
201 UNSPEC_MOVU))]
202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
203 "movups\t{%1, %0|%0, %1}"
204 [(set_attr "type" "ssemov")
205 (set_attr "mode" "V2DF")])
206
207(define_insn "sse2_movupd"
208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
210 UNSPEC_MOVU))]
211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
212 "movupd\t{%1, %0|%0, %1}"
213 [(set_attr "type" "ssemov")
214 (set_attr "mode" "V2DF")])
215
216(define_insn "sse2_movdqu"
217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
219 UNSPEC_MOVU))]
220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
221 "movdqu\t{%1, %0|%0, %1}"
222 [(set_attr "type" "ssemov")
223 (set_attr "mode" "TI")])
224
225(define_insn "sse_movntv4sf"
226 [(set (match_operand:V4SF 0 "memory_operand" "=m")
227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
228 UNSPEC_MOVNT))]
229 "TARGET_SSE"
230 "movntps\t{%1, %0|%0, %1}"
231 [(set_attr "type" "ssemov")
232 (set_attr "mode" "V4SF")])
233
234(define_insn "sse2_movntv2df"
235 [(set (match_operand:V2DF 0 "memory_operand" "=m")
236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
237 UNSPEC_MOVNT))]
238 "TARGET_SSE2"
239 "movntpd\t{%1, %0|%0, %1}"
240 [(set_attr "type" "ssecvt")
241 (set_attr "mode" "V2DF")])
242
243(define_insn "sse2_movntv2di"
244 [(set (match_operand:V2DI 0 "memory_operand" "=m")
245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
246 UNSPEC_MOVNT))]
247 "TARGET_SSE2"
248 "movntdq\t{%1, %0|%0, %1}"
249 [(set_attr "type" "ssecvt")
250 (set_attr "mode" "TI")])
251
252(define_insn "sse2_movntsi"
253 [(set (match_operand:SI 0 "memory_operand" "=m")
254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
255 UNSPEC_MOVNT))]
256 "TARGET_SSE2"
257 "movnti\t{%1, %0|%0, %1}"
258 [(set_attr "type" "ssecvt")
259 (set_attr "mode" "V2DF")])
260
261(define_insn "sse3_lddqu"
262 [(set (match_operand:V16QI 0 "register_operand" "=x")
263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
264 UNSPEC_LDQQU))]
265 "TARGET_SSE3"
266 "lddqu\t{%1, %0|%0, %1}"
267 [(set_attr "type" "ssecvt")
268 (set_attr "mode" "TI")])
269
270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
271;;
272;; Parallel single-precision floating point arithmetic
273;;
274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
275
276(define_expand "negv4sf2"
277 [(set (match_operand:V4SF 0 "register_operand" "")
278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
279 "TARGET_SSE"
280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;")
281
282(define_expand "absv4sf2"
283 [(set (match_operand:V4SF 0 "register_operand" "")
284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))]
285 "TARGET_SSE"
286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;")
287
288(define_expand "addv4sf3"
289 [(set (match_operand:V4SF 0 "register_operand" "")
290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
291 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
292 "TARGET_SSE"
293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);")
294
295(define_insn "*addv4sf3"
296 [(set (match_operand:V4SF 0 "register_operand" "=x")
297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
300 "addps\t{%2, %0|%0, %2}"
301 [(set_attr "type" "sseadd")
302 (set_attr "mode" "V4SF")])
303
304(define_insn "sse_vmaddv4sf3"
305 [(set (match_operand:V4SF 0 "register_operand" "=x")
306 (vec_merge:V4SF
307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
308 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
309 (match_dup 1)
310 (const_int 1)))]
311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
312 "addss\t{%2, %0|%0, %2}"
313 [(set_attr "type" "sseadd")
314 (set_attr "mode" "SF")])
315
316(define_expand "subv4sf3"
317 [(set (match_operand:V4SF 0 "register_operand" "")
318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "")
319 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
320 "TARGET_SSE"
321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);")
322
323(define_insn "*subv4sf3"
324 [(set (match_operand:V4SF 0 "register_operand" "=x")
325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
327 "TARGET_SSE"
328 "subps\t{%2, %0|%0, %2}"
329 [(set_attr "type" "sseadd")
330 (set_attr "mode" "V4SF")])
331
332(define_insn "sse_vmsubv4sf3"
333 [(set (match_operand:V4SF 0 "register_operand" "=x")
334 (vec_merge:V4SF
335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
336 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
337 (match_dup 1)
338 (const_int 1)))]
339 "TARGET_SSE"
340 "subss\t{%2, %0|%0, %2}"
341 [(set_attr "type" "sseadd")
342 (set_attr "mode" "SF")])
343
344(define_expand "mulv4sf3"
345 [(set (match_operand:V4SF 0 "register_operand" "")
346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
347 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
348 "TARGET_SSE"
349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);")
350
351(define_insn "*mulv4sf3"
352 [(set (match_operand:V4SF 0 "register_operand" "=x")
353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
356 "mulps\t{%2, %0|%0, %2}"
357 [(set_attr "type" "ssemul")
358 (set_attr "mode" "V4SF")])
359
360(define_insn "sse_vmmulv4sf3"
361 [(set (match_operand:V4SF 0 "register_operand" "=x")
362 (vec_merge:V4SF
363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
364 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
365 (match_dup 1)
366 (const_int 1)))]
367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)"
368 "mulss\t{%2, %0|%0, %2}"
369 [(set_attr "type" "ssemul")
370 (set_attr "mode" "SF")])
371
372(define_expand "divv4sf3"
373 [(set (match_operand:V4SF 0 "register_operand" "")
374 (div:V4SF (match_operand:V4SF 1 "register_operand" "")
375 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
376 "TARGET_SSE"
377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);")
378
379(define_insn "*divv4sf3"
380 [(set (match_operand:V4SF 0 "register_operand" "=x")
381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
383 "TARGET_SSE"
384 "divps\t{%2, %0|%0, %2}"
385 [(set_attr "type" "ssediv")
386 (set_attr "mode" "V4SF")])
387
388(define_insn "sse_vmdivv4sf3"
389 [(set (match_operand:V4SF 0 "register_operand" "=x")
390 (vec_merge:V4SF
391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
392 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
393 (match_dup 1)
394 (const_int 1)))]
395 "TARGET_SSE"
396 "divss\t{%2, %0|%0, %2}"
397 [(set_attr "type" "ssediv")
398 (set_attr "mode" "SF")])
399
400(define_insn "sse_rcpv4sf2"
401 [(set (match_operand:V4SF 0 "register_operand" "=x")
402 (unspec:V4SF
403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
404 "TARGET_SSE"
405 "rcpps\t{%1, %0|%0, %1}"
406 [(set_attr "type" "sse")
407 (set_attr "mode" "V4SF")])
408
409(define_insn "sse_vmrcpv4sf2"
410 [(set (match_operand:V4SF 0 "register_operand" "=x")
411 (vec_merge:V4SF
412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
413 UNSPEC_RCP)
414 (match_operand:V4SF 2 "register_operand" "0")
415 (const_int 1)))]
416 "TARGET_SSE"
417 "rcpss\t{%1, %0|%0, %1}"
418 [(set_attr "type" "sse")
419 (set_attr "mode" "SF")])
420
421(define_insn "sse_rsqrtv4sf2"
422 [(set (match_operand:V4SF 0 "register_operand" "=x")
423 (unspec:V4SF
424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
425 "TARGET_SSE"
426 "rsqrtps\t{%1, %0|%0, %1}"
427 [(set_attr "type" "sse")
428 (set_attr "mode" "V4SF")])
429
430(define_insn "sse_vmrsqrtv4sf2"
431 [(set (match_operand:V4SF 0 "register_operand" "=x")
432 (vec_merge:V4SF
433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
434 UNSPEC_RSQRT)
435 (match_operand:V4SF 2 "register_operand" "0")
436 (const_int 1)))]
437 "TARGET_SSE"
438 "rsqrtss\t{%1, %0|%0, %1}"
439 [(set_attr "type" "sse")
440 (set_attr "mode" "SF")])
441
442(define_insn "sqrtv4sf2"
443 [(set (match_operand:V4SF 0 "register_operand" "=x")
444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
445 "TARGET_SSE"
446 "sqrtps\t{%1, %0|%0, %1}"
447 [(set_attr "type" "sse")
448 (set_attr "mode" "V4SF")])
449
450(define_insn "sse_vmsqrtv4sf2"
451 [(set (match_operand:V4SF 0 "register_operand" "=x")
452 (vec_merge:V4SF
453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
454 (match_operand:V4SF 2 "register_operand" "0")
455 (const_int 1)))]
456 "TARGET_SSE"
457 "sqrtss\t{%1, %0|%0, %1}"
458 [(set_attr "type" "sse")
459 (set_attr "mode" "SF")])
460
461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
462;; isn't really correct, as those rtl operators aren't defined when
463;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
464
465(define_expand "smaxv4sf3"
466 [(set (match_operand:V4SF 0 "register_operand" "")
467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
468 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
469 "TARGET_SSE"
470{
471 if (!flag_finite_math_only)
472 operands[1] = force_reg (V4SFmode, operands[1]);
473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands);
474})
475
476(define_insn "*smaxv4sf3_finite"
477 [(set (match_operand:V4SF 0 "register_operand" "=x")
478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
480 "TARGET_SSE && flag_finite_math_only
481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
482 "maxps\t{%2, %0|%0, %2}"
483 [(set_attr "type" "sse")
484 (set_attr "mode" "V4SF")])
485
486(define_insn "*smaxv4sf3"
487 [(set (match_operand:V4SF 0 "register_operand" "=x")
488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
490 "TARGET_SSE"
491 "maxps\t{%2, %0|%0, %2}"
492 [(set_attr "type" "sse")
493 (set_attr "mode" "V4SF")])
494
495(define_insn "sse_vmsmaxv4sf3"
496 [(set (match_operand:V4SF 0 "register_operand" "=x")
497 (vec_merge:V4SF
498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
499 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
500 (match_dup 1)
501 (const_int 1)))]
502 "TARGET_SSE"
503 "maxss\t{%2, %0|%0, %2}"
504 [(set_attr "type" "sse")
505 (set_attr "mode" "SF")])
506
507(define_expand "sminv4sf3"
508 [(set (match_operand:V4SF 0 "register_operand" "")
509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
510 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
511 "TARGET_SSE"
512{
513 if (!flag_finite_math_only)
514 operands[1] = force_reg (V4SFmode, operands[1]);
515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands);
516})
517
518(define_insn "*sminv4sf3_finite"
519 [(set (match_operand:V4SF 0 "register_operand" "=x")
520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
522 "TARGET_SSE && flag_finite_math_only
523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
524 "minps\t{%2, %0|%0, %2}"
525 [(set_attr "type" "sse")
526 (set_attr "mode" "V4SF")])
527
528(define_insn "*sminv4sf3"
529 [(set (match_operand:V4SF 0 "register_operand" "=x")
530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
532 "TARGET_SSE"
533 "minps\t{%2, %0|%0, %2}"
534 [(set_attr "type" "sse")
535 (set_attr "mode" "V4SF")])
536
537(define_insn "sse_vmsminv4sf3"
538 [(set (match_operand:V4SF 0 "register_operand" "=x")
539 (vec_merge:V4SF
540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
541 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
542 (match_dup 1)
543 (const_int 1)))]
544 "TARGET_SSE"
545 "minss\t{%2, %0|%0, %2}"
546 [(set_attr "type" "sse")
547 (set_attr "mode" "SF")])
548
549;; These versions of the min/max patterns implement exactly the operations
550;; min = (op1 < op2 ? op1 : op2)
551;; max = (!(op1 < op2) ? op1 : op2)
552;; Their operands are not commutative, and thus they may be used in the
553;; presence of -0.0 and NaN.
554
555(define_insn "*ieee_sminv4sf3"
556 [(set (match_operand:V4SF 0 "register_operand" "=x")
557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
559 UNSPEC_IEEE_MIN))]
560 "TARGET_SSE"
561 "minps\t{%2, %0|%0, %2}"
562 [(set_attr "type" "sseadd")
563 (set_attr "mode" "V4SF")])
564
565(define_insn "*ieee_smaxv4sf3"
566 [(set (match_operand:V4SF 0 "register_operand" "=x")
567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
569 UNSPEC_IEEE_MAX))]
570 "TARGET_SSE"
571 "maxps\t{%2, %0|%0, %2}"
572 [(set_attr "type" "sseadd")
573 (set_attr "mode" "V4SF")])
574
575(define_insn "*ieee_sminv2df3"
576 [(set (match_operand:V2DF 0 "register_operand" "=x")
577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
579 UNSPEC_IEEE_MIN))]
580 "TARGET_SSE2"
581 "minpd\t{%2, %0|%0, %2}"
582 [(set_attr "type" "sseadd")
583 (set_attr "mode" "V2DF")])
584
585(define_insn "*ieee_smaxv2df3"
586 [(set (match_operand:V2DF 0 "register_operand" "=x")
587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
589 UNSPEC_IEEE_MAX))]
590 "TARGET_SSE2"
591 "maxpd\t{%2, %0|%0, %2}"
592 [(set_attr "type" "sseadd")
593 (set_attr "mode" "V2DF")])
594
595(define_insn "sse3_addsubv4sf3"
596 [(set (match_operand:V4SF 0 "register_operand" "=x")
597 (vec_merge:V4SF
598 (plus:V4SF
599 (match_operand:V4SF 1 "register_operand" "0")
600 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
601 (minus:V4SF (match_dup 1) (match_dup 2))
602 (const_int 5)))]
603 "TARGET_SSE3"
604 "addsubps\t{%2, %0|%0, %2}"
605 [(set_attr "type" "sseadd")
606 (set_attr "mode" "V4SF")])
607
608(define_insn "sse3_haddv4sf3"
609 [(set (match_operand:V4SF 0 "register_operand" "=x")
610 (vec_concat:V4SF
611 (vec_concat:V2SF
612 (plus:SF
613 (vec_select:SF
614 (match_operand:V4SF 1 "register_operand" "0")
615 (parallel [(const_int 0)]))
616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
617 (plus:SF
618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
620 (vec_concat:V2SF
621 (plus:SF
622 (vec_select:SF
623 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
624 (parallel [(const_int 0)]))
625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
626 (plus:SF
627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
629 "TARGET_SSE3"
630 "haddps\t{%2, %0|%0, %2}"
631 [(set_attr "type" "sseadd")
632 (set_attr "mode" "V4SF")])
633
634(define_insn "sse3_hsubv4sf3"
635 [(set (match_operand:V4SF 0 "register_operand" "=x")
636 (vec_concat:V4SF
637 (vec_concat:V2SF
638 (minus:SF
639 (vec_select:SF
640 (match_operand:V4SF 1 "register_operand" "0")
641 (parallel [(const_int 0)]))
642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
643 (minus:SF
644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
646 (vec_concat:V2SF
647 (minus:SF
648 (vec_select:SF
649 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
650 (parallel [(const_int 0)]))
651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
652 (minus:SF
653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
655 "TARGET_SSE3"
656 "hsubps\t{%2, %0|%0, %2}"
657 [(set_attr "type" "sseadd")
658 (set_attr "mode" "V4SF")])
659
660(define_expand "reduc_splus_v4sf"
661 [(match_operand:V4SF 0 "register_operand" "")
662 (match_operand:V4SF 1 "register_operand" "")]
663 "TARGET_SSE"
664{
665 if (TARGET_SSE3)
666 {
667 rtx tmp = gen_reg_rtx (V4SFmode);
668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
670 }
671 else
672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]);
673 DONE;
674})
675
676(define_expand "reduc_smax_v4sf"
677 [(match_operand:V4SF 0 "register_operand" "")
678 (match_operand:V4SF 1 "register_operand" "")]
679 "TARGET_SSE"
680{
681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]);
682 DONE;
683})
684
685(define_expand "reduc_smin_v4sf"
686 [(match_operand:V4SF 0 "register_operand" "")
687 (match_operand:V4SF 1 "register_operand" "")]
688 "TARGET_SSE"
689{
690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]);
691 DONE;
692})
693
694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
695;;
696;; Parallel single-precision floating point comparisons
697;;
698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
699
700(define_insn "sse_maskcmpv4sf3"
701 [(set (match_operand:V4SF 0 "register_operand" "=x")
702 (match_operator:V4SF 3 "sse_comparison_operator"
703 [(match_operand:V4SF 1 "register_operand" "0")
704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))]
705 "TARGET_SSE"
706 "cmp%D3ps\t{%2, %0|%0, %2}"
707 [(set_attr "type" "ssecmp")
708 (set_attr "mode" "V4SF")])
709
710(define_insn "sse_vmmaskcmpv4sf3"
711 [(set (match_operand:V4SF 0 "register_operand" "=x")
712 (vec_merge:V4SF
713 (match_operator:V4SF 3 "sse_comparison_operator"
714 [(match_operand:V4SF 1 "register_operand" "0")
715 (match_operand:V4SF 2 "register_operand" "x")])
716 (match_dup 1)
717 (const_int 1)))]
718 "TARGET_SSE"
719 "cmp%D3ss\t{%2, %0|%0, %2}"
720 [(set_attr "type" "ssecmp")
721 (set_attr "mode" "SF")])
722
723(define_insn "sse_comi"
724 [(set (reg:CCFP FLAGS_REG)
725 (compare:CCFP
726 (vec_select:SF
727 (match_operand:V4SF 0 "register_operand" "x")
728 (parallel [(const_int 0)]))
729 (vec_select:SF
730 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
731 (parallel [(const_int 0)]))))]
732 "TARGET_SSE"
733 "comiss\t{%1, %0|%0, %1}"
734 [(set_attr "type" "ssecomi")
735 (set_attr "mode" "SF")])
736
737(define_insn "sse_ucomi"
738 [(set (reg:CCFPU FLAGS_REG)
739 (compare:CCFPU
740 (vec_select:SF
741 (match_operand:V4SF 0 "register_operand" "x")
742 (parallel [(const_int 0)]))
743 (vec_select:SF
744 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
745 (parallel [(const_int 0)]))))]
746 "TARGET_SSE"
747 "ucomiss\t{%1, %0|%0, %1}"
748 [(set_attr "type" "ssecomi")
749 (set_attr "mode" "SF")])
750
751(define_expand "vcondv4sf"
752 [(set (match_operand:V4SF 0 "register_operand" "")
753 (if_then_else:V4SF
754 (match_operator 3 ""
755 [(match_operand:V4SF 4 "nonimmediate_operand" "")
756 (match_operand:V4SF 5 "nonimmediate_operand" "")])
757 (match_operand:V4SF 1 "general_operand" "")
758 (match_operand:V4SF 2 "general_operand" "")))]
759 "TARGET_SSE"
760{
761 if (ix86_expand_fp_vcond (operands))
762 DONE;
763 else
764 FAIL;
765})
766
767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
768;;
769;; Parallel single-precision floating point logical operations
770;;
771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
772
773(define_expand "andv4sf3"
774 [(set (match_operand:V4SF 0 "register_operand" "")
775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
776 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
777 "TARGET_SSE"
778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);")
779
780(define_insn "*andv4sf3"
781 [(set (match_operand:V4SF 0 "register_operand" "=x")
782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)"
785 "andps\t{%2, %0|%0, %2}"
786 [(set_attr "type" "sselog")
787 (set_attr "mode" "V4SF")])
788
789(define_insn "sse_nandv4sf3"
790 [(set (match_operand:V4SF 0 "register_operand" "=x")
791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
793 "TARGET_SSE"
794 "andnps\t{%2, %0|%0, %2}"
795 [(set_attr "type" "sselog")
796 (set_attr "mode" "V4SF")])
797
798(define_expand "iorv4sf3"
799 [(set (match_operand:V4SF 0 "register_operand" "")
800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
801 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
802 "TARGET_SSE"
803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);")
804
805(define_insn "*iorv4sf3"
806 [(set (match_operand:V4SF 0 "register_operand" "=x")
807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)"
810 "orps\t{%2, %0|%0, %2}"
811 [(set_attr "type" "sselog")
812 (set_attr "mode" "V4SF")])
813
814(define_expand "xorv4sf3"
815 [(set (match_operand:V4SF 0 "register_operand" "")
816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
817 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
818 "TARGET_SSE"
819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);")
820
821(define_insn "*xorv4sf3"
822 [(set (match_operand:V4SF 0 "register_operand" "=x")
823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)"
826 "xorps\t{%2, %0|%0, %2}"
827 [(set_attr "type" "sselog")
828 (set_attr "mode" "V4SF")])
829
830;; Also define scalar versions. These are used for abs, neg, and
831;; conditional move. Using subregs into vector modes causes register
832;; allocation lossage. These patterns do not allow memory operands
833;; because the native instructions read the full 128-bits.
834
835(define_insn "*andsf3"
836 [(set (match_operand:SF 0 "register_operand" "=x")
837 (and:SF (match_operand:SF 1 "register_operand" "0")
838 (match_operand:SF 2 "register_operand" "x")))]
839 "TARGET_SSE"
840 "andps\t{%2, %0|%0, %2}"
841 [(set_attr "type" "sselog")
842 (set_attr "mode" "V4SF")])
843
844(define_insn "*nandsf3"
845 [(set (match_operand:SF 0 "register_operand" "=x")
846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0"))
847 (match_operand:SF 2 "register_operand" "x")))]
848 "TARGET_SSE"
849 "andnps\t{%2, %0|%0, %2}"
850 [(set_attr "type" "sselog")
851 (set_attr "mode" "V4SF")])
852
853(define_insn "*iorsf3"
854 [(set (match_operand:SF 0 "register_operand" "=x")
855 (ior:SF (match_operand:SF 1 "register_operand" "0")
856 (match_operand:SF 2 "register_operand" "x")))]
857 "TARGET_SSE"
858 "orps\t{%2, %0|%0, %2}"
859 [(set_attr "type" "sselog")
860 (set_attr "mode" "V4SF")])
861
862(define_insn "*xorsf3"
863 [(set (match_operand:SF 0 "register_operand" "=x")
864 (xor:SF (match_operand:SF 1 "register_operand" "0")
865 (match_operand:SF 2 "register_operand" "x")))]
866 "TARGET_SSE"
867 "xorps\t{%2, %0|%0, %2}"
868 [(set_attr "type" "sselog")
869 (set_attr "mode" "V4SF")])
870
871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
872;;
873;; Parallel single-precision floating point conversion operations
874;;
875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
876
877(define_insn "sse_cvtpi2ps"
878 [(set (match_operand:V4SF 0 "register_operand" "=x")
879 (vec_merge:V4SF
880 (vec_duplicate:V4SF
881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
882 (match_operand:V4SF 1 "register_operand" "0")
883 (const_int 3)))]
884 "TARGET_SSE"
885 "cvtpi2ps\t{%2, %0|%0, %2}"
886 [(set_attr "type" "ssecvt")
887 (set_attr "mode" "V4SF")])
888
889(define_insn "sse_cvtps2pi"
890 [(set (match_operand:V2SI 0 "register_operand" "=y")
891 (vec_select:V2SI
892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
893 UNSPEC_FIX_NOTRUNC)
894 (parallel [(const_int 0) (const_int 1)])))]
895 "TARGET_SSE"
896 "cvtps2pi\t{%1, %0|%0, %1}"
897 [(set_attr "type" "ssecvt")
898 (set_attr "unit" "mmx")
899 (set_attr "mode" "DI")])
900
901(define_insn "sse_cvttps2pi"
902 [(set (match_operand:V2SI 0 "register_operand" "=y")
903 (vec_select:V2SI
904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
905 (parallel [(const_int 0) (const_int 1)])))]
906 "TARGET_SSE"
907 "cvttps2pi\t{%1, %0|%0, %1}"
908 [(set_attr "type" "ssecvt")
909 (set_attr "unit" "mmx")
910 (set_attr "mode" "SF")])
911
912(define_insn "sse_cvtsi2ss"
913 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
914 (vec_merge:V4SF
915 (vec_duplicate:V4SF
916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
917 (match_operand:V4SF 1 "register_operand" "0,0")
918 (const_int 1)))]
919 "TARGET_SSE"
920 "cvtsi2ss\t{%2, %0|%0, %2}"
921 [(set_attr "type" "sseicvt")
922 (set_attr "athlon_decode" "vector,double")
923 (set_attr "mode" "SF")])
924
925(define_insn "sse_cvtsi2ssq"
926 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
927 (vec_merge:V4SF
928 (vec_duplicate:V4SF
929 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
930 (match_operand:V4SF 1 "register_operand" "0,0")
931 (const_int 1)))]
932 "TARGET_SSE && TARGET_64BIT"
933 "cvtsi2ssq\t{%2, %0|%0, %2}"
934 [(set_attr "type" "sseicvt")
935 (set_attr "athlon_decode" "vector,double")
936 (set_attr "mode" "SF")])
937
938(define_insn "sse_cvtss2si"
939 [(set (match_operand:SI 0 "register_operand" "=r,r")
940 (unspec:SI
941 [(vec_select:SF
942 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
943 (parallel [(const_int 0)]))]
944 UNSPEC_FIX_NOTRUNC))]
945 "TARGET_SSE"
946 "cvtss2si\t{%1, %0|%0, %1}"
947 [(set_attr "type" "sseicvt")
948 (set_attr "athlon_decode" "double,vector")
949 (set_attr "mode" "SI")])
950
951(define_insn "sse_cvtss2siq"
952 [(set (match_operand:DI 0 "register_operand" "=r,r")
953 (unspec:DI
954 [(vec_select:SF
955 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
956 (parallel [(const_int 0)]))]
957 UNSPEC_FIX_NOTRUNC))]
958 "TARGET_SSE && TARGET_64BIT"
959 "cvtss2siq\t{%1, %0|%0, %1}"
960 [(set_attr "type" "sseicvt")
961 (set_attr "athlon_decode" "double,vector")
962 (set_attr "mode" "DI")])
963
964(define_insn "sse_cvttss2si"
965 [(set (match_operand:SI 0 "register_operand" "=r,r")
966 (fix:SI
967 (vec_select:SF
968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
969 (parallel [(const_int 0)]))))]
970 "TARGET_SSE"
971 "cvttss2si\t{%1, %0|%0, %1}"
972 [(set_attr "type" "sseicvt")
973 (set_attr "athlon_decode" "double,vector")
974 (set_attr "mode" "SI")])
975
976(define_insn "sse_cvttss2siq"
977 [(set (match_operand:DI 0 "register_operand" "=r,r")
978 (fix:DI
979 (vec_select:SF
980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m")
981 (parallel [(const_int 0)]))))]
982 "TARGET_SSE && TARGET_64BIT"
983 "cvttss2siq\t{%1, %0|%0, %1}"
984 [(set_attr "type" "sseicvt")
985 (set_attr "athlon_decode" "double,vector")
986 (set_attr "mode" "DI")])
987
988(define_insn "sse2_cvtdq2ps"
989 [(set (match_operand:V4SF 0 "register_operand" "=x")
990 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
991 "TARGET_SSE2"
992 "cvtdq2ps\t{%1, %0|%0, %1}"
993 [(set_attr "type" "ssecvt")
994 (set_attr "mode" "V2DF")])
995
996(define_insn "sse2_cvtps2dq"
997 [(set (match_operand:V4SI 0 "register_operand" "=x")
998 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
999 UNSPEC_FIX_NOTRUNC))]
1000 "TARGET_SSE2"
1001 "cvtps2dq\t{%1, %0|%0, %1}"
1002 [(set_attr "type" "ssecvt")
1003 (set_attr "mode" "TI")])
1004
1005(define_insn "sse2_cvttps2dq"
1006 [(set (match_operand:V4SI 0 "register_operand" "=x")
1007 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
1008 "TARGET_SSE2"
1009 "cvttps2dq\t{%1, %0|%0, %1}"
1010 [(set_attr "type" "ssecvt")
1011 (set_attr "mode" "TI")])
1012
1013;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1014;;
1015;; Parallel single-precision floating point element swizzling
1016;;
1017;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1018
1019(define_insn "sse_movhlps"
1020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1021 (vec_select:V4SF
1022 (vec_concat:V8SF
1023 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1024 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
1025 (parallel [(const_int 6)
1026 (const_int 7)
1027 (const_int 2)
1028 (const_int 3)])))]
1029 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
1030 "@
1031 movhlps\t{%2, %0|%0, %2}
1032 movlps\t{%H2, %0|%0, %H2}
1033 movhps\t{%2, %0|%0, %2}"
1034 [(set_attr "type" "ssemov")
1035 (set_attr "mode" "V4SF,V2SF,V2SF")])
1036
1037(define_insn "sse_movlhps"
1038 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1039 (vec_select:V4SF
1040 (vec_concat:V8SF
1041 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
1042 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x"))
1043 (parallel [(const_int 0)
1044 (const_int 1)
1045 (const_int 4)
1046 (const_int 5)])))]
1047 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
1048 "@
1049 movlhps\t{%2, %0|%0, %2}
1050 movhps\t{%2, %0|%0, %2}
1051 movlps\t{%2, %H0|%H0, %2}"
1052 [(set_attr "type" "ssemov")
1053 (set_attr "mode" "V4SF,V2SF,V2SF")])
1054
1055(define_insn "sse_unpckhps"
1056 [(set (match_operand:V4SF 0 "register_operand" "=x")
1057 (vec_select:V4SF
1058 (vec_concat:V8SF
1059 (match_operand:V4SF 1 "register_operand" "0")
1060 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1061 (parallel [(const_int 2) (const_int 6)
1062 (const_int 3) (const_int 7)])))]
1063 "TARGET_SSE"
1064 "unpckhps\t{%2, %0|%0, %2}"
1065 [(set_attr "type" "sselog")
1066 (set_attr "mode" "V4SF")])
1067
1068(define_insn "sse_unpcklps"
1069 [(set (match_operand:V4SF 0 "register_operand" "=x")
1070 (vec_select:V4SF
1071 (vec_concat:V8SF
1072 (match_operand:V4SF 1 "register_operand" "0")
1073 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1074 (parallel [(const_int 0) (const_int 4)
1075 (const_int 1) (const_int 5)])))]
1076 "TARGET_SSE"
1077 "unpcklps\t{%2, %0|%0, %2}"
1078 [(set_attr "type" "sselog")
1079 (set_attr "mode" "V4SF")])
1080
1081;; These are modeled with the same vec_concat as the others so that we
1082;; capture users of shufps that can use the new instructions
1083(define_insn "sse3_movshdup"
1084 [(set (match_operand:V4SF 0 "register_operand" "=x")
1085 (vec_select:V4SF
1086 (vec_concat:V8SF
1087 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1088 (match_dup 1))
1089 (parallel [(const_int 1)
1090 (const_int 1)
1091 (const_int 7)
1092 (const_int 7)])))]
1093 "TARGET_SSE3"
1094 "movshdup\t{%1, %0|%0, %1}"
1095 [(set_attr "type" "sse")
1096 (set_attr "mode" "V4SF")])
1097
1098(define_insn "sse3_movsldup"
1099 [(set (match_operand:V4SF 0 "register_operand" "=x")
1100 (vec_select:V4SF
1101 (vec_concat:V8SF
1102 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
1103 (match_dup 1))
1104 (parallel [(const_int 0)
1105 (const_int 0)
1106 (const_int 6)
1107 (const_int 6)])))]
1108 "TARGET_SSE3"
1109 "movsldup\t{%1, %0|%0, %1}"
1110 [(set_attr "type" "sse")
1111 (set_attr "mode" "V4SF")])
1112
1113(define_expand "sse_shufps"
1114 [(match_operand:V4SF 0 "register_operand" "")
1115 (match_operand:V4SF 1 "register_operand" "")
1116 (match_operand:V4SF 2 "nonimmediate_operand" "")
1117 (match_operand:SI 3 "const_int_operand" "")]
1118 "TARGET_SSE"
1119{
1120 int mask = INTVAL (operands[3]);
1121 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2],
1122 GEN_INT ((mask >> 0) & 3),
1123 GEN_INT ((mask >> 2) & 3),
1124 GEN_INT (((mask >> 4) & 3) + 4),
1125 GEN_INT (((mask >> 6) & 3) + 4)));
1126 DONE;
1127})
1128
1129(define_insn "sse_shufps_1"
1130 [(set (match_operand:V4SF 0 "register_operand" "=x")
1131 (vec_select:V4SF
1132 (vec_concat:V8SF
1133 (match_operand:V4SF 1 "register_operand" "0")
1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
1135 (parallel [(match_operand 3 "const_0_to_3_operand" "")
1136 (match_operand 4 "const_0_to_3_operand" "")
1137 (match_operand 5 "const_4_to_7_operand" "")
1138 (match_operand 6 "const_4_to_7_operand" "")])))]
1139 "TARGET_SSE"
1140{
1141 int mask = 0;
1142 mask |= INTVAL (operands[3]) << 0;
1143 mask |= INTVAL (operands[4]) << 2;
1144 mask |= (INTVAL (operands[5]) - 4) << 4;
1145 mask |= (INTVAL (operands[6]) - 4) << 6;
1146 operands[3] = GEN_INT (mask);
1147
1148 return "shufps\t{%3, %2, %0|%0, %2, %3}";
1149}
1150 [(set_attr "type" "sselog")
1151 (set_attr "mode" "V4SF")])
1152
1153(define_insn "sse_storehps"
1154 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1155 (vec_select:V2SF
1156 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
1157 (parallel [(const_int 2) (const_int 3)])))]
1158 "TARGET_SSE"
1159 "@
1160 movhps\t{%1, %0|%0, %1}
1161 movhlps\t{%1, %0|%0, %1}
1162 movlps\t{%H1, %0|%0, %H1}"
1163 [(set_attr "type" "ssemov")
1164 (set_attr "mode" "V2SF,V4SF,V2SF")])
1165
1166(define_insn "sse_loadhps"
1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
1168 (vec_concat:V4SF
1169 (vec_select:V2SF
1170 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
1171 (parallel [(const_int 0) (const_int 1)]))
1172 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
1173 "TARGET_SSE"
1174 "@
1175 movhps\t{%2, %0|%0, %2}
1176 movlhps\t{%2, %0|%0, %2}
1177 movlps\t{%2, %H0|%H0, %2}"
1178 [(set_attr "type" "ssemov")
1179 (set_attr "mode" "V2SF,V4SF,V2SF")])
1180
1181(define_insn "sse_storelps"
1182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
1183 (vec_select:V2SF
1184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
1185 (parallel [(const_int 0) (const_int 1)])))]
1186 "TARGET_SSE"
1187 "@
1188 movlps\t{%1, %0|%0, %1}
1189 movaps\t{%1, %0|%0, %1}
1190 movlps\t{%1, %0|%0, %1}"
1191 [(set_attr "type" "ssemov")
1192 (set_attr "mode" "V2SF,V4SF,V2SF")])
1193
1194(define_insn "sse_loadlps"
1195 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
1196 (vec_concat:V4SF
1197 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
1198 (vec_select:V2SF
1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
1200 (parallel [(const_int 2) (const_int 3)]))))]
1201 "TARGET_SSE"
1202 "@
1203 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
1204 movlps\t{%2, %0|%0, %2}
1205 movlps\t{%2, %0|%0, %2}"
1206 [(set_attr "type" "sselog,ssemov,ssemov")
1207 (set_attr "mode" "V4SF,V2SF,V2SF")])
1208
1209(define_insn "sse_movss"
1210 [(set (match_operand:V4SF 0 "register_operand" "=x")
1211 (vec_merge:V4SF
1212 (match_operand:V4SF 2 "register_operand" "x")
1213 (match_operand:V4SF 1 "register_operand" "0")
1214 (const_int 1)))]
1215 "TARGET_SSE"
1216 "movss\t{%2, %0|%0, %2}"
1217 [(set_attr "type" "ssemov")
1218 (set_attr "mode" "SF")])
1219
1220(define_insn "*vec_dupv4sf"
1221 [(set (match_operand:V4SF 0 "register_operand" "=x")
1222 (vec_duplicate:V4SF
1223 (match_operand:SF 1 "register_operand" "0")))]
1224 "TARGET_SSE"
1225 "shufps\t{$0, %0, %0|%0, %0, 0}"
1226 [(set_attr "type" "sselog1")
1227 (set_attr "mode" "V4SF")])
1228
1229;; ??? In theory we can match memory for the MMX alternative, but allowing
1230;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
1231;; alternatives pretty much forces the MMX alternative to be chosen.
1232(define_insn "*sse_concatv2sf"
1233 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
1234 (vec_concat:V2SF
1235 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
1236 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
1237 "TARGET_SSE"
1238 "@
1239 unpcklps\t{%2, %0|%0, %2}
1240 movss\t{%1, %0|%0, %1}
1241 punpckldq\t{%2, %0|%0, %2}
1242 movd\t{%1, %0|%0, %1}"
1243 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
1244 (set_attr "mode" "V4SF,SF,DI,DI")])
1245
1246(define_insn "*sse_concatv4sf"
1247 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1248 (vec_concat:V4SF
1249 (match_operand:V2SF 1 "register_operand" " 0,0")
1250 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
1251 "TARGET_SSE"
1252 "@
1253 movlhps\t{%2, %0|%0, %2}
1254 movhps\t{%2, %0|%0, %2}"
1255 [(set_attr "type" "ssemov")
1256 (set_attr "mode" "V4SF,V2SF")])
1257
1258(define_expand "vec_initv4sf"
1259 [(match_operand:V4SF 0 "register_operand" "")
1260 (match_operand 1 "" "")]
1261 "TARGET_SSE"
1262{
1263 ix86_expand_vector_init (false, operands[0], operands[1]);
1264 DONE;
1265})
1266
1267(define_insn "*vec_setv4sf_0"
1268 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
1269 (vec_merge:V4SF
1270 (vec_duplicate:V4SF
1271 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
1272 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
1273 (const_int 1)))]
1274 "TARGET_SSE"
1275 "@
1276 movss\t{%2, %0|%0, %2}
1277 movss\t{%2, %0|%0, %2}
1278 movd\t{%2, %0|%0, %2}
1279 #"
1280 [(set_attr "type" "ssemov")
1281 (set_attr "mode" "SF")])
1282
1283(define_split
1284 [(set (match_operand:V4SF 0 "memory_operand" "")
1285 (vec_merge:V4SF
1286 (vec_duplicate:V4SF
1287 (match_operand:SF 1 "nonmemory_operand" ""))
1288 (match_dup 0)
1289 (const_int 1)))]
1290 "TARGET_SSE && reload_completed"
1291 [(const_int 0)]
1292{
1293 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
1294 DONE;
1295})
1296
1297(define_expand "vec_setv4sf"
1298 [(match_operand:V4SF 0 "register_operand" "")
1299 (match_operand:SF 1 "register_operand" "")
1300 (match_operand 2 "const_int_operand" "")]
1301 "TARGET_SSE"
1302{
1303 ix86_expand_vector_set (false, operands[0], operands[1],
1304 INTVAL (operands[2]));
1305 DONE;
1306})
1307
1308(define_insn_and_split "*vec_extractv4sf_0"
1309 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
1310 (vec_select:SF
1311 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
1312 (parallel [(const_int 0)])))]
1313 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
1314 "#"
1315 "&& reload_completed"
1316 [(const_int 0)]
1317{
1318 rtx op1 = operands[1];
1319 if (REG_P (op1))
1320 op1 = gen_rtx_REG (SFmode, REGNO (op1));
1321 else
1322 op1 = gen_lowpart (SFmode, op1);
1323 emit_move_insn (operands[0], op1);
1324 DONE;
1325})
1326
1327(define_expand "vec_extractv4sf"
1328 [(match_operand:SF 0 "register_operand" "")
1329 (match_operand:V4SF 1 "register_operand" "")
1330 (match_operand 2 "const_int_operand" "")]
1331 "TARGET_SSE"
1332{
1333 ix86_expand_vector_extract (false, operands[0], operands[1],
1334 INTVAL (operands[2]));
1335 DONE;
1336})
1337
1338;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1339;;
1340;; Parallel double-precision floating point arithmetic
1341;;
1342;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1343
1344(define_expand "negv2df2"
1345 [(set (match_operand:V2DF 0 "register_operand" "")
1346 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1347 "TARGET_SSE2"
1348 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;")
1349
1350(define_expand "absv2df2"
1351 [(set (match_operand:V2DF 0 "register_operand" "")
1352 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))]
1353 "TARGET_SSE2"
1354 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;")
1355
1356(define_expand "addv2df3"
1357 [(set (match_operand:V2DF 0 "register_operand" "")
1358 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1359 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1360 "TARGET_SSE2"
1361 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);")
1362
1363(define_insn "*addv2df3"
1364 [(set (match_operand:V2DF 0 "register_operand" "=x")
1365 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1366 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1367 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)"
1368 "addpd\t{%2, %0|%0, %2}"
1369 [(set_attr "type" "sseadd")
1370 (set_attr "mode" "V2DF")])
1371
1372(define_insn "sse2_vmaddv2df3"
1373 [(set (match_operand:V2DF 0 "register_operand" "=x")
1374 (vec_merge:V2DF
1375 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1376 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1377 (match_dup 1)
1378 (const_int 1)))]
1379 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)"
1380 "addsd\t{%2, %0|%0, %2}"
1381 [(set_attr "type" "sseadd")
1382 (set_attr "mode" "DF")])
1383
1384(define_expand "subv2df3"
1385 [(set (match_operand:V2DF 0 "register_operand" "")
1386 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1387 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1388 "TARGET_SSE2"
1389 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);")
1390
1391(define_insn "*subv2df3"
1392 [(set (match_operand:V2DF 0 "register_operand" "=x")
1393 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1394 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1395 "TARGET_SSE2"
1396 "subpd\t{%2, %0|%0, %2}"
1397 [(set_attr "type" "sseadd")
1398 (set_attr "mode" "V2DF")])
1399
1400(define_insn "sse2_vmsubv2df3"
1401 [(set (match_operand:V2DF 0 "register_operand" "=x")
1402 (vec_merge:V2DF
1403 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
1404 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1405 (match_dup 1)
1406 (const_int 1)))]
1407 "TARGET_SSE2"
1408 "subsd\t{%2, %0|%0, %2}"
1409 [(set_attr "type" "sseadd")
1410 (set_attr "mode" "DF")])
1411
1412(define_expand "mulv2df3"
1413 [(set (match_operand:V2DF 0 "register_operand" "")
1414 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1416 "TARGET_SSE2"
1417 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);")
1418
1419(define_insn "*mulv2df3"
1420 [(set (match_operand:V2DF 0 "register_operand" "=x")
1421 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1423 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1424 "mulpd\t{%2, %0|%0, %2}"
1425 [(set_attr "type" "ssemul")
1426 (set_attr "mode" "V2DF")])
1427
1428(define_insn "sse2_vmmulv2df3"
1429 [(set (match_operand:V2DF 0 "register_operand" "=x")
1430 (vec_merge:V2DF
1431 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1433 (match_dup 1)
1434 (const_int 1)))]
1435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)"
1436 "mulsd\t{%2, %0|%0, %2}"
1437 [(set_attr "type" "ssemul")
1438 (set_attr "mode" "DF")])
1439
1440(define_expand "divv2df3"
1441 [(set (match_operand:V2DF 0 "register_operand" "")
1442 (div:V2DF (match_operand:V2DF 1 "register_operand" "")
1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1444 "TARGET_SSE2"
1445 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);")
1446
1447(define_insn "*divv2df3"
1448 [(set (match_operand:V2DF 0 "register_operand" "=x")
1449 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1451 "TARGET_SSE2"
1452 "divpd\t{%2, %0|%0, %2}"
1453 [(set_attr "type" "ssediv")
1454 (set_attr "mode" "V2DF")])
1455
1456(define_insn "sse2_vmdivv2df3"
1457 [(set (match_operand:V2DF 0 "register_operand" "=x")
1458 (vec_merge:V2DF
1459 (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1461 (match_dup 1)
1462 (const_int 1)))]
1463 "TARGET_SSE2"
1464 "divsd\t{%2, %0|%0, %2}"
1465 [(set_attr "type" "ssediv")
1466 (set_attr "mode" "DF")])
1467
1468(define_insn "sqrtv2df2"
1469 [(set (match_operand:V2DF 0 "register_operand" "=x")
1470 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1471 "TARGET_SSE2"
1472 "sqrtpd\t{%1, %0|%0, %1}"
1473 [(set_attr "type" "sse")
1474 (set_attr "mode" "V2DF")])
1475
1476(define_insn "sse2_vmsqrtv2df2"
1477 [(set (match_operand:V2DF 0 "register_operand" "=x")
1478 (vec_merge:V2DF
1479 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
1480 (match_operand:V2DF 2 "register_operand" "0")
1481 (const_int 1)))]
1482 "TARGET_SSE2"
1483 "sqrtsd\t{%1, %0|%0, %1}"
1484 [(set_attr "type" "sse")
1485 (set_attr "mode" "DF")])
1486
1487;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1488;; isn't really correct, as those rtl operators aren't defined when
1489;; applied to NaNs. Hopefully the optimizers won't get too smart on us.
1490
1491(define_expand "smaxv2df3"
1492 [(set (match_operand:V2DF 0 "register_operand" "")
1493 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1494 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1495 "TARGET_SSE2"
1496{
1497 if (!flag_finite_math_only)
1498 operands[1] = force_reg (V2DFmode, operands[1]);
1499 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands);
1500})
1501
1502(define_insn "*smaxv2df3_finite"
1503 [(set (match_operand:V2DF 0 "register_operand" "=x")
1504 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1506 "TARGET_SSE2 && flag_finite_math_only
1507 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
1508 "maxpd\t{%2, %0|%0, %2}"
1509 [(set_attr "type" "sseadd")
1510 (set_attr "mode" "V2DF")])
1511
1512(define_insn "*smaxv2df3"
1513 [(set (match_operand:V2DF 0 "register_operand" "=x")
1514 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1516 "TARGET_SSE2"
1517 "maxpd\t{%2, %0|%0, %2}"
1518 [(set_attr "type" "sseadd")
1519 (set_attr "mode" "V2DF")])
1520
1521(define_insn "sse2_vmsmaxv2df3"
1522 [(set (match_operand:V2DF 0 "register_operand" "=x")
1523 (vec_merge:V2DF
1524 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
1525 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1526 (match_dup 1)
1527 (const_int 1)))]
1528 "TARGET_SSE2"
1529 "maxsd\t{%2, %0|%0, %2}"
1530 [(set_attr "type" "sseadd")
1531 (set_attr "mode" "DF")])
1532
1533(define_expand "sminv2df3"
1534 [(set (match_operand:V2DF 0 "register_operand" "")
1535 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1536 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1537 "TARGET_SSE2"
1538{
1539 if (!flag_finite_math_only)
1540 operands[1] = force_reg (V2DFmode, operands[1]);
1541 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands);
1542})
1543
1544(define_insn "*sminv2df3_finite"
1545 [(set (match_operand:V2DF 0 "register_operand" "=x")
1546 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1548 "TARGET_SSE2 && flag_finite_math_only
1549 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
1550 "minpd\t{%2, %0|%0, %2}"
1551 [(set_attr "type" "sseadd")
1552 (set_attr "mode" "V2DF")])
1553
1554(define_insn "*sminv2df3"
1555 [(set (match_operand:V2DF 0 "register_operand" "=x")
1556 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1558 "TARGET_SSE2"
1559 "minpd\t{%2, %0|%0, %2}"
1560 [(set_attr "type" "sseadd")
1561 (set_attr "mode" "V2DF")])
1562
1563(define_insn "sse2_vmsminv2df3"
1564 [(set (match_operand:V2DF 0 "register_operand" "=x")
1565 (vec_merge:V2DF
1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1568 (match_dup 1)
1569 (const_int 1)))]
1570 "TARGET_SSE2"
1571 "minsd\t{%2, %0|%0, %2}"
1572 [(set_attr "type" "sseadd")
1573 (set_attr "mode" "DF")])
1574
1575(define_insn "sse3_addsubv2df3"
1576 [(set (match_operand:V2DF 0 "register_operand" "=x")
1577 (vec_merge:V2DF
1578 (plus:V2DF
1579 (match_operand:V2DF 1 "register_operand" "0")
1580 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
1581 (minus:V2DF (match_dup 1) (match_dup 2))
1582 (const_int 1)))]
1583 "TARGET_SSE3"
1584 "addsubpd\t{%2, %0|%0, %2}"
1585 [(set_attr "type" "sseadd")
1586 (set_attr "mode" "V2DF")])
1587
1588(define_insn "sse3_haddv2df3"
1589 [(set (match_operand:V2DF 0 "register_operand" "=x")
1590 (vec_concat:V2DF
1591 (plus:DF
1592 (vec_select:DF
1593 (match_operand:V2DF 1 "register_operand" "0")
1594 (parallel [(const_int 0)]))
1595 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1596 (plus:DF
1597 (vec_select:DF
1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1599 (parallel [(const_int 0)]))
1600 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1601 "TARGET_SSE3"
1602 "haddpd\t{%2, %0|%0, %2}"
1603 [(set_attr "type" "sseadd")
1604 (set_attr "mode" "V2DF")])
1605
1606(define_insn "sse3_hsubv2df3"
1607 [(set (match_operand:V2DF 0 "register_operand" "=x")
1608 (vec_concat:V2DF
1609 (minus:DF
1610 (vec_select:DF
1611 (match_operand:V2DF 1 "register_operand" "0")
1612 (parallel [(const_int 0)]))
1613 (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
1614 (minus:DF
1615 (vec_select:DF
1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm")
1617 (parallel [(const_int 0)]))
1618 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
1619 "TARGET_SSE3"
1620 "hsubpd\t{%2, %0|%0, %2}"
1621 [(set_attr "type" "sseadd")
1622 (set_attr "mode" "V2DF")])
1623
1624(define_expand "reduc_splus_v2df"
1625 [(match_operand:V2DF 0 "register_operand" "")
1626 (match_operand:V2DF 1 "register_operand" "")]
1627 "TARGET_SSE3"
1628{
1629 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
1630 DONE;
1631})
1632
1633;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1634;;
1635;; Parallel double-precision floating point comparisons
1636;;
1637;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1638
1639(define_insn "sse2_maskcmpv2df3"
1640 [(set (match_operand:V2DF 0 "register_operand" "=x")
1641 (match_operator:V2DF 3 "sse_comparison_operator"
1642 [(match_operand:V2DF 1 "register_operand" "0")
1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))]
1644 "TARGET_SSE2"
1645 "cmp%D3pd\t{%2, %0|%0, %2}"
1646 [(set_attr "type" "ssecmp")
1647 (set_attr "mode" "V2DF")])
1648
1649(define_insn "sse2_vmmaskcmpv2df3"
1650 [(set (match_operand:V2DF 0 "register_operand" "=x")
1651 (vec_merge:V2DF
1652 (match_operator:V2DF 3 "sse_comparison_operator"
1653 [(match_operand:V2DF 1 "register_operand" "0")
1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")])
1655 (match_dup 1)
1656 (const_int 1)))]
1657 "TARGET_SSE2"
1658 "cmp%D3sd\t{%2, %0|%0, %2}"
1659 [(set_attr "type" "ssecmp")
1660 (set_attr "mode" "DF")])
1661
1662(define_insn "sse2_comi"
1663 [(set (reg:CCFP FLAGS_REG)
1664 (compare:CCFP
1665 (vec_select:DF
1666 (match_operand:V2DF 0 "register_operand" "x")
1667 (parallel [(const_int 0)]))
1668 (vec_select:DF
1669 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1670 (parallel [(const_int 0)]))))]
1671 "TARGET_SSE2"
1672 "comisd\t{%1, %0|%0, %1}"
1673 [(set_attr "type" "ssecomi")
1674 (set_attr "mode" "DF")])
1675
1676(define_insn "sse2_ucomi"
1677 [(set (reg:CCFPU FLAGS_REG)
1678 (compare:CCFPU
1679 (vec_select:DF
1680 (match_operand:V2DF 0 "register_operand" "x")
1681 (parallel [(const_int 0)]))
1682 (vec_select:DF
1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm")
1684 (parallel [(const_int 0)]))))]
1685 "TARGET_SSE2"
1686 "ucomisd\t{%1, %0|%0, %1}"
1687 [(set_attr "type" "ssecomi")
1688 (set_attr "mode" "DF")])
1689
1690(define_expand "vcondv2df"
1691 [(set (match_operand:V2DF 0 "register_operand" "")
1692 (if_then_else:V2DF
1693 (match_operator 3 ""
1694 [(match_operand:V2DF 4 "nonimmediate_operand" "")
1695 (match_operand:V2DF 5 "nonimmediate_operand" "")])
1696 (match_operand:V2DF 1 "general_operand" "")
1697 (match_operand:V2DF 2 "general_operand" "")))]
1698 "TARGET_SSE2"
1699{
1700 if (ix86_expand_fp_vcond (operands))
1701 DONE;
1702 else
1703 FAIL;
1704})
1705
1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1707;;
1708;; Parallel double-precision floating point logical operations
1709;;
1710;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1711
1712(define_expand "andv2df3"
1713 [(set (match_operand:V2DF 0 "register_operand" "")
1714 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1715 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1716 "TARGET_SSE2"
1717 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);")
1718
1719(define_insn "*andv2df3"
1720 [(set (match_operand:V2DF 0 "register_operand" "=x")
1721 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1722 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1723 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)"
1724 "andpd\t{%2, %0|%0, %2}"
1725 [(set_attr "type" "sselog")
1726 (set_attr "mode" "V2DF")])
1727
1728(define_insn "sse2_nandv2df3"
1729 [(set (match_operand:V2DF 0 "register_operand" "=x")
1730 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1732 "TARGET_SSE2"
1733 "andnpd\t{%2, %0|%0, %2}"
1734 [(set_attr "type" "sselog")
1735 (set_attr "mode" "V2DF")])
1736
1737(define_expand "iorv2df3"
1738 [(set (match_operand:V2DF 0 "register_operand" "")
1739 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1740 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1741 "TARGET_SSE2"
1742 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);")
1743
1744(define_insn "*iorv2df3"
1745 [(set (match_operand:V2DF 0 "register_operand" "=x")
1746 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1748 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)"
1749 "orpd\t{%2, %0|%0, %2}"
1750 [(set_attr "type" "sselog")
1751 (set_attr "mode" "V2DF")])
1752
1753(define_expand "xorv2df3"
1754 [(set (match_operand:V2DF 0 "register_operand" "")
1755 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
1756 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
1757 "TARGET_SSE2"
1758 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);")
1759
1760(define_insn "*xorv2df3"
1761 [(set (match_operand:V2DF 0 "register_operand" "=x")
1762 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
1764 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)"
1765 "xorpd\t{%2, %0|%0, %2}"
1766 [(set_attr "type" "sselog")
1767 (set_attr "mode" "V2DF")])
1768
1769;; Also define scalar versions. These are used for abs, neg, and
1770;; conditional move. Using subregs into vector modes causes register
1771;; allocation lossage. These patterns do not allow memory operands
1772;; because the native instructions read the full 128-bits.
1773
1774(define_insn "*anddf3"
1775 [(set (match_operand:DF 0 "register_operand" "=x")
1776 (and:DF (match_operand:DF 1 "register_operand" "0")
1777 (match_operand:DF 2 "register_operand" "x")))]
1778 "TARGET_SSE2"
1779 "andpd\t{%2, %0|%0, %2}"
1780 [(set_attr "type" "sselog")
1781 (set_attr "mode" "V2DF")])
1782
1783(define_insn "*nanddf3"
1784 [(set (match_operand:DF 0 "register_operand" "=x")
1785 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0"))
1786 (match_operand:DF 2 "register_operand" "x")))]
1787 "TARGET_SSE2"
1788 "andnpd\t{%2, %0|%0, %2}"
1789 [(set_attr "type" "sselog")
1790 (set_attr "mode" "V2DF")])
1791
1792(define_insn "*iordf3"
1793 [(set (match_operand:DF 0 "register_operand" "=x")
1794 (ior:DF (match_operand:DF 1 "register_operand" "0")
1795 (match_operand:DF 2 "register_operand" "x")))]
1796 "TARGET_SSE2"
1797 "orpd\t{%2, %0|%0, %2}"
1798 [(set_attr "type" "sselog")
1799 (set_attr "mode" "V2DF")])
1800
1801(define_insn "*xordf3"
1802 [(set (match_operand:DF 0 "register_operand" "=x")
1803 (xor:DF (match_operand:DF 1 "register_operand" "0")
1804 (match_operand:DF 2 "register_operand" "x")))]
1805 "TARGET_SSE2"
1806 "xorpd\t{%2, %0|%0, %2}"
1807 [(set_attr "type" "sselog")
1808 (set_attr "mode" "V2DF")])
1809
1810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1811;;
1812;; Parallel double-precision floating point conversion operations
1813;;
1814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1815
1816(define_insn "sse2_cvtpi2pd"
1817 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1818 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
1819 "TARGET_SSE2"
1820 "cvtpi2pd\t{%1, %0|%0, %1}"
1821 [(set_attr "type" "ssecvt")
1822 (set_attr "unit" "mmx,*")
1823 (set_attr "mode" "V2DF")])
1824
1825(define_insn "sse2_cvtpd2pi"
1826 [(set (match_operand:V2SI 0 "register_operand" "=y")
1827 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1828 UNSPEC_FIX_NOTRUNC))]
1829 "TARGET_SSE2"
1830 "cvtpd2pi\t{%1, %0|%0, %1}"
1831 [(set_attr "type" "ssecvt")
1832 (set_attr "unit" "mmx")
1833 (set_attr "mode" "DI")])
1834
1835(define_insn "sse2_cvttpd2pi"
1836 [(set (match_operand:V2SI 0 "register_operand" "=y")
1837 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
1838 "TARGET_SSE2"
1839 "cvttpd2pi\t{%1, %0|%0, %1}"
1840 [(set_attr "type" "ssecvt")
1841 (set_attr "unit" "mmx")
1842 (set_attr "mode" "TI")])
1843
1844(define_insn "sse2_cvtsi2sd"
1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1846 (vec_merge:V2DF
1847 (vec_duplicate:V2DF
1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m")))
1849 (match_operand:V2DF 1 "register_operand" "0,0")
1850 (const_int 1)))]
1851 "TARGET_SSE2"
1852 "cvtsi2sd\t{%2, %0|%0, %2}"
1853 [(set_attr "type" "sseicvt")
1854 (set_attr "mode" "DF")
1855 (set_attr "athlon_decode" "double,direct")])
1856
1857(define_insn "sse2_cvtsi2sdq"
1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
1859 (vec_merge:V2DF
1860 (vec_duplicate:V2DF
1861 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m")))
1862 (match_operand:V2DF 1 "register_operand" "0,0")
1863 (const_int 1)))]
1864 "TARGET_SSE2 && TARGET_64BIT"
1865 "cvtsi2sdq\t{%2, %0|%0, %2}"
1866 [(set_attr "type" "sseicvt")
1867 (set_attr "mode" "DF")
1868 (set_attr "athlon_decode" "double,direct")])
1869
1870(define_insn "sse2_cvtsd2si"
1871 [(set (match_operand:SI 0 "register_operand" "=r,r")
1872 (unspec:SI
1873 [(vec_select:DF
1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1875 (parallel [(const_int 0)]))]
1876 UNSPEC_FIX_NOTRUNC))]
1877 "TARGET_SSE2"
1878 "cvtsd2si\t{%1, %0|%0, %1}"
1879 [(set_attr "type" "sseicvt")
1880 (set_attr "athlon_decode" "double,vector")
1881 (set_attr "mode" "SI")])
1882
1883(define_insn "sse2_cvtsd2siq"
1884 [(set (match_operand:DI 0 "register_operand" "=r,r")
1885 (unspec:DI
1886 [(vec_select:DF
1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1888 (parallel [(const_int 0)]))]
1889 UNSPEC_FIX_NOTRUNC))]
1890 "TARGET_SSE2 && TARGET_64BIT"
1891 "cvtsd2siq\t{%1, %0|%0, %1}"
1892 [(set_attr "type" "sseicvt")
1893 (set_attr "athlon_decode" "double,vector")
1894 (set_attr "mode" "DI")])
1895
1896(define_insn "sse2_cvttsd2si"
1897 [(set (match_operand:SI 0 "register_operand" "=r,r")
1898 (fix:SI
1899 (vec_select:DF
1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1901 (parallel [(const_int 0)]))))]
1902 "TARGET_SSE2"
1903 "cvttsd2si\t{%1, %0|%0, %1}"
1904 [(set_attr "type" "sseicvt")
1905 (set_attr "mode" "SI")
1906 (set_attr "athlon_decode" "double,vector")])
1907
1908(define_insn "sse2_cvttsd2siq"
1909 [(set (match_operand:DI 0 "register_operand" "=r,r")
1910 (fix:DI
1911 (vec_select:DF
1912 (match_operand:V2DF 1 "nonimmediate_operand" "x,m")
1913 (parallel [(const_int 0)]))))]
1914 "TARGET_SSE2 && TARGET_64BIT"
1915 "cvttsd2siq\t{%1, %0|%0, %1}"
1916 [(set_attr "type" "sseicvt")
1917 (set_attr "mode" "DI")
1918 (set_attr "athlon_decode" "double,vector")])
1919
1920(define_insn "sse2_cvtdq2pd"
1921 [(set (match_operand:V2DF 0 "register_operand" "=x")
1922 (float:V2DF
1923 (vec_select:V2SI
1924 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
1925 (parallel [(const_int 0) (const_int 1)]))))]
1926 "TARGET_SSE2"
1927 "cvtdq2pd\t{%1, %0|%0, %1}"
1928 [(set_attr "type" "ssecvt")
1929 (set_attr "mode" "V2DF")])
1930
1931(define_expand "sse2_cvtpd2dq"
1932 [(set (match_operand:V4SI 0 "register_operand" "")
1933 (vec_concat:V4SI
1934 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")]
1935 UNSPEC_FIX_NOTRUNC)
1936 (match_dup 2)))]
1937 "TARGET_SSE2"
1938 "operands[2] = CONST0_RTX (V2SImode);")
1939
1940(define_insn "*sse2_cvtpd2dq"
1941 [(set (match_operand:V4SI 0 "register_operand" "=x")
1942 (vec_concat:V4SI
1943 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
1944 UNSPEC_FIX_NOTRUNC)
1945 (match_operand:V2SI 2 "const0_operand" "")))]
1946 "TARGET_SSE2"
1947 "cvtpd2dq\t{%1, %0|%0, %1}"
1948 [(set_attr "type" "ssecvt")
1949 (set_attr "mode" "TI")])
1950
1951(define_expand "sse2_cvttpd2dq"
1952 [(set (match_operand:V4SI 0 "register_operand" "")
1953 (vec_concat:V4SI
1954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" ""))
1955 (match_dup 2)))]
1956 "TARGET_SSE2"
1957 "operands[2] = CONST0_RTX (V2SImode);")
1958
1959(define_insn "*sse2_cvttpd2dq"
1960 [(set (match_operand:V4SI 0 "register_operand" "=x")
1961 (vec_concat:V4SI
1962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
1963 (match_operand:V2SI 2 "const0_operand" "")))]
1964 "TARGET_SSE2"
1965 "cvttpd2dq\t{%1, %0|%0, %1}"
1966 [(set_attr "type" "ssecvt")
1967 (set_attr "mode" "TI")])
1968
1969(define_insn "sse2_cvtsd2ss"
1970 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1971 (vec_merge:V4SF
1972 (vec_duplicate:V4SF
1973 (float_truncate:V2SF
1974 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")))
1975 (match_operand:V4SF 1 "register_operand" "0,0")
1976 (const_int 1)))]
1977 "TARGET_SSE2"
1978 "cvtsd2ss\t{%2, %0|%0, %2}"
1979 [(set_attr "type" "ssecvt")
1980 (set_attr "athlon_decode" "vector,double")
1981 (set_attr "mode" "SF")])
1982
1983(define_insn "sse2_cvtss2sd"
1984 [(set (match_operand:V2DF 0 "register_operand" "=x")
1985 (vec_merge:V2DF
1986 (float_extend:V2DF
1987 (vec_select:V2SF
1988 (match_operand:V4SF 2 "nonimmediate_operand" "xm")
1989 (parallel [(const_int 0) (const_int 1)])))
1990 (match_operand:V2DF 1 "register_operand" "0")
1991 (const_int 1)))]
1992 "TARGET_SSE2"
1993 "cvtss2sd\t{%2, %0|%0, %2}"
1994 [(set_attr "type" "ssecvt")
1995 (set_attr "mode" "DF")])
1996
1997(define_expand "sse2_cvtpd2ps"
1998 [(set (match_operand:V4SF 0 "register_operand" "")
1999 (vec_concat:V4SF
2000 (float_truncate:V2SF
2001 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2002 (match_dup 2)))]
2003 "TARGET_SSE2"
2004 "operands[2] = CONST0_RTX (V2SFmode);")
2005
2006(define_insn "*sse2_cvtpd2ps"
2007 [(set (match_operand:V4SF 0 "register_operand" "=x")
2008 (vec_concat:V4SF
2009 (float_truncate:V2SF
2010 (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
2011 (match_operand:V2SF 2 "const0_operand" "")))]
2012 "TARGET_SSE2"
2013 "cvtpd2ps\t{%1, %0|%0, %1}"
2014 [(set_attr "type" "ssecvt")
2015 (set_attr "mode" "V4SF")])
2016
2017(define_insn "sse2_cvtps2pd"
2018 [(set (match_operand:V2DF 0 "register_operand" "=x")
2019 (float_extend:V2DF
2020 (vec_select:V2SF
2021 (match_operand:V4SF 1 "nonimmediate_operand" "xm")
2022 (parallel [(const_int 0) (const_int 1)]))))]
2023 "TARGET_SSE2"
2024 "cvtps2pd\t{%1, %0|%0, %1}"
2025 [(set_attr "type" "ssecvt")
2026 (set_attr "mode" "V2DF")])
2027
2028;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2029;;
2030;; Parallel double-precision floating point element swizzling
2031;;
2032;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2033
2034(define_insn "sse2_unpckhpd"
2035 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
2036 (vec_select:V2DF
2037 (vec_concat:V4DF
2038 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
2039 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
2040 (parallel [(const_int 1)
2041 (const_int 3)])))]
2042 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2043 "@
2044 unpckhpd\t{%2, %0|%0, %2}
2045 movlpd\t{%H1, %0|%0, %H1}
2046 movhpd\t{%1, %0|%0, %1}"
2047 [(set_attr "type" "sselog,ssemov,ssemov")
2048 (set_attr "mode" "V2DF,V1DF,V1DF")])
2049
2050(define_insn "*sse3_movddup"
2051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
2052 (vec_select:V2DF
2053 (vec_concat:V4DF
2054 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
2055 (match_dup 1))
2056 (parallel [(const_int 0)
2057 (const_int 2)])))]
2058 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2059 "@
2060 movddup\t{%1, %0|%0, %1}
2061 #"
2062 [(set_attr "type" "sselog1,ssemov")
2063 (set_attr "mode" "V2DF")])
2064
2065(define_split
2066 [(set (match_operand:V2DF 0 "memory_operand" "")
2067 (vec_select:V2DF
2068 (vec_concat:V4DF
2069 (match_operand:V2DF 1 "register_operand" "")
2070 (match_dup 1))
2071 (parallel [(const_int 0)
2072 (const_int 2)])))]
2073 "TARGET_SSE3 && reload_completed"
2074 [(const_int 0)]
2075{
2076 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
2077 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
2078 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
2079 DONE;
2080})
2081
2082(define_insn "sse2_unpcklpd"
2083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
2084 (vec_select:V2DF
2085 (vec_concat:V4DF
2086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
2087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
2088 (parallel [(const_int 0)
2089 (const_int 2)])))]
2090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2091 "@
2092 unpcklpd\t{%2, %0|%0, %2}
2093 movhpd\t{%2, %0|%0, %2}
2094 movlpd\t{%2, %H0|%H0, %2}"
2095 [(set_attr "type" "sselog,ssemov,ssemov")
2096 (set_attr "mode" "V2DF,V1DF,V1DF")])
2097
2098(define_expand "sse2_shufpd"
2099 [(match_operand:V2DF 0 "register_operand" "")
2100 (match_operand:V2DF 1 "register_operand" "")
2101 (match_operand:V2DF 2 "nonimmediate_operand" "")
2102 (match_operand:SI 3 "const_int_operand" "")]
2103 "TARGET_SSE2"
2104{
2105 int mask = INTVAL (operands[3]);
2106 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
2107 GEN_INT (mask & 1),
2108 GEN_INT (mask & 2 ? 3 : 2)));
2109 DONE;
2110})
2111
2112(define_insn "sse2_shufpd_1"
2113 [(set (match_operand:V2DF 0 "register_operand" "=x")
2114 (vec_select:V2DF
2115 (vec_concat:V4DF
2116 (match_operand:V2DF 1 "register_operand" "0")
2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
2118 (parallel [(match_operand 3 "const_0_to_1_operand" "")
2119 (match_operand 4 "const_2_to_3_operand" "")])))]
2120 "TARGET_SSE2"
2121{
2122 int mask;
2123 mask = INTVAL (operands[3]);
2124 mask |= (INTVAL (operands[4]) - 2) << 1;
2125 operands[3] = GEN_INT (mask);
2126
2127 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
2128}
2129 [(set_attr "type" "sselog")
2130 (set_attr "mode" "V2DF")])
2131
2132(define_insn "sse2_storehpd"
2133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2134 (vec_select:DF
2135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
2136 (parallel [(const_int 1)])))]
2137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2138 "@
2139 movhpd\t{%1, %0|%0, %1}
2140 unpckhpd\t%0, %0
2141 #"
2142 [(set_attr "type" "ssemov,sselog1,ssemov")
2143 (set_attr "mode" "V1DF,V2DF,DF")])
2144
2145(define_split
2146 [(set (match_operand:DF 0 "register_operand" "")
2147 (vec_select:DF
2148 (match_operand:V2DF 1 "memory_operand" "")
2149 (parallel [(const_int 1)])))]
2150 "TARGET_SSE2 && reload_completed"
2151 [(set (match_dup 0) (match_dup 1))]
2152{
2153 operands[1] = adjust_address (operands[1], DFmode, 8);
2154})
2155
2156(define_insn "sse2_storelpd"
2157 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
2158 (vec_select:DF
2159 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
2160 (parallel [(const_int 0)])))]
2161 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2162 "@
2163 movlpd\t{%1, %0|%0, %1}
2164 #
2165 #"
2166 [(set_attr "type" "ssemov")
2167 (set_attr "mode" "V1DF,DF,DF")])
2168
2169(define_split
2170 [(set (match_operand:DF 0 "register_operand" "")
2171 (vec_select:DF
2172 (match_operand:V2DF 1 "nonimmediate_operand" "")
2173 (parallel [(const_int 0)])))]
2174 "TARGET_SSE2 && reload_completed"
2175 [(const_int 0)]
2176{
2177 rtx op1 = operands[1];
2178 if (REG_P (op1))
2179 op1 = gen_rtx_REG (DFmode, REGNO (op1));
2180 else
2181 op1 = gen_lowpart (DFmode, op1);
2182 emit_move_insn (operands[0], op1);
2183 DONE;
2184})
2185
2186(define_insn "sse2_loadhpd"
2187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
2188 (vec_concat:V2DF
2189 (vec_select:DF
2190 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
2191 (parallel [(const_int 0)]))
2192 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
2193 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2194 "@
2195 movhpd\t{%2, %0|%0, %2}
2196 unpcklpd\t{%2, %0|%0, %2}
2197 shufpd\t{$1, %1, %0|%0, %1, 1}
2198 #"
2199 [(set_attr "type" "ssemov,sselog,sselog,other")
2200 (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
2201
2202(define_split
2203 [(set (match_operand:V2DF 0 "memory_operand" "")
2204 (vec_concat:V2DF
2205 (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
2206 (match_operand:DF 1 "register_operand" "")))]
2207 "TARGET_SSE2 && reload_completed"
2208 [(set (match_dup 0) (match_dup 1))]
2209{
2210 operands[0] = adjust_address (operands[0], DFmode, 8);
2211})
2212
2213(define_insn "sse2_loadlpd"
2214 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
2215 (vec_concat:V2DF
2216 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
2217 (vec_select:DF
2218 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
2219 (parallel [(const_int 1)]))))]
2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
2221 "@
2222 movsd\t{%2, %0|%0, %2}
2223 movlpd\t{%2, %0|%0, %2}
2224 movsd\t{%2, %0|%0, %2}
2225 shufpd\t{$2, %2, %0|%0, %2, 2}
2226 movhpd\t{%H1, %0|%0, %H1}
2227 #"
2228 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
2229 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
2230
2231(define_split
2232 [(set (match_operand:V2DF 0 "memory_operand" "")
2233 (vec_concat:V2DF
2234 (match_operand:DF 1 "register_operand" "")
2235 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
2236 "TARGET_SSE2 && reload_completed"
2237 [(set (match_dup 0) (match_dup 1))]
2238{
2239 operands[0] = adjust_address (operands[0], DFmode, 8);
2240})
2241
2242;; Not sure these two are ever used, but it doesn't hurt to have
2243;; them. -aoliva
2244(define_insn "*vec_extractv2df_1_sse"
2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2246 (vec_select:DF
2247 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
2248 (parallel [(const_int 1)])))]
2249 "!TARGET_SSE2 && TARGET_SSE
2250 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2251 "@
2252 movhps\t{%1, %0|%0, %1}
2253 movhlps\t{%1, %0|%0, %1}
2254 movlps\t{%H1, %0|%0, %H1}"
2255 [(set_attr "type" "ssemov")
2256 (set_attr "mode" "V2SF,V4SF,V2SF")])
2257
2258(define_insn "*vec_extractv2df_0_sse"
2259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
2260 (vec_select:DF
2261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
2262 (parallel [(const_int 0)])))]
2263 "!TARGET_SSE2 && TARGET_SSE
2264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
2265 "@
2266 movlps\t{%1, %0|%0, %1}
2267 movaps\t{%1, %0|%0, %1}
2268 movlps\t{%1, %0|%0, %1}"
2269 [(set_attr "type" "ssemov")
2270 (set_attr "mode" "V2SF,V4SF,V2SF")])
2271
2272(define_insn "sse2_movsd"
2273 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
2274 (vec_merge:V2DF
2275 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
2276 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
2277 (const_int 1)))]
2278 "TARGET_SSE2"
2279 "@
2280 movsd\t{%2, %0|%0, %2}
2281 movlpd\t{%2, %0|%0, %2}
2282 movlpd\t{%2, %0|%0, %2}
2283 shufpd\t{$2, %2, %0|%0, %2, 2}
2284 movhps\t{%H1, %0|%0, %H1}
2285 movhps\t{%1, %H0|%H0, %1}"
2286 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
2287 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
2288
2289(define_insn "*vec_dupv2df_sse3"
2290 [(set (match_operand:V2DF 0 "register_operand" "=x")
2291 (vec_duplicate:V2DF
2292 (match_operand:DF 1 "nonimmediate_operand" "xm")))]
2293 "TARGET_SSE3"
2294 "movddup\t{%1, %0|%0, %1}"
2295 [(set_attr "type" "sselog1")
2296 (set_attr "mode" "DF")])
2297
2298(define_insn "*vec_dupv2df"
2299 [(set (match_operand:V2DF 0 "register_operand" "=x")
2300 (vec_duplicate:V2DF
2301 (match_operand:DF 1 "register_operand" "0")))]
2302 "TARGET_SSE2"
2303 "unpcklpd\t%0, %0"
2304 [(set_attr "type" "sselog1")
2305 (set_attr "mode" "V4SF")])
2306
2307(define_insn "*vec_concatv2df_sse3"
2308 [(set (match_operand:V2DF 0 "register_operand" "=x")
2309 (vec_concat:V2DF
2310 (match_operand:DF 1 "nonimmediate_operand" "xm")
2311 (match_dup 1)))]
2312 "TARGET_SSE3"
2313 "movddup\t{%1, %0|%0, %1}"
2314 [(set_attr "type" "sselog1")
2315 (set_attr "mode" "DF")])
2316
2317(define_insn "*vec_concatv2df"
2318 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
2319 (vec_concat:V2DF
2320 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
2321 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
2322 "TARGET_SSE"
2323 "@
2324 unpcklpd\t{%2, %0|%0, %2}
2325 movhpd\t{%2, %0|%0, %2}
2326 movsd\t{%1, %0|%0, %1}
2327 movlhps\t{%2, %0|%0, %2}
2328 movhps\t{%2, %0|%0, %2}"
2329 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
2330 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
2331
2332(define_expand "vec_setv2df"
2333 [(match_operand:V2DF 0 "register_operand" "")
2334 (match_operand:DF 1 "register_operand" "")
2335 (match_operand 2 "const_int_operand" "")]
2336 "TARGET_SSE"
2337{
2338 ix86_expand_vector_set (false, operands[0], operands[1],
2339 INTVAL (operands[2]));
2340 DONE;
2341})
2342
2343(define_expand "vec_extractv2df"
2344 [(match_operand:DF 0 "register_operand" "")
2345 (match_operand:V2DF 1 "register_operand" "")
2346 (match_operand 2 "const_int_operand" "")]
2347 "TARGET_SSE"
2348{
2349 ix86_expand_vector_extract (false, operands[0], operands[1],
2350 INTVAL (operands[2]));
2351 DONE;
2352})
2353
2354(define_expand "vec_initv2df"
2355 [(match_operand:V2DF 0 "register_operand" "")
2356 (match_operand 1 "" "")]
2357 "TARGET_SSE"
2358{
2359 ix86_expand_vector_init (false, operands[0], operands[1]);
2360 DONE;
2361})
2362
2363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2364;;
2365;; Parallel integral arithmetic
2366;;
2367;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2368
2369(define_expand "neg<mode>2"
2370 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2371 (minus:SSEMODEI
2372 (match_dup 2)
2373 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
2374 "TARGET_SSE2"
2375 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
2376
2377(define_expand "add<mode>3"
2378 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2379 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
2380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2381 "TARGET_SSE2"
2382 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);")
2383
2384(define_insn "*add<mode>3"
2385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2386 (plus:SSEMODEI
2387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
2388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2389 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
2390 "padd<ssevecsize>\t{%2, %0|%0, %2}"
2391 [(set_attr "type" "sseiadd")
2392 (set_attr "mode" "TI")])
2393
2394(define_insn "sse2_ssadd<mode>3"
2395 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2396 (ss_plus:SSEMODE12
2397 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2398 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2399 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
2400 "padds<ssevecsize>\t{%2, %0|%0, %2}"
2401 [(set_attr "type" "sseiadd")
2402 (set_attr "mode" "TI")])
2403
2404(define_insn "sse2_usadd<mode>3"
2405 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2406 (us_plus:SSEMODE12
2407 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0")
2408 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2409 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
2410 "paddus<ssevecsize>\t{%2, %0|%0, %2}"
2411 [(set_attr "type" "sseiadd")
2412 (set_attr "mode" "TI")])
2413
2414(define_expand "sub<mode>3"
2415 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2416 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "")
2417 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
2418 "TARGET_SSE2"
2419 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);")
2420
2421(define_insn "*sub<mode>3"
2422 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
2423 (minus:SSEMODEI
2424 (match_operand:SSEMODEI 1 "register_operand" "0")
2425 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
2426 "TARGET_SSE2"
2427 "psub<ssevecsize>\t{%2, %0|%0, %2}"
2428 [(set_attr "type" "sseiadd")
2429 (set_attr "mode" "TI")])
2430
2431(define_insn "sse2_sssub<mode>3"
2432 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2433 (ss_minus:SSEMODE12
2434 (match_operand:SSEMODE12 1 "register_operand" "0")
2435 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2436 "TARGET_SSE2"
2437 "psubs<ssevecsize>\t{%2, %0|%0, %2}"
2438 [(set_attr "type" "sseiadd")
2439 (set_attr "mode" "TI")])
2440
2441(define_insn "sse2_ussub<mode>3"
2442 [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
2443 (us_minus:SSEMODE12
2444 (match_operand:SSEMODE12 1 "register_operand" "0")
2445 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
2446 "TARGET_SSE2"
2447 "psubus<ssevecsize>\t{%2, %0|%0, %2}"
2448 [(set_attr "type" "sseiadd")
2449 (set_attr "mode" "TI")])
2450
2451(define_expand "mulv16qi3"
2452 [(set (match_operand:V16QI 0 "register_operand" "")
2453 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
2454 (match_operand:V16QI 2 "register_operand" "")))]
2455 "TARGET_SSE2"
2456{
2457 rtx t[12], op0;
2458 int i;
2459
2460 for (i = 0; i < 12; ++i)
2461 t[i] = gen_reg_rtx (V16QImode);
2462
2463 /* Unpack data such that we've got a source byte in each low byte of
2464 each word. We don't care what goes into the high byte of each word.
2465 Rather than trying to get zero in there, most convenient is to let
2466 it be a copy of the low byte. */
2467 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
2468 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
2469 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
2470 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
2471
2472 /* Multiply words. The end-of-line annotations here give a picture of what
2473 the output of that instruction looks like. Dot means don't care; the
2474 letters are the bytes of the result with A being the most significant. */
2475 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
2476 gen_lowpart (V8HImode, t[0]),
2477 gen_lowpart (V8HImode, t[1])));
2478 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
2479 gen_lowpart (V8HImode, t[2]),
2480 gen_lowpart (V8HImode, t[3])));
2481
2482 /* Extract the relevant bytes and merge them back together. */
2483 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
2484 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
2485 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
2486 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
2487 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
2488 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
2489
2490 op0 = operands[0];
2491 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
2492 DONE;
2493})
2494
2495(define_expand "mulv8hi3"
2496 [(set (match_operand:V8HI 0 "register_operand" "")
2497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2498 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2499 "TARGET_SSE2"
2500 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
2501
2502(define_insn "*mulv8hi3"
2503 [(set (match_operand:V8HI 0 "register_operand" "=x")
2504 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2507 "pmullw\t{%2, %0|%0, %2}"
2508 [(set_attr "type" "sseimul")
2509 (set_attr "mode" "TI")])
2510
2511(define_insn "sse2_smulv8hi3_highpart"
2512 [(set (match_operand:V8HI 0 "register_operand" "=x")
2513 (truncate:V8HI
2514 (lshiftrt:V8SI
2515 (mult:V8SI
2516 (sign_extend:V8SI
2517 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2518 (sign_extend:V8SI
2519 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2520 (const_int 16))))]
2521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2522 "pmulhw\t{%2, %0|%0, %2}"
2523 [(set_attr "type" "sseimul")
2524 (set_attr "mode" "TI")])
2525
2526(define_insn "sse2_umulv8hi3_highpart"
2527 [(set (match_operand:V8HI 0 "register_operand" "=x")
2528 (truncate:V8HI
2529 (lshiftrt:V8SI
2530 (mult:V8SI
2531 (zero_extend:V8SI
2532 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
2533 (zero_extend:V8SI
2534 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2535 (const_int 16))))]
2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2537 "pmulhuw\t{%2, %0|%0, %2}"
2538 [(set_attr "type" "sseimul")
2539 (set_attr "mode" "TI")])
2540
2541(define_insn "sse2_umulv2siv2di3"
2542 [(set (match_operand:V2DI 0 "register_operand" "=x")
2543 (mult:V2DI
2544 (zero_extend:V2DI
2545 (vec_select:V2SI
2546 (match_operand:V4SI 1 "nonimmediate_operand" "%0")
2547 (parallel [(const_int 0) (const_int 2)])))
2548 (zero_extend:V2DI
2549 (vec_select:V2SI
2550 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
2551 (parallel [(const_int 0) (const_int 2)])))))]
2552 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
2553 "pmuludq\t{%2, %0|%0, %2}"
2554 [(set_attr "type" "sseimul")
2555 (set_attr "mode" "TI")])
2556
2557(define_insn "sse2_pmaddwd"
2558 [(set (match_operand:V4SI 0 "register_operand" "=x")
2559 (plus:V4SI
2560 (mult:V4SI
2561 (sign_extend:V4SI
2562 (vec_select:V4HI
2563 (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2564 (parallel [(const_int 0)
2565 (const_int 2)
2566 (const_int 4)
2567 (const_int 6)])))
2568 (sign_extend:V4SI
2569 (vec_select:V4HI
2570 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
2571 (parallel [(const_int 0)
2572 (const_int 2)
2573 (const_int 4)
2574 (const_int 6)]))))
2575 (mult:V4SI
2576 (sign_extend:V4SI
2577 (vec_select:V4HI (match_dup 1)
2578 (parallel [(const_int 1)
2579 (const_int 3)
2580 (const_int 5)
2581 (const_int 7)])))
2582 (sign_extend:V4SI
2583 (vec_select:V4HI (match_dup 2)
2584 (parallel [(const_int 1)
2585 (const_int 3)
2586 (const_int 5)
2587 (const_int 7)]))))))]
2588 "TARGET_SSE2"
2589 "pmaddwd\t{%2, %0|%0, %2}"
2590 [(set_attr "type" "sseiadd")
2591 (set_attr "mode" "TI")])
2592
2593(define_expand "mulv4si3"
2594 [(set (match_operand:V4SI 0 "register_operand" "")
2595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
2596 (match_operand:V4SI 2 "register_operand" "")))]
2597 "TARGET_SSE2"
2598{
2599 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2600 rtx op0, op1, op2;
2601
2602 op0 = operands[0];
2603 op1 = operands[1];
2604 op2 = operands[2];
2605 t1 = gen_reg_rtx (V4SImode);
2606 t2 = gen_reg_rtx (V4SImode);
2607 t3 = gen_reg_rtx (V4SImode);
2608 t4 = gen_reg_rtx (V4SImode);
2609 t5 = gen_reg_rtx (V4SImode);
2610 t6 = gen_reg_rtx (V4SImode);
2611 thirtytwo = GEN_INT (32);
2612
2613 /* Multiply elements 2 and 0. */
2614 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
2615
2616 /* Shift both input vectors down one element, so that elements 3 and 1
2617 are now in the slots for elements 2 and 0. For K8, at least, this is
2618 faster than using a shuffle. */
2619 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2620 gen_lowpart (TImode, op1), thirtytwo));
2621 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2622 gen_lowpart (TImode, op2), thirtytwo));
2623
2624 /* Multiply elements 3 and 1. */
2625 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
2626
2627 /* Move the results in element 2 down to element 1; we don't care what
2628 goes in elements 2 and 3. */
2629 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
2630 const0_rtx, const0_rtx));
2631 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
2632 const0_rtx, const0_rtx));
2633
2634 /* Merge the parts back together. */
2635 emit_insn (gen_sse2_punpckldq (op0, t5, t6));
2636 DONE;
2637})
2638
2639(define_expand "mulv2di3"
2640 [(set (match_operand:V2DI 0 "register_operand" "")
2641 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
2642 (match_operand:V2DI 2 "register_operand" "")))]
2643 "TARGET_SSE2"
2644{
2645 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
2646 rtx op0, op1, op2;
2647
2648 op0 = operands[0];
2649 op1 = operands[1];
2650 op2 = operands[2];
2651 t1 = gen_reg_rtx (V2DImode);
2652 t2 = gen_reg_rtx (V2DImode);
2653 t3 = gen_reg_rtx (V2DImode);
2654 t4 = gen_reg_rtx (V2DImode);
2655 t5 = gen_reg_rtx (V2DImode);
2656 t6 = gen_reg_rtx (V2DImode);
2657 thirtytwo = GEN_INT (32);
2658
2659 /* Multiply low parts. */
2660 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
2661 gen_lowpart (V4SImode, op2)));
2662
2663 /* Shift input vectors left 32 bits so we can multiply high parts. */
2664 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
2665 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
2666
2667 /* Multiply high parts by low parts. */
2668 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
2669 gen_lowpart (V4SImode, t3)));
2670 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
2671 gen_lowpart (V4SImode, t2)));
2672
2673 /* Shift them back. */
2674 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
2675 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
2676
2677 /* Add the three parts together. */
2678 emit_insn (gen_addv2di3 (t6, t1, t4));
2679 emit_insn (gen_addv2di3 (op0, t6, t5));
2680 DONE;
2681})
2682
2683(define_expand "sdot_prodv8hi"
2684 [(match_operand:V4SI 0 "register_operand" "")
2685 (match_operand:V8HI 1 "nonimmediate_operand" "")
2686 (match_operand:V8HI 2 "nonimmediate_operand" "")
2687 (match_operand:V4SI 3 "register_operand" "")]
2688 "TARGET_SSE2"
2689{
2690 rtx t = gen_reg_rtx (V4SImode);
2691 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
2692 emit_insn (gen_addv4si3 (operands[0], operands[3], t));
2693 DONE;
2694})
2695
2696(define_expand "udot_prodv4si"
2697 [(match_operand:V2DI 0 "register_operand" "")
2698 (match_operand:V4SI 1 "register_operand" "")
2699 (match_operand:V4SI 2 "register_operand" "")
2700 (match_operand:V2DI 3 "register_operand" "")]
2701 "TARGET_SSE2"
2702{
2703 rtx t1, t2, t3, t4;
2704
2705 t1 = gen_reg_rtx (V2DImode);
2706 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
2707 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
2708
2709 t2 = gen_reg_rtx (V4SImode);
2710 t3 = gen_reg_rtx (V4SImode);
2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
2712 gen_lowpart (TImode, operands[1]),
2713 GEN_INT (32)));
2714 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
2715 gen_lowpart (TImode, operands[2]),
2716 GEN_INT (32)));
2717
2718 t4 = gen_reg_rtx (V2DImode);
2719 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
2720
2721 emit_insn (gen_addv2di3 (operands[0], t1, t4));
2722 DONE;
2723})
2724
2725(define_insn "ashr<mode>3"
2726 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
2727 (ashiftrt:SSEMODE24
2728 (match_operand:SSEMODE24 1 "register_operand" "0")
2729 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2730 "TARGET_SSE2"
2731 "psra<ssevecsize>\t{%2, %0|%0, %2}"
2732 [(set_attr "type" "sseishft")
2733 (set_attr "mode" "TI")])
2734
2735(define_insn "lshr<mode>3"
2736 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2737 (lshiftrt:SSEMODE248
2738 (match_operand:SSEMODE248 1 "register_operand" "0")
2739 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2740 "TARGET_SSE2"
2741 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
2742 [(set_attr "type" "sseishft")
2743 (set_attr "mode" "TI")])
2744
2745(define_insn "ashl<mode>3"
2746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
2747 (ashift:SSEMODE248
2748 (match_operand:SSEMODE248 1 "register_operand" "0")
2749 (match_operand:TI 2 "nonmemory_operand" "xn")))]
2750 "TARGET_SSE2"
2751 "psll<ssevecsize>\t{%2, %0|%0, %2}"
2752 [(set_attr "type" "sseishft")
2753 (set_attr "mode" "TI")])
2754
2755(define_insn "sse2_ashlti3"
2756 [(set (match_operand:TI 0 "register_operand" "=x")
2757 (ashift:TI (match_operand:TI 1 "register_operand" "0")
2758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2759 "TARGET_SSE2"
2760{
2761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2762 return "pslldq\t{%2, %0|%0, %2}";
2763}
2764 [(set_attr "type" "sseishft")
2765 (set_attr "mode" "TI")])
2766
2767(define_expand "vec_shl_<mode>"
2768 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2769 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "")
2770 (match_operand:SI 2 "general_operand" "")))]
2771 "TARGET_SSE2"
2772{
2773 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2774 FAIL;
2775 operands[0] = gen_lowpart (TImode, operands[0]);
2776 operands[1] = gen_lowpart (TImode, operands[1]);
2777})
2778
2779(define_insn "sse2_lshrti3"
2780 [(set (match_operand:TI 0 "register_operand" "=x")
2781 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
2782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
2783 "TARGET_SSE2"
2784{
2785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
2786 return "psrldq\t{%2, %0|%0, %2}";
2787}
2788 [(set_attr "type" "sseishft")
2789 (set_attr "mode" "TI")])
2790
2791(define_expand "vec_shr_<mode>"
2792 [(set (match_operand:SSEMODEI 0 "register_operand" "")
2793 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "")
2794 (match_operand:SI 2 "general_operand" "")))]
2795 "TARGET_SSE2"
2796{
2797 if (!const_0_to_255_mul_8_operand (operands[2], SImode))
2798 FAIL;
2799 operands[0] = gen_lowpart (TImode, operands[0]);
2800 operands[1] = gen_lowpart (TImode, operands[1]);
2801})
2802
2803(define_expand "umaxv16qi3"
2804 [(set (match_operand:V16QI 0 "register_operand" "")
2805 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2806 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2807 "TARGET_SSE2"
2808 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
2809
2810(define_insn "*umaxv16qi3"
2811 [(set (match_operand:V16QI 0 "register_operand" "=x")
2812 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2814 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
2815 "pmaxub\t{%2, %0|%0, %2}"
2816 [(set_attr "type" "sseiadd")
2817 (set_attr "mode" "TI")])
2818
2819(define_expand "smaxv8hi3"
2820 [(set (match_operand:V8HI 0 "register_operand" "")
2821 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2822 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2823 "TARGET_SSE2"
2824 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);")
2825
2826(define_insn "*smaxv8hi3"
2827 [(set (match_operand:V8HI 0 "register_operand" "=x")
2828 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2830 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
2831 "pmaxsw\t{%2, %0|%0, %2}"
2832 [(set_attr "type" "sseiadd")
2833 (set_attr "mode" "TI")])
2834
2835(define_expand "umaxv8hi3"
2836 [(set (match_operand:V8HI 0 "register_operand" "=x")
2837 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
2838 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
2839 (set (match_dup 3)
2840 (plus:V8HI (match_dup 0) (match_dup 2)))]
2841 "TARGET_SSE2"
2842{
2843 operands[3] = operands[0];
2844 if (rtx_equal_p (operands[0], operands[2]))
2845 operands[0] = gen_reg_rtx (V8HImode);
2846})
2847
2848(define_expand "smax<mode>3"
2849 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2850 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2851 (match_operand:SSEMODE14 2 "register_operand" "")))]
2852 "TARGET_SSE2"
2853{
2854 rtx xops[6];
2855 bool ok;
2856
2857 xops[0] = operands[0];
2858 xops[1] = operands[1];
2859 xops[2] = operands[2];
2860 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2861 xops[4] = operands[1];
2862 xops[5] = operands[2];
2863 ok = ix86_expand_int_vcond (xops);
2864 gcc_assert (ok);
2865 DONE;
2866})
2867
2868(define_expand "umaxv4si3"
2869 [(set (match_operand:V4SI 0 "register_operand" "")
2870 (umax:V4SI (match_operand:V4SI 1 "register_operand" "")
2871 (match_operand:V4SI 2 "register_operand" "")))]
2872 "TARGET_SSE2"
2873{
2874 rtx xops[6];
2875 bool ok;
2876
2877 xops[0] = operands[0];
2878 xops[1] = operands[1];
2879 xops[2] = operands[2];
2880 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2881 xops[4] = operands[1];
2882 xops[5] = operands[2];
2883 ok = ix86_expand_int_vcond (xops);
2884 gcc_assert (ok);
2885 DONE;
2886})
2887
2888(define_expand "uminv16qi3"
2889 [(set (match_operand:V16QI 0 "register_operand" "")
2890 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
2891 (match_operand:V16QI 2 "nonimmediate_operand" "")))]
2892 "TARGET_SSE2"
2893 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
2894
2895(define_insn "*uminv16qi3"
2896 [(set (match_operand:V16QI 0 "register_operand" "=x")
2897 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0")
2898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
2899 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
2900 "pminub\t{%2, %0|%0, %2}"
2901 [(set_attr "type" "sseiadd")
2902 (set_attr "mode" "TI")])
2903
2904(define_expand "sminv8hi3"
2905 [(set (match_operand:V8HI 0 "register_operand" "")
2906 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
2907 (match_operand:V8HI 2 "nonimmediate_operand" "")))]
2908 "TARGET_SSE2"
2909 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);")
2910
2911(define_insn "*sminv8hi3"
2912 [(set (match_operand:V8HI 0 "register_operand" "=x")
2913 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
2914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
2915 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
2916 "pminsw\t{%2, %0|%0, %2}"
2917 [(set_attr "type" "sseiadd")
2918 (set_attr "mode" "TI")])
2919
2920(define_expand "smin<mode>3"
2921 [(set (match_operand:SSEMODE14 0 "register_operand" "")
2922 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "")
2923 (match_operand:SSEMODE14 2 "register_operand" "")))]
2924 "TARGET_SSE2"
2925{
2926 rtx xops[6];
2927 bool ok;
2928
2929 xops[0] = operands[0];
2930 xops[1] = operands[2];
2931 xops[2] = operands[1];
2932 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
2933 xops[4] = operands[1];
2934 xops[5] = operands[2];
2935 ok = ix86_expand_int_vcond (xops);
2936 gcc_assert (ok);
2937 DONE;
2938})
2939
2940(define_expand "umin<mode>3"
2941 [(set (match_operand:SSEMODE24 0 "register_operand" "")
2942 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
2943 (match_operand:SSEMODE24 2 "register_operand" "")))]
2944 "TARGET_SSE2"
2945{
2946 rtx xops[6];
2947 bool ok;
2948
2949 xops[0] = operands[0];
2950 xops[1] = operands[2];
2951 xops[2] = operands[1];
2952 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
2953 xops[4] = operands[1];
2954 xops[5] = operands[2];
2955 ok = ix86_expand_int_vcond (xops);
2956 gcc_assert (ok);
2957 DONE;
2958})
2959
2960;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2961;;
2962;; Parallel integral comparisons
2963;;
2964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2965
2966(define_insn "sse2_eq<mode>3"
2967 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2968 (eq:SSEMODE124
2969 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
2970 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2971 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
2972 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
2973 [(set_attr "type" "ssecmp")
2974 (set_attr "mode" "TI")])
2975
2976(define_insn "sse2_gt<mode>3"
2977 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
2978 (gt:SSEMODE124
2979 (match_operand:SSEMODE124 1 "register_operand" "0")
2980 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
2981 "TARGET_SSE2"
2982 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
2983 [(set_attr "type" "ssecmp")
2984 (set_attr "mode" "TI")])
2985
2986(define_expand "vcond<mode>"
2987 [(set (match_operand:SSEMODE124 0 "register_operand" "")
2988 (if_then_else:SSEMODE124
2989 (match_operator 3 ""
2990 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
2991 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
2992 (match_operand:SSEMODE124 1 "general_operand" "")
2993 (match_operand:SSEMODE124 2 "general_operand" "")))]
2994 "TARGET_SSE2"
2995{
2996 if (ix86_expand_int_vcond (operands))
2997 DONE;
2998 else
2999 FAIL;
3000})
3001
3002(define_expand "vcondu<mode>"
3003 [(set (match_operand:SSEMODE124 0 "register_operand" "")
3004 (if_then_else:SSEMODE124
3005 (match_operator 3 ""
3006 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
3007 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
3008 (match_operand:SSEMODE124 1 "general_operand" "")
3009 (match_operand:SSEMODE124 2 "general_operand" "")))]
3010 "TARGET_SSE2"
3011{
3012 if (ix86_expand_int_vcond (operands))
3013 DONE;
3014 else
3015 FAIL;
3016})
3017
3018;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3019;;
3020;; Parallel integral logical operations
3021;;
3022;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3023
3024(define_expand "one_cmpl<mode>2"
3025 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3026 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3027 (match_dup 2)))]
3028 "TARGET_SSE2"
3029{
3030 int i, n = GET_MODE_NUNITS (<MODE>mode);
3031 rtvec v = rtvec_alloc (n);
3032
3033 for (i = 0; i < n; ++i)
3034 RTVEC_ELT (v, i) = constm1_rtx;
3035
3036 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
3037})
3038
3039(define_expand "and<mode>3"
3040 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3041 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3042 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3043 "TARGET_SSE2"
3044 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);")
3045
3046(define_insn "*and<mode>3"
3047 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3048 (and:SSEMODEI
3049 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3050 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3051 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
3052 "pand\t{%2, %0|%0, %2}"
3053 [(set_attr "type" "sselog")
3054 (set_attr "mode" "TI")])
3055
3056(define_insn "sse2_nand<mode>3"
3057 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3058 (and:SSEMODEI
3059 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0"))
3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3061 "TARGET_SSE2"
3062 "pandn\t{%2, %0|%0, %2}"
3063 [(set_attr "type" "sselog")
3064 (set_attr "mode" "TI")])
3065
3066(define_expand "ior<mode>3"
3067 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3068 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3069 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3070 "TARGET_SSE2"
3071 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);")
3072
3073(define_insn "*ior<mode>3"
3074 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3075 (ior:SSEMODEI
3076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3078 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
3079 "por\t{%2, %0|%0, %2}"
3080 [(set_attr "type" "sselog")
3081 (set_attr "mode" "TI")])
3082
3083(define_expand "xor<mode>3"
3084 [(set (match_operand:SSEMODEI 0 "register_operand" "")
3085 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
3086 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
3087 "TARGET_SSE2"
3088 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);")
3089
3090(define_insn "*xor<mode>3"
3091 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
3092 (xor:SSEMODEI
3093 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
3095 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
3096 "pxor\t{%2, %0|%0, %2}"
3097 [(set_attr "type" "sselog")
3098 (set_attr "mode" "TI")])
3099
3100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3101;;
3102;; Parallel integral element swizzling
3103;;
3104;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3105
3106(define_insn "sse2_packsswb"
3107 [(set (match_operand:V16QI 0 "register_operand" "=x")
3108 (vec_concat:V16QI
3109 (ss_truncate:V8QI
3110 (match_operand:V8HI 1 "register_operand" "0"))
3111 (ss_truncate:V8QI
3112 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3113 "TARGET_SSE2"
3114 "packsswb\t{%2, %0|%0, %2}"
3115 [(set_attr "type" "sselog")
3116 (set_attr "mode" "TI")])
3117
3118(define_insn "sse2_packssdw"
3119 [(set (match_operand:V8HI 0 "register_operand" "=x")
3120 (vec_concat:V8HI
3121 (ss_truncate:V4HI
3122 (match_operand:V4SI 1 "register_operand" "0"))
3123 (ss_truncate:V4HI
3124 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
3125 "TARGET_SSE2"
3126 "packssdw\t{%2, %0|%0, %2}"
3127 [(set_attr "type" "sselog")
3128 (set_attr "mode" "TI")])
3129
3130(define_insn "sse2_packuswb"
3131 [(set (match_operand:V16QI 0 "register_operand" "=x")
3132 (vec_concat:V16QI
3133 (us_truncate:V8QI
3134 (match_operand:V8HI 1 "register_operand" "0"))
3135 (us_truncate:V8QI
3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))]
3137 "TARGET_SSE2"
3138 "packuswb\t{%2, %0|%0, %2}"
3139 [(set_attr "type" "sselog")
3140 (set_attr "mode" "TI")])
3141
3142(define_insn "sse2_punpckhbw"
3143 [(set (match_operand:V16QI 0 "register_operand" "=x")
3144 (vec_select:V16QI
3145 (vec_concat:V32QI
3146 (match_operand:V16QI 1 "register_operand" "0")
3147 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3148 (parallel [(const_int 8) (const_int 24)
3149 (const_int 9) (const_int 25)
3150 (const_int 10) (const_int 26)
3151 (const_int 11) (const_int 27)
3152 (const_int 12) (const_int 28)
3153 (const_int 13) (const_int 29)
3154 (const_int 14) (const_int 30)
3155 (const_int 15) (const_int 31)])))]
3156 "TARGET_SSE2"
3157 "punpckhbw\t{%2, %0|%0, %2}"
3158 [(set_attr "type" "sselog")
3159 (set_attr "mode" "TI")])
3160
3161(define_insn "sse2_punpcklbw"
3162 [(set (match_operand:V16QI 0 "register_operand" "=x")
3163 (vec_select:V16QI
3164 (vec_concat:V32QI
3165 (match_operand:V16QI 1 "register_operand" "0")
3166 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
3167 (parallel [(const_int 0) (const_int 16)
3168 (const_int 1) (const_int 17)
3169 (const_int 2) (const_int 18)
3170 (const_int 3) (const_int 19)
3171 (const_int 4) (const_int 20)
3172 (const_int 5) (const_int 21)
3173 (const_int 6) (const_int 22)
3174 (const_int 7) (const_int 23)])))]
3175 "TARGET_SSE2"
3176 "punpcklbw\t{%2, %0|%0, %2}"
3177 [(set_attr "type" "sselog")
3178 (set_attr "mode" "TI")])
3179
3180(define_insn "sse2_punpckhwd"
3181 [(set (match_operand:V8HI 0 "register_operand" "=x")
3182 (vec_select:V8HI
3183 (vec_concat:V16HI
3184 (match_operand:V8HI 1 "register_operand" "0")
3185 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3186 (parallel [(const_int 4) (const_int 12)
3187 (const_int 5) (const_int 13)
3188 (const_int 6) (const_int 14)
3189 (const_int 7) (const_int 15)])))]
3190 "TARGET_SSE2"
3191 "punpckhwd\t{%2, %0|%0, %2}"
3192 [(set_attr "type" "sselog")
3193 (set_attr "mode" "TI")])
3194
3195(define_insn "sse2_punpcklwd"
3196 [(set (match_operand:V8HI 0 "register_operand" "=x")
3197 (vec_select:V8HI
3198 (vec_concat:V16HI
3199 (match_operand:V8HI 1 "register_operand" "0")
3200 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
3201 (parallel [(const_int 0) (const_int 8)
3202 (const_int 1) (const_int 9)
3203 (const_int 2) (const_int 10)
3204 (const_int 3) (const_int 11)])))]
3205 "TARGET_SSE2"
3206 "punpcklwd\t{%2, %0|%0, %2}"
3207 [(set_attr "type" "sselog")
3208 (set_attr "mode" "TI")])
3209
3210(define_insn "sse2_punpckhdq"
3211 [(set (match_operand:V4SI 0 "register_operand" "=x")
3212 (vec_select:V4SI
3213 (vec_concat:V8SI
3214 (match_operand:V4SI 1 "register_operand" "0")
3215 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3216 (parallel [(const_int 2) (const_int 6)
3217 (const_int 3) (const_int 7)])))]
3218 "TARGET_SSE2"
3219 "punpckhdq\t{%2, %0|%0, %2}"
3220 [(set_attr "type" "sselog")
3221 (set_attr "mode" "TI")])
3222
3223(define_insn "sse2_punpckldq"
3224 [(set (match_operand:V4SI 0 "register_operand" "=x")
3225 (vec_select:V4SI
3226 (vec_concat:V8SI
3227 (match_operand:V4SI 1 "register_operand" "0")
3228 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
3229 (parallel [(const_int 0) (const_int 4)
3230 (const_int 1) (const_int 5)])))]
3231 "TARGET_SSE2"
3232 "punpckldq\t{%2, %0|%0, %2}"
3233 [(set_attr "type" "sselog")
3234 (set_attr "mode" "TI")])
3235
3236(define_insn "sse2_punpckhqdq"
3237 [(set (match_operand:V2DI 0 "register_operand" "=x")
3238 (vec_select:V2DI
3239 (vec_concat:V4DI
3240 (match_operand:V2DI 1 "register_operand" "0")
3241 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3242 (parallel [(const_int 1)
3243 (const_int 3)])))]
3244 "TARGET_SSE2"
3245 "punpckhqdq\t{%2, %0|%0, %2}"
3246 [(set_attr "type" "sselog")
3247 (set_attr "mode" "TI")])
3248
3249(define_insn "sse2_punpcklqdq"
3250 [(set (match_operand:V2DI 0 "register_operand" "=x")
3251 (vec_select:V2DI
3252 (vec_concat:V4DI
3253 (match_operand:V2DI 1 "register_operand" "0")
3254 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
3255 (parallel [(const_int 0)
3256 (const_int 2)])))]
3257 "TARGET_SSE2"
3258 "punpcklqdq\t{%2, %0|%0, %2}"
3259 [(set_attr "type" "sselog")
3260 (set_attr "mode" "TI")])
3261
3262(define_expand "sse2_pinsrw"
3263 [(set (match_operand:V8HI 0 "register_operand" "")
3264 (vec_merge:V8HI
3265 (vec_duplicate:V8HI
3266 (match_operand:SI 2 "nonimmediate_operand" ""))
3267 (match_operand:V8HI 1 "register_operand" "")
3268 (match_operand:SI 3 "const_0_to_7_operand" "")))]
3269 "TARGET_SSE2"
3270{
3271 operands[2] = gen_lowpart (HImode, operands[2]);
3272 operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
3273})
3274
3275(define_insn "*sse2_pinsrw"
3276 [(set (match_operand:V8HI 0 "register_operand" "=x")
3277 (vec_merge:V8HI
3278 (vec_duplicate:V8HI
3279 (match_operand:HI 2 "nonimmediate_operand" "rm"))
3280 (match_operand:V8HI 1 "register_operand" "0")
3281 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))]
3282 "TARGET_SSE2"
3283{
3284 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3285 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3286}
3287 [(set_attr "type" "sselog")
3288 (set_attr "mode" "TI")])
3289
3290(define_insn "sse2_pextrw"
3291 [(set (match_operand:SI 0 "register_operand" "=r")
3292 (zero_extend:SI
3293 (vec_select:HI
3294 (match_operand:V8HI 1 "register_operand" "x")
3295 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
3296 "TARGET_SSE2"
3297 "pextrw\t{%2, %1, %0|%0, %1, %2}"
3298 [(set_attr "type" "sselog")
3299 (set_attr "mode" "TI")])
3300
3301(define_expand "sse2_pshufd"
3302 [(match_operand:V4SI 0 "register_operand" "")
3303 (match_operand:V4SI 1 "nonimmediate_operand" "")
3304 (match_operand:SI 2 "const_int_operand" "")]
3305 "TARGET_SSE2"
3306{
3307 int mask = INTVAL (operands[2]);
3308 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
3309 GEN_INT ((mask >> 0) & 3),
3310 GEN_INT ((mask >> 2) & 3),
3311 GEN_INT ((mask >> 4) & 3),
3312 GEN_INT ((mask >> 6) & 3)));
3313 DONE;
3314})
3315
3316(define_insn "sse2_pshufd_1"
3317 [(set (match_operand:V4SI 0 "register_operand" "=x")
3318 (vec_select:V4SI
3319 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
3320 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3321 (match_operand 3 "const_0_to_3_operand" "")
3322 (match_operand 4 "const_0_to_3_operand" "")
3323 (match_operand 5 "const_0_to_3_operand" "")])))]
3324 "TARGET_SSE2"
3325{
3326 int mask = 0;
3327 mask |= INTVAL (operands[2]) << 0;
3328 mask |= INTVAL (operands[3]) << 2;
3329 mask |= INTVAL (operands[4]) << 4;
3330 mask |= INTVAL (operands[5]) << 6;
3331 operands[2] = GEN_INT (mask);
3332
3333 return "pshufd\t{%2, %1, %0|%0, %1, %2}";
3334}
3335 [(set_attr "type" "sselog1")
3336 (set_attr "mode" "TI")])
3337
3338(define_expand "sse2_pshuflw"
3339 [(match_operand:V8HI 0 "register_operand" "")
3340 (match_operand:V8HI 1 "nonimmediate_operand" "")
3341 (match_operand:SI 2 "const_int_operand" "")]
3342 "TARGET_SSE2"
3343{
3344 int mask = INTVAL (operands[2]);
3345 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
3346 GEN_INT ((mask >> 0) & 3),
3347 GEN_INT ((mask >> 2) & 3),
3348 GEN_INT ((mask >> 4) & 3),
3349 GEN_INT ((mask >> 6) & 3)));
3350 DONE;
3351})
3352
3353(define_insn "sse2_pshuflw_1"
3354 [(set (match_operand:V8HI 0 "register_operand" "=x")
3355 (vec_select:V8HI
3356 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3357 (parallel [(match_operand 2 "const_0_to_3_operand" "")
3358 (match_operand 3 "const_0_to_3_operand" "")
3359 (match_operand 4 "const_0_to_3_operand" "")
3360 (match_operand 5 "const_0_to_3_operand" "")
3361 (const_int 4)
3362 (const_int 5)
3363 (const_int 6)
3364 (const_int 7)])))]
3365 "TARGET_SSE2"
3366{
3367 int mask = 0;
3368 mask |= INTVAL (operands[2]) << 0;
3369 mask |= INTVAL (operands[3]) << 2;
3370 mask |= INTVAL (operands[4]) << 4;
3371 mask |= INTVAL (operands[5]) << 6;
3372 operands[2] = GEN_INT (mask);
3373
3374 return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
3375}
3376 [(set_attr "type" "sselog")
3377 (set_attr "mode" "TI")])
3378
3379(define_expand "sse2_pshufhw"
3380 [(match_operand:V8HI 0 "register_operand" "")
3381 (match_operand:V8HI 1 "nonimmediate_operand" "")
3382 (match_operand:SI 2 "const_int_operand" "")]
3383 "TARGET_SSE2"
3384{
3385 int mask = INTVAL (operands[2]);
3386 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
3387 GEN_INT (((mask >> 0) & 3) + 4),
3388 GEN_INT (((mask >> 2) & 3) + 4),
3389 GEN_INT (((mask >> 4) & 3) + 4),
3390 GEN_INT (((mask >> 6) & 3) + 4)));
3391 DONE;
3392})
3393
3394(define_insn "sse2_pshufhw_1"
3395 [(set (match_operand:V8HI 0 "register_operand" "=x")
3396 (vec_select:V8HI
3397 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
3398 (parallel [(const_int 0)
3399 (const_int 1)
3400 (const_int 2)
3401 (const_int 3)
3402 (match_operand 2 "const_4_to_7_operand" "")
3403 (match_operand 3 "const_4_to_7_operand" "")
3404 (match_operand 4 "const_4_to_7_operand" "")
3405 (match_operand 5 "const_4_to_7_operand" "")])))]
3406 "TARGET_SSE2"
3407{
3408 int mask = 0;
3409 mask |= (INTVAL (operands[2]) - 4) << 0;
3410 mask |= (INTVAL (operands[3]) - 4) << 2;
3411 mask |= (INTVAL (operands[4]) - 4) << 4;
3412 mask |= (INTVAL (operands[5]) - 4) << 6;
3413 operands[2] = GEN_INT (mask);
3414
3415 return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
3416}
3417 [(set_attr "type" "sselog")
3418 (set_attr "mode" "TI")])
3419
3420(define_expand "sse2_loadd"
3421 [(set (match_operand:V4SI 0 "register_operand" "")
3422 (vec_merge:V4SI
3423 (vec_duplicate:V4SI
3424 (match_operand:SI 1 "nonimmediate_operand" ""))
3425 (match_dup 2)
3426 (const_int 1)))]
3427 "TARGET_SSE"
3428 "operands[2] = CONST0_RTX (V4SImode);")
3429
3430(define_insn "sse2_loadld"
3431 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3432 (vec_merge:V4SI
3433 (vec_duplicate:V4SI
3434 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
3435 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
3436 (const_int 1)))]
3437 "TARGET_SSE"
3438 "@
3439 movd\t{%2, %0|%0, %2}
3440 movss\t{%2, %0|%0, %2}
3441 movss\t{%2, %0|%0, %2}"
3442 [(set_attr "type" "ssemov")
3443 (set_attr "mode" "TI,V4SF,SF")])
3444
3445;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3446;; be taken into account, and movdi isn't fully populated even without.
3447(define_insn_and_split "sse2_stored"
3448 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
3449 (vec_select:SI
3450 (match_operand:V4SI 1 "register_operand" "x")
3451 (parallel [(const_int 0)])))]
3452 "TARGET_SSE"
3453 "#"
3454 "&& reload_completed"
3455 [(set (match_dup 0) (match_dup 1))]
3456{
3457 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
3458})
3459
3460(define_expand "sse_storeq"
3461 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3462 (vec_select:DI
3463 (match_operand:V2DI 1 "register_operand" "")
3464 (parallel [(const_int 0)])))]
3465 "TARGET_SSE"
3466 "")
3467
3468;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
3469;; be taken into account, and movdi isn't fully populated even without.
3470(define_insn "*sse2_storeq"
3471 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
3472 (vec_select:DI
3473 (match_operand:V2DI 1 "register_operand" "x")
3474 (parallel [(const_int 0)])))]
3475 "TARGET_SSE"
3476 "#")
3477
3478(define_split
3479 [(set (match_operand:DI 0 "nonimmediate_operand" "")
3480 (vec_select:DI
3481 (match_operand:V2DI 1 "register_operand" "")
3482 (parallel [(const_int 0)])))]
3483 "TARGET_SSE && reload_completed"
3484 [(set (match_dup 0) (match_dup 1))]
3485{
3486 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
3487})
3488
3489(define_insn "*vec_extractv2di_1_sse2"
3490 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3491 (vec_select:DI
3492 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
3493 (parallel [(const_int 1)])))]
3494 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3495 "@
3496 movhps\t{%1, %0|%0, %1}
3497 psrldq\t{$8, %0|%0, 8}
3498 movq\t{%H1, %0|%0, %H1}"
3499 [(set_attr "type" "ssemov,sseishft,ssemov")
3500 (set_attr "memory" "*,none,*")
3501 (set_attr "mode" "V2SF,TI,TI")])
3502
3503;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
3504(define_insn "*vec_extractv2di_1_sse"
3505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
3506 (vec_select:DI
3507 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
3508 (parallel [(const_int 1)])))]
3509 "!TARGET_SSE2 && TARGET_SSE
3510 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
3511 "@
3512 movhps\t{%1, %0|%0, %1}
3513 movhlps\t{%1, %0|%0, %1}
3514 movlps\t{%H1, %0|%0, %H1}"
3515 [(set_attr "type" "ssemov")
3516 (set_attr "mode" "V2SF,V4SF,V2SF")])
3517
3518(define_insn "*vec_dupv4si"
3519 [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
3520 (vec_duplicate:V4SI
3521 (match_operand:SI 1 "register_operand" " Y,0")))]
3522 "TARGET_SSE"
3523 "@
3524 pshufd\t{$0, %1, %0|%0, %1, 0}
3525 shufps\t{$0, %0, %0|%0, %0, 0}"
3526 [(set_attr "type" "sselog1")
3527 (set_attr "mode" "TI,V4SF")])
3528
3529(define_insn "*vec_dupv2di"
3530 [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
3531 (vec_duplicate:V2DI
3532 (match_operand:DI 1 "register_operand" " 0,0")))]
3533 "TARGET_SSE"
3534 "@
3535 punpcklqdq\t%0, %0
3536 movlhps\t%0, %0"
3537 [(set_attr "type" "sselog1,ssemov")
3538 (set_attr "mode" "TI,V4SF")])
3539
3540;; ??? In theory we can match memory for the MMX alternative, but allowing
3541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3542;; alternatives pretty much forces the MMX alternative to be chosen.
3543(define_insn "*sse2_concatv2si"
3544 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
3545 (vec_concat:V2SI
3546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
3547 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
3548 "TARGET_SSE2"
3549 "@
3550 punpckldq\t{%2, %0|%0, %2}
3551 movd\t{%1, %0|%0, %1}
3552 punpckldq\t{%2, %0|%0, %2}
3553 movd\t{%1, %0|%0, %1}"
3554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3555 (set_attr "mode" "TI,TI,DI,DI")])
3556
3557(define_insn "*sse1_concatv2si"
3558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y")
3559 (vec_concat:V2SI
3560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm")
3561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))]
3562 "TARGET_SSE"
3563 "@
3564 unpcklps\t{%2, %0|%0, %2}
3565 movss\t{%1, %0|%0, %1}
3566 punpckldq\t{%2, %0|%0, %2}
3567 movd\t{%1, %0|%0, %1}"
3568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
3569 (set_attr "mode" "V4SF,V4SF,DI,DI")])
3570
3571(define_insn "*vec_concatv4si_1"
3572 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
3573 (vec_concat:V4SI
3574 (match_operand:V2SI 1 "register_operand" " 0,0,0")
3575 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
3576 "TARGET_SSE"
3577 "@
3578 punpcklqdq\t{%2, %0|%0, %2}
3579 movlhps\t{%2, %0|%0, %2}
3580 movhps\t{%2, %0|%0, %2}"
3581 [(set_attr "type" "sselog,ssemov,ssemov")
3582 (set_attr "mode" "TI,V4SF,V2SF")])
3583
3584(define_insn "*vec_concatv2di"
3585 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
3586 (vec_concat:V2DI
3587 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
3588 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
3589 "TARGET_SSE"
3590 "@
3591 movq\t{%1, %0|%0, %1}
3592 movq2dq\t{%1, %0|%0, %1}
3593 punpcklqdq\t{%2, %0|%0, %2}
3594 movlhps\t{%2, %0|%0, %2}
3595 movhps\t{%2, %0|%0, %2}
3596 movlps\t{%1, %0|%0, %1}"
3597 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov")
3598 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")])
3599
3600(define_expand "vec_setv2di"
3601 [(match_operand:V2DI 0 "register_operand" "")
3602 (match_operand:DI 1 "register_operand" "")
3603 (match_operand 2 "const_int_operand" "")]
3604 "TARGET_SSE"
3605{
3606 ix86_expand_vector_set (false, operands[0], operands[1],
3607 INTVAL (operands[2]));
3608 DONE;
3609})
3610
3611(define_expand "vec_extractv2di"
3612 [(match_operand:DI 0 "register_operand" "")
3613 (match_operand:V2DI 1 "register_operand" "")
3614 (match_operand 2 "const_int_operand" "")]
3615 "TARGET_SSE"
3616{
3617 ix86_expand_vector_extract (false, operands[0], operands[1],
3618 INTVAL (operands[2]));
3619 DONE;
3620})
3621
3622(define_expand "vec_initv2di"
3623 [(match_operand:V2DI 0 "register_operand" "")
3624 (match_operand 1 "" "")]
3625 "TARGET_SSE"
3626{
3627 ix86_expand_vector_init (false, operands[0], operands[1]);
3628 DONE;
3629})
3630
3631(define_expand "vec_setv4si"
3632 [(match_operand:V4SI 0 "register_operand" "")
3633 (match_operand:SI 1 "register_operand" "")
3634 (match_operand 2 "const_int_operand" "")]
3635 "TARGET_SSE"
3636{
3637 ix86_expand_vector_set (false, operands[0], operands[1],
3638 INTVAL (operands[2]));
3639 DONE;
3640})
3641
3642(define_expand "vec_extractv4si"
3643 [(match_operand:SI 0 "register_operand" "")
3644 (match_operand:V4SI 1 "register_operand" "")
3645 (match_operand 2 "const_int_operand" "")]
3646 "TARGET_SSE"
3647{
3648 ix86_expand_vector_extract (false, operands[0], operands[1],
3649 INTVAL (operands[2]));
3650 DONE;
3651})
3652
3653(define_expand "vec_initv4si"
3654 [(match_operand:V4SI 0 "register_operand" "")
3655 (match_operand 1 "" "")]
3656 "TARGET_SSE"
3657{
3658 ix86_expand_vector_init (false, operands[0], operands[1]);
3659 DONE;
3660})
3661
3662(define_expand "vec_setv8hi"
3663 [(match_operand:V8HI 0 "register_operand" "")
3664 (match_operand:HI 1 "register_operand" "")
3665 (match_operand 2 "const_int_operand" "")]
3666 "TARGET_SSE"
3667{
3668 ix86_expand_vector_set (false, operands[0], operands[1],
3669 INTVAL (operands[2]));
3670 DONE;
3671})
3672
3673(define_expand "vec_extractv8hi"
3674 [(match_operand:HI 0 "register_operand" "")
3675 (match_operand:V8HI 1 "register_operand" "")
3676 (match_operand 2 "const_int_operand" "")]
3677 "TARGET_SSE"
3678{
3679 ix86_expand_vector_extract (false, operands[0], operands[1],
3680 INTVAL (operands[2]));
3681 DONE;
3682})
3683
3684(define_expand "vec_initv8hi"
3685 [(match_operand:V8HI 0 "register_operand" "")
3686 (match_operand 1 "" "")]
3687 "TARGET_SSE"
3688{
3689 ix86_expand_vector_init (false, operands[0], operands[1]);
3690 DONE;
3691})
3692
3693(define_expand "vec_setv16qi"
3694 [(match_operand:V16QI 0 "register_operand" "")
3695 (match_operand:QI 1 "register_operand" "")
3696 (match_operand 2 "const_int_operand" "")]
3697 "TARGET_SSE"
3698{
3699 ix86_expand_vector_set (false, operands[0], operands[1],
3700 INTVAL (operands[2]));
3701 DONE;
3702})
3703
3704(define_expand "vec_extractv16qi"
3705 [(match_operand:QI 0 "register_operand" "")
3706 (match_operand:V16QI 1 "register_operand" "")
3707 (match_operand 2 "const_int_operand" "")]
3708 "TARGET_SSE"
3709{
3710 ix86_expand_vector_extract (false, operands[0], operands[1],
3711 INTVAL (operands[2]));
3712 DONE;
3713})
3714
3715(define_expand "vec_initv16qi"
3716 [(match_operand:V16QI 0 "register_operand" "")
3717 (match_operand 1 "" "")]
3718 "TARGET_SSE"
3719{
3720 ix86_expand_vector_init (false, operands[0], operands[1]);
3721 DONE;
3722})
3723
3724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3725;;
3726;; Miscellaneous
3727;;
3728;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3729
3730(define_insn "sse2_uavgv16qi3"
3731 [(set (match_operand:V16QI 0 "register_operand" "=x")
3732 (truncate:V16QI
3733 (lshiftrt:V16HI
3734 (plus:V16HI
3735 (plus:V16HI
3736 (zero_extend:V16HI
3737 (match_operand:V16QI 1 "nonimmediate_operand" "%0"))
3738 (zero_extend:V16HI
3739 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))
3740 (const_vector:V16QI [(const_int 1) (const_int 1)
3741 (const_int 1) (const_int 1)
3742 (const_int 1) (const_int 1)
3743 (const_int 1) (const_int 1)
3744 (const_int 1) (const_int 1)
3745 (const_int 1) (const_int 1)
3746 (const_int 1) (const_int 1)
3747 (const_int 1) (const_int 1)]))
3748 (const_int 1))))]
3749 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
3750 "pavgb\t{%2, %0|%0, %2}"
3751 [(set_attr "type" "sseiadd")
3752 (set_attr "mode" "TI")])
3753
3754(define_insn "sse2_uavgv8hi3"
3755 [(set (match_operand:V8HI 0 "register_operand" "=x")
3756 (truncate:V8HI
3757 (lshiftrt:V8SI
3758 (plus:V8SI
3759 (plus:V8SI
3760 (zero_extend:V8SI
3761 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
3762 (zero_extend:V8SI
3763 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
3764 (const_vector:V8HI [(const_int 1) (const_int 1)
3765 (const_int 1) (const_int 1)
3766 (const_int 1) (const_int 1)
3767 (const_int 1) (const_int 1)]))
3768 (const_int 1))))]
3769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
3770 "pavgw\t{%2, %0|%0, %2}"
3771 [(set_attr "type" "sseiadd")
3772 (set_attr "mode" "TI")])
3773
3774;; The correct representation for this is absolutely enormous, and
3775;; surely not generally useful.
3776(define_insn "sse2_psadbw"
3777 [(set (match_operand:V2DI 0 "register_operand" "=x")
3778 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
3779 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
3780 UNSPEC_PSADBW))]
3781 "TARGET_SSE2"
3782 "psadbw\t{%2, %0|%0, %2}"
3783 [(set_attr "type" "sseiadd")
3784 (set_attr "mode" "TI")])
3785
3786(define_insn "sse_movmskps"
3787 [(set (match_operand:SI 0 "register_operand" "=r")
3788 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
3789 UNSPEC_MOVMSK))]
3790 "TARGET_SSE"
3791 "movmskps\t{%1, %0|%0, %1}"
3792 [(set_attr "type" "ssecvt")
3793 (set_attr "mode" "V4SF")])
3794
3795(define_insn "sse2_movmskpd"
3796 [(set (match_operand:SI 0 "register_operand" "=r")
3797 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
3798 UNSPEC_MOVMSK))]
3799 "TARGET_SSE2"
3800 "movmskpd\t{%1, %0|%0, %1}"
3801 [(set_attr "type" "ssecvt")
3802 (set_attr "mode" "V2DF")])
3803
3804(define_insn "sse2_pmovmskb"
3805 [(set (match_operand:SI 0 "register_operand" "=r")
3806 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
3807 UNSPEC_MOVMSK))]
3808 "TARGET_SSE2"
3809 "pmovmskb\t{%1, %0|%0, %1}"
3810 [(set_attr "type" "ssecvt")
3811 (set_attr "mode" "V2DF")])
3812
3813(define_expand "sse2_maskmovdqu"
3814 [(set (match_operand:V16QI 0 "memory_operand" "")
3815 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3816 (match_operand:V16QI 2 "register_operand" "x")
3817 (match_dup 0)]
3818 UNSPEC_MASKMOV))]
3819 "TARGET_SSE2"
3820 "")
3821
3822(define_insn "*sse2_maskmovdqu"
3823 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
3824 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3825 (match_operand:V16QI 2 "register_operand" "x")
3826 (mem:V16QI (match_dup 0))]
3827 UNSPEC_MASKMOV))]
3828 "TARGET_SSE2 && !TARGET_64BIT"
3829 ;; @@@ check ordering of operands in intel/nonintel syntax
3830 "maskmovdqu\t{%2, %1|%1, %2}"
3831 [(set_attr "type" "ssecvt")
3832 (set_attr "mode" "TI")])
3833
3834(define_insn "*sse2_maskmovdqu_rex64"
3835 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
3836 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
3837 (match_operand:V16QI 2 "register_operand" "x")
3838 (mem:V16QI (match_dup 0))]
3839 UNSPEC_MASKMOV))]
3840 "TARGET_SSE2 && TARGET_64BIT"
3841 ;; @@@ check ordering of operands in intel/nonintel syntax
3842 "maskmovdqu\t{%2, %1|%1, %2}"
3843 [(set_attr "type" "ssecvt")
3844 (set_attr "mode" "TI")])
3845
3846(define_insn "sse_ldmxcsr"
3847 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
3848 UNSPECV_LDMXCSR)]
3849 "TARGET_SSE"
3850 "ldmxcsr\t%0"
3851 [(set_attr "type" "sse")
3852 (set_attr "memory" "load")])
3853
3854(define_insn "sse_stmxcsr"
3855 [(set (match_operand:SI 0 "memory_operand" "=m")
3856 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
3857 "TARGET_SSE"
3858 "stmxcsr\t%0"
3859 [(set_attr "type" "sse")
3860 (set_attr "memory" "store")])
3861
3862(define_expand "sse_sfence"
3863 [(set (match_dup 0)
3864 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3865 "TARGET_SSE || TARGET_3DNOW_A"
3866{
3867 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3868 MEM_VOLATILE_P (operands[0]) = 1;
3869})
3870
3871(define_insn "*sse_sfence"
3872 [(set (match_operand:BLK 0 "" "")
3873 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
3874 "TARGET_SSE || TARGET_3DNOW_A"
3875 "sfence"
3876 [(set_attr "type" "sse")
3877 (set_attr "memory" "unknown")])
3878
3879(define_insn "sse2_clflush"
3880 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
3881 UNSPECV_CLFLUSH)]
3882 "TARGET_SSE2"
3883 "clflush\t%a0"
3884 [(set_attr "type" "sse")
3885 (set_attr "memory" "unknown")])
3886
3887(define_expand "sse2_mfence"
3888 [(set (match_dup 0)
3889 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3890 "TARGET_SSE2"
3891{
3892 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3893 MEM_VOLATILE_P (operands[0]) = 1;
3894})
3895
3896(define_insn "*sse2_mfence"
3897 [(set (match_operand:BLK 0 "" "")
3898 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
3899 "TARGET_SSE2"
3900 "mfence"
3901 [(set_attr "type" "sse")
3902 (set_attr "memory" "unknown")])
3903
3904(define_expand "sse2_lfence"
3905 [(set (match_dup 0)
3906 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3907 "TARGET_SSE2"
3908{
3909 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
3910 MEM_VOLATILE_P (operands[0]) = 1;
3911})
3912
3913(define_insn "*sse2_lfence"
3914 [(set (match_operand:BLK 0 "" "")
3915 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
3916 "TARGET_SSE2"
3917 "lfence"
3918 [(set_attr "type" "sse")
3919 (set_attr "memory" "unknown")])
3920
3921(define_insn "sse3_mwait"
3922 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3923 (match_operand:SI 1 "register_operand" "c")]
3924 UNSPECV_MWAIT)]
3925 "TARGET_SSE3"
3926;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
3927;; Since 32bit register operands are implicitly zero extended to 64bit,
3928;; we only need to set up 32bit registers.
3929 "mwait"
3930 [(set_attr "length" "3")])
3931
3932(define_insn "sse3_monitor"
3933 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
3934 (match_operand:SI 1 "register_operand" "c")
3935 (match_operand:SI 2 "register_operand" "d")]
3936 UNSPECV_MONITOR)]
3937 "TARGET_SSE3 && !TARGET_64BIT"
3938 "monitor\t%0, %1, %2"
3939 [(set_attr "length" "3")])
3940
3941(define_insn "sse3_monitor64"
3942 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
3943 (match_operand:SI 1 "register_operand" "c")
3944 (match_operand:SI 2 "register_operand" "d")]
3945 UNSPECV_MONITOR)]
3946 "TARGET_SSE3 && TARGET_64BIT"
3947;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
3948;; RCX and RDX are used. Since 32bit register operands are implicitly
3949;; zero extended to 64bit, we only need to set up 32bit registers.
3950 "monitor"
3951 [(set_attr "length" "3")])
3952
3953;; SSSE3
3954(define_insn "ssse3_phaddwv8hi3"
3955 [(set (match_operand:V8HI 0 "register_operand" "=x")
3956 (vec_concat:V8HI
3957 (vec_concat:V4HI
3958 (vec_concat:V2HI
3959 (plus:HI
3960 (vec_select:HI
3961 (match_operand:V8HI 1 "register_operand" "0")
3962 (parallel [(const_int 0)]))
3963 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
3964 (plus:HI
3965 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
3966 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
3967 (vec_concat:V2HI
3968 (plus:HI
3969 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
3970 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
3971 (plus:HI
3972 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
3973 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
3974 (vec_concat:V4HI
3975 (vec_concat:V2HI
3976 (plus:HI
3977 (vec_select:HI
3978 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
3979 (parallel [(const_int 0)]))
3980 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
3981 (plus:HI
3982 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
3983 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
3984 (vec_concat:V2HI
3985 (plus:HI
3986 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
3987 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
3988 (plus:HI
3989 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
3990 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
3991 "TARGET_SSSE3"
3992 "phaddw\t{%2, %0|%0, %2}"
3993 [(set_attr "type" "sseiadd")
3994 (set_attr "mode" "TI")])
3995
3996(define_insn "ssse3_phaddwv4hi3"
3997 [(set (match_operand:V4HI 0 "register_operand" "=y")
3998 (vec_concat:V4HI
3999 (vec_concat:V2HI
4000 (plus:HI
4001 (vec_select:HI
4002 (match_operand:V4HI 1 "register_operand" "0")
4003 (parallel [(const_int 0)]))
4004 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4005 (plus:HI
4006 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4007 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4008 (vec_concat:V2HI
4009 (plus:HI
4010 (vec_select:HI
4011 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4012 (parallel [(const_int 0)]))
4013 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4014 (plus:HI
4015 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4016 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4017 "TARGET_SSSE3"
4018 "phaddw\t{%2, %0|%0, %2}"
4019 [(set_attr "type" "sseiadd")
4020 (set_attr "mode" "DI")])
4021
4022(define_insn "ssse3_phadddv4si3"
4023 [(set (match_operand:V4SI 0 "register_operand" "=x")
4024 (vec_concat:V4SI
4025 (vec_concat:V2SI
4026 (plus:SI
4027 (vec_select:SI
4028 (match_operand:V4SI 1 "register_operand" "0")
4029 (parallel [(const_int 0)]))
4030 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4031 (plus:SI
4032 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4033 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4034 (vec_concat:V2SI
4035 (plus:SI
4036 (vec_select:SI
4037 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4038 (parallel [(const_int 0)]))
4039 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4040 (plus:SI
4041 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4042 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4043 "TARGET_SSSE3"
4044 "phaddd\t{%2, %0|%0, %2}"
4045 [(set_attr "type" "sseiadd")
4046 (set_attr "mode" "TI")])
4047
4048(define_insn "ssse3_phadddv2si3"
4049 [(set (match_operand:V2SI 0 "register_operand" "=y")
4050 (vec_concat:V2SI
4051 (plus:SI
4052 (vec_select:SI
4053 (match_operand:V2SI 1 "register_operand" "0")
4054 (parallel [(const_int 0)]))
4055 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4056 (plus:SI
4057 (vec_select:SI
4058 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4059 (parallel [(const_int 0)]))
4060 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4061 "TARGET_SSSE3"
4062 "phaddd\t{%2, %0|%0, %2}"
4063 [(set_attr "type" "sseiadd")
4064 (set_attr "mode" "DI")])
4065
4066(define_insn "ssse3_phaddswv8hi3"
4067 [(set (match_operand:V8HI 0 "register_operand" "=x")
4068 (vec_concat:V8HI
4069 (vec_concat:V4HI
4070 (vec_concat:V2HI
4071 (ss_plus:HI
4072 (vec_select:HI
4073 (match_operand:V8HI 1 "register_operand" "0")
4074 (parallel [(const_int 0)]))
4075 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4076 (ss_plus:HI
4077 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4078 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4079 (vec_concat:V2HI
4080 (ss_plus:HI
4081 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4082 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4083 (ss_plus:HI
4084 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4085 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4086 (vec_concat:V4HI
4087 (vec_concat:V2HI
4088 (ss_plus:HI
4089 (vec_select:HI
4090 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4091 (parallel [(const_int 0)]))
4092 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4093 (ss_plus:HI
4094 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4095 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4096 (vec_concat:V2HI
4097 (ss_plus:HI
4098 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4099 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4100 (ss_plus:HI
4101 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4102 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4103 "TARGET_SSSE3"
4104 "phaddsw\t{%2, %0|%0, %2}"
4105 [(set_attr "type" "sseiadd")
4106 (set_attr "mode" "TI")])
4107
4108(define_insn "ssse3_phaddswv4hi3"
4109 [(set (match_operand:V4HI 0 "register_operand" "=y")
4110 (vec_concat:V4HI
4111 (vec_concat:V2HI
4112 (ss_plus:HI
4113 (vec_select:HI
4114 (match_operand:V4HI 1 "register_operand" "0")
4115 (parallel [(const_int 0)]))
4116 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4117 (ss_plus:HI
4118 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4119 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4120 (vec_concat:V2HI
4121 (ss_plus:HI
4122 (vec_select:HI
4123 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4124 (parallel [(const_int 0)]))
4125 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4126 (ss_plus:HI
4127 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4128 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4129 "TARGET_SSSE3"
4130 "phaddsw\t{%2, %0|%0, %2}"
4131 [(set_attr "type" "sseiadd")
4132 (set_attr "mode" "DI")])
4133
4134(define_insn "ssse3_phsubwv8hi3"
4135 [(set (match_operand:V8HI 0 "register_operand" "=x")
4136 (vec_concat:V8HI
4137 (vec_concat:V4HI
4138 (vec_concat:V2HI
4139 (minus:HI
4140 (vec_select:HI
4141 (match_operand:V8HI 1 "register_operand" "0")
4142 (parallel [(const_int 0)]))
4143 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4144 (minus:HI
4145 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4146 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4147 (vec_concat:V2HI
4148 (minus:HI
4149 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4150 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4151 (minus:HI
4152 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4153 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4154 (vec_concat:V4HI
4155 (vec_concat:V2HI
4156 (minus:HI
4157 (vec_select:HI
4158 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4159 (parallel [(const_int 0)]))
4160 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4161 (minus:HI
4162 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4163 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4164 (vec_concat:V2HI
4165 (minus:HI
4166 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4167 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4168 (minus:HI
4169 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4170 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4171 "TARGET_SSSE3"
4172 "phsubw\t{%2, %0|%0, %2}"
4173 [(set_attr "type" "sseiadd")
4174 (set_attr "mode" "TI")])
4175
4176(define_insn "ssse3_phsubwv4hi3"
4177 [(set (match_operand:V4HI 0 "register_operand" "=y")
4178 (vec_concat:V4HI
4179 (vec_concat:V2HI
4180 (minus:HI
4181 (vec_select:HI
4182 (match_operand:V4HI 1 "register_operand" "0")
4183 (parallel [(const_int 0)]))
4184 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4185 (minus:HI
4186 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4187 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4188 (vec_concat:V2HI
4189 (minus:HI
4190 (vec_select:HI
4191 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4192 (parallel [(const_int 0)]))
4193 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4194 (minus:HI
4195 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4196 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4197 "TARGET_SSSE3"
4198 "phsubw\t{%2, %0|%0, %2}"
4199 [(set_attr "type" "sseiadd")
4200 (set_attr "mode" "DI")])
4201
4202(define_insn "ssse3_phsubdv4si3"
4203 [(set (match_operand:V4SI 0 "register_operand" "=x")
4204 (vec_concat:V4SI
4205 (vec_concat:V2SI
4206 (minus:SI
4207 (vec_select:SI
4208 (match_operand:V4SI 1 "register_operand" "0")
4209 (parallel [(const_int 0)]))
4210 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4211 (minus:SI
4212 (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
4213 (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
4214 (vec_concat:V2SI
4215 (minus:SI
4216 (vec_select:SI
4217 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4218 (parallel [(const_int 0)]))
4219 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
4220 (minus:SI
4221 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
4222 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
4223 "TARGET_SSSE3"
4224 "phsubd\t{%2, %0|%0, %2}"
4225 [(set_attr "type" "sseiadd")
4226 (set_attr "mode" "TI")])
4227
4228(define_insn "ssse3_phsubdv2si3"
4229 [(set (match_operand:V2SI 0 "register_operand" "=y")
4230 (vec_concat:V2SI
4231 (minus:SI
4232 (vec_select:SI
4233 (match_operand:V2SI 1 "register_operand" "0")
4234 (parallel [(const_int 0)]))
4235 (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
4236 (minus:SI
4237 (vec_select:SI
4238 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
4239 (parallel [(const_int 0)]))
4240 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
4241 "TARGET_SSSE3"
4242 "phsubd\t{%2, %0|%0, %2}"
4243 [(set_attr "type" "sseiadd")
4244 (set_attr "mode" "DI")])
4245
4246(define_insn "ssse3_phsubswv8hi3"
4247 [(set (match_operand:V8HI 0 "register_operand" "=x")
4248 (vec_concat:V8HI
4249 (vec_concat:V4HI
4250 (vec_concat:V2HI
4251 (ss_minus:HI
4252 (vec_select:HI
4253 (match_operand:V8HI 1 "register_operand" "0")
4254 (parallel [(const_int 0)]))
4255 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4256 (ss_minus:HI
4257 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4258 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4259 (vec_concat:V2HI
4260 (ss_minus:HI
4261 (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
4262 (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
4263 (ss_minus:HI
4264 (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
4265 (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
4266 (vec_concat:V4HI
4267 (vec_concat:V2HI
4268 (ss_minus:HI
4269 (vec_select:HI
4270 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
4271 (parallel [(const_int 0)]))
4272 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4273 (ss_minus:HI
4274 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4275 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
4276 (vec_concat:V2HI
4277 (ss_minus:HI
4278 (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
4279 (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
4280 (ss_minus:HI
4281 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
4282 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
4283 "TARGET_SSSE3"
4284 "phsubsw\t{%2, %0|%0, %2}"
4285 [(set_attr "type" "sseiadd")
4286 (set_attr "mode" "TI")])
4287
4288(define_insn "ssse3_phsubswv4hi3"
4289 [(set (match_operand:V4HI 0 "register_operand" "=y")
4290 (vec_concat:V4HI
4291 (vec_concat:V2HI
4292 (ss_minus:HI
4293 (vec_select:HI
4294 (match_operand:V4HI 1 "register_operand" "0")
4295 (parallel [(const_int 0)]))
4296 (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
4297 (ss_minus:HI
4298 (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
4299 (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
4300 (vec_concat:V2HI
4301 (ss_minus:HI
4302 (vec_select:HI
4303 (match_operand:V4HI 2 "nonimmediate_operand" "ym")
4304 (parallel [(const_int 0)]))
4305 (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
4306 (ss_minus:HI
4307 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
4308 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
4309 "TARGET_SSSE3"
4310 "phsubsw\t{%2, %0|%0, %2}"
4311 [(set_attr "type" "sseiadd")
4312 (set_attr "mode" "DI")])
4313
4314(define_insn "ssse3_pmaddubswv8hi3"
4315 [(set (match_operand:V8HI 0 "register_operand" "=x")
4316 (ss_plus:V8HI
4317 (mult:V8HI
4318 (zero_extend:V8HI
4319 (vec_select:V4QI
4320 (match_operand:V16QI 1 "nonimmediate_operand" "%0")
4321 (parallel [(const_int 0)
4322 (const_int 2)
4323 (const_int 4)
4324 (const_int 6)
4325 (const_int 8)
4326 (const_int 10)
4327 (const_int 12)
4328 (const_int 14)])))
4329 (sign_extend:V8HI
4330 (vec_select:V8QI
4331 (match_operand:V16QI 2 "nonimmediate_operand" "xm")
4332 (parallel [(const_int 0)
4333 (const_int 2)
4334 (const_int 4)
4335 (const_int 6)
4336 (const_int 8)
4337 (const_int 10)
4338 (const_int 12)
4339 (const_int 14)]))))
4340 (mult:V8HI
4341 (zero_extend:V8HI
4342 (vec_select:V16QI (match_dup 1)
4343 (parallel [(const_int 1)
4344 (const_int 3)
4345 (const_int 5)
4346 (const_int 7)
4347 (const_int 9)
4348 (const_int 11)
4349 (const_int 13)
4350 (const_int 15)])))
4351 (sign_extend:V8HI
4352 (vec_select:V16QI (match_dup 2)
4353 (parallel [(const_int 1)
4354 (const_int 3)
4355 (const_int 5)
4356 (const_int 7)
4357 (const_int 9)
4358 (const_int 11)
4359 (const_int 13)
4360 (const_int 15)]))))))]
4361 "TARGET_SSSE3"
4362 "pmaddubsw\t{%2, %0|%0, %2}"
4363 [(set_attr "type" "sseiadd")
4364 (set_attr "mode" "TI")])
4365
4366(define_insn "ssse3_pmaddubswv4hi3"
4367 [(set (match_operand:V4HI 0 "register_operand" "=y")
4368 (ss_plus:V4HI
4369 (mult:V4HI
4370 (zero_extend:V4HI
4371 (vec_select:V4QI
4372 (match_operand:V8QI 1 "nonimmediate_operand" "%0")
4373 (parallel [(const_int 0)
4374 (const_int 2)
4375 (const_int 4)
4376 (const_int 6)])))
4377 (sign_extend:V4HI
4378 (vec_select:V4QI
4379 (match_operand:V8QI 2 "nonimmediate_operand" "ym")
4380 (parallel [(const_int 0)
4381 (const_int 2)
4382 (const_int 4)
4383 (const_int 6)]))))
4384 (mult:V4HI
4385 (zero_extend:V4HI
4386 (vec_select:V8QI (match_dup 1)
4387 (parallel [(const_int 1)
4388 (const_int 3)
4389 (const_int 5)
4390 (const_int 7)])))
4391 (sign_extend:V4HI
4392 (vec_select:V8QI (match_dup 2)
4393 (parallel [(const_int 1)
4394 (const_int 3)
4395 (const_int 5)
4396 (const_int 7)]))))))]
4397 "TARGET_SSSE3"
4398 "pmaddubsw\t{%2, %0|%0, %2}"
4399 [(set_attr "type" "sseiadd")
4400 (set_attr "mode" "DI")])
4401
4402(define_insn "ssse3_pmulhrswv8hi3"
4403 [(set (match_operand:V8HI 0 "register_operand" "=x")
4404 (truncate:V8HI
4405 (lshiftrt:V8SI
4406 (plus:V8SI
4407 (lshiftrt:V8SI
4408 (mult:V8SI
4409 (sign_extend:V8SI
4410 (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
4411 (sign_extend:V8SI
4412 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
4413 (const_int 14))
4414 (const_vector:V8HI [(const_int 1) (const_int 1)
4415 (const_int 1) (const_int 1)
4416 (const_int 1) (const_int 1)
4417 (const_int 1) (const_int 1)]))
4418 (const_int 1))))]
4419 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
4420 "pmulhrsw\t{%2, %0|%0, %2}"
4421 [(set_attr "type" "sseimul")
4422 (set_attr "mode" "TI")])
4423
4424(define_insn "ssse3_pmulhrswv4hi3"
4425 [(set (match_operand:V4HI 0 "register_operand" "=y")
4426 (truncate:V4HI
4427 (lshiftrt:V4SI
4428 (plus:V4SI
4429 (lshiftrt:V4SI
4430 (mult:V4SI
4431 (sign_extend:V4SI
4432 (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
4433 (sign_extend:V4SI
4434 (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
4435 (const_int 14))
4436 (const_vector:V4HI [(const_int 1) (const_int 1)
4437 (const_int 1) (const_int 1)]))
4438 (const_int 1))))]
4439 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
4440 "pmulhrsw\t{%2, %0|%0, %2}"
4441 [(set_attr "type" "sseimul")
4442 (set_attr "mode" "DI")])
4443
4444(define_insn "ssse3_pshufbv16qi3"
4445 [(set (match_operand:V16QI 0 "register_operand" "=x")
4446 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4447 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
4448 UNSPEC_PSHUFB))]
4449 "TARGET_SSSE3"
4450 "pshufb\t{%2, %0|%0, %2}";
4451 [(set_attr "type" "sselog1")
4452 (set_attr "mode" "TI")])
4453
4454(define_insn "ssse3_pshufbv8qi3"
4455 [(set (match_operand:V8QI 0 "register_operand" "=y")
4456 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
4457 (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
4458 UNSPEC_PSHUFB))]
4459 "TARGET_SSSE3"
4460 "pshufb\t{%2, %0|%0, %2}";
4461 [(set_attr "type" "sselog1")
4462 (set_attr "mode" "DI")])
4463
4464(define_insn "ssse3_psign<mode>3"
4465 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4466 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0")
4467 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
4468 UNSPEC_PSIGN))]
4469 "TARGET_SSSE3"
4470 "psign<ssevecsize>\t{%2, %0|%0, %2}";
4471 [(set_attr "type" "sselog1")
4472 (set_attr "mode" "TI")])
4473
4474(define_insn "ssse3_psign<mode>3"
4475 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4476 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0")
4477 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
4478 UNSPEC_PSIGN))]
4479 "TARGET_SSSE3"
4480 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
4481 [(set_attr "type" "sselog1")
4482 (set_attr "mode" "DI")])
4483
4484(define_insn "ssse3_palignrti"
4485 [(set (match_operand:TI 0 "register_operand" "=x")
4486 (unspec:TI [(match_operand:TI 1 "register_operand" "0")
4487 (match_operand:TI 2 "nonimmediate_operand" "xm")
4488 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4489 UNSPEC_PALIGNR))]
4490 "TARGET_SSSE3"
4491{
4492 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4493 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4494}
4495 [(set_attr "type" "sseishft")
4496 (set_attr "mode" "TI")])
4497
4498(define_insn "ssse3_palignrdi"
4499 [(set (match_operand:DI 0 "register_operand" "=y")
4500 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
4501 (match_operand:DI 2 "nonimmediate_operand" "ym")
4502 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
4503 UNSPEC_PALIGNR))]
4504 "TARGET_SSSE3"
4505{
4506 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
4507 return "palignr\t{%3, %2, %0|%0, %2, %3}";
4508}
4509 [(set_attr "type" "sseishft")
4510 (set_attr "mode" "DI")])
4511
4512(define_insn "abs<mode>2"
4513 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
4514 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
4515 "TARGET_SSSE3"
4516 "pabs<ssevecsize>\t{%1, %0|%0, %1}";
4517 [(set_attr "type" "sselog1")
4518 (set_attr "mode" "TI")])
4519
4520(define_insn "abs<mode>2"
4521 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
4522 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
4523 "TARGET_SSSE3"
4524 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
4525 [(set_attr "type" "sselog1")
4526 (set_attr "mode" "DI")])