1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005, 2006, 2007 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 2, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING. If not, write to 19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20;; Boston, MA 02110-1301, USA. 21 22 23;; 16 byte integral modes handled by SSE, minus TImode, which gets 24;; special-cased for TARGET_64BIT. 25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) 26 27;; All 16-byte vector modes handled by SSE 28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) 29 30;; Mix-n-match 31(define_mode_macro SSEMODE12 [V16QI V8HI]) 32(define_mode_macro SSEMODE24 [V8HI V4SI]) 33(define_mode_macro SSEMODE14 [V16QI V4SI]) 34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) 35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) 36 37;; Mapping from integer vector mode to mnemonic suffix 38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) 39 40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 41 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43;; 44;; Move patterns 45;; 46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 47 48;; All of these patterns are enabled for SSE1 as well as SSE2. 49;; This is essential for maintaining stable calling conventions. 50 51(define_expand "mov<mode>" 52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") 53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] 54 "TARGET_SSE" 55{ 56 ix86_expand_vector_move (<MODE>mode, operands); 57 DONE; 58}) 59 60(define_insn "*mov<mode>_internal" 61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") 62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 64{ 65 switch (which_alternative) 66 { 67 case 0: 68 return standard_sse_constant_opcode (insn, operands[1]); 69 case 1: 70 case 2: 71 if (get_attr_mode (insn) == MODE_V4SF) 72 return "movaps\t{%1, %0|%0, %1}"; 73 else 74 return "movdqa\t{%1, %0|%0, %1}"; 75 default: 76 gcc_unreachable (); 77 } 78} 79 [(set_attr "type" "sselog1,ssemov,ssemov") 80 (set (attr "mode") 81 (if_then_else 82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 83 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 84 (and (eq_attr "alternative" "2") 85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 86 (const_int 0)))) 87 (const_string "V4SF") 88 (const_string "TI")))]) 89 90(define_expand "movv4sf" 91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 92 (match_operand:V4SF 1 "nonimmediate_operand" ""))] 93 "TARGET_SSE" 94{ 95 ix86_expand_vector_move (V4SFmode, operands); 96 DONE; 97}) 98 99(define_insn "*movv4sf_internal" 100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 102 "TARGET_SSE" 103{ 104 switch (which_alternative) 105 { 106 case 0: 107 return standard_sse_constant_opcode (insn, operands[1]); 108 case 1: 109 case 2: 110 return "movaps\t{%1, %0|%0, %1}"; 111 default: 112 abort(); 113 } 114} 115 [(set_attr "type" "sselog1,ssemov,ssemov") 116 (set_attr "mode" "V4SF")]) 117 118(define_split 119 [(set (match_operand:V4SF 0 "register_operand" "") 120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 121 "TARGET_SSE && reload_completed" 122 [(set (match_dup 0) 123 (vec_merge:V4SF 124 (vec_duplicate:V4SF (match_dup 1)) 125 (match_dup 2) 126 (const_int 1)))] 127{ 128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 129 operands[2] = CONST0_RTX (V4SFmode); 130}) 131 132(define_expand "movv2df" 133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 134 (match_operand:V2DF 1 "nonimmediate_operand" ""))] 135 "TARGET_SSE" 136{ 137 ix86_expand_vector_move (V2DFmode, operands); 138 DONE; 139}) 140 141(define_insn "*movv2df_internal" 142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 145{ 146 switch (which_alternative) 147 { 148 case 0: 149 return standard_sse_constant_opcode (insn, operands[1]); 150 case 1: 151 case 2: 152 if (get_attr_mode (insn) == MODE_V4SF) 153 return "movaps\t{%1, %0|%0, %1}"; 154 else 155 return "movapd\t{%1, %0|%0, %1}"; 156 default: 157 gcc_unreachable (); 158 } 159} 160 [(set_attr "type" "sselog1,ssemov,ssemov") 161 (set (attr "mode") 162 (if_then_else 163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 164 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 165 (and (eq_attr "alternative" "2") 166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 167 (const_int 0)))) 168 (const_string "V4SF") 169 (const_string "V2DF")))]) 170 171(define_split 172 [(set (match_operand:V2DF 0 "register_operand" "") 173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 174 "TARGET_SSE2 && reload_completed" 175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 176{ 177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 178 operands[2] = CONST0_RTX (DFmode); 179}) 180 181(define_expand "push<mode>1" 182 [(match_operand:SSEMODE 0 "register_operand" "")] 183 "TARGET_SSE" 184{ 185 ix86_expand_push (<MODE>mode, operands[0]); 186 DONE; 187}) 188 189(define_expand "movmisalign<mode>" 190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 192 "TARGET_SSE" 193{ 194 ix86_expand_vector_move_misalign (<MODE>mode, operands); 195 DONE; 196}) 197 198(define_insn "sse_movups" 199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") 200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 201 UNSPEC_MOVU))] 202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 203 "movups\t{%1, %0|%0, %1}" 204 [(set_attr "type" "ssemov") 205 (set_attr "mode" "V2DF")]) 206 207(define_insn "sse2_movupd" 208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") 209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] 210 UNSPEC_MOVU))] 211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 212 "movupd\t{%1, %0|%0, %1}" 213 [(set_attr "type" "ssemov") 214 (set_attr "mode" "V2DF")]) 215 216(define_insn "sse2_movdqu" 217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] 219 UNSPEC_MOVU))] 220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 221 "movdqu\t{%1, %0|%0, %1}" 222 [(set_attr "type" "ssemov") 223 (set_attr "mode" "TI")]) 224 225(define_insn "sse_movntv4sf" 226 [(set (match_operand:V4SF 0 "memory_operand" "=m") 227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 228 UNSPEC_MOVNT))] 229 "TARGET_SSE" 230 "movntps\t{%1, %0|%0, %1}" 231 [(set_attr "type" "ssemov") 232 (set_attr "mode" "V4SF")]) 233 234(define_insn "sse2_movntv2df" 235 [(set (match_operand:V2DF 0 "memory_operand" "=m") 236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 237 UNSPEC_MOVNT))] 238 "TARGET_SSE2" 239 "movntpd\t{%1, %0|%0, %1}" 240 [(set_attr "type" "ssecvt") 241 (set_attr "mode" "V2DF")]) 242 243(define_insn "sse2_movntv2di" 244 [(set (match_operand:V2DI 0 "memory_operand" "=m") 245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 246 UNSPEC_MOVNT))] 247 "TARGET_SSE2" 248 "movntdq\t{%1, %0|%0, %1}" 249 [(set_attr "type" "ssecvt") 250 (set_attr "mode" "TI")]) 251 252(define_insn "sse2_movntsi" 253 [(set (match_operand:SI 0 "memory_operand" "=m") 254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 255 UNSPEC_MOVNT))] 256 "TARGET_SSE2" 257 "movnti\t{%1, %0|%0, %1}" 258 [(set_attr "type" "ssecvt") 259 (set_attr "mode" "V2DF")]) 260 261(define_insn "sse3_lddqu" 262 [(set (match_operand:V16QI 0 "register_operand" "=x") 263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] 264 UNSPEC_LDQQU))] 265 "TARGET_SSE3" 266 "lddqu\t{%1, %0|%0, %1}" 267 [(set_attr "type" "ssecvt") 268 (set_attr "mode" "TI")]) 269 270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271;; 272;; Parallel single-precision floating point arithmetic 273;; 274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 275 276(define_expand "negv4sf2" 277 [(set (match_operand:V4SF 0 "register_operand" "") 278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 279 "TARGET_SSE" 280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") 281 282(define_expand "absv4sf2" 283 [(set (match_operand:V4SF 0 "register_operand" "") 284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 285 "TARGET_SSE" 286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") 287 288(define_expand "addv4sf3" 289 [(set (match_operand:V4SF 0 "register_operand" "") 290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 291 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 292 "TARGET_SSE" 293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") 294 295(define_insn "*addv4sf3" 296 [(set (match_operand:V4SF 0 "register_operand" "=x") 297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 300 "addps\t{%2, %0|%0, %2}" 301 [(set_attr "type" "sseadd") 302 (set_attr "mode" "V4SF")]) 303 304(define_insn "sse_vmaddv4sf3" 305 [(set (match_operand:V4SF 0 "register_operand" "=x") 306 (vec_merge:V4SF 307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 308 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 309 (match_dup 1) 310 (const_int 1)))] 311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 312 "addss\t{%2, %0|%0, %2}" 313 [(set_attr "type" "sseadd") 314 (set_attr "mode" "SF")]) 315 316(define_expand "subv4sf3" 317 [(set (match_operand:V4SF 0 "register_operand" "") 318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "") 319 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 320 "TARGET_SSE" 321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") 322 323(define_insn "*subv4sf3" 324 [(set (match_operand:V4SF 0 "register_operand" "=x") 325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 327 "TARGET_SSE" 328 "subps\t{%2, %0|%0, %2}" 329 [(set_attr "type" "sseadd") 330 (set_attr "mode" "V4SF")]) 331 332(define_insn "sse_vmsubv4sf3" 333 [(set (match_operand:V4SF 0 "register_operand" "=x") 334 (vec_merge:V4SF 335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 336 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 337 (match_dup 1) 338 (const_int 1)))] 339 "TARGET_SSE" 340 "subss\t{%2, %0|%0, %2}" 341 [(set_attr "type" "sseadd") 342 (set_attr "mode" "SF")]) 343 344(define_expand "mulv4sf3" 345 [(set (match_operand:V4SF 0 "register_operand" "") 346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 347 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 348 "TARGET_SSE" 349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") 350 351(define_insn "*mulv4sf3" 352 [(set (match_operand:V4SF 0 "register_operand" "=x") 353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 356 "mulps\t{%2, %0|%0, %2}" 357 [(set_attr "type" "ssemul") 358 (set_attr "mode" "V4SF")]) 359 360(define_insn "sse_vmmulv4sf3" 361 [(set (match_operand:V4SF 0 "register_operand" "=x") 362 (vec_merge:V4SF 363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") 364 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 365 (match_dup 1) 366 (const_int 1)))] 367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 368 "mulss\t{%2, %0|%0, %2}" 369 [(set_attr "type" "ssemul") 370 (set_attr "mode" "SF")]) 371 372(define_expand "divv4sf3" 373 [(set (match_operand:V4SF 0 "register_operand" "") 374 (div:V4SF (match_operand:V4SF 1 "register_operand" "") 375 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 376 "TARGET_SSE" 377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") 378 379(define_insn "*divv4sf3" 380 [(set (match_operand:V4SF 0 "register_operand" "=x") 381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 383 "TARGET_SSE" 384 "divps\t{%2, %0|%0, %2}" 385 [(set_attr "type" "ssediv") 386 (set_attr "mode" "V4SF")]) 387 388(define_insn "sse_vmdivv4sf3" 389 [(set (match_operand:V4SF 0 "register_operand" "=x") 390 (vec_merge:V4SF 391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 393 (match_dup 1) 394 (const_int 1)))] 395 "TARGET_SSE" 396 "divss\t{%2, %0|%0, %2}" 397 [(set_attr "type" "ssediv") 398 (set_attr "mode" "SF")]) 399 400(define_insn "sse_rcpv4sf2" 401 [(set (match_operand:V4SF 0 "register_operand" "=x") 402 (unspec:V4SF 403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 404 "TARGET_SSE" 405 "rcpps\t{%1, %0|%0, %1}" 406 [(set_attr "type" "sse") 407 (set_attr "mode" "V4SF")]) 408 409(define_insn "sse_vmrcpv4sf2" 410 [(set (match_operand:V4SF 0 "register_operand" "=x") 411 (vec_merge:V4SF 412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 413 UNSPEC_RCP) 414 (match_operand:V4SF 2 "register_operand" "0") 415 (const_int 1)))] 416 "TARGET_SSE" 417 "rcpss\t{%1, %0|%0, %1}" 418 [(set_attr "type" "sse") 419 (set_attr "mode" "SF")]) 420 421(define_insn "sse_rsqrtv4sf2" 422 [(set (match_operand:V4SF 0 "register_operand" "=x") 423 (unspec:V4SF 424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 425 "TARGET_SSE" 426 "rsqrtps\t{%1, %0|%0, %1}" 427 [(set_attr "type" "sse") 428 (set_attr "mode" "V4SF")]) 429 430(define_insn "sse_vmrsqrtv4sf2" 431 [(set (match_operand:V4SF 0 "register_operand" "=x") 432 (vec_merge:V4SF 433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 434 UNSPEC_RSQRT) 435 (match_operand:V4SF 2 "register_operand" "0") 436 (const_int 1)))] 437 "TARGET_SSE" 438 "rsqrtss\t{%1, %0|%0, %1}" 439 [(set_attr "type" "sse") 440 (set_attr "mode" "SF")]) 441 442(define_insn "sqrtv4sf2" 443 [(set (match_operand:V4SF 0 "register_operand" "=x") 444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 445 "TARGET_SSE" 446 "sqrtps\t{%1, %0|%0, %1}" 447 [(set_attr "type" "sse") 448 (set_attr "mode" "V4SF")]) 449 450(define_insn "sse_vmsqrtv4sf2" 451 [(set (match_operand:V4SF 0 "register_operand" "=x") 452 (vec_merge:V4SF 453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 454 (match_operand:V4SF 2 "register_operand" "0") 455 (const_int 1)))] 456 "TARGET_SSE" 457 "sqrtss\t{%1, %0|%0, %1}" 458 [(set_attr "type" "sse") 459 (set_attr "mode" "SF")]) 460 461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 462;; isn't really correct, as those rtl operators aren't defined when 463;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 464 465(define_expand "smaxv4sf3" 466 [(set (match_operand:V4SF 0 "register_operand" "") 467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 468 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 469 "TARGET_SSE" 470{ 471 if (!flag_finite_math_only) 472 operands[1] = force_reg (V4SFmode, operands[1]); 473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); 474}) 475 476(define_insn "*smaxv4sf3_finite" 477 [(set (match_operand:V4SF 0 "register_operand" "=x") 478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 480 "TARGET_SSE && flag_finite_math_only 481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" 482 "maxps\t{%2, %0|%0, %2}" 483 [(set_attr "type" "sse") 484 (set_attr "mode" "V4SF")]) 485 486(define_insn "*smaxv4sf3" 487 [(set (match_operand:V4SF 0 "register_operand" "=x") 488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 490 "TARGET_SSE" 491 "maxps\t{%2, %0|%0, %2}" 492 [(set_attr "type" "sse") 493 (set_attr "mode" "V4SF")]) 494 495(define_insn "sse_vmsmaxv4sf3" 496 [(set (match_operand:V4SF 0 "register_operand" "=x") 497 (vec_merge:V4SF 498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 499 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 500 (match_dup 1) 501 (const_int 1)))] 502 "TARGET_SSE" 503 "maxss\t{%2, %0|%0, %2}" 504 [(set_attr "type" "sse") 505 (set_attr "mode" "SF")]) 506 507(define_expand "sminv4sf3" 508 [(set (match_operand:V4SF 0 "register_operand" "") 509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 510 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 511 "TARGET_SSE" 512{ 513 if (!flag_finite_math_only) 514 operands[1] = force_reg (V4SFmode, operands[1]); 515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); 516}) 517 518(define_insn "*sminv4sf3_finite" 519 [(set (match_operand:V4SF 0 "register_operand" "=x") 520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 522 "TARGET_SSE && flag_finite_math_only 523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" 524 "minps\t{%2, %0|%0, %2}" 525 [(set_attr "type" "sse") 526 (set_attr "mode" "V4SF")]) 527 528(define_insn "*sminv4sf3" 529 [(set (match_operand:V4SF 0 "register_operand" "=x") 530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 532 "TARGET_SSE" 533 "minps\t{%2, %0|%0, %2}" 534 [(set_attr "type" "sse") 535 (set_attr "mode" "V4SF")]) 536 537(define_insn "sse_vmsminv4sf3" 538 [(set (match_operand:V4SF 0 "register_operand" "=x") 539 (vec_merge:V4SF 540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 541 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 542 (match_dup 1) 543 (const_int 1)))] 544 "TARGET_SSE" 545 "minss\t{%2, %0|%0, %2}" 546 [(set_attr "type" "sse") 547 (set_attr "mode" "SF")]) 548 549;; These versions of the min/max patterns implement exactly the operations 550;; min = (op1 < op2 ? op1 : op2) 551;; max = (!(op1 < op2) ? op1 : op2) 552;; Their operands are not commutative, and thus they may be used in the 553;; presence of -0.0 and NaN. 554 555(define_insn "*ieee_sminv4sf3" 556 [(set (match_operand:V4SF 0 "register_operand" "=x") 557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 559 UNSPEC_IEEE_MIN))] 560 "TARGET_SSE" 561 "minps\t{%2, %0|%0, %2}" 562 [(set_attr "type" "sseadd") 563 (set_attr "mode" "V4SF")]) 564 565(define_insn "*ieee_smaxv4sf3" 566 [(set (match_operand:V4SF 0 "register_operand" "=x") 567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 569 UNSPEC_IEEE_MAX))] 570 "TARGET_SSE" 571 "maxps\t{%2, %0|%0, %2}" 572 [(set_attr "type" "sseadd") 573 (set_attr "mode" "V4SF")]) 574 575(define_insn "*ieee_sminv2df3" 576 [(set (match_operand:V2DF 0 "register_operand" "=x") 577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 579 UNSPEC_IEEE_MIN))] 580 "TARGET_SSE2" 581 "minpd\t{%2, %0|%0, %2}" 582 [(set_attr "type" "sseadd") 583 (set_attr "mode" "V2DF")]) 584 585(define_insn "*ieee_smaxv2df3" 586 [(set (match_operand:V2DF 0 "register_operand" "=x") 587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 589 UNSPEC_IEEE_MAX))] 590 "TARGET_SSE2" 591 "maxpd\t{%2, %0|%0, %2}" 592 [(set_attr "type" "sseadd") 593 (set_attr "mode" "V2DF")]) 594 595(define_insn "sse3_addsubv4sf3" 596 [(set (match_operand:V4SF 0 "register_operand" "=x") 597 (vec_merge:V4SF 598 (plus:V4SF 599 (match_operand:V4SF 1 "register_operand" "0") 600 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 601 (minus:V4SF (match_dup 1) (match_dup 2)) 602 (const_int 5)))] 603 "TARGET_SSE3" 604 "addsubps\t{%2, %0|%0, %2}" 605 [(set_attr "type" "sseadd") 606 (set_attr "mode" "V4SF")]) 607 608(define_insn "sse3_haddv4sf3" 609 [(set (match_operand:V4SF 0 "register_operand" "=x") 610 (vec_concat:V4SF 611 (vec_concat:V2SF 612 (plus:SF 613 (vec_select:SF 614 (match_operand:V4SF 1 "register_operand" "0") 615 (parallel [(const_int 0)])) 616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 617 (plus:SF 618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 620 (vec_concat:V2SF 621 (plus:SF 622 (vec_select:SF 623 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 624 (parallel [(const_int 0)])) 625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 626 (plus:SF 627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 629 "TARGET_SSE3" 630 "haddps\t{%2, %0|%0, %2}" 631 [(set_attr "type" "sseadd") 632 (set_attr "mode" "V4SF")]) 633 634(define_insn "sse3_hsubv4sf3" 635 [(set (match_operand:V4SF 0 "register_operand" "=x") 636 (vec_concat:V4SF 637 (vec_concat:V2SF 638 (minus:SF 639 (vec_select:SF 640 (match_operand:V4SF 1 "register_operand" "0") 641 (parallel [(const_int 0)])) 642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 643 (minus:SF 644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 646 (vec_concat:V2SF 647 (minus:SF 648 (vec_select:SF 649 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 650 (parallel [(const_int 0)])) 651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 652 (minus:SF 653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 655 "TARGET_SSE3" 656 "hsubps\t{%2, %0|%0, %2}" 657 [(set_attr "type" "sseadd") 658 (set_attr "mode" "V4SF")]) 659 660(define_expand "reduc_splus_v4sf" 661 [(match_operand:V4SF 0 "register_operand" "") 662 (match_operand:V4SF 1 "register_operand" "")] 663 "TARGET_SSE" 664{ 665 if (TARGET_SSE3) 666 { 667 rtx tmp = gen_reg_rtx (V4SFmode); 668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 670 } 671 else 672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); 673 DONE; 674}) 675 676(define_expand "reduc_smax_v4sf" 677 [(match_operand:V4SF 0 "register_operand" "") 678 (match_operand:V4SF 1 "register_operand" "")] 679 "TARGET_SSE" 680{ 681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); 682 DONE; 683}) 684 685(define_expand "reduc_smin_v4sf" 686 [(match_operand:V4SF 0 "register_operand" "") 687 (match_operand:V4SF 1 "register_operand" "")] 688 "TARGET_SSE" 689{ 690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); 691 DONE; 692}) 693 694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 695;; 696;; Parallel single-precision floating point comparisons 697;; 698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 699 700(define_insn "sse_maskcmpv4sf3" 701 [(set (match_operand:V4SF 0 "register_operand" "=x") 702 (match_operator:V4SF 3 "sse_comparison_operator" 703 [(match_operand:V4SF 1 "register_operand" "0") 704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] 705 "TARGET_SSE" 706 "cmp%D3ps\t{%2, %0|%0, %2}" 707 [(set_attr "type" "ssecmp") 708 (set_attr "mode" "V4SF")]) 709 710(define_insn "sse_vmmaskcmpv4sf3" 711 [(set (match_operand:V4SF 0 "register_operand" "=x") 712 (vec_merge:V4SF 713 (match_operator:V4SF 3 "sse_comparison_operator" 714 [(match_operand:V4SF 1 "register_operand" "0") 715 (match_operand:V4SF 2 "register_operand" "x")]) 716 (match_dup 1) 717 (const_int 1)))] 718 "TARGET_SSE" 719 "cmp%D3ss\t{%2, %0|%0, %2}" 720 [(set_attr "type" "ssecmp") 721 (set_attr "mode" "SF")]) 722 723(define_insn "sse_comi" 724 [(set (reg:CCFP FLAGS_REG) 725 (compare:CCFP 726 (vec_select:SF 727 (match_operand:V4SF 0 "register_operand" "x") 728 (parallel [(const_int 0)])) 729 (vec_select:SF 730 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 731 (parallel [(const_int 0)]))))] 732 "TARGET_SSE" 733 "comiss\t{%1, %0|%0, %1}" 734 [(set_attr "type" "ssecomi") 735 (set_attr "mode" "SF")]) 736 737(define_insn "sse_ucomi" 738 [(set (reg:CCFPU FLAGS_REG) 739 (compare:CCFPU 740 (vec_select:SF 741 (match_operand:V4SF 0 "register_operand" "x") 742 (parallel [(const_int 0)])) 743 (vec_select:SF 744 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 745 (parallel [(const_int 0)]))))] 746 "TARGET_SSE" 747 "ucomiss\t{%1, %0|%0, %1}" 748 [(set_attr "type" "ssecomi") 749 (set_attr "mode" "SF")]) 750 751(define_expand "vcondv4sf" 752 [(set (match_operand:V4SF 0 "register_operand" "") 753 (if_then_else:V4SF 754 (match_operator 3 "" 755 [(match_operand:V4SF 4 "nonimmediate_operand" "") 756 (match_operand:V4SF 5 "nonimmediate_operand" "")]) 757 (match_operand:V4SF 1 "general_operand" "") 758 (match_operand:V4SF 2 "general_operand" "")))] 759 "TARGET_SSE" 760{ 761 if (ix86_expand_fp_vcond (operands)) 762 DONE; 763 else 764 FAIL; 765}) 766 767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 768;; 769;; Parallel single-precision floating point logical operations 770;; 771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 772 773(define_expand "andv4sf3" 774 [(set (match_operand:V4SF 0 "register_operand" "") 775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 776 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 777 "TARGET_SSE" 778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") 779 780(define_insn "*andv4sf3" 781 [(set (match_operand:V4SF 0 "register_operand" "=x") 782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" 785 "andps\t{%2, %0|%0, %2}" 786 [(set_attr "type" "sselog") 787 (set_attr "mode" "V4SF")]) 788 789(define_insn "sse_nandv4sf3" 790 [(set (match_operand:V4SF 0 "register_operand" "=x") 791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) 792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 793 "TARGET_SSE" 794 "andnps\t{%2, %0|%0, %2}" 795 [(set_attr "type" "sselog") 796 (set_attr "mode" "V4SF")]) 797 798(define_expand "iorv4sf3" 799 [(set (match_operand:V4SF 0 "register_operand" "") 800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 801 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 802 "TARGET_SSE" 803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") 804 805(define_insn "*iorv4sf3" 806 [(set (match_operand:V4SF 0 "register_operand" "=x") 807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" 810 "orps\t{%2, %0|%0, %2}" 811 [(set_attr "type" "sselog") 812 (set_attr "mode" "V4SF")]) 813 814(define_expand "xorv4sf3" 815 [(set (match_operand:V4SF 0 "register_operand" "") 816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 817 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 818 "TARGET_SSE" 819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") 820 821(define_insn "*xorv4sf3" 822 [(set (match_operand:V4SF 0 "register_operand" "=x") 823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" 826 "xorps\t{%2, %0|%0, %2}" 827 [(set_attr "type" "sselog") 828 (set_attr "mode" "V4SF")]) 829 830;; Also define scalar versions. These are used for abs, neg, and 831;; conditional move. Using subregs into vector modes causes register 832;; allocation lossage. These patterns do not allow memory operands 833;; because the native instructions read the full 128-bits. 834 835(define_insn "*andsf3" 836 [(set (match_operand:SF 0 "register_operand" "=x") 837 (and:SF (match_operand:SF 1 "register_operand" "0") 838 (match_operand:SF 2 "register_operand" "x")))] 839 "TARGET_SSE" 840 "andps\t{%2, %0|%0, %2}" 841 [(set_attr "type" "sselog") 842 (set_attr "mode" "V4SF")]) 843 844(define_insn "*nandsf3" 845 [(set (match_operand:SF 0 "register_operand" "=x") 846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) 847 (match_operand:SF 2 "register_operand" "x")))] 848 "TARGET_SSE" 849 "andnps\t{%2, %0|%0, %2}" 850 [(set_attr "type" "sselog") 851 (set_attr "mode" "V4SF")]) 852 853(define_insn "*iorsf3" 854 [(set (match_operand:SF 0 "register_operand" "=x") 855 (ior:SF (match_operand:SF 1 "register_operand" "0") 856 (match_operand:SF 2 "register_operand" "x")))] 857 "TARGET_SSE" 858 "orps\t{%2, %0|%0, %2}" 859 [(set_attr "type" "sselog") 860 (set_attr "mode" "V4SF")]) 861 862(define_insn "*xorsf3" 863 [(set (match_operand:SF 0 "register_operand" "=x") 864 (xor:SF (match_operand:SF 1 "register_operand" "0") 865 (match_operand:SF 2 "register_operand" "x")))] 866 "TARGET_SSE" 867 "xorps\t{%2, %0|%0, %2}" 868 [(set_attr "type" "sselog") 869 (set_attr "mode" "V4SF")]) 870 871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872;; 873;; Parallel single-precision floating point conversion operations 874;; 875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 876 877(define_insn "sse_cvtpi2ps" 878 [(set (match_operand:V4SF 0 "register_operand" "=x") 879 (vec_merge:V4SF 880 (vec_duplicate:V4SF 881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 882 (match_operand:V4SF 1 "register_operand" "0") 883 (const_int 3)))] 884 "TARGET_SSE" 885 "cvtpi2ps\t{%2, %0|%0, %2}" 886 [(set_attr "type" "ssecvt") 887 (set_attr "mode" "V4SF")]) 888 889(define_insn "sse_cvtps2pi" 890 [(set (match_operand:V2SI 0 "register_operand" "=y") 891 (vec_select:V2SI 892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 893 UNSPEC_FIX_NOTRUNC) 894 (parallel [(const_int 0) (const_int 1)])))] 895 "TARGET_SSE" 896 "cvtps2pi\t{%1, %0|%0, %1}" 897 [(set_attr "type" "ssecvt") 898 (set_attr "unit" "mmx") 899 (set_attr "mode" "DI")]) 900 901(define_insn "sse_cvttps2pi" 902 [(set (match_operand:V2SI 0 "register_operand" "=y") 903 (vec_select:V2SI 904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 905 (parallel [(const_int 0) (const_int 1)])))] 906 "TARGET_SSE" 907 "cvttps2pi\t{%1, %0|%0, %1}" 908 [(set_attr "type" "ssecvt") 909 (set_attr "unit" "mmx") 910 (set_attr "mode" "SF")]) 911 912(define_insn "sse_cvtsi2ss" 913 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 914 (vec_merge:V4SF 915 (vec_duplicate:V4SF 916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 917 (match_operand:V4SF 1 "register_operand" "0,0") 918 (const_int 1)))] 919 "TARGET_SSE" 920 "cvtsi2ss\t{%2, %0|%0, %2}" 921 [(set_attr "type" "sseicvt") 922 (set_attr "athlon_decode" "vector,double") 923 (set_attr "amdfam10_decode" "vector,double") 924 (set_attr "mode" "SF")]) 925 926(define_insn "sse_cvtsi2ssq" 927 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 928 (vec_merge:V4SF 929 (vec_duplicate:V4SF 930 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) 931 (match_operand:V4SF 1 "register_operand" "0,0") 932 (const_int 1)))] 933 "TARGET_SSE && TARGET_64BIT" 934 "cvtsi2ssq\t{%2, %0|%0, %2}" 935 [(set_attr "type" "sseicvt") 936 (set_attr "athlon_decode" "vector,double") 937 (set_attr "amdfam10_decode" "vector,double") 938 (set_attr "mode" "SF")]) 939 940(define_insn "sse_cvtss2si" 941 [(set (match_operand:SI 0 "register_operand" "=r,r") 942 (unspec:SI 943 [(vec_select:SF 944 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 945 (parallel [(const_int 0)]))] 946 UNSPEC_FIX_NOTRUNC))] 947 "TARGET_SSE" 948 "cvtss2si\t{%1, %0|%0, %1}" 949 [(set_attr "type" "sseicvt") 950 (set_attr "athlon_decode" "double,vector") 951 (set_attr "amdfam10_decode" "double,double") 952 (set_attr "mode" "SI")]) 953 954(define_insn "sse_cvtss2siq" 955 [(set (match_operand:DI 0 "register_operand" "=r,r") 956 (unspec:DI 957 [(vec_select:SF 958 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 959 (parallel [(const_int 0)]))] 960 UNSPEC_FIX_NOTRUNC))] 961 "TARGET_SSE && TARGET_64BIT" 962 "cvtss2siq\t{%1, %0|%0, %1}" 963 [(set_attr "type" "sseicvt") 964 (set_attr "athlon_decode" "double,vector") 965 (set_attr "amdfam10_decode" "double,double") 966 (set_attr "mode" "DI")]) 967 968(define_insn "sse_cvttss2si" 969 [(set (match_operand:SI 0 "register_operand" "=r,r") 970 (fix:SI 971 (vec_select:SF 972 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 973 (parallel [(const_int 0)]))))] 974 "TARGET_SSE" 975 "cvttss2si\t{%1, %0|%0, %1}" 976 [(set_attr "type" "sseicvt") 977 (set_attr "athlon_decode" "double,vector") 978 (set_attr "amdfam10_decode" "double,double") 979 (set_attr "mode" "SI")]) 980 981(define_insn "sse_cvttss2siq" 982 [(set (match_operand:DI 0 "register_operand" "=r,r") 983 (fix:DI 984 (vec_select:SF 985 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 986 (parallel [(const_int 0)]))))] 987 "TARGET_SSE && TARGET_64BIT" 988 "cvttss2siq\t{%1, %0|%0, %1}" 989 [(set_attr "type" "sseicvt") 990 (set_attr "athlon_decode" "double,vector") 991 (set_attr "amdfam10_decode" "double,double") 992 (set_attr "mode" "DI")]) 993 994(define_insn "sse2_cvtdq2ps" 995 [(set (match_operand:V4SF 0 "register_operand" "=x") 996 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 997 "TARGET_SSE2" 998 "cvtdq2ps\t{%1, %0|%0, %1}" 999 [(set_attr "type" "ssecvt") 1000 (set_attr "mode" "V2DF")]) 1001 1002(define_insn "sse2_cvtps2dq" 1003 [(set (match_operand:V4SI 0 "register_operand" "=x") 1004 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 1005 UNSPEC_FIX_NOTRUNC))] 1006 "TARGET_SSE2" 1007 "cvtps2dq\t{%1, %0|%0, %1}" 1008 [(set_attr "type" "ssecvt") 1009 (set_attr "mode" "TI")]) 1010 1011(define_insn "sse2_cvttps2dq" 1012 [(set (match_operand:V4SI 0 "register_operand" "=x") 1013 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 1014 "TARGET_SSE2" 1015 "cvttps2dq\t{%1, %0|%0, %1}" 1016 [(set_attr "type" "ssecvt") 1017 (set_attr "mode" "TI")]) 1018 1019;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1020;; 1021;; Parallel single-precision floating point element swizzling 1022;; 1023;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1024 1025(define_insn "sse_movhlps" 1026 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1027 (vec_select:V4SF 1028 (vec_concat:V8SF 1029 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1030 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) 1031 (parallel [(const_int 6) 1032 (const_int 7) 1033 (const_int 2) 1034 (const_int 3)])))] 1035 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 1036 "@ 1037 movhlps\t{%2, %0|%0, %2} 1038 movlps\t{%H2, %0|%0, %H2} 1039 movhps\t{%2, %0|%0, %2}" 1040 [(set_attr "type" "ssemov") 1041 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1042 1043(define_insn "sse_movlhps" 1044 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1045 (vec_select:V4SF 1046 (vec_concat:V8SF 1047 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1048 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) 1049 (parallel [(const_int 0) 1050 (const_int 1) 1051 (const_int 4) 1052 (const_int 5)])))] 1053 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 1054 "@ 1055 movlhps\t{%2, %0|%0, %2} 1056 movhps\t{%2, %0|%0, %2} 1057 movlps\t{%2, %H0|%H0, %2}" 1058 [(set_attr "type" "ssemov") 1059 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1060 1061(define_insn "sse_unpckhps" 1062 [(set (match_operand:V4SF 0 "register_operand" "=x") 1063 (vec_select:V4SF 1064 (vec_concat:V8SF 1065 (match_operand:V4SF 1 "register_operand" "0") 1066 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1067 (parallel [(const_int 2) (const_int 6) 1068 (const_int 3) (const_int 7)])))] 1069 "TARGET_SSE" 1070 "unpckhps\t{%2, %0|%0, %2}" 1071 [(set_attr "type" "sselog") 1072 (set_attr "mode" "V4SF")]) 1073 1074(define_insn "sse_unpcklps" 1075 [(set (match_operand:V4SF 0 "register_operand" "=x") 1076 (vec_select:V4SF 1077 (vec_concat:V8SF 1078 (match_operand:V4SF 1 "register_operand" "0") 1079 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1080 (parallel [(const_int 0) (const_int 4) 1081 (const_int 1) (const_int 5)])))] 1082 "TARGET_SSE" 1083 "unpcklps\t{%2, %0|%0, %2}" 1084 [(set_attr "type" "sselog") 1085 (set_attr "mode" "V4SF")]) 1086 1087;; These are modeled with the same vec_concat as the others so that we 1088;; capture users of shufps that can use the new instructions 1089(define_insn "sse3_movshdup" 1090 [(set (match_operand:V4SF 0 "register_operand" "=x") 1091 (vec_select:V4SF 1092 (vec_concat:V8SF 1093 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1094 (match_dup 1)) 1095 (parallel [(const_int 1) 1096 (const_int 1) 1097 (const_int 7) 1098 (const_int 7)])))] 1099 "TARGET_SSE3" 1100 "movshdup\t{%1, %0|%0, %1}" 1101 [(set_attr "type" "sse") 1102 (set_attr "mode" "V4SF")]) 1103 1104(define_insn "sse3_movsldup" 1105 [(set (match_operand:V4SF 0 "register_operand" "=x") 1106 (vec_select:V4SF 1107 (vec_concat:V8SF 1108 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1109 (match_dup 1)) 1110 (parallel [(const_int 0) 1111 (const_int 0) 1112 (const_int 6) 1113 (const_int 6)])))] 1114 "TARGET_SSE3" 1115 "movsldup\t{%1, %0|%0, %1}" 1116 [(set_attr "type" "sse") 1117 (set_attr "mode" "V4SF")]) 1118 1119(define_expand "sse_shufps" 1120 [(match_operand:V4SF 0 "register_operand" "") 1121 (match_operand:V4SF 1 "register_operand" "") 1122 (match_operand:V4SF 2 "nonimmediate_operand" "") 1123 (match_operand:SI 3 "const_int_operand" "")] 1124 "TARGET_SSE" 1125{ 1126 int mask = INTVAL (operands[3]); 1127 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], 1128 GEN_INT ((mask >> 0) & 3), 1129 GEN_INT ((mask >> 2) & 3), 1130 GEN_INT (((mask >> 4) & 3) + 4), 1131 GEN_INT (((mask >> 6) & 3) + 4))); 1132 DONE; 1133}) 1134 1135(define_insn "sse_shufps_1" 1136 [(set (match_operand:V4SF 0 "register_operand" "=x") 1137 (vec_select:V4SF 1138 (vec_concat:V8SF 1139 (match_operand:V4SF 1 "register_operand" "0") 1140 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1141 (parallel [(match_operand 3 "const_0_to_3_operand" "") 1142 (match_operand 4 "const_0_to_3_operand" "") 1143 (match_operand 5 "const_4_to_7_operand" "") 1144 (match_operand 6 "const_4_to_7_operand" "")])))] 1145 "TARGET_SSE" 1146{ 1147 int mask = 0; 1148 mask |= INTVAL (operands[3]) << 0; 1149 mask |= INTVAL (operands[4]) << 2; 1150 mask |= (INTVAL (operands[5]) - 4) << 4; 1151 mask |= (INTVAL (operands[6]) - 4) << 6; 1152 operands[3] = GEN_INT (mask); 1153 1154 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 1155} 1156 [(set_attr "type" "sselog") 1157 (set_attr "mode" "V4SF")]) 1158 1159(define_insn "sse_storehps" 1160 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1161 (vec_select:V2SF 1162 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 1163 (parallel [(const_int 2) (const_int 3)])))] 1164 "TARGET_SSE" 1165 "@ 1166 movhps\t{%1, %0|%0, %1} 1167 movhlps\t{%1, %0|%0, %1} 1168 movlps\t{%H1, %0|%0, %H1}" 1169 [(set_attr "type" "ssemov") 1170 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1171 1172(define_insn "sse_loadhps" 1173 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1174 (vec_concat:V4SF 1175 (vec_select:V2SF 1176 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") 1177 (parallel [(const_int 0) (const_int 1)])) 1178 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] 1179 "TARGET_SSE" 1180 "@ 1181 movhps\t{%2, %0|%0, %2} 1182 movlhps\t{%2, %0|%0, %2} 1183 movlps\t{%2, %H0|%H0, %2}" 1184 [(set_attr "type" "ssemov") 1185 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1186 1187(define_insn "sse_storelps" 1188 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1189 (vec_select:V2SF 1190 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") 1191 (parallel [(const_int 0) (const_int 1)])))] 1192 "TARGET_SSE" 1193 "@ 1194 movlps\t{%1, %0|%0, %1} 1195 movaps\t{%1, %0|%0, %1} 1196 movlps\t{%1, %0|%0, %1}" 1197 [(set_attr "type" "ssemov") 1198 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1199 1200(define_insn "sse_loadlps" 1201 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1202 (vec_concat:V4SF 1203 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") 1204 (vec_select:V2SF 1205 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") 1206 (parallel [(const_int 2) (const_int 3)]))))] 1207 "TARGET_SSE" 1208 "@ 1209 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 1210 movlps\t{%2, %0|%0, %2} 1211 movlps\t{%2, %0|%0, %2}" 1212 [(set_attr "type" "sselog,ssemov,ssemov") 1213 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1214 1215(define_insn "sse_movss" 1216 [(set (match_operand:V4SF 0 "register_operand" "=x") 1217 (vec_merge:V4SF 1218 (match_operand:V4SF 2 "register_operand" "x") 1219 (match_operand:V4SF 1 "register_operand" "0") 1220 (const_int 1)))] 1221 "TARGET_SSE" 1222 "movss\t{%2, %0|%0, %2}" 1223 [(set_attr "type" "ssemov") 1224 (set_attr "mode" "SF")]) 1225 1226(define_insn "*vec_dupv4sf" 1227 [(set (match_operand:V4SF 0 "register_operand" "=x") 1228 (vec_duplicate:V4SF 1229 (match_operand:SF 1 "register_operand" "0")))] 1230 "TARGET_SSE" 1231 "shufps\t{$0, %0, %0|%0, %0, 0}" 1232 [(set_attr "type" "sselog1") 1233 (set_attr "mode" "V4SF")]) 1234 1235;; ??? In theory we can match memory for the MMX alternative, but allowing 1236;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 1237;; alternatives pretty much forces the MMX alternative to be chosen. 1238(define_insn "*sse_concatv2sf" 1239 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 1240 (vec_concat:V2SF 1241 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 1242 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 1243 "TARGET_SSE" 1244 "@ 1245 unpcklps\t{%2, %0|%0, %2} 1246 movss\t{%1, %0|%0, %1} 1247 punpckldq\t{%2, %0|%0, %2} 1248 movd\t{%1, %0|%0, %1}" 1249 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 1250 (set_attr "mode" "V4SF,SF,DI,DI")]) 1251 1252(define_insn "*sse_concatv4sf" 1253 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1254 (vec_concat:V4SF 1255 (match_operand:V2SF 1 "register_operand" " 0,0") 1256 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] 1257 "TARGET_SSE" 1258 "@ 1259 movlhps\t{%2, %0|%0, %2} 1260 movhps\t{%2, %0|%0, %2}" 1261 [(set_attr "type" "ssemov") 1262 (set_attr "mode" "V4SF,V2SF")]) 1263 1264(define_expand "vec_initv4sf" 1265 [(match_operand:V4SF 0 "register_operand" "") 1266 (match_operand 1 "" "")] 1267 "TARGET_SSE" 1268{ 1269 ix86_expand_vector_init (false, operands[0], operands[1]); 1270 DONE; 1271}) 1272 1273(define_insn "*vec_setv4sf_0" 1274 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m") 1275 (vec_merge:V4SF 1276 (vec_duplicate:V4SF 1277 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 1278 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") 1279 (const_int 1)))] 1280 "TARGET_SSE" 1281 "@ 1282 movss\t{%2, %0|%0, %2} 1283 movss\t{%2, %0|%0, %2} 1284 movd\t{%2, %0|%0, %2} 1285 #" 1286 [(set_attr "type" "ssemov") 1287 (set_attr "mode" "SF")]) 1288 1289(define_split 1290 [(set (match_operand:V4SF 0 "memory_operand" "") 1291 (vec_merge:V4SF 1292 (vec_duplicate:V4SF 1293 (match_operand:SF 1 "nonmemory_operand" "")) 1294 (match_dup 0) 1295 (const_int 1)))] 1296 "TARGET_SSE && reload_completed" 1297 [(const_int 0)] 1298{ 1299 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); 1300 DONE; 1301}) 1302 1303(define_expand "vec_setv4sf" 1304 [(match_operand:V4SF 0 "register_operand" "") 1305 (match_operand:SF 1 "register_operand" "") 1306 (match_operand 2 "const_int_operand" "")] 1307 "TARGET_SSE" 1308{ 1309 ix86_expand_vector_set (false, operands[0], operands[1], 1310 INTVAL (operands[2])); 1311 DONE; 1312}) 1313 1314(define_insn_and_split "*vec_extractv4sf_0" 1315 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") 1316 (vec_select:SF 1317 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") 1318 (parallel [(const_int 0)])))] 1319 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 1320 "#" 1321 "&& reload_completed" 1322 [(const_int 0)] 1323{ 1324 rtx op1 = operands[1]; 1325 if (REG_P (op1)) 1326 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 1327 else 1328 op1 = gen_lowpart (SFmode, op1); 1329 emit_move_insn (operands[0], op1); 1330 DONE; 1331}) 1332 1333(define_expand "vec_extractv4sf" 1334 [(match_operand:SF 0 "register_operand" "") 1335 (match_operand:V4SF 1 "register_operand" "") 1336 (match_operand 2 "const_int_operand" "")] 1337 "TARGET_SSE" 1338{ 1339 ix86_expand_vector_extract (false, operands[0], operands[1], 1340 INTVAL (operands[2])); 1341 DONE; 1342}) 1343 1344;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1345;; 1346;; Parallel double-precision floating point arithmetic 1347;; 1348;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1349 1350(define_expand "negv2df2" 1351 [(set (match_operand:V2DF 0 "register_operand" "") 1352 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1353 "TARGET_SSE2" 1354 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") 1355 1356(define_expand "absv2df2" 1357 [(set (match_operand:V2DF 0 "register_operand" "") 1358 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1359 "TARGET_SSE2" 1360 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") 1361 1362(define_expand "addv2df3" 1363 [(set (match_operand:V2DF 0 "register_operand" "") 1364 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1365 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1366 "TARGET_SSE2" 1367 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") 1368 1369(define_insn "*addv2df3" 1370 [(set (match_operand:V2DF 0 "register_operand" "=x") 1371 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1372 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1373 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" 1374 "addpd\t{%2, %0|%0, %2}" 1375 [(set_attr "type" "sseadd") 1376 (set_attr "mode" "V2DF")]) 1377 1378(define_insn "sse2_vmaddv2df3" 1379 [(set (match_operand:V2DF 0 "register_operand" "=x") 1380 (vec_merge:V2DF 1381 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1382 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1383 (match_dup 1) 1384 (const_int 1)))] 1385 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 1386 "addsd\t{%2, %0|%0, %2}" 1387 [(set_attr "type" "sseadd") 1388 (set_attr "mode" "DF")]) 1389 1390(define_expand "subv2df3" 1391 [(set (match_operand:V2DF 0 "register_operand" "") 1392 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1393 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1394 "TARGET_SSE2" 1395 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") 1396 1397(define_insn "*subv2df3" 1398 [(set (match_operand:V2DF 0 "register_operand" "=x") 1399 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1400 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1401 "TARGET_SSE2" 1402 "subpd\t{%2, %0|%0, %2}" 1403 [(set_attr "type" "sseadd") 1404 (set_attr "mode" "V2DF")]) 1405 1406(define_insn "sse2_vmsubv2df3" 1407 [(set (match_operand:V2DF 0 "register_operand" "=x") 1408 (vec_merge:V2DF 1409 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1410 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1411 (match_dup 1) 1412 (const_int 1)))] 1413 "TARGET_SSE2" 1414 "subsd\t{%2, %0|%0, %2}" 1415 [(set_attr "type" "sseadd") 1416 (set_attr "mode" "DF")]) 1417 1418(define_expand "mulv2df3" 1419 [(set (match_operand:V2DF 0 "register_operand" "") 1420 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1421 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1422 "TARGET_SSE2" 1423 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") 1424 1425(define_insn "*mulv2df3" 1426 [(set (match_operand:V2DF 0 "register_operand" "=x") 1427 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1428 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1429 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1430 "mulpd\t{%2, %0|%0, %2}" 1431 [(set_attr "type" "ssemul") 1432 (set_attr "mode" "V2DF")]) 1433 1434(define_insn "sse2_vmmulv2df3" 1435 [(set (match_operand:V2DF 0 "register_operand" "=x") 1436 (vec_merge:V2DF 1437 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") 1438 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1439 (match_dup 1) 1440 (const_int 1)))] 1441 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1442 "mulsd\t{%2, %0|%0, %2}" 1443 [(set_attr "type" "ssemul") 1444 (set_attr "mode" "DF")]) 1445 1446(define_expand "divv2df3" 1447 [(set (match_operand:V2DF 0 "register_operand" "") 1448 (div:V2DF (match_operand:V2DF 1 "register_operand" "") 1449 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1450 "TARGET_SSE2" 1451 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") 1452 1453(define_insn "*divv2df3" 1454 [(set (match_operand:V2DF 0 "register_operand" "=x") 1455 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1456 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1457 "TARGET_SSE2" 1458 "divpd\t{%2, %0|%0, %2}" 1459 [(set_attr "type" "ssediv") 1460 (set_attr "mode" "V2DF")]) 1461 1462(define_insn "sse2_vmdivv2df3" 1463 [(set (match_operand:V2DF 0 "register_operand" "=x") 1464 (vec_merge:V2DF 1465 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1466 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1467 (match_dup 1) 1468 (const_int 1)))] 1469 "TARGET_SSE2" 1470 "divsd\t{%2, %0|%0, %2}" 1471 [(set_attr "type" "ssediv") 1472 (set_attr "mode" "DF")]) 1473 1474(define_insn "sqrtv2df2" 1475 [(set (match_operand:V2DF 0 "register_operand" "=x") 1476 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1477 "TARGET_SSE2" 1478 "sqrtpd\t{%1, %0|%0, %1}" 1479 [(set_attr "type" "sse") 1480 (set_attr "mode" "V2DF")]) 1481 1482(define_insn "sse2_vmsqrtv2df2" 1483 [(set (match_operand:V2DF 0 "register_operand" "=x") 1484 (vec_merge:V2DF 1485 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) 1486 (match_operand:V2DF 2 "register_operand" "0") 1487 (const_int 1)))] 1488 "TARGET_SSE2" 1489 "sqrtsd\t{%1, %0|%0, %1}" 1490 [(set_attr "type" "sse") 1491 (set_attr "mode" "DF")]) 1492 1493;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1494;; isn't really correct, as those rtl operators aren't defined when 1495;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1496 1497(define_expand "smaxv2df3" 1498 [(set (match_operand:V2DF 0 "register_operand" "") 1499 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1500 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1501 "TARGET_SSE2" 1502{ 1503 if (!flag_finite_math_only) 1504 operands[1] = force_reg (V2DFmode, operands[1]); 1505 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); 1506}) 1507 1508(define_insn "*smaxv2df3_finite" 1509 [(set (match_operand:V2DF 0 "register_operand" "=x") 1510 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1511 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1512 "TARGET_SSE2 && flag_finite_math_only 1513 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" 1514 "maxpd\t{%2, %0|%0, %2}" 1515 [(set_attr "type" "sseadd") 1516 (set_attr "mode" "V2DF")]) 1517 1518(define_insn "*smaxv2df3" 1519 [(set (match_operand:V2DF 0 "register_operand" "=x") 1520 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1521 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1522 "TARGET_SSE2" 1523 "maxpd\t{%2, %0|%0, %2}" 1524 [(set_attr "type" "sseadd") 1525 (set_attr "mode" "V2DF")]) 1526 1527(define_insn "sse2_vmsmaxv2df3" 1528 [(set (match_operand:V2DF 0 "register_operand" "=x") 1529 (vec_merge:V2DF 1530 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1531 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1532 (match_dup 1) 1533 (const_int 1)))] 1534 "TARGET_SSE2" 1535 "maxsd\t{%2, %0|%0, %2}" 1536 [(set_attr "type" "sseadd") 1537 (set_attr "mode" "DF")]) 1538 1539(define_expand "sminv2df3" 1540 [(set (match_operand:V2DF 0 "register_operand" "") 1541 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1542 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1543 "TARGET_SSE2" 1544{ 1545 if (!flag_finite_math_only) 1546 operands[1] = force_reg (V2DFmode, operands[1]); 1547 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); 1548}) 1549 1550(define_insn "*sminv2df3_finite" 1551 [(set (match_operand:V2DF 0 "register_operand" "=x") 1552 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1553 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1554 "TARGET_SSE2 && flag_finite_math_only 1555 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" 1556 "minpd\t{%2, %0|%0, %2}" 1557 [(set_attr "type" "sseadd") 1558 (set_attr "mode" "V2DF")]) 1559 1560(define_insn "*sminv2df3" 1561 [(set (match_operand:V2DF 0 "register_operand" "=x") 1562 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1563 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1564 "TARGET_SSE2" 1565 "minpd\t{%2, %0|%0, %2}" 1566 [(set_attr "type" "sseadd") 1567 (set_attr "mode" "V2DF")]) 1568 1569(define_insn "sse2_vmsminv2df3" 1570 [(set (match_operand:V2DF 0 "register_operand" "=x") 1571 (vec_merge:V2DF 1572 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1573 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1574 (match_dup 1) 1575 (const_int 1)))] 1576 "TARGET_SSE2" 1577 "minsd\t{%2, %0|%0, %2}" 1578 [(set_attr "type" "sseadd") 1579 (set_attr "mode" "DF")]) 1580 1581(define_insn "sse3_addsubv2df3" 1582 [(set (match_operand:V2DF 0 "register_operand" "=x") 1583 (vec_merge:V2DF 1584 (plus:V2DF 1585 (match_operand:V2DF 1 "register_operand" "0") 1586 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1587 (minus:V2DF (match_dup 1) (match_dup 2)) 1588 (const_int 1)))] 1589 "TARGET_SSE3" 1590 "addsubpd\t{%2, %0|%0, %2}" 1591 [(set_attr "type" "sseadd") 1592 (set_attr "mode" "V2DF")]) 1593 1594(define_insn "sse3_haddv2df3" 1595 [(set (match_operand:V2DF 0 "register_operand" "=x") 1596 (vec_concat:V2DF 1597 (plus:DF 1598 (vec_select:DF 1599 (match_operand:V2DF 1 "register_operand" "0") 1600 (parallel [(const_int 0)])) 1601 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1602 (plus:DF 1603 (vec_select:DF 1604 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1605 (parallel [(const_int 0)])) 1606 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1607 "TARGET_SSE3" 1608 "haddpd\t{%2, %0|%0, %2}" 1609 [(set_attr "type" "sseadd") 1610 (set_attr "mode" "V2DF")]) 1611 1612(define_insn "sse3_hsubv2df3" 1613 [(set (match_operand:V2DF 0 "register_operand" "=x") 1614 (vec_concat:V2DF 1615 (minus:DF 1616 (vec_select:DF 1617 (match_operand:V2DF 1 "register_operand" "0") 1618 (parallel [(const_int 0)])) 1619 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1620 (minus:DF 1621 (vec_select:DF 1622 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1623 (parallel [(const_int 0)])) 1624 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1625 "TARGET_SSE3" 1626 "hsubpd\t{%2, %0|%0, %2}" 1627 [(set_attr "type" "sseadd") 1628 (set_attr "mode" "V2DF")]) 1629 1630(define_expand "reduc_splus_v2df" 1631 [(match_operand:V2DF 0 "register_operand" "") 1632 (match_operand:V2DF 1 "register_operand" "")] 1633 "TARGET_SSE3" 1634{ 1635 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1636 DONE; 1637}) 1638 1639;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1640;; 1641;; Parallel double-precision floating point comparisons 1642;; 1643;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1644 1645(define_insn "sse2_maskcmpv2df3" 1646 [(set (match_operand:V2DF 0 "register_operand" "=x") 1647 (match_operator:V2DF 3 "sse_comparison_operator" 1648 [(match_operand:V2DF 1 "register_operand" "0") 1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] 1650 "TARGET_SSE2" 1651 "cmp%D3pd\t{%2, %0|%0, %2}" 1652 [(set_attr "type" "ssecmp") 1653 (set_attr "mode" "V2DF")]) 1654 1655(define_insn "sse2_vmmaskcmpv2df3" 1656 [(set (match_operand:V2DF 0 "register_operand" "=x") 1657 (vec_merge:V2DF 1658 (match_operator:V2DF 3 "sse_comparison_operator" 1659 [(match_operand:V2DF 1 "register_operand" "0") 1660 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) 1661 (match_dup 1) 1662 (const_int 1)))] 1663 "TARGET_SSE2" 1664 "cmp%D3sd\t{%2, %0|%0, %2}" 1665 [(set_attr "type" "ssecmp") 1666 (set_attr "mode" "DF")]) 1667 1668(define_insn "sse2_comi" 1669 [(set (reg:CCFP FLAGS_REG) 1670 (compare:CCFP 1671 (vec_select:DF 1672 (match_operand:V2DF 0 "register_operand" "x") 1673 (parallel [(const_int 0)])) 1674 (vec_select:DF 1675 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1676 (parallel [(const_int 0)]))))] 1677 "TARGET_SSE2" 1678 "comisd\t{%1, %0|%0, %1}" 1679 [(set_attr "type" "ssecomi") 1680 (set_attr "mode" "DF")]) 1681 1682(define_insn "sse2_ucomi" 1683 [(set (reg:CCFPU FLAGS_REG) 1684 (compare:CCFPU 1685 (vec_select:DF 1686 (match_operand:V2DF 0 "register_operand" "x") 1687 (parallel [(const_int 0)])) 1688 (vec_select:DF 1689 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1690 (parallel [(const_int 0)]))))] 1691 "TARGET_SSE2" 1692 "ucomisd\t{%1, %0|%0, %1}" 1693 [(set_attr "type" "ssecomi") 1694 (set_attr "mode" "DF")]) 1695 1696(define_expand "vcondv2df" 1697 [(set (match_operand:V2DF 0 "register_operand" "") 1698 (if_then_else:V2DF 1699 (match_operator 3 "" 1700 [(match_operand:V2DF 4 "nonimmediate_operand" "") 1701 (match_operand:V2DF 5 "nonimmediate_operand" "")]) 1702 (match_operand:V2DF 1 "general_operand" "") 1703 (match_operand:V2DF 2 "general_operand" "")))] 1704 "TARGET_SSE2" 1705{ 1706 if (ix86_expand_fp_vcond (operands)) 1707 DONE; 1708 else 1709 FAIL; 1710}) 1711 1712;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1713;; 1714;; Parallel double-precision floating point logical operations 1715;; 1716;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1717 1718(define_expand "andv2df3" 1719 [(set (match_operand:V2DF 0 "register_operand" "") 1720 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1721 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1722 "TARGET_SSE2" 1723 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") 1724 1725(define_insn "*andv2df3" 1726 [(set (match_operand:V2DF 0 "register_operand" "=x") 1727 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1728 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1729 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" 1730 "andpd\t{%2, %0|%0, %2}" 1731 [(set_attr "type" "sselog") 1732 (set_attr "mode" "V2DF")]) 1733 1734(define_insn "sse2_nandv2df3" 1735 [(set (match_operand:V2DF 0 "register_operand" "=x") 1736 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) 1737 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1738 "TARGET_SSE2" 1739 "andnpd\t{%2, %0|%0, %2}" 1740 [(set_attr "type" "sselog") 1741 (set_attr "mode" "V2DF")]) 1742 1743(define_expand "iorv2df3" 1744 [(set (match_operand:V2DF 0 "register_operand" "") 1745 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1746 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1747 "TARGET_SSE2" 1748 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") 1749 1750(define_insn "*iorv2df3" 1751 [(set (match_operand:V2DF 0 "register_operand" "=x") 1752 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1753 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1754 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" 1755 "orpd\t{%2, %0|%0, %2}" 1756 [(set_attr "type" "sselog") 1757 (set_attr "mode" "V2DF")]) 1758 1759(define_expand "xorv2df3" 1760 [(set (match_operand:V2DF 0 "register_operand" "") 1761 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1762 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1763 "TARGET_SSE2" 1764 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") 1765 1766(define_insn "*xorv2df3" 1767 [(set (match_operand:V2DF 0 "register_operand" "=x") 1768 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1769 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1770 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" 1771 "xorpd\t{%2, %0|%0, %2}" 1772 [(set_attr "type" "sselog") 1773 (set_attr "mode" "V2DF")]) 1774 1775;; Also define scalar versions. These are used for abs, neg, and 1776;; conditional move. Using subregs into vector modes causes register 1777;; allocation lossage. These patterns do not allow memory operands 1778;; because the native instructions read the full 128-bits. 1779 1780(define_insn "*anddf3" 1781 [(set (match_operand:DF 0 "register_operand" "=x") 1782 (and:DF (match_operand:DF 1 "register_operand" "0") 1783 (match_operand:DF 2 "register_operand" "x")))] 1784 "TARGET_SSE2" 1785 "andpd\t{%2, %0|%0, %2}" 1786 [(set_attr "type" "sselog") 1787 (set_attr "mode" "V2DF")]) 1788 1789(define_insn "*nanddf3" 1790 [(set (match_operand:DF 0 "register_operand" "=x") 1791 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) 1792 (match_operand:DF 2 "register_operand" "x")))] 1793 "TARGET_SSE2" 1794 "andnpd\t{%2, %0|%0, %2}" 1795 [(set_attr "type" "sselog") 1796 (set_attr "mode" "V2DF")]) 1797 1798(define_insn "*iordf3" 1799 [(set (match_operand:DF 0 "register_operand" "=x") 1800 (ior:DF (match_operand:DF 1 "register_operand" "0") 1801 (match_operand:DF 2 "register_operand" "x")))] 1802 "TARGET_SSE2" 1803 "orpd\t{%2, %0|%0, %2}" 1804 [(set_attr "type" "sselog") 1805 (set_attr "mode" "V2DF")]) 1806 1807(define_insn "*xordf3" 1808 [(set (match_operand:DF 0 "register_operand" "=x") 1809 (xor:DF (match_operand:DF 1 "register_operand" "0") 1810 (match_operand:DF 2 "register_operand" "x")))] 1811 "TARGET_SSE2" 1812 "xorpd\t{%2, %0|%0, %2}" 1813 [(set_attr "type" "sselog") 1814 (set_attr "mode" "V2DF")]) 1815 1816;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1817;; 1818;; Parallel double-precision floating point conversion operations 1819;; 1820;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1821 1822(define_insn "sse2_cvtpi2pd" 1823 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1824 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 1825 "TARGET_SSE2" 1826 "cvtpi2pd\t{%1, %0|%0, %1}" 1827 [(set_attr "type" "ssecvt") 1828 (set_attr "unit" "mmx,*") 1829 (set_attr "mode" "V2DF")]) 1830 1831(define_insn "sse2_cvtpd2pi" 1832 [(set (match_operand:V2SI 0 "register_operand" "=y") 1833 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1834 UNSPEC_FIX_NOTRUNC))] 1835 "TARGET_SSE2" 1836 "cvtpd2pi\t{%1, %0|%0, %1}" 1837 [(set_attr "type" "ssecvt") 1838 (set_attr "unit" "mmx") 1839 (set_attr "mode" "DI")]) 1840 1841(define_insn "sse2_cvttpd2pi" 1842 [(set (match_operand:V2SI 0 "register_operand" "=y") 1843 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1844 "TARGET_SSE2" 1845 "cvttpd2pi\t{%1, %0|%0, %1}" 1846 [(set_attr "type" "ssecvt") 1847 (set_attr "unit" "mmx") 1848 (set_attr "mode" "TI")]) 1849 1850(define_insn "sse2_cvtsi2sd" 1851 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1852 (vec_merge:V2DF 1853 (vec_duplicate:V2DF 1854 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 1855 (match_operand:V2DF 1 "register_operand" "0,0") 1856 (const_int 1)))] 1857 "TARGET_SSE2" 1858 "cvtsi2sd\t{%2, %0|%0, %2}" 1859 [(set_attr "type" "sseicvt") 1860 (set_attr "mode" "DF") 1861 (set_attr "athlon_decode" "double,direct") 1862 (set_attr "amdfam10_decode" "vector,double")]) 1863 1864(define_insn "sse2_cvtsi2sdq" 1865 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1866 (vec_merge:V2DF 1867 (vec_duplicate:V2DF 1868 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) 1869 (match_operand:V2DF 1 "register_operand" "0,0") 1870 (const_int 1)))] 1871 "TARGET_SSE2 && TARGET_64BIT" 1872 "cvtsi2sdq\t{%2, %0|%0, %2}" 1873 [(set_attr "type" "sseicvt") 1874 (set_attr "mode" "DF") 1875 (set_attr "athlon_decode" "double,direct") 1876 (set_attr "amdfam10_decode" "vector,double")]) 1877 1878(define_insn "sse2_cvtsd2si" 1879 [(set (match_operand:SI 0 "register_operand" "=r,r") 1880 (unspec:SI 1881 [(vec_select:DF 1882 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1883 (parallel [(const_int 0)]))] 1884 UNSPEC_FIX_NOTRUNC))] 1885 "TARGET_SSE2" 1886 "cvtsd2si\t{%1, %0|%0, %1}" 1887 [(set_attr "type" "sseicvt") 1888 (set_attr "athlon_decode" "double,vector") 1889 (set_attr "amdfam10_decode" "double,double") 1890 (set_attr "mode" "SI")]) 1891 1892(define_insn "sse2_cvtsd2siq" 1893 [(set (match_operand:DI 0 "register_operand" "=r,r") 1894 (unspec:DI 1895 [(vec_select:DF 1896 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1897 (parallel [(const_int 0)]))] 1898 UNSPEC_FIX_NOTRUNC))] 1899 "TARGET_SSE2 && TARGET_64BIT" 1900 "cvtsd2siq\t{%1, %0|%0, %1}" 1901 [(set_attr "type" "sseicvt") 1902 (set_attr "athlon_decode" "double,vector") 1903 (set_attr "amdfam10_decode" "double,double") 1904 (set_attr "mode" "DI")]) 1905 1906(define_insn "sse2_cvttsd2si" 1907 [(set (match_operand:SI 0 "register_operand" "=r,r") 1908 (fix:SI 1909 (vec_select:DF 1910 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1911 (parallel [(const_int 0)]))))] 1912 "TARGET_SSE2" 1913 "cvttsd2si\t{%1, %0|%0, %1}" 1914 [(set_attr "type" "sseicvt") 1915 (set_attr "mode" "SI") 1916 (set_attr "athlon_decode" "double,vector") 1917 (set_attr "amdfam10_decode" "double,double")]) 1918 1919(define_insn "sse2_cvttsd2siq" 1920 [(set (match_operand:DI 0 "register_operand" "=r,r") 1921 (fix:DI 1922 (vec_select:DF 1923 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1924 (parallel [(const_int 0)]))))] 1925 "TARGET_SSE2 && TARGET_64BIT" 1926 "cvttsd2siq\t{%1, %0|%0, %1}" 1927 [(set_attr "type" "sseicvt") 1928 (set_attr "mode" "DI") 1929 (set_attr "athlon_decode" "double,vector") 1930 (set_attr "amdfam10_decode" "double,double")]) 1931 1932(define_insn "sse2_cvtdq2pd" 1933 [(set (match_operand:V2DF 0 "register_operand" "=x") 1934 (float:V2DF 1935 (vec_select:V2SI 1936 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 1937 (parallel [(const_int 0) (const_int 1)]))))] 1938 "TARGET_SSE2" 1939 "cvtdq2pd\t{%1, %0|%0, %1}" 1940 [(set_attr "type" "ssecvt") 1941 (set_attr "mode" "V2DF")]) 1942 1943(define_expand "sse2_cvtpd2dq" 1944 [(set (match_operand:V4SI 0 "register_operand" "") 1945 (vec_concat:V4SI 1946 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 1947 UNSPEC_FIX_NOTRUNC) 1948 (match_dup 2)))] 1949 "TARGET_SSE2" 1950 "operands[2] = CONST0_RTX (V2SImode);") 1951 1952(define_insn "*sse2_cvtpd2dq" 1953 [(set (match_operand:V4SI 0 "register_operand" "=x") 1954 (vec_concat:V4SI 1955 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1956 UNSPEC_FIX_NOTRUNC) 1957 (match_operand:V2SI 2 "const0_operand" "")))] 1958 "TARGET_SSE2" 1959 "cvtpd2dq\t{%1, %0|%0, %1}" 1960 [(set_attr "type" "ssecvt") 1961 (set_attr "mode" "TI") 1962 (set_attr "amdfam10_decode" "double")]) 1963 1964(define_expand "sse2_cvttpd2dq" 1965 [(set (match_operand:V4SI 0 "register_operand" "") 1966 (vec_concat:V4SI 1967 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 1968 (match_dup 2)))] 1969 "TARGET_SSE2" 1970 "operands[2] = CONST0_RTX (V2SImode);") 1971 1972(define_insn "*sse2_cvttpd2dq" 1973 [(set (match_operand:V4SI 0 "register_operand" "=x") 1974 (vec_concat:V4SI 1975 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 1976 (match_operand:V2SI 2 "const0_operand" "")))] 1977 "TARGET_SSE2" 1978 "cvttpd2dq\t{%1, %0|%0, %1}" 1979 [(set_attr "type" "ssecvt") 1980 (set_attr "mode" "TI") 1981 (set_attr "amdfam10_decode" "double")]) 1982 1983(define_insn "sse2_cvtsd2ss" 1984 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1985 (vec_merge:V4SF 1986 (vec_duplicate:V4SF 1987 (float_truncate:V2SF 1988 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) 1989 (match_operand:V4SF 1 "register_operand" "0,0") 1990 (const_int 1)))] 1991 "TARGET_SSE2" 1992 "cvtsd2ss\t{%2, %0|%0, %2}" 1993 [(set_attr "type" "ssecvt") 1994 (set_attr "athlon_decode" "vector,double") 1995 (set_attr "amdfam10_decode" "vector,double") 1996 (set_attr "mode" "SF")]) 1997 1998(define_insn "sse2_cvtss2sd" 1999 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 2000 (vec_merge:V2DF 2001 (float_extend:V2DF 2002 (vec_select:V2SF 2003 (match_operand:V4SF 2 "nonimmediate_operand" "x,m") 2004 (parallel [(const_int 0) (const_int 1)]))) 2005 (match_operand:V2DF 1 "register_operand" "0,0") 2006 (const_int 1)))] 2007 "TARGET_SSE2" 2008 "cvtss2sd\t{%2, %0|%0, %2}" 2009 [(set_attr "type" "ssecvt") 2010 (set_attr "amdfam10_decode" "vector,double") 2011 (set_attr "mode" "DF")]) 2012 2013(define_expand "sse2_cvtpd2ps" 2014 [(set (match_operand:V4SF 0 "register_operand" "") 2015 (vec_concat:V4SF 2016 (float_truncate:V2SF 2017 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2018 (match_dup 2)))] 2019 "TARGET_SSE2" 2020 "operands[2] = CONST0_RTX (V2SFmode);") 2021 2022(define_insn "*sse2_cvtpd2ps" 2023 [(set (match_operand:V4SF 0 "register_operand" "=x") 2024 (vec_concat:V4SF 2025 (float_truncate:V2SF 2026 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2027 (match_operand:V2SF 2 "const0_operand" "")))] 2028 "TARGET_SSE2" 2029 "cvtpd2ps\t{%1, %0|%0, %1}" 2030 [(set_attr "type" "ssecvt") 2031 (set_attr "mode" "V4SF") 2032 (set_attr "amdfam10_decode" "double")]) 2033 2034(define_insn "sse2_cvtps2pd" 2035 [(set (match_operand:V2DF 0 "register_operand" "=x") 2036 (float_extend:V2DF 2037 (vec_select:V2SF 2038 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2039 (parallel [(const_int 0) (const_int 1)]))))] 2040 "TARGET_SSE2" 2041 "cvtps2pd\t{%1, %0|%0, %1}" 2042 [(set_attr "type" "ssecvt") 2043 (set_attr "mode" "V2DF") 2044 (set_attr "amdfam10_decode" "direct")]) 2045 2046;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2047;; 2048;; Parallel double-precision floating point element swizzling 2049;; 2050;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2051 2052(define_insn "sse2_unpckhpd" 2053 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 2054 (vec_select:V2DF 2055 (vec_concat:V4DF 2056 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") 2057 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) 2058 (parallel [(const_int 1) 2059 (const_int 3)])))] 2060 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2061 "@ 2062 unpckhpd\t{%2, %0|%0, %2} 2063 movlpd\t{%H1, %0|%0, %H1} 2064 movhpd\t{%1, %0|%0, %1}" 2065 [(set_attr "type" "sselog,ssemov,ssemov") 2066 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2067 2068(define_insn "*sse3_movddup" 2069 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") 2070 (vec_select:V2DF 2071 (vec_concat:V4DF 2072 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 2073 (match_dup 1)) 2074 (parallel [(const_int 0) 2075 (const_int 2)])))] 2076 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2077 "@ 2078 movddup\t{%1, %0|%0, %1} 2079 #" 2080 [(set_attr "type" "sselog1,ssemov") 2081 (set_attr "mode" "V2DF")]) 2082 2083(define_split 2084 [(set (match_operand:V2DF 0 "memory_operand" "") 2085 (vec_select:V2DF 2086 (vec_concat:V4DF 2087 (match_operand:V2DF 1 "register_operand" "") 2088 (match_dup 1)) 2089 (parallel [(const_int 0) 2090 (const_int 2)])))] 2091 "TARGET_SSE3 && reload_completed" 2092 [(const_int 0)] 2093{ 2094 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 2095 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 2096 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 2097 DONE; 2098}) 2099 2100(define_insn "sse2_unpcklpd" 2101 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") 2102 (vec_select:V2DF 2103 (vec_concat:V4DF 2104 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") 2105 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) 2106 (parallel [(const_int 0) 2107 (const_int 2)])))] 2108 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2109 "@ 2110 unpcklpd\t{%2, %0|%0, %2} 2111 movhpd\t{%2, %0|%0, %2} 2112 movlpd\t{%2, %H0|%H0, %2}" 2113 [(set_attr "type" "sselog,ssemov,ssemov") 2114 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2115 2116(define_expand "sse2_shufpd" 2117 [(match_operand:V2DF 0 "register_operand" "") 2118 (match_operand:V2DF 1 "register_operand" "") 2119 (match_operand:V2DF 2 "nonimmediate_operand" "") 2120 (match_operand:SI 3 "const_int_operand" "")] 2121 "TARGET_SSE2" 2122{ 2123 int mask = INTVAL (operands[3]); 2124 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], 2125 GEN_INT (mask & 1), 2126 GEN_INT (mask & 2 ? 3 : 2))); 2127 DONE; 2128}) 2129 2130(define_insn "sse2_shufpd_1" 2131 [(set (match_operand:V2DF 0 "register_operand" "=x") 2132 (vec_select:V2DF 2133 (vec_concat:V4DF 2134 (match_operand:V2DF 1 "register_operand" "0") 2135 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 2136 (parallel [(match_operand 3 "const_0_to_1_operand" "") 2137 (match_operand 4 "const_2_to_3_operand" "")])))] 2138 "TARGET_SSE2" 2139{ 2140 int mask; 2141 mask = INTVAL (operands[3]); 2142 mask |= (INTVAL (operands[4]) - 2) << 1; 2143 operands[3] = GEN_INT (mask); 2144 2145 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 2146} 2147 [(set_attr "type" "sselog") 2148 (set_attr "mode" "V2DF")]) 2149 2150(define_insn "sse2_storehpd" 2151 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2152 (vec_select:DF 2153 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") 2154 (parallel [(const_int 1)])))] 2155 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2156 "@ 2157 movhpd\t{%1, %0|%0, %1} 2158 unpckhpd\t%0, %0 2159 #" 2160 [(set_attr "type" "ssemov,sselog1,ssemov") 2161 (set_attr "mode" "V1DF,V2DF,DF")]) 2162 2163(define_split 2164 [(set (match_operand:DF 0 "register_operand" "") 2165 (vec_select:DF 2166 (match_operand:V2DF 1 "memory_operand" "") 2167 (parallel [(const_int 1)])))] 2168 "TARGET_SSE2 && reload_completed" 2169 [(set (match_dup 0) (match_dup 1))] 2170{ 2171 operands[1] = adjust_address (operands[1], DFmode, 8); 2172}) 2173 2174(define_insn "sse2_storelpd" 2175 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2176 (vec_select:DF 2177 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") 2178 (parallel [(const_int 0)])))] 2179 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2180 "@ 2181 movlpd\t{%1, %0|%0, %1} 2182 # 2183 #" 2184 [(set_attr "type" "ssemov") 2185 (set_attr "mode" "V1DF,DF,DF")]) 2186 2187(define_split 2188 [(set (match_operand:DF 0 "register_operand" "") 2189 (vec_select:DF 2190 (match_operand:V2DF 1 "nonimmediate_operand" "") 2191 (parallel [(const_int 0)])))] 2192 "TARGET_SSE2 && reload_completed" 2193 [(const_int 0)] 2194{ 2195 rtx op1 = operands[1]; 2196 if (REG_P (op1)) 2197 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 2198 else 2199 op1 = gen_lowpart (DFmode, op1); 2200 emit_move_insn (operands[0], op1); 2201 DONE; 2202}) 2203 2204(define_insn "sse2_loadhpd" 2205 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") 2206 (vec_concat:V2DF 2207 (vec_select:DF 2208 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") 2209 (parallel [(const_int 0)])) 2210 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] 2211 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2212 "@ 2213 movhpd\t{%2, %0|%0, %2} 2214 unpcklpd\t{%2, %0|%0, %2} 2215 shufpd\t{$1, %1, %0|%0, %1, 1} 2216 #" 2217 [(set_attr "type" "ssemov,sselog,sselog,other") 2218 (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) 2219 2220(define_split 2221 [(set (match_operand:V2DF 0 "memory_operand" "") 2222 (vec_concat:V2DF 2223 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 2224 (match_operand:DF 1 "register_operand" "")))] 2225 "TARGET_SSE2 && reload_completed" 2226 [(set (match_dup 0) (match_dup 1))] 2227{ 2228 operands[0] = adjust_address (operands[0], DFmode, 8); 2229}) 2230 2231(define_insn "sse2_loadlpd" 2232 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 2233 (vec_concat:V2DF 2234 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") 2235 (vec_select:DF 2236 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") 2237 (parallel [(const_int 1)]))))] 2238 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2239 "@ 2240 movsd\t{%2, %0|%0, %2} 2241 movlpd\t{%2, %0|%0, %2} 2242 movsd\t{%2, %0|%0, %2} 2243 shufpd\t{$2, %2, %0|%0, %2, 2} 2244 movhpd\t{%H1, %0|%0, %H1} 2245 #" 2246 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") 2247 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) 2248 2249(define_split 2250 [(set (match_operand:V2DF 0 "memory_operand" "") 2251 (vec_concat:V2DF 2252 (match_operand:DF 1 "register_operand" "") 2253 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 2254 "TARGET_SSE2 && reload_completed" 2255 [(set (match_dup 0) (match_dup 1))] 2256{ 2257 operands[0] = adjust_address (operands[0], DFmode, 8); 2258}) 2259 2260;; Not sure these two are ever used, but it doesn't hurt to have 2261;; them. -aoliva 2262(define_insn "*vec_extractv2df_1_sse" 2263 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2264 (vec_select:DF 2265 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 2266 (parallel [(const_int 1)])))] 2267 "!TARGET_SSE2 && TARGET_SSE 2268 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2269 "@ 2270 movhps\t{%1, %0|%0, %1} 2271 movhlps\t{%1, %0|%0, %1} 2272 movlps\t{%H1, %0|%0, %H1}" 2273 [(set_attr "type" "ssemov") 2274 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2275 2276(define_insn "*vec_extractv2df_0_sse" 2277 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2278 (vec_select:DF 2279 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 2280 (parallel [(const_int 0)])))] 2281 "!TARGET_SSE2 && TARGET_SSE 2282 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2283 "@ 2284 movlps\t{%1, %0|%0, %1} 2285 movaps\t{%1, %0|%0, %1} 2286 movlps\t{%1, %0|%0, %1}" 2287 [(set_attr "type" "ssemov") 2288 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2289 2290(define_insn "sse2_movsd" 2291 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") 2292 (vec_merge:V2DF 2293 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") 2294 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") 2295 (const_int 1)))] 2296 "TARGET_SSE2" 2297 "@ 2298 movsd\t{%2, %0|%0, %2} 2299 movlpd\t{%2, %0|%0, %2} 2300 movlpd\t{%2, %0|%0, %2} 2301 shufpd\t{$2, %2, %0|%0, %2, 2} 2302 movhps\t{%H1, %0|%0, %H1} 2303 movhps\t{%1, %H0|%H0, %1}" 2304 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 2305 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) 2306 2307(define_insn "*vec_dupv2df_sse3" 2308 [(set (match_operand:V2DF 0 "register_operand" "=x") 2309 (vec_duplicate:V2DF 2310 (match_operand:DF 1 "nonimmediate_operand" "xm")))] 2311 "TARGET_SSE3" 2312 "movddup\t{%1, %0|%0, %1}" 2313 [(set_attr "type" "sselog1") 2314 (set_attr "mode" "DF")]) 2315 2316(define_insn "*vec_dupv2df" 2317 [(set (match_operand:V2DF 0 "register_operand" "=x") 2318 (vec_duplicate:V2DF 2319 (match_operand:DF 1 "register_operand" "0")))] 2320 "TARGET_SSE2" 2321 "unpcklpd\t%0, %0" 2322 [(set_attr "type" "sselog1") 2323 (set_attr "mode" "V4SF")]) 2324 2325(define_insn "*vec_concatv2df_sse3" 2326 [(set (match_operand:V2DF 0 "register_operand" "=x") 2327 (vec_concat:V2DF 2328 (match_operand:DF 1 "nonimmediate_operand" "xm") 2329 (match_dup 1)))] 2330 "TARGET_SSE3" 2331 "movddup\t{%1, %0|%0, %1}" 2332 [(set_attr "type" "sselog1") 2333 (set_attr "mode" "DF")]) 2334 2335(define_insn "*vec_concatv2df" 2336 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x") 2337 (vec_concat:V2DF 2338 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0") 2339 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))] 2340 "TARGET_SSE" 2341 "@ 2342 unpcklpd\t{%2, %0|%0, %2} 2343 movhpd\t{%2, %0|%0, %2} 2344 movsd\t{%1, %0|%0, %1} 2345 movlhps\t{%2, %0|%0, %2} 2346 movhps\t{%2, %0|%0, %2}" 2347 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") 2348 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) 2349 2350(define_expand "vec_setv2df" 2351 [(match_operand:V2DF 0 "register_operand" "") 2352 (match_operand:DF 1 "register_operand" "") 2353 (match_operand 2 "const_int_operand" "")] 2354 "TARGET_SSE" 2355{ 2356 ix86_expand_vector_set (false, operands[0], operands[1], 2357 INTVAL (operands[2])); 2358 DONE; 2359}) 2360 2361(define_expand "vec_extractv2df" 2362 [(match_operand:DF 0 "register_operand" "") 2363 (match_operand:V2DF 1 "register_operand" "") 2364 (match_operand 2 "const_int_operand" "")] 2365 "TARGET_SSE" 2366{ 2367 ix86_expand_vector_extract (false, operands[0], operands[1], 2368 INTVAL (operands[2])); 2369 DONE; 2370}) 2371 2372(define_expand "vec_initv2df" 2373 [(match_operand:V2DF 0 "register_operand" "") 2374 (match_operand 1 "" "")] 2375 "TARGET_SSE" 2376{ 2377 ix86_expand_vector_init (false, operands[0], operands[1]); 2378 DONE; 2379}) 2380 2381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2382;; 2383;; Parallel integral arithmetic 2384;; 2385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2386 2387(define_expand "neg<mode>2" 2388 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2389 (minus:SSEMODEI 2390 (match_dup 2) 2391 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] 2392 "TARGET_SSE2" 2393 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 2394 2395(define_expand "add<mode>3" 2396 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2397 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 2398 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2399 "TARGET_SSE2" 2400 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") 2401 2402(define_insn "*add<mode>3" 2403 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2404 (plus:SSEMODEI 2405 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 2406 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2407 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" 2408 "padd<ssevecsize>\t{%2, %0|%0, %2}" 2409 [(set_attr "type" "sseiadd") 2410 (set_attr "mode" "TI")]) 2411 2412(define_insn "sse2_ssadd<mode>3" 2413 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2414 (ss_plus:SSEMODE12 2415 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2416 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2417 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)" 2418 "padds<ssevecsize>\t{%2, %0|%0, %2}" 2419 [(set_attr "type" "sseiadd") 2420 (set_attr "mode" "TI")]) 2421 2422(define_insn "sse2_usadd<mode>3" 2423 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2424 (us_plus:SSEMODE12 2425 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2426 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2427 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)" 2428 "paddus<ssevecsize>\t{%2, %0|%0, %2}" 2429 [(set_attr "type" "sseiadd") 2430 (set_attr "mode" "TI")]) 2431 2432(define_expand "sub<mode>3" 2433 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2434 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") 2435 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2436 "TARGET_SSE2" 2437 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") 2438 2439(define_insn "*sub<mode>3" 2440 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2441 (minus:SSEMODEI 2442 (match_operand:SSEMODEI 1 "register_operand" "0") 2443 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2444 "TARGET_SSE2" 2445 "psub<ssevecsize>\t{%2, %0|%0, %2}" 2446 [(set_attr "type" "sseiadd") 2447 (set_attr "mode" "TI")]) 2448 2449(define_insn "sse2_sssub<mode>3" 2450 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2451 (ss_minus:SSEMODE12 2452 (match_operand:SSEMODE12 1 "register_operand" "0") 2453 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2454 "TARGET_SSE2" 2455 "psubs<ssevecsize>\t{%2, %0|%0, %2}" 2456 [(set_attr "type" "sseiadd") 2457 (set_attr "mode" "TI")]) 2458 2459(define_insn "sse2_ussub<mode>3" 2460 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2461 (us_minus:SSEMODE12 2462 (match_operand:SSEMODE12 1 "register_operand" "0") 2463 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2464 "TARGET_SSE2" 2465 "psubus<ssevecsize>\t{%2, %0|%0, %2}" 2466 [(set_attr "type" "sseiadd") 2467 (set_attr "mode" "TI")]) 2468 2469(define_expand "mulv16qi3" 2470 [(set (match_operand:V16QI 0 "register_operand" "") 2471 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") 2472 (match_operand:V16QI 2 "register_operand" "")))] 2473 "TARGET_SSE2" 2474{ 2475 rtx t[12], op0; 2476 int i; 2477 2478 for (i = 0; i < 12; ++i) 2479 t[i] = gen_reg_rtx (V16QImode); 2480 2481 /* Unpack data such that we've got a source byte in each low byte of 2482 each word. We don't care what goes into the high byte of each word. 2483 Rather than trying to get zero in there, most convenient is to let 2484 it be a copy of the low byte. */ 2485 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); 2486 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); 2487 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); 2488 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); 2489 2490 /* Multiply words. The end-of-line annotations here give a picture of what 2491 the output of that instruction looks like. Dot means don't care; the 2492 letters are the bytes of the result with A being the most significant. */ 2493 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ 2494 gen_lowpart (V8HImode, t[0]), 2495 gen_lowpart (V8HImode, t[1]))); 2496 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ 2497 gen_lowpart (V8HImode, t[2]), 2498 gen_lowpart (V8HImode, t[3]))); 2499 2500 /* Extract the relevant bytes and merge them back together. */ 2501 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ 2502 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ 2503 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ 2504 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ 2505 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ 2506 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ 2507 2508 op0 = operands[0]; 2509 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ 2510 DONE; 2511}) 2512 2513(define_expand "mulv8hi3" 2514 [(set (match_operand:V8HI 0 "register_operand" "") 2515 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2516 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2517 "TARGET_SSE2" 2518 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 2519 2520(define_insn "*mulv8hi3" 2521 [(set (match_operand:V8HI 0 "register_operand" "=x") 2522 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2523 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2524 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2525 "pmullw\t{%2, %0|%0, %2}" 2526 [(set_attr "type" "sseimul") 2527 (set_attr "mode" "TI")]) 2528 2529(define_insn "sse2_smulv8hi3_highpart" 2530 [(set (match_operand:V8HI 0 "register_operand" "=x") 2531 (truncate:V8HI 2532 (lshiftrt:V8SI 2533 (mult:V8SI 2534 (sign_extend:V8SI 2535 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2536 (sign_extend:V8SI 2537 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2538 (const_int 16))))] 2539 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2540 "pmulhw\t{%2, %0|%0, %2}" 2541 [(set_attr "type" "sseimul") 2542 (set_attr "mode" "TI")]) 2543 2544(define_insn "sse2_umulv8hi3_highpart" 2545 [(set (match_operand:V8HI 0 "register_operand" "=x") 2546 (truncate:V8HI 2547 (lshiftrt:V8SI 2548 (mult:V8SI 2549 (zero_extend:V8SI 2550 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2551 (zero_extend:V8SI 2552 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2553 (const_int 16))))] 2554 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2555 "pmulhuw\t{%2, %0|%0, %2}" 2556 [(set_attr "type" "sseimul") 2557 (set_attr "mode" "TI")]) 2558 2559(define_insn "sse2_umulv2siv2di3" 2560 [(set (match_operand:V2DI 0 "register_operand" "=x") 2561 (mult:V2DI 2562 (zero_extend:V2DI 2563 (vec_select:V2SI 2564 (match_operand:V4SI 1 "nonimmediate_operand" "%0") 2565 (parallel [(const_int 0) (const_int 2)]))) 2566 (zero_extend:V2DI 2567 (vec_select:V2SI 2568 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 2569 (parallel [(const_int 0) (const_int 2)])))))] 2570 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2571 "pmuludq\t{%2, %0|%0, %2}" 2572 [(set_attr "type" "sseimul") 2573 (set_attr "mode" "TI")]) 2574 2575(define_insn "sse2_pmaddwd" 2576 [(set (match_operand:V4SI 0 "register_operand" "=x") 2577 (plus:V4SI 2578 (mult:V4SI 2579 (sign_extend:V4SI 2580 (vec_select:V4HI 2581 (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2582 (parallel [(const_int 0) 2583 (const_int 2) 2584 (const_int 4) 2585 (const_int 6)]))) 2586 (sign_extend:V4SI 2587 (vec_select:V4HI 2588 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 2589 (parallel [(const_int 0) 2590 (const_int 2) 2591 (const_int 4) 2592 (const_int 6)])))) 2593 (mult:V4SI 2594 (sign_extend:V4SI 2595 (vec_select:V4HI (match_dup 1) 2596 (parallel [(const_int 1) 2597 (const_int 3) 2598 (const_int 5) 2599 (const_int 7)]))) 2600 (sign_extend:V4SI 2601 (vec_select:V4HI (match_dup 2) 2602 (parallel [(const_int 1) 2603 (const_int 3) 2604 (const_int 5) 2605 (const_int 7)]))))))] 2606 "TARGET_SSE2" 2607 "pmaddwd\t{%2, %0|%0, %2}" 2608 [(set_attr "type" "sseiadd") 2609 (set_attr "mode" "TI")]) 2610 2611(define_expand "mulv4si3" 2612 [(set (match_operand:V4SI 0 "register_operand" "") 2613 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 2614 (match_operand:V4SI 2 "register_operand" "")))] 2615 "TARGET_SSE2" 2616{ 2617 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2618 rtx op0, op1, op2; 2619 2620 op0 = operands[0]; 2621 op1 = operands[1]; 2622 op2 = operands[2]; 2623 t1 = gen_reg_rtx (V4SImode); 2624 t2 = gen_reg_rtx (V4SImode); 2625 t3 = gen_reg_rtx (V4SImode); 2626 t4 = gen_reg_rtx (V4SImode); 2627 t5 = gen_reg_rtx (V4SImode); 2628 t6 = gen_reg_rtx (V4SImode); 2629 thirtytwo = GEN_INT (32); 2630 2631 /* Multiply elements 2 and 0. */ 2632 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); 2633 2634 /* Shift both input vectors down one element, so that elements 3 and 1 2635 are now in the slots for elements 2 and 0. For K8, at least, this is 2636 faster than using a shuffle. */ 2637 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2638 gen_lowpart (TImode, op1), thirtytwo)); 2639 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2640 gen_lowpart (TImode, op2), thirtytwo)); 2641 2642 /* Multiply elements 3 and 1. */ 2643 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); 2644 2645 /* Move the results in element 2 down to element 1; we don't care what 2646 goes in elements 2 and 3. */ 2647 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 2648 const0_rtx, const0_rtx)); 2649 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 2650 const0_rtx, const0_rtx)); 2651 2652 /* Merge the parts back together. */ 2653 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); 2654 DONE; 2655}) 2656 2657(define_expand "mulv2di3" 2658 [(set (match_operand:V2DI 0 "register_operand" "") 2659 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") 2660 (match_operand:V2DI 2 "register_operand" "")))] 2661 "TARGET_SSE2" 2662{ 2663 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2664 rtx op0, op1, op2; 2665 2666 op0 = operands[0]; 2667 op1 = operands[1]; 2668 op2 = operands[2]; 2669 t1 = gen_reg_rtx (V2DImode); 2670 t2 = gen_reg_rtx (V2DImode); 2671 t3 = gen_reg_rtx (V2DImode); 2672 t4 = gen_reg_rtx (V2DImode); 2673 t5 = gen_reg_rtx (V2DImode); 2674 t6 = gen_reg_rtx (V2DImode); 2675 thirtytwo = GEN_INT (32); 2676 2677 /* Multiply low parts. */ 2678 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), 2679 gen_lowpart (V4SImode, op2))); 2680 2681 /* Shift input vectors left 32 bits so we can multiply high parts. */ 2682 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); 2683 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); 2684 2685 /* Multiply high parts by low parts. */ 2686 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), 2687 gen_lowpart (V4SImode, t3))); 2688 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), 2689 gen_lowpart (V4SImode, t2))); 2690 2691 /* Shift them back. */ 2692 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); 2693 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); 2694 2695 /* Add the three parts together. */ 2696 emit_insn (gen_addv2di3 (t6, t1, t4)); 2697 emit_insn (gen_addv2di3 (op0, t6, t5)); 2698 DONE; 2699}) 2700 2701(define_expand "sdot_prodv8hi" 2702 [(match_operand:V4SI 0 "register_operand" "") 2703 (match_operand:V8HI 1 "nonimmediate_operand" "") 2704 (match_operand:V8HI 2 "nonimmediate_operand" "") 2705 (match_operand:V4SI 3 "register_operand" "")] 2706 "TARGET_SSE2" 2707{ 2708 rtx t = gen_reg_rtx (V4SImode); 2709 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2])); 2710 emit_insn (gen_addv4si3 (operands[0], operands[3], t)); 2711 DONE; 2712}) 2713 2714(define_expand "udot_prodv4si" 2715 [(match_operand:V2DI 0 "register_operand" "") 2716 (match_operand:V4SI 1 "register_operand" "") 2717 (match_operand:V4SI 2 "register_operand" "") 2718 (match_operand:V2DI 3 "register_operand" "")] 2719 "TARGET_SSE2" 2720{ 2721 rtx t1, t2, t3, t4; 2722 2723 t1 = gen_reg_rtx (V2DImode); 2724 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); 2725 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 2726 2727 t2 = gen_reg_rtx (V4SImode); 2728 t3 = gen_reg_rtx (V4SImode); 2729 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2730 gen_lowpart (TImode, operands[1]), 2731 GEN_INT (32))); 2732 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2733 gen_lowpart (TImode, operands[2]), 2734 GEN_INT (32))); 2735 2736 t4 = gen_reg_rtx (V2DImode); 2737 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); 2738 2739 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 2740 DONE; 2741}) 2742 2743(define_insn "ashr<mode>3" 2744 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") 2745 (ashiftrt:SSEMODE24 2746 (match_operand:SSEMODE24 1 "register_operand" "0") 2747 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2748 "TARGET_SSE2" 2749 "psra<ssevecsize>\t{%2, %0|%0, %2}" 2750 [(set_attr "type" "sseishft") 2751 (set_attr "mode" "TI")]) 2752 2753(define_insn "lshr<mode>3" 2754 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2755 (lshiftrt:SSEMODE248 2756 (match_operand:SSEMODE248 1 "register_operand" "0") 2757 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2758 "TARGET_SSE2" 2759 "psrl<ssevecsize>\t{%2, %0|%0, %2}" 2760 [(set_attr "type" "sseishft") 2761 (set_attr "mode" "TI")]) 2762 2763(define_insn "ashl<mode>3" 2764 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2765 (ashift:SSEMODE248 2766 (match_operand:SSEMODE248 1 "register_operand" "0") 2767 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2768 "TARGET_SSE2" 2769 "psll<ssevecsize>\t{%2, %0|%0, %2}" 2770 [(set_attr "type" "sseishft") 2771 (set_attr "mode" "TI")]) 2772 2773(define_insn "sse2_ashlti3" 2774 [(set (match_operand:TI 0 "register_operand" "=x") 2775 (ashift:TI (match_operand:TI 1 "register_operand" "0") 2776 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2777 "TARGET_SSE2" 2778{ 2779 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2780 return "pslldq\t{%2, %0|%0, %2}"; 2781} 2782 [(set_attr "type" "sseishft") 2783 (set_attr "mode" "TI")]) 2784 2785(define_expand "vec_shl_<mode>" 2786 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2787 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") 2788 (match_operand:SI 2 "general_operand" "")))] 2789 "TARGET_SSE2" 2790{ 2791 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2792 FAIL; 2793 operands[0] = gen_lowpart (TImode, operands[0]); 2794 operands[1] = gen_lowpart (TImode, operands[1]); 2795}) 2796 2797(define_insn "sse2_lshrti3" 2798 [(set (match_operand:TI 0 "register_operand" "=x") 2799 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") 2800 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2801 "TARGET_SSE2" 2802{ 2803 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2804 return "psrldq\t{%2, %0|%0, %2}"; 2805} 2806 [(set_attr "type" "sseishft") 2807 (set_attr "mode" "TI")]) 2808 2809(define_expand "vec_shr_<mode>" 2810 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2811 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") 2812 (match_operand:SI 2 "general_operand" "")))] 2813 "TARGET_SSE2" 2814{ 2815 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2816 FAIL; 2817 operands[0] = gen_lowpart (TImode, operands[0]); 2818 operands[1] = gen_lowpart (TImode, operands[1]); 2819}) 2820 2821(define_expand "umaxv16qi3" 2822 [(set (match_operand:V16QI 0 "register_operand" "") 2823 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2824 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2825 "TARGET_SSE2" 2826 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") 2827 2828(define_insn "*umaxv16qi3" 2829 [(set (match_operand:V16QI 0 "register_operand" "=x") 2830 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2831 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2832 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" 2833 "pmaxub\t{%2, %0|%0, %2}" 2834 [(set_attr "type" "sseiadd") 2835 (set_attr "mode" "TI")]) 2836 2837(define_expand "smaxv8hi3" 2838 [(set (match_operand:V8HI 0 "register_operand" "") 2839 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2840 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2841 "TARGET_SSE2" 2842 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") 2843 2844(define_insn "*smaxv8hi3" 2845 [(set (match_operand:V8HI 0 "register_operand" "=x") 2846 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2847 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2848 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" 2849 "pmaxsw\t{%2, %0|%0, %2}" 2850 [(set_attr "type" "sseiadd") 2851 (set_attr "mode" "TI")]) 2852 2853(define_expand "umaxv8hi3" 2854 [(set (match_operand:V8HI 0 "register_operand" "=x") 2855 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") 2856 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2857 (set (match_dup 3) 2858 (plus:V8HI (match_dup 0) (match_dup 2)))] 2859 "TARGET_SSE2" 2860{ 2861 operands[3] = operands[0]; 2862 if (rtx_equal_p (operands[0], operands[2])) 2863 operands[0] = gen_reg_rtx (V8HImode); 2864}) 2865 2866(define_expand "smax<mode>3" 2867 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2868 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2869 (match_operand:SSEMODE14 2 "register_operand" "")))] 2870 "TARGET_SSE2" 2871{ 2872 rtx xops[6]; 2873 bool ok; 2874 2875 xops[0] = operands[0]; 2876 xops[1] = operands[1]; 2877 xops[2] = operands[2]; 2878 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2879 xops[4] = operands[1]; 2880 xops[5] = operands[2]; 2881 ok = ix86_expand_int_vcond (xops); 2882 gcc_assert (ok); 2883 DONE; 2884}) 2885 2886(define_expand "umaxv4si3" 2887 [(set (match_operand:V4SI 0 "register_operand" "") 2888 (umax:V4SI (match_operand:V4SI 1 "register_operand" "") 2889 (match_operand:V4SI 2 "register_operand" "")))] 2890 "TARGET_SSE2" 2891{ 2892 rtx xops[6]; 2893 bool ok; 2894 2895 xops[0] = operands[0]; 2896 xops[1] = operands[1]; 2897 xops[2] = operands[2]; 2898 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2899 xops[4] = operands[1]; 2900 xops[5] = operands[2]; 2901 ok = ix86_expand_int_vcond (xops); 2902 gcc_assert (ok); 2903 DONE; 2904}) 2905 2906(define_expand "uminv16qi3" 2907 [(set (match_operand:V16QI 0 "register_operand" "") 2908 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2909 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2910 "TARGET_SSE2" 2911 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);") 2912 2913(define_insn "*uminv16qi3" 2914 [(set (match_operand:V16QI 0 "register_operand" "=x") 2915 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2916 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2917 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" 2918 "pminub\t{%2, %0|%0, %2}" 2919 [(set_attr "type" "sseiadd") 2920 (set_attr "mode" "TI")]) 2921 2922(define_expand "sminv8hi3" 2923 [(set (match_operand:V8HI 0 "register_operand" "") 2924 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2925 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2926 "TARGET_SSE2" 2927 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") 2928 2929(define_insn "*sminv8hi3" 2930 [(set (match_operand:V8HI 0 "register_operand" "=x") 2931 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2932 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2933 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" 2934 "pminsw\t{%2, %0|%0, %2}" 2935 [(set_attr "type" "sseiadd") 2936 (set_attr "mode" "TI")]) 2937 2938(define_expand "smin<mode>3" 2939 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2940 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2941 (match_operand:SSEMODE14 2 "register_operand" "")))] 2942 "TARGET_SSE2" 2943{ 2944 rtx xops[6]; 2945 bool ok; 2946 2947 xops[0] = operands[0]; 2948 xops[1] = operands[2]; 2949 xops[2] = operands[1]; 2950 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2951 xops[4] = operands[1]; 2952 xops[5] = operands[2]; 2953 ok = ix86_expand_int_vcond (xops); 2954 gcc_assert (ok); 2955 DONE; 2956}) 2957 2958(define_expand "umin<mode>3" 2959 [(set (match_operand:SSEMODE24 0 "register_operand" "") 2960 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") 2961 (match_operand:SSEMODE24 2 "register_operand" "")))] 2962 "TARGET_SSE2" 2963{ 2964 rtx xops[6]; 2965 bool ok; 2966 2967 xops[0] = operands[0]; 2968 xops[1] = operands[2]; 2969 xops[2] = operands[1]; 2970 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2971 xops[4] = operands[1]; 2972 xops[5] = operands[2]; 2973 ok = ix86_expand_int_vcond (xops); 2974 gcc_assert (ok); 2975 DONE; 2976}) 2977 2978;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2979;; 2980;; Parallel integral comparisons 2981;; 2982;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2983 2984(define_insn "sse2_eq<mode>3" 2985 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2986 (eq:SSEMODE124 2987 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") 2988 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2989 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 2990 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" 2991 [(set_attr "type" "ssecmp") 2992 (set_attr "mode" "TI")]) 2993 2994(define_insn "sse2_gt<mode>3" 2995 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2996 (gt:SSEMODE124 2997 (match_operand:SSEMODE124 1 "register_operand" "0") 2998 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2999 "TARGET_SSE2" 3000 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" 3001 [(set_attr "type" "ssecmp") 3002 (set_attr "mode" "TI")]) 3003 3004(define_expand "vcond<mode>" 3005 [(set (match_operand:SSEMODE124 0 "register_operand" "") 3006 (if_then_else:SSEMODE124 3007 (match_operator 3 "" 3008 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 3009 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 3010 (match_operand:SSEMODE124 1 "general_operand" "") 3011 (match_operand:SSEMODE124 2 "general_operand" "")))] 3012 "TARGET_SSE2" 3013{ 3014 if (ix86_expand_int_vcond (operands)) 3015 DONE; 3016 else 3017 FAIL; 3018}) 3019 3020(define_expand "vcondu<mode>" 3021 [(set (match_operand:SSEMODE124 0 "register_operand" "") 3022 (if_then_else:SSEMODE124 3023 (match_operator 3 "" 3024 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 3025 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 3026 (match_operand:SSEMODE124 1 "general_operand" "") 3027 (match_operand:SSEMODE124 2 "general_operand" "")))] 3028 "TARGET_SSE2" 3029{ 3030 if (ix86_expand_int_vcond (operands)) 3031 DONE; 3032 else 3033 FAIL; 3034}) 3035 3036;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3037;; 3038;; Parallel integral logical operations 3039;; 3040;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3041 3042(define_expand "one_cmpl<mode>2" 3043 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3044 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3045 (match_dup 2)))] 3046 "TARGET_SSE2" 3047{ 3048 int i, n = GET_MODE_NUNITS (<MODE>mode); 3049 rtvec v = rtvec_alloc (n); 3050 3051 for (i = 0; i < n; ++i) 3052 RTVEC_ELT (v, i) = constm1_rtx; 3053 3054 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 3055}) 3056 3057(define_expand "and<mode>3" 3058 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3059 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3061 "TARGET_SSE2" 3062 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") 3063 3064(define_insn "*and<mode>3" 3065 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3066 (and:SSEMODEI 3067 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3068 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3069 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)" 3070 "pand\t{%2, %0|%0, %2}" 3071 [(set_attr "type" "sselog") 3072 (set_attr "mode" "TI")]) 3073 3074(define_insn "sse2_nand<mode>3" 3075 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3076 (and:SSEMODEI 3077 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) 3078 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3079 "TARGET_SSE2" 3080 "pandn\t{%2, %0|%0, %2}" 3081 [(set_attr "type" "sselog") 3082 (set_attr "mode" "TI")]) 3083 3084(define_expand "ior<mode>3" 3085 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3086 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3087 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3088 "TARGET_SSE2" 3089 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") 3090 3091(define_insn "*ior<mode>3" 3092 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3093 (ior:SSEMODEI 3094 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3095 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3096 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" 3097 "por\t{%2, %0|%0, %2}" 3098 [(set_attr "type" "sselog") 3099 (set_attr "mode" "TI")]) 3100 3101(define_expand "xor<mode>3" 3102 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3103 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3104 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3105 "TARGET_SSE2" 3106 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") 3107 3108(define_insn "*xor<mode>3" 3109 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3110 (xor:SSEMODEI 3111 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3112 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3113 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" 3114 "pxor\t{%2, %0|%0, %2}" 3115 [(set_attr "type" "sselog") 3116 (set_attr "mode" "TI")]) 3117 3118;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3119;; 3120;; Parallel integral element swizzling 3121;; 3122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3123 3124(define_insn "sse2_packsswb" 3125 [(set (match_operand:V16QI 0 "register_operand" "=x") 3126 (vec_concat:V16QI 3127 (ss_truncate:V8QI 3128 (match_operand:V8HI 1 "register_operand" "0")) 3129 (ss_truncate:V8QI 3130 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3131 "TARGET_SSE2" 3132 "packsswb\t{%2, %0|%0, %2}" 3133 [(set_attr "type" "sselog") 3134 (set_attr "mode" "TI")]) 3135 3136(define_insn "sse2_packssdw" 3137 [(set (match_operand:V8HI 0 "register_operand" "=x") 3138 (vec_concat:V8HI 3139 (ss_truncate:V4HI 3140 (match_operand:V4SI 1 "register_operand" "0")) 3141 (ss_truncate:V4HI 3142 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] 3143 "TARGET_SSE2" 3144 "packssdw\t{%2, %0|%0, %2}" 3145 [(set_attr "type" "sselog") 3146 (set_attr "mode" "TI")]) 3147 3148(define_insn "sse2_packuswb" 3149 [(set (match_operand:V16QI 0 "register_operand" "=x") 3150 (vec_concat:V16QI 3151 (us_truncate:V8QI 3152 (match_operand:V8HI 1 "register_operand" "0")) 3153 (us_truncate:V8QI 3154 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3155 "TARGET_SSE2" 3156 "packuswb\t{%2, %0|%0, %2}" 3157 [(set_attr "type" "sselog") 3158 (set_attr "mode" "TI")]) 3159 3160(define_insn "sse2_punpckhbw" 3161 [(set (match_operand:V16QI 0 "register_operand" "=x") 3162 (vec_select:V16QI 3163 (vec_concat:V32QI 3164 (match_operand:V16QI 1 "register_operand" "0") 3165 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3166 (parallel [(const_int 8) (const_int 24) 3167 (const_int 9) (const_int 25) 3168 (const_int 10) (const_int 26) 3169 (const_int 11) (const_int 27) 3170 (const_int 12) (const_int 28) 3171 (const_int 13) (const_int 29) 3172 (const_int 14) (const_int 30) 3173 (const_int 15) (const_int 31)])))] 3174 "TARGET_SSE2" 3175 "punpckhbw\t{%2, %0|%0, %2}" 3176 [(set_attr "type" "sselog") 3177 (set_attr "mode" "TI")]) 3178 3179(define_insn "sse2_punpcklbw" 3180 [(set (match_operand:V16QI 0 "register_operand" "=x") 3181 (vec_select:V16QI 3182 (vec_concat:V32QI 3183 (match_operand:V16QI 1 "register_operand" "0") 3184 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3185 (parallel [(const_int 0) (const_int 16) 3186 (const_int 1) (const_int 17) 3187 (const_int 2) (const_int 18) 3188 (const_int 3) (const_int 19) 3189 (const_int 4) (const_int 20) 3190 (const_int 5) (const_int 21) 3191 (const_int 6) (const_int 22) 3192 (const_int 7) (const_int 23)])))] 3193 "TARGET_SSE2" 3194 "punpcklbw\t{%2, %0|%0, %2}" 3195 [(set_attr "type" "sselog") 3196 (set_attr "mode" "TI")]) 3197 3198(define_insn "sse2_punpckhwd" 3199 [(set (match_operand:V8HI 0 "register_operand" "=x") 3200 (vec_select:V8HI 3201 (vec_concat:V16HI 3202 (match_operand:V8HI 1 "register_operand" "0") 3203 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3204 (parallel [(const_int 4) (const_int 12) 3205 (const_int 5) (const_int 13) 3206 (const_int 6) (const_int 14) 3207 (const_int 7) (const_int 15)])))] 3208 "TARGET_SSE2" 3209 "punpckhwd\t{%2, %0|%0, %2}" 3210 [(set_attr "type" "sselog") 3211 (set_attr "mode" "TI")]) 3212 3213(define_insn "sse2_punpcklwd" 3214 [(set (match_operand:V8HI 0 "register_operand" "=x") 3215 (vec_select:V8HI 3216 (vec_concat:V16HI 3217 (match_operand:V8HI 1 "register_operand" "0") 3218 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3219 (parallel [(const_int 0) (const_int 8) 3220 (const_int 1) (const_int 9) 3221 (const_int 2) (const_int 10) 3222 (const_int 3) (const_int 11)])))] 3223 "TARGET_SSE2" 3224 "punpcklwd\t{%2, %0|%0, %2}" 3225 [(set_attr "type" "sselog") 3226 (set_attr "mode" "TI")]) 3227 3228(define_insn "sse2_punpckhdq" 3229 [(set (match_operand:V4SI 0 "register_operand" "=x") 3230 (vec_select:V4SI 3231 (vec_concat:V8SI 3232 (match_operand:V4SI 1 "register_operand" "0") 3233 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3234 (parallel [(const_int 2) (const_int 6) 3235 (const_int 3) (const_int 7)])))] 3236 "TARGET_SSE2" 3237 "punpckhdq\t{%2, %0|%0, %2}" 3238 [(set_attr "type" "sselog") 3239 (set_attr "mode" "TI")]) 3240 3241(define_insn "sse2_punpckldq" 3242 [(set (match_operand:V4SI 0 "register_operand" "=x") 3243 (vec_select:V4SI 3244 (vec_concat:V8SI 3245 (match_operand:V4SI 1 "register_operand" "0") 3246 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3247 (parallel [(const_int 0) (const_int 4) 3248 (const_int 1) (const_int 5)])))] 3249 "TARGET_SSE2" 3250 "punpckldq\t{%2, %0|%0, %2}" 3251 [(set_attr "type" "sselog") 3252 (set_attr "mode" "TI")]) 3253 3254(define_insn "sse2_punpckhqdq" 3255 [(set (match_operand:V2DI 0 "register_operand" "=x") 3256 (vec_select:V2DI 3257 (vec_concat:V4DI 3258 (match_operand:V2DI 1 "register_operand" "0") 3259 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3260 (parallel [(const_int 1) 3261 (const_int 3)])))] 3262 "TARGET_SSE2" 3263 "punpckhqdq\t{%2, %0|%0, %2}" 3264 [(set_attr "type" "sselog") 3265 (set_attr "mode" "TI")]) 3266 3267(define_insn "sse2_punpcklqdq" 3268 [(set (match_operand:V2DI 0 "register_operand" "=x") 3269 (vec_select:V2DI 3270 (vec_concat:V4DI 3271 (match_operand:V2DI 1 "register_operand" "0") 3272 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3273 (parallel [(const_int 0) 3274 (const_int 2)])))] 3275 "TARGET_SSE2" 3276 "punpcklqdq\t{%2, %0|%0, %2}" 3277 [(set_attr "type" "sselog") 3278 (set_attr "mode" "TI")]) 3279 3280(define_expand "sse2_pinsrw" 3281 [(set (match_operand:V8HI 0 "register_operand" "") 3282 (vec_merge:V8HI 3283 (vec_duplicate:V8HI 3284 (match_operand:SI 2 "nonimmediate_operand" "")) 3285 (match_operand:V8HI 1 "register_operand" "") 3286 (match_operand:SI 3 "const_0_to_7_operand" "")))] 3287 "TARGET_SSE2" 3288{ 3289 operands[2] = gen_lowpart (HImode, operands[2]); 3290 operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); 3291}) 3292 3293(define_insn "*sse2_pinsrw" 3294 [(set (match_operand:V8HI 0 "register_operand" "=x") 3295 (vec_merge:V8HI 3296 (vec_duplicate:V8HI 3297 (match_operand:HI 2 "nonimmediate_operand" "rm")) 3298 (match_operand:V8HI 1 "register_operand" "0") 3299 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] 3300 "TARGET_SSE2" 3301{ 3302 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 3303 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; 3304} 3305 [(set_attr "type" "sselog") 3306 (set_attr "mode" "TI")]) 3307 3308(define_insn "sse2_pextrw" 3309 [(set (match_operand:SI 0 "register_operand" "=r") 3310 (zero_extend:SI 3311 (vec_select:HI 3312 (match_operand:V8HI 1 "register_operand" "x") 3313 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 3314 "TARGET_SSE2" 3315 "pextrw\t{%2, %1, %0|%0, %1, %2}" 3316 [(set_attr "type" "sselog") 3317 (set_attr "mode" "TI")]) 3318 3319(define_expand "sse2_pshufd" 3320 [(match_operand:V4SI 0 "register_operand" "") 3321 (match_operand:V4SI 1 "nonimmediate_operand" "") 3322 (match_operand:SI 2 "const_int_operand" "")] 3323 "TARGET_SSE2" 3324{ 3325 int mask = INTVAL (operands[2]); 3326 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 3327 GEN_INT ((mask >> 0) & 3), 3328 GEN_INT ((mask >> 2) & 3), 3329 GEN_INT ((mask >> 4) & 3), 3330 GEN_INT ((mask >> 6) & 3))); 3331 DONE; 3332}) 3333 3334(define_insn "sse2_pshufd_1" 3335 [(set (match_operand:V4SI 0 "register_operand" "=x") 3336 (vec_select:V4SI 3337 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 3338 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3339 (match_operand 3 "const_0_to_3_operand" "") 3340 (match_operand 4 "const_0_to_3_operand" "") 3341 (match_operand 5 "const_0_to_3_operand" "")])))] 3342 "TARGET_SSE2" 3343{ 3344 int mask = 0; 3345 mask |= INTVAL (operands[2]) << 0; 3346 mask |= INTVAL (operands[3]) << 2; 3347 mask |= INTVAL (operands[4]) << 4; 3348 mask |= INTVAL (operands[5]) << 6; 3349 operands[2] = GEN_INT (mask); 3350 3351 return "pshufd\t{%2, %1, %0|%0, %1, %2}"; 3352} 3353 [(set_attr "type" "sselog1") 3354 (set_attr "mode" "TI")]) 3355 3356(define_expand "sse2_pshuflw" 3357 [(match_operand:V8HI 0 "register_operand" "") 3358 (match_operand:V8HI 1 "nonimmediate_operand" "") 3359 (match_operand:SI 2 "const_int_operand" "")] 3360 "TARGET_SSE2" 3361{ 3362 int mask = INTVAL (operands[2]); 3363 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 3364 GEN_INT ((mask >> 0) & 3), 3365 GEN_INT ((mask >> 2) & 3), 3366 GEN_INT ((mask >> 4) & 3), 3367 GEN_INT ((mask >> 6) & 3))); 3368 DONE; 3369}) 3370 3371(define_insn "sse2_pshuflw_1" 3372 [(set (match_operand:V8HI 0 "register_operand" "=x") 3373 (vec_select:V8HI 3374 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3375 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3376 (match_operand 3 "const_0_to_3_operand" "") 3377 (match_operand 4 "const_0_to_3_operand" "") 3378 (match_operand 5 "const_0_to_3_operand" "") 3379 (const_int 4) 3380 (const_int 5) 3381 (const_int 6) 3382 (const_int 7)])))] 3383 "TARGET_SSE2" 3384{ 3385 int mask = 0; 3386 mask |= INTVAL (operands[2]) << 0; 3387 mask |= INTVAL (operands[3]) << 2; 3388 mask |= INTVAL (operands[4]) << 4; 3389 mask |= INTVAL (operands[5]) << 6; 3390 operands[2] = GEN_INT (mask); 3391 3392 return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; 3393} 3394 [(set_attr "type" "sselog") 3395 (set_attr "mode" "TI")]) 3396 3397(define_expand "sse2_pshufhw" 3398 [(match_operand:V8HI 0 "register_operand" "") 3399 (match_operand:V8HI 1 "nonimmediate_operand" "") 3400 (match_operand:SI 2 "const_int_operand" "")] 3401 "TARGET_SSE2" 3402{ 3403 int mask = INTVAL (operands[2]); 3404 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 3405 GEN_INT (((mask >> 0) & 3) + 4), 3406 GEN_INT (((mask >> 2) & 3) + 4), 3407 GEN_INT (((mask >> 4) & 3) + 4), 3408 GEN_INT (((mask >> 6) & 3) + 4))); 3409 DONE; 3410}) 3411 3412(define_insn "sse2_pshufhw_1" 3413 [(set (match_operand:V8HI 0 "register_operand" "=x") 3414 (vec_select:V8HI 3415 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3416 (parallel [(const_int 0) 3417 (const_int 1) 3418 (const_int 2) 3419 (const_int 3) 3420 (match_operand 2 "const_4_to_7_operand" "") 3421 (match_operand 3 "const_4_to_7_operand" "") 3422 (match_operand 4 "const_4_to_7_operand" "") 3423 (match_operand 5 "const_4_to_7_operand" "")])))] 3424 "TARGET_SSE2" 3425{ 3426 int mask = 0; 3427 mask |= (INTVAL (operands[2]) - 4) << 0; 3428 mask |= (INTVAL (operands[3]) - 4) << 2; 3429 mask |= (INTVAL (operands[4]) - 4) << 4; 3430 mask |= (INTVAL (operands[5]) - 4) << 6; 3431 operands[2] = GEN_INT (mask); 3432 3433 return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; 3434} 3435 [(set_attr "type" "sselog") 3436 (set_attr "mode" "TI")]) 3437 3438(define_expand "sse2_loadd" 3439 [(set (match_operand:V4SI 0 "register_operand" "") 3440 (vec_merge:V4SI 3441 (vec_duplicate:V4SI 3442 (match_operand:SI 1 "nonimmediate_operand" "")) 3443 (match_dup 2) 3444 (const_int 1)))] 3445 "TARGET_SSE" 3446 "operands[2] = CONST0_RTX (V4SImode);") 3447 3448(define_insn "sse2_loadld" 3449 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3450 (vec_merge:V4SI 3451 (vec_duplicate:V4SI 3452 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x")) 3453 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0") 3454 (const_int 1)))] 3455 "TARGET_SSE" 3456 "@ 3457 movd\t{%2, %0|%0, %2} 3458 movss\t{%2, %0|%0, %2} 3459 movss\t{%2, %0|%0, %2}" 3460 [(set_attr "type" "ssemov") 3461 (set_attr "mode" "TI,V4SF,SF")]) 3462 3463;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3464;; be taken into account, and movdi isn't fully populated even without. 3465(define_insn_and_split "sse2_stored" 3466 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx") 3467 (vec_select:SI 3468 (match_operand:V4SI 1 "register_operand" "x") 3469 (parallel [(const_int 0)])))] 3470 "TARGET_SSE" 3471 "#" 3472 "&& reload_completed" 3473 [(set (match_dup 0) (match_dup 1))] 3474{ 3475 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); 3476}) 3477 3478(define_expand "sse_storeq" 3479 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3480 (vec_select:DI 3481 (match_operand:V2DI 1 "register_operand" "") 3482 (parallel [(const_int 0)])))] 3483 "TARGET_SSE" 3484 "") 3485 3486;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3487;; be taken into account, and movdi isn't fully populated even without. 3488(define_insn "*sse2_storeq" 3489 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") 3490 (vec_select:DI 3491 (match_operand:V2DI 1 "register_operand" "x") 3492 (parallel [(const_int 0)])))] 3493 "TARGET_SSE" 3494 "#") 3495 3496(define_split 3497 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3498 (vec_select:DI 3499 (match_operand:V2DI 1 "register_operand" "") 3500 (parallel [(const_int 0)])))] 3501 "TARGET_SSE && reload_completed" 3502 [(set (match_dup 0) (match_dup 1))] 3503{ 3504 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); 3505}) 3506 3507(define_insn "*vec_extractv2di_1_sse2" 3508 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3509 (vec_select:DI 3510 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o") 3511 (parallel [(const_int 1)])))] 3512 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3513 "@ 3514 movhps\t{%1, %0|%0, %1} 3515 psrldq\t{$8, %0|%0, 8} 3516 movq\t{%H1, %0|%0, %H1}" 3517 [(set_attr "type" "ssemov,sseishft,ssemov") 3518 (set_attr "memory" "*,none,*") 3519 (set_attr "mode" "V2SF,TI,TI")]) 3520 3521;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva 3522(define_insn "*vec_extractv2di_1_sse" 3523 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3524 (vec_select:DI 3525 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o") 3526 (parallel [(const_int 1)])))] 3527 "!TARGET_SSE2 && TARGET_SSE 3528 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3529 "@ 3530 movhps\t{%1, %0|%0, %1} 3531 movhlps\t{%1, %0|%0, %1} 3532 movlps\t{%H1, %0|%0, %H1}" 3533 [(set_attr "type" "ssemov") 3534 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3535 3536(define_insn "*vec_dupv4si" 3537 [(set (match_operand:V4SI 0 "register_operand" "=Y,x") 3538 (vec_duplicate:V4SI 3539 (match_operand:SI 1 "register_operand" " Y,0")))] 3540 "TARGET_SSE" 3541 "@ 3542 pshufd\t{$0, %1, %0|%0, %1, 0} 3543 shufps\t{$0, %0, %0|%0, %0, 0}" 3544 [(set_attr "type" "sselog1") 3545 (set_attr "mode" "TI,V4SF")]) 3546 3547(define_insn "*vec_dupv2di" 3548 [(set (match_operand:V2DI 0 "register_operand" "=Y,x") 3549 (vec_duplicate:V2DI 3550 (match_operand:DI 1 "register_operand" " 0,0")))] 3551 "TARGET_SSE" 3552 "@ 3553 punpcklqdq\t%0, %0 3554 movlhps\t%0, %0" 3555 [(set_attr "type" "sselog1,ssemov") 3556 (set_attr "mode" "TI,V4SF")]) 3557 3558;; ??? In theory we can match memory for the MMX alternative, but allowing 3559;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3560;; alternatives pretty much forces the MMX alternative to be chosen. 3561(define_insn "*sse2_concatv2si" 3562 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y") 3563 (vec_concat:V2SI 3564 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 3565 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))] 3566 "TARGET_SSE2" 3567 "@ 3568 punpckldq\t{%2, %0|%0, %2} 3569 movd\t{%1, %0|%0, %1} 3570 punpckldq\t{%2, %0|%0, %2} 3571 movd\t{%1, %0|%0, %1}" 3572 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3573 (set_attr "mode" "TI,TI,DI,DI")]) 3574 3575(define_insn "*sse1_concatv2si" 3576 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 3577 (vec_concat:V2SI 3578 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 3579 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 3580 "TARGET_SSE" 3581 "@ 3582 unpcklps\t{%2, %0|%0, %2} 3583 movss\t{%1, %0|%0, %1} 3584 punpckldq\t{%2, %0|%0, %2} 3585 movd\t{%1, %0|%0, %1}" 3586 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3587 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 3588 3589(define_insn "*vec_concatv4si_1" 3590 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3591 (vec_concat:V4SI 3592 (match_operand:V2SI 1 "register_operand" " 0,0,0") 3593 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))] 3594 "TARGET_SSE" 3595 "@ 3596 punpcklqdq\t{%2, %0|%0, %2} 3597 movlhps\t{%2, %0|%0, %2} 3598 movhps\t{%2, %0|%0, %2}" 3599 [(set_attr "type" "sselog,ssemov,ssemov") 3600 (set_attr "mode" "TI,V4SF,V2SF")]) 3601 3602(define_insn "*vec_concatv2di" 3603 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x") 3604 (vec_concat:V2DI 3605 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m") 3606 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))] 3607 "TARGET_SSE" 3608 "@ 3609 movq\t{%1, %0|%0, %1} 3610 movq2dq\t{%1, %0|%0, %1} 3611 punpcklqdq\t{%2, %0|%0, %2} 3612 movlhps\t{%2, %0|%0, %2} 3613 movhps\t{%2, %0|%0, %2} 3614 movlps\t{%1, %0|%0, %1}" 3615 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") 3616 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) 3617 3618(define_expand "vec_setv2di" 3619 [(match_operand:V2DI 0 "register_operand" "") 3620 (match_operand:DI 1 "register_operand" "") 3621 (match_operand 2 "const_int_operand" "")] 3622 "TARGET_SSE" 3623{ 3624 ix86_expand_vector_set (false, operands[0], operands[1], 3625 INTVAL (operands[2])); 3626 DONE; 3627}) 3628 3629(define_expand "vec_extractv2di" 3630 [(match_operand:DI 0 "register_operand" "") 3631 (match_operand:V2DI 1 "register_operand" "") 3632 (match_operand 2 "const_int_operand" "")] 3633 "TARGET_SSE" 3634{ 3635 ix86_expand_vector_extract (false, operands[0], operands[1], 3636 INTVAL (operands[2])); 3637 DONE; 3638}) 3639 3640(define_expand "vec_initv2di" 3641 [(match_operand:V2DI 0 "register_operand" "") 3642 (match_operand 1 "" "")] 3643 "TARGET_SSE" 3644{ 3645 ix86_expand_vector_init (false, operands[0], operands[1]); 3646 DONE; 3647}) 3648 3649(define_expand "vec_setv4si" 3650 [(match_operand:V4SI 0 "register_operand" "") 3651 (match_operand:SI 1 "register_operand" "") 3652 (match_operand 2 "const_int_operand" "")] 3653 "TARGET_SSE" 3654{ 3655 ix86_expand_vector_set (false, operands[0], operands[1], 3656 INTVAL (operands[2])); 3657 DONE; 3658}) 3659 3660(define_expand "vec_extractv4si" 3661 [(match_operand:SI 0 "register_operand" "") 3662 (match_operand:V4SI 1 "register_operand" "") 3663 (match_operand 2 "const_int_operand" "")] 3664 "TARGET_SSE" 3665{ 3666 ix86_expand_vector_extract (false, operands[0], operands[1], 3667 INTVAL (operands[2])); 3668 DONE; 3669}) 3670 3671(define_expand "vec_initv4si" 3672 [(match_operand:V4SI 0 "register_operand" "") 3673 (match_operand 1 "" "")] 3674 "TARGET_SSE" 3675{ 3676 ix86_expand_vector_init (false, operands[0], operands[1]); 3677 DONE; 3678}) 3679 3680(define_expand "vec_setv8hi" 3681 [(match_operand:V8HI 0 "register_operand" "") 3682 (match_operand:HI 1 "register_operand" "") 3683 (match_operand 2 "const_int_operand" "")] 3684 "TARGET_SSE" 3685{ 3686 ix86_expand_vector_set (false, operands[0], operands[1], 3687 INTVAL (operands[2])); 3688 DONE; 3689}) 3690 3691(define_expand "vec_extractv8hi" 3692 [(match_operand:HI 0 "register_operand" "") 3693 (match_operand:V8HI 1 "register_operand" "") 3694 (match_operand 2 "const_int_operand" "")] 3695 "TARGET_SSE" 3696{ 3697 ix86_expand_vector_extract (false, operands[0], operands[1], 3698 INTVAL (operands[2])); 3699 DONE; 3700}) 3701 3702(define_expand "vec_initv8hi" 3703 [(match_operand:V8HI 0 "register_operand" "") 3704 (match_operand 1 "" "")] 3705 "TARGET_SSE" 3706{ 3707 ix86_expand_vector_init (false, operands[0], operands[1]); 3708 DONE; 3709}) 3710 3711(define_expand "vec_setv16qi" 3712 [(match_operand:V16QI 0 "register_operand" "") 3713 (match_operand:QI 1 "register_operand" "") 3714 (match_operand 2 "const_int_operand" "")] 3715 "TARGET_SSE" 3716{ 3717 ix86_expand_vector_set (false, operands[0], operands[1], 3718 INTVAL (operands[2])); 3719 DONE; 3720}) 3721 3722(define_expand "vec_extractv16qi" 3723 [(match_operand:QI 0 "register_operand" "") 3724 (match_operand:V16QI 1 "register_operand" "") 3725 (match_operand 2 "const_int_operand" "")] 3726 "TARGET_SSE" 3727{ 3728 ix86_expand_vector_extract (false, operands[0], operands[1], 3729 INTVAL (operands[2])); 3730 DONE; 3731}) 3732 3733(define_expand "vec_initv16qi" 3734 [(match_operand:V16QI 0 "register_operand" "") 3735 (match_operand 1 "" "")] 3736 "TARGET_SSE" 3737{ 3738 ix86_expand_vector_init (false, operands[0], operands[1]); 3739 DONE; 3740}) 3741 3742;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3743;; 3744;; Miscellaneous 3745;; 3746;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3747 3748(define_insn "sse2_uavgv16qi3" 3749 [(set (match_operand:V16QI 0 "register_operand" "=x") 3750 (truncate:V16QI 3751 (lshiftrt:V16HI 3752 (plus:V16HI 3753 (plus:V16HI 3754 (zero_extend:V16HI 3755 (match_operand:V16QI 1 "nonimmediate_operand" "%0")) 3756 (zero_extend:V16HI 3757 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) 3758 (const_vector:V16QI [(const_int 1) (const_int 1) 3759 (const_int 1) (const_int 1) 3760 (const_int 1) (const_int 1) 3761 (const_int 1) (const_int 1) 3762 (const_int 1) (const_int 1) 3763 (const_int 1) (const_int 1) 3764 (const_int 1) (const_int 1) 3765 (const_int 1) (const_int 1)])) 3766 (const_int 1))))] 3767 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 3768 "pavgb\t{%2, %0|%0, %2}" 3769 [(set_attr "type" "sseiadd") 3770 (set_attr "mode" "TI")]) 3771 3772(define_insn "sse2_uavgv8hi3" 3773 [(set (match_operand:V8HI 0 "register_operand" "=x") 3774 (truncate:V8HI 3775 (lshiftrt:V8SI 3776 (plus:V8SI 3777 (plus:V8SI 3778 (zero_extend:V8SI 3779 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 3780 (zero_extend:V8SI 3781 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 3782 (const_vector:V8HI [(const_int 1) (const_int 1) 3783 (const_int 1) (const_int 1) 3784 (const_int 1) (const_int 1) 3785 (const_int 1) (const_int 1)])) 3786 (const_int 1))))] 3787 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 3788 "pavgw\t{%2, %0|%0, %2}" 3789 [(set_attr "type" "sseiadd") 3790 (set_attr "mode" "TI")]) 3791 3792;; The correct representation for this is absolutely enormous, and 3793;; surely not generally useful. 3794(define_insn "sse2_psadbw" 3795 [(set (match_operand:V2DI 0 "register_operand" "=x") 3796 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") 3797 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 3798 UNSPEC_PSADBW))] 3799 "TARGET_SSE2" 3800 "psadbw\t{%2, %0|%0, %2}" 3801 [(set_attr "type" "sseiadd") 3802 (set_attr "mode" "TI")]) 3803 3804(define_insn "sse_movmskps" 3805 [(set (match_operand:SI 0 "register_operand" "=r") 3806 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 3807 UNSPEC_MOVMSK))] 3808 "TARGET_SSE" 3809 "movmskps\t{%1, %0|%0, %1}" 3810 [(set_attr "type" "ssecvt") 3811 (set_attr "mode" "V4SF")]) 3812 3813(define_insn "sse2_movmskpd" 3814 [(set (match_operand:SI 0 "register_operand" "=r") 3815 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 3816 UNSPEC_MOVMSK))] 3817 "TARGET_SSE2" 3818 "movmskpd\t{%1, %0|%0, %1}" 3819 [(set_attr "type" "ssecvt") 3820 (set_attr "mode" "V2DF")]) 3821 3822(define_insn "sse2_pmovmskb" 3823 [(set (match_operand:SI 0 "register_operand" "=r") 3824 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 3825 UNSPEC_MOVMSK))] 3826 "TARGET_SSE2" 3827 "pmovmskb\t{%1, %0|%0, %1}" 3828 [(set_attr "type" "ssecvt") 3829 (set_attr "mode" "V2DF")]) 3830 3831(define_expand "sse2_maskmovdqu" 3832 [(set (match_operand:V16QI 0 "memory_operand" "") 3833 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3834 (match_operand:V16QI 2 "register_operand" "x") 3835 (match_dup 0)] 3836 UNSPEC_MASKMOV))] 3837 "TARGET_SSE2" 3838 "") 3839 3840(define_insn "*sse2_maskmovdqu" 3841 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) 3842 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3843 (match_operand:V16QI 2 "register_operand" "x") 3844 (mem:V16QI (match_dup 0))] 3845 UNSPEC_MASKMOV))] 3846 "TARGET_SSE2 && !TARGET_64BIT" 3847 ;; @@@ check ordering of operands in intel/nonintel syntax 3848 "maskmovdqu\t{%2, %1|%1, %2}" 3849 [(set_attr "type" "ssecvt") 3850 (set_attr "mode" "TI")]) 3851 3852(define_insn "*sse2_maskmovdqu_rex64" 3853 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) 3854 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3855 (match_operand:V16QI 2 "register_operand" "x") 3856 (mem:V16QI (match_dup 0))] 3857 UNSPEC_MASKMOV))] 3858 "TARGET_SSE2 && TARGET_64BIT" 3859 ;; @@@ check ordering of operands in intel/nonintel syntax 3860 "maskmovdqu\t{%2, %1|%1, %2}" 3861 [(set_attr "type" "ssecvt") 3862 (set_attr "mode" "TI")]) 3863 3864(define_insn "sse_ldmxcsr" 3865 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 3866 UNSPECV_LDMXCSR)] 3867 "TARGET_SSE" 3868 "ldmxcsr\t%0" 3869 [(set_attr "type" "sse") 3870 (set_attr "memory" "load")]) 3871 3872(define_insn "sse_stmxcsr" 3873 [(set (match_operand:SI 0 "memory_operand" "=m") 3874 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 3875 "TARGET_SSE" 3876 "stmxcsr\t%0" 3877 [(set_attr "type" "sse") 3878 (set_attr "memory" "store")]) 3879 3880(define_expand "sse_sfence" 3881 [(set (match_dup 0) 3882 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3883 "TARGET_SSE || TARGET_3DNOW_A" 3884{ 3885 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3886 MEM_VOLATILE_P (operands[0]) = 1; 3887}) 3888 3889(define_insn "*sse_sfence" 3890 [(set (match_operand:BLK 0 "" "") 3891 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3892 "TARGET_SSE || TARGET_3DNOW_A" 3893 "sfence" 3894 [(set_attr "type" "sse") 3895 (set_attr "memory" "unknown")]) 3896 3897(define_insn "sse2_clflush" 3898 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 3899 UNSPECV_CLFLUSH)] 3900 "TARGET_SSE2" 3901 "clflush\t%a0" 3902 [(set_attr "type" "sse") 3903 (set_attr "memory" "unknown")]) 3904 3905(define_expand "sse2_mfence" 3906 [(set (match_dup 0) 3907 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3908 "TARGET_SSE2" 3909{ 3910 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3911 MEM_VOLATILE_P (operands[0]) = 1; 3912}) 3913 3914(define_insn "*sse2_mfence" 3915 [(set (match_operand:BLK 0 "" "") 3916 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3917 "TARGET_SSE2" 3918 "mfence" 3919 [(set_attr "type" "sse") 3920 (set_attr "memory" "unknown")]) 3921 3922(define_expand "sse2_lfence" 3923 [(set (match_dup 0) 3924 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3925 "TARGET_SSE2" 3926{ 3927 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3928 MEM_VOLATILE_P (operands[0]) = 1; 3929}) 3930 3931(define_insn "*sse2_lfence" 3932 [(set (match_operand:BLK 0 "" "") 3933 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3934 "TARGET_SSE2" 3935 "lfence" 3936 [(set_attr "type" "sse") 3937 (set_attr "memory" "unknown")]) 3938 3939(define_insn "sse3_mwait" 3940 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3941 (match_operand:SI 1 "register_operand" "c")] 3942 UNSPECV_MWAIT)] 3943 "TARGET_SSE3" 3944;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 3945;; Since 32bit register operands are implicitly zero extended to 64bit, 3946;; we only need to set up 32bit registers. 3947 "mwait" 3948 [(set_attr "length" "3")]) 3949 3950(define_insn "sse3_monitor" 3951 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3952 (match_operand:SI 1 "register_operand" "c") 3953 (match_operand:SI 2 "register_operand" "d")] 3954 UNSPECV_MONITOR)] 3955 "TARGET_SSE3 && !TARGET_64BIT" 3956 "monitor\t%0, %1, %2" 3957 [(set_attr "length" "3")]) 3958 3959(define_insn "sse3_monitor64" 3960 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") 3961 (match_operand:SI 1 "register_operand" "c") 3962 (match_operand:SI 2 "register_operand" "d")] 3963 UNSPECV_MONITOR)] 3964 "TARGET_SSE3 && TARGET_64BIT" 3965;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 3966;; RCX and RDX are used. Since 32bit register operands are implicitly 3967;; zero extended to 64bit, we only need to set up 32bit registers. 3968 "monitor" 3969 [(set_attr "length" "3")]) 3970 3971;; SSSE3 3972(define_insn "ssse3_phaddwv8hi3" 3973 [(set (match_operand:V8HI 0 "register_operand" "=x") 3974 (vec_concat:V8HI 3975 (vec_concat:V4HI 3976 (vec_concat:V2HI 3977 (plus:HI 3978 (vec_select:HI 3979 (match_operand:V8HI 1 "register_operand" "0") 3980 (parallel [(const_int 0)])) 3981 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 3982 (plus:HI 3983 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 3984 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 3985 (vec_concat:V2HI 3986 (plus:HI 3987 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 3988 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 3989 (plus:HI 3990 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 3991 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 3992 (vec_concat:V4HI 3993 (vec_concat:V2HI 3994 (plus:HI 3995 (vec_select:HI 3996 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 3997 (parallel [(const_int 0)])) 3998 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 3999 (plus:HI 4000 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4001 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 4002 (vec_concat:V2HI 4003 (plus:HI 4004 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 4005 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 4006 (plus:HI 4007 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 4008 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 4009 "TARGET_SSSE3" 4010 "phaddw\t{%2, %0|%0, %2}" 4011 [(set_attr "type" "sseiadd") 4012 (set_attr "mode" "TI")]) 4013 4014(define_insn "ssse3_phaddwv4hi3" 4015 [(set (match_operand:V4HI 0 "register_operand" "=y") 4016 (vec_concat:V4HI 4017 (vec_concat:V2HI 4018 (plus:HI 4019 (vec_select:HI 4020 (match_operand:V4HI 1 "register_operand" "0") 4021 (parallel [(const_int 0)])) 4022 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4023 (plus:HI 4024 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4025 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4026 (vec_concat:V2HI 4027 (plus:HI 4028 (vec_select:HI 4029 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 4030 (parallel [(const_int 0)])) 4031 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4032 (plus:HI 4033 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4034 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 4035 "TARGET_SSSE3" 4036 "phaddw\t{%2, %0|%0, %2}" 4037 [(set_attr "type" "sseiadd") 4038 (set_attr "mode" "DI")]) 4039 4040(define_insn "ssse3_phadddv4si3" 4041 [(set (match_operand:V4SI 0 "register_operand" "=x") 4042 (vec_concat:V4SI 4043 (vec_concat:V2SI 4044 (plus:SI 4045 (vec_select:SI 4046 (match_operand:V4SI 1 "register_operand" "0") 4047 (parallel [(const_int 0)])) 4048 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 4049 (plus:SI 4050 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 4051 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 4052 (vec_concat:V2SI 4053 (plus:SI 4054 (vec_select:SI 4055 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 4056 (parallel [(const_int 0)])) 4057 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 4058 (plus:SI 4059 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 4060 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 4061 "TARGET_SSSE3" 4062 "phaddd\t{%2, %0|%0, %2}" 4063 [(set_attr "type" "sseiadd") 4064 (set_attr "mode" "TI")]) 4065 4066(define_insn "ssse3_phadddv2si3" 4067 [(set (match_operand:V2SI 0 "register_operand" "=y") 4068 (vec_concat:V2SI 4069 (plus:SI 4070 (vec_select:SI 4071 (match_operand:V2SI 1 "register_operand" "0") 4072 (parallel [(const_int 0)])) 4073 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 4074 (plus:SI 4075 (vec_select:SI 4076 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 4077 (parallel [(const_int 0)])) 4078 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 4079 "TARGET_SSSE3" 4080 "phaddd\t{%2, %0|%0, %2}" 4081 [(set_attr "type" "sseiadd") 4082 (set_attr "mode" "DI")]) 4083 4084(define_insn "ssse3_phaddswv8hi3" 4085 [(set (match_operand:V8HI 0 "register_operand" "=x") 4086 (vec_concat:V8HI 4087 (vec_concat:V4HI 4088 (vec_concat:V2HI 4089 (ss_plus:HI 4090 (vec_select:HI 4091 (match_operand:V8HI 1 "register_operand" "0") 4092 (parallel [(const_int 0)])) 4093 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4094 (ss_plus:HI 4095 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4096 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4097 (vec_concat:V2HI 4098 (ss_plus:HI 4099 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 4100 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 4101 (ss_plus:HI 4102 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 4103 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 4104 (vec_concat:V4HI 4105 (vec_concat:V2HI 4106 (ss_plus:HI 4107 (vec_select:HI 4108 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 4109 (parallel [(const_int 0)])) 4110 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4111 (ss_plus:HI 4112 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4113 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 4114 (vec_concat:V2HI 4115 (ss_plus:HI 4116 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 4117 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 4118 (ss_plus:HI 4119 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 4120 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 4121 "TARGET_SSSE3" 4122 "phaddsw\t{%2, %0|%0, %2}" 4123 [(set_attr "type" "sseiadd") 4124 (set_attr "mode" "TI")]) 4125 4126(define_insn "ssse3_phaddswv4hi3" 4127 [(set (match_operand:V4HI 0 "register_operand" "=y") 4128 (vec_concat:V4HI 4129 (vec_concat:V2HI 4130 (ss_plus:HI 4131 (vec_select:HI 4132 (match_operand:V4HI 1 "register_operand" "0") 4133 (parallel [(const_int 0)])) 4134 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4135 (ss_plus:HI 4136 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4137 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4138 (vec_concat:V2HI 4139 (ss_plus:HI 4140 (vec_select:HI 4141 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 4142 (parallel [(const_int 0)])) 4143 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4144 (ss_plus:HI 4145 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4146 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 4147 "TARGET_SSSE3" 4148 "phaddsw\t{%2, %0|%0, %2}" 4149 [(set_attr "type" "sseiadd") 4150 (set_attr "mode" "DI")]) 4151 4152(define_insn "ssse3_phsubwv8hi3" 4153 [(set (match_operand:V8HI 0 "register_operand" "=x") 4154 (vec_concat:V8HI 4155 (vec_concat:V4HI 4156 (vec_concat:V2HI 4157 (minus:HI 4158 (vec_select:HI 4159 (match_operand:V8HI 1 "register_operand" "0") 4160 (parallel [(const_int 0)])) 4161 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4162 (minus:HI 4163 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4164 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4165 (vec_concat:V2HI 4166 (minus:HI 4167 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 4168 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 4169 (minus:HI 4170 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 4171 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 4172 (vec_concat:V4HI 4173 (vec_concat:V2HI 4174 (minus:HI 4175 (vec_select:HI 4176 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 4177 (parallel [(const_int 0)])) 4178 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4179 (minus:HI 4180 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4181 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 4182 (vec_concat:V2HI 4183 (minus:HI 4184 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 4185 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 4186 (minus:HI 4187 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 4188 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 4189 "TARGET_SSSE3" 4190 "phsubw\t{%2, %0|%0, %2}" 4191 [(set_attr "type" "sseiadd") 4192 (set_attr "mode" "TI")]) 4193 4194(define_insn "ssse3_phsubwv4hi3" 4195 [(set (match_operand:V4HI 0 "register_operand" "=y") 4196 (vec_concat:V4HI 4197 (vec_concat:V2HI 4198 (minus:HI 4199 (vec_select:HI 4200 (match_operand:V4HI 1 "register_operand" "0") 4201 (parallel [(const_int 0)])) 4202 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4203 (minus:HI 4204 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4205 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4206 (vec_concat:V2HI 4207 (minus:HI 4208 (vec_select:HI 4209 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 4210 (parallel [(const_int 0)])) 4211 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4212 (minus:HI 4213 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4214 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 4215 "TARGET_SSSE3" 4216 "phsubw\t{%2, %0|%0, %2}" 4217 [(set_attr "type" "sseiadd") 4218 (set_attr "mode" "DI")]) 4219 4220(define_insn "ssse3_phsubdv4si3" 4221 [(set (match_operand:V4SI 0 "register_operand" "=x") 4222 (vec_concat:V4SI 4223 (vec_concat:V2SI 4224 (minus:SI 4225 (vec_select:SI 4226 (match_operand:V4SI 1 "register_operand" "0") 4227 (parallel [(const_int 0)])) 4228 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 4229 (minus:SI 4230 (vec_select:SI (match_dup 1) (parallel [(const_int 2)])) 4231 (vec_select:SI (match_dup 1) (parallel [(const_int 3)])))) 4232 (vec_concat:V2SI 4233 (minus:SI 4234 (vec_select:SI 4235 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 4236 (parallel [(const_int 0)])) 4237 (vec_select:SI (match_dup 2) (parallel [(const_int 1)]))) 4238 (minus:SI 4239 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 4240 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 4241 "TARGET_SSSE3" 4242 "phsubd\t{%2, %0|%0, %2}" 4243 [(set_attr "type" "sseiadd") 4244 (set_attr "mode" "TI")]) 4245 4246(define_insn "ssse3_phsubdv2si3" 4247 [(set (match_operand:V2SI 0 "register_operand" "=y") 4248 (vec_concat:V2SI 4249 (minus:SI 4250 (vec_select:SI 4251 (match_operand:V2SI 1 "register_operand" "0") 4252 (parallel [(const_int 0)])) 4253 (vec_select:SI (match_dup 1) (parallel [(const_int 1)]))) 4254 (minus:SI 4255 (vec_select:SI 4256 (match_operand:V2SI 2 "nonimmediate_operand" "ym") 4257 (parallel [(const_int 0)])) 4258 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 4259 "TARGET_SSSE3" 4260 "phsubd\t{%2, %0|%0, %2}" 4261 [(set_attr "type" "sseiadd") 4262 (set_attr "mode" "DI")]) 4263 4264(define_insn "ssse3_phsubswv8hi3" 4265 [(set (match_operand:V8HI 0 "register_operand" "=x") 4266 (vec_concat:V8HI 4267 (vec_concat:V4HI 4268 (vec_concat:V2HI 4269 (ss_minus:HI 4270 (vec_select:HI 4271 (match_operand:V8HI 1 "register_operand" "0") 4272 (parallel [(const_int 0)])) 4273 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4274 (ss_minus:HI 4275 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4276 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4277 (vec_concat:V2HI 4278 (ss_minus:HI 4279 (vec_select:HI (match_dup 1) (parallel [(const_int 4)])) 4280 (vec_select:HI (match_dup 1) (parallel [(const_int 5)]))) 4281 (ss_minus:HI 4282 (vec_select:HI (match_dup 1) (parallel [(const_int 6)])) 4283 (vec_select:HI (match_dup 1) (parallel [(const_int 7)]))))) 4284 (vec_concat:V4HI 4285 (vec_concat:V2HI 4286 (ss_minus:HI 4287 (vec_select:HI 4288 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 4289 (parallel [(const_int 0)])) 4290 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4291 (ss_minus:HI 4292 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4293 (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) 4294 (vec_concat:V2HI 4295 (ss_minus:HI 4296 (vec_select:HI (match_dup 2) (parallel [(const_int 4)])) 4297 (vec_select:HI (match_dup 2) (parallel [(const_int 5)]))) 4298 (ss_minus:HI 4299 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 4300 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 4301 "TARGET_SSSE3" 4302 "phsubsw\t{%2, %0|%0, %2}" 4303 [(set_attr "type" "sseiadd") 4304 (set_attr "mode" "TI")]) 4305 4306(define_insn "ssse3_phsubswv4hi3" 4307 [(set (match_operand:V4HI 0 "register_operand" "=y") 4308 (vec_concat:V4HI 4309 (vec_concat:V2HI 4310 (ss_minus:HI 4311 (vec_select:HI 4312 (match_operand:V4HI 1 "register_operand" "0") 4313 (parallel [(const_int 0)])) 4314 (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) 4315 (ss_minus:HI 4316 (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) 4317 (vec_select:HI (match_dup 1) (parallel [(const_int 3)])))) 4318 (vec_concat:V2HI 4319 (ss_minus:HI 4320 (vec_select:HI 4321 (match_operand:V4HI 2 "nonimmediate_operand" "ym") 4322 (parallel [(const_int 0)])) 4323 (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))) 4324 (ss_minus:HI 4325 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 4326 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 4327 "TARGET_SSSE3" 4328 "phsubsw\t{%2, %0|%0, %2}" 4329 [(set_attr "type" "sseiadd") 4330 (set_attr "mode" "DI")]) 4331 4332(define_insn "ssse3_pmaddubswv8hi3" 4333 [(set (match_operand:V8HI 0 "register_operand" "=x") 4334 (ss_plus:V8HI 4335 (mult:V8HI 4336 (zero_extend:V8HI 4337 (vec_select:V4QI 4338 (match_operand:V16QI 1 "nonimmediate_operand" "%0") 4339 (parallel [(const_int 0) 4340 (const_int 2) 4341 (const_int 4) 4342 (const_int 6) 4343 (const_int 8) 4344 (const_int 10) 4345 (const_int 12) 4346 (const_int 14)]))) 4347 (sign_extend:V8HI 4348 (vec_select:V8QI 4349 (match_operand:V16QI 2 "nonimmediate_operand" "xm") 4350 (parallel [(const_int 0) 4351 (const_int 2) 4352 (const_int 4) 4353 (const_int 6) 4354 (const_int 8) 4355 (const_int 10) 4356 (const_int 12) 4357 (const_int 14)])))) 4358 (mult:V8HI 4359 (zero_extend:V8HI 4360 (vec_select:V16QI (match_dup 1) 4361 (parallel [(const_int 1) 4362 (const_int 3) 4363 (const_int 5) 4364 (const_int 7) 4365 (const_int 9) 4366 (const_int 11) 4367 (const_int 13) 4368 (const_int 15)]))) 4369 (sign_extend:V8HI 4370 (vec_select:V16QI (match_dup 2) 4371 (parallel [(const_int 1) 4372 (const_int 3) 4373 (const_int 5) 4374 (const_int 7) 4375 (const_int 9) 4376 (const_int 11) 4377 (const_int 13) 4378 (const_int 15)]))))))] 4379 "TARGET_SSSE3" 4380 "pmaddubsw\t{%2, %0|%0, %2}" 4381 [(set_attr "type" "sseiadd") 4382 (set_attr "mode" "TI")]) 4383 4384(define_insn "ssse3_pmaddubswv4hi3" 4385 [(set (match_operand:V4HI 0 "register_operand" "=y") 4386 (ss_plus:V4HI 4387 (mult:V4HI 4388 (zero_extend:V4HI 4389 (vec_select:V4QI 4390 (match_operand:V8QI 1 "nonimmediate_operand" "%0") 4391 (parallel [(const_int 0) 4392 (const_int 2) 4393 (const_int 4) 4394 (const_int 6)]))) 4395 (sign_extend:V4HI 4396 (vec_select:V4QI 4397 (match_operand:V8QI 2 "nonimmediate_operand" "ym") 4398 (parallel [(const_int 0) 4399 (const_int 2) 4400 (const_int 4) 4401 (const_int 6)])))) 4402 (mult:V4HI 4403 (zero_extend:V4HI 4404 (vec_select:V8QI (match_dup 1) 4405 (parallel [(const_int 1) 4406 (const_int 3) 4407 (const_int 5) 4408 (const_int 7)]))) 4409 (sign_extend:V4HI 4410 (vec_select:V8QI (match_dup 2) 4411 (parallel [(const_int 1) 4412 (const_int 3) 4413 (const_int 5) 4414 (const_int 7)]))))))] 4415 "TARGET_SSSE3" 4416 "pmaddubsw\t{%2, %0|%0, %2}" 4417 [(set_attr "type" "sseiadd") 4418 (set_attr "mode" "DI")]) 4419 4420(define_insn "ssse3_pmulhrswv8hi3" 4421 [(set (match_operand:V8HI 0 "register_operand" "=x") 4422 (truncate:V8HI 4423 (lshiftrt:V8SI 4424 (plus:V8SI 4425 (lshiftrt:V8SI 4426 (mult:V8SI 4427 (sign_extend:V8SI 4428 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 4429 (sign_extend:V8SI 4430 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 4431 (const_int 14)) 4432 (const_vector:V8HI [(const_int 1) (const_int 1) 4433 (const_int 1) (const_int 1) 4434 (const_int 1) (const_int 1) 4435 (const_int 1) (const_int 1)])) 4436 (const_int 1))))] 4437 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 4438 "pmulhrsw\t{%2, %0|%0, %2}" 4439 [(set_attr "type" "sseimul") 4440 (set_attr "mode" "TI")]) 4441 4442(define_insn "ssse3_pmulhrswv4hi3" 4443 [(set (match_operand:V4HI 0 "register_operand" "=y") 4444 (truncate:V4HI 4445 (lshiftrt:V4SI 4446 (plus:V4SI 4447 (lshiftrt:V4SI 4448 (mult:V4SI 4449 (sign_extend:V4SI 4450 (match_operand:V4HI 1 "nonimmediate_operand" "%0")) 4451 (sign_extend:V4SI 4452 (match_operand:V4HI 2 "nonimmediate_operand" "ym"))) 4453 (const_int 14)) 4454 (const_vector:V4HI [(const_int 1) (const_int 1) 4455 (const_int 1) (const_int 1)])) 4456 (const_int 1))))] 4457 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" 4458 "pmulhrsw\t{%2, %0|%0, %2}" 4459 [(set_attr "type" "sseimul") 4460 (set_attr "mode" "DI")]) 4461 4462(define_insn "ssse3_pshufbv16qi3" 4463 [(set (match_operand:V16QI 0 "register_operand" "=x") 4464 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") 4465 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 4466 UNSPEC_PSHUFB))] 4467 "TARGET_SSSE3" 4468 "pshufb\t{%2, %0|%0, %2}"; 4469 [(set_attr "type" "sselog1") 4470 (set_attr "mode" "TI")]) 4471 4472(define_insn "ssse3_pshufbv8qi3" 4473 [(set (match_operand:V8QI 0 "register_operand" "=y") 4474 (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0") 4475 (match_operand:V8QI 2 "nonimmediate_operand" "ym")] 4476 UNSPEC_PSHUFB))] 4477 "TARGET_SSSE3" 4478 "pshufb\t{%2, %0|%0, %2}"; 4479 [(set_attr "type" "sselog1") 4480 (set_attr "mode" "DI")]) 4481 4482(define_insn "ssse3_psign<mode>3" 4483 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 4484 (unspec:SSEMODE124 [(match_operand:SSEMODE124 1 "register_operand" "0") 4485 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] 4486 UNSPEC_PSIGN))] 4487 "TARGET_SSSE3" 4488 "psign<ssevecsize>\t{%2, %0|%0, %2}"; 4489 [(set_attr "type" "sselog1") 4490 (set_attr "mode" "TI")]) 4491 4492(define_insn "ssse3_psign<mode>3" 4493 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 4494 (unspec:MMXMODEI [(match_operand:MMXMODEI 1 "register_operand" "0") 4495 (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")] 4496 UNSPEC_PSIGN))] 4497 "TARGET_SSSE3" 4498 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; 4499 [(set_attr "type" "sselog1") 4500 (set_attr "mode" "DI")]) 4501 4502(define_insn "ssse3_palignrti" 4503 [(set (match_operand:TI 0 "register_operand" "=x") 4504 (unspec:TI [(match_operand:TI 1 "register_operand" "0") 4505 (match_operand:TI 2 "nonimmediate_operand" "xm") 4506 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] 4507 UNSPEC_PALIGNR))] 4508 "TARGET_SSSE3" 4509{ 4510 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 4511 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 4512} 4513 [(set_attr "type" "sseishft") 4514 (set_attr "mode" "TI")]) 4515 4516(define_insn "ssse3_palignrdi" 4517 [(set (match_operand:DI 0 "register_operand" "=y") 4518 (unspec:DI [(match_operand:DI 1 "register_operand" "0") 4519 (match_operand:DI 2 "nonimmediate_operand" "ym") 4520 (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")] 4521 UNSPEC_PALIGNR))] 4522 "TARGET_SSSE3" 4523{ 4524 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 4525 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 4526} 4527 [(set_attr "type" "sseishft") 4528 (set_attr "mode" "DI")]) 4529 4530(define_insn "abs<mode>2" 4531 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 4532 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] 4533 "TARGET_SSSE3" 4534 "pabs<ssevecsize>\t{%1, %0|%0, %1}"; 4535 [(set_attr "type" "sselog1") 4536 (set_attr "mode" "TI")]) 4537 4538(define_insn "abs<mode>2" 4539 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 4540 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] 4541 "TARGET_SSSE3" 4542 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; 4543 [(set_attr "type" "sselog1") 4544 (set_attr "mode" "DI")]) 4545 4546;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4547;; 4548;; AMD SSE4A instructions 4549;; 4550;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4551 4552(define_insn "sse4a_vmmovntv2df" 4553 [(set (match_operand:DF 0 "memory_operand" "=m") 4554 (unspec:DF [(vec_select:DF 4555 (match_operand:V2DF 1 "register_operand" "x") 4556 (parallel [(const_int 0)]))] 4557 UNSPEC_MOVNT))] 4558 "TARGET_SSE4A" 4559 "movntsd\t{%1, %0|%0, %1}" 4560 [(set_attr "type" "ssemov") 4561 (set_attr "mode" "DF")]) 4562 4563(define_insn "sse4a_movntdf" 4564 [(set (match_operand:DF 0 "memory_operand" "=m") 4565 (unspec:DF [(match_operand:DF 1 "register_operand" "x")] 4566 UNSPEC_MOVNT))] 4567 "TARGET_SSE4A" 4568 "movntsd\t{%1, %0|%0, %1}" 4569 [(set_attr "type" "ssemov") 4570 (set_attr "mode" "DF")]) 4571 4572(define_insn "sse4a_vmmovntv4sf" 4573 [(set (match_operand:SF 0 "memory_operand" "=m") 4574 (unspec:SF [(vec_select:SF 4575 (match_operand:V4SF 1 "register_operand" "x") 4576 (parallel [(const_int 0)]))] 4577 UNSPEC_MOVNT))] 4578 "TARGET_SSE4A" 4579 "movntss\t{%1, %0|%0, %1}" 4580 [(set_attr "type" "ssemov") 4581 (set_attr "mode" "SF")]) 4582 4583(define_insn "sse4a_movntsf" 4584 [(set (match_operand:SF 0 "memory_operand" "=m") 4585 (unspec:SF [(match_operand:SF 1 "register_operand" "x")] 4586 UNSPEC_MOVNT))] 4587 "TARGET_SSE4A" 4588 "movntss\t{%1, %0|%0, %1}" 4589 [(set_attr "type" "ssemov") 4590 (set_attr "mode" "SF")]) 4591 4592(define_insn "sse4a_extrqi" 4593 [(set (match_operand:V2DI 0 "register_operand" "=x") 4594 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 4595 (match_operand 2 "const_int_operand" "") 4596 (match_operand 3 "const_int_operand" "")] 4597 UNSPEC_EXTRQI))] 4598 "TARGET_SSE4A" 4599 "extrq\t{%3, %2, %0|%0, %2, %3}" 4600 [(set_attr "type" "sse") 4601 (set_attr "mode" "TI")]) 4602 4603(define_insn "sse4a_extrq" 4604 [(set (match_operand:V2DI 0 "register_operand" "=x") 4605 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 4606 (match_operand:V16QI 2 "register_operand" "x")] 4607 UNSPEC_EXTRQ))] 4608 "TARGET_SSE4A" 4609 "extrq\t{%2, %0|%0, %2}" 4610 [(set_attr "type" "sse") 4611 (set_attr "mode" "TI")]) 4612 4613(define_insn "sse4a_insertqi" 4614 [(set (match_operand:V2DI 0 "register_operand" "=x") 4615 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 4616 (match_operand:V2DI 2 "register_operand" "x") 4617 (match_operand 3 "const_int_operand" "") 4618 (match_operand 4 "const_int_operand" "")] 4619 UNSPEC_INSERTQI))] 4620 "TARGET_SSE4A" 4621 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" 4622 [(set_attr "type" "sseins") 4623 (set_attr "mode" "TI")]) 4624 4625(define_insn "sse4a_insertq" 4626 [(set (match_operand:V2DI 0 "register_operand" "=x") 4627 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 4628 (match_operand:V2DI 2 "register_operand" "x")] 4629 UNSPEC_INSERTQ))] 4630 "TARGET_SSE4A" 4631 "insertq\t{%2, %0|%0, %2}" 4632 [(set_attr "type" "sseins") 4633 (set_attr "mode" "TI")]) 4634