1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005, 2006 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 2, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING. If not, write to 19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20;; Boston, MA 02110-1301, USA. 21 22 23;; 16 byte integral modes handled by SSE, minus TImode, which gets 24;; special-cased for TARGET_64BIT. 25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) 26 27;; All 16-byte vector modes handled by SSE 28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) 29 30;; Mix-n-match 31(define_mode_macro SSEMODE12 [V16QI V8HI]) 32(define_mode_macro SSEMODE24 [V8HI V4SI]) 33(define_mode_macro SSEMODE14 [V16QI V4SI]) 34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) 35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) 36 37;; Mapping from integer vector mode to mnemonic suffix 38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) 39 40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 41 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43;; 44;; Move patterns 45;; 46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 47 48;; All of these patterns are enabled for SSE1 as well as SSE2. 49;; This is essential for maintaining stable calling conventions. 50 51(define_expand "mov<mode>" 52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") 53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] 54 "TARGET_SSE" 55{ 56 ix86_expand_vector_move (<MODE>mode, operands); 57 DONE; 58}) 59 60(define_insn "*mov<mode>_internal" 61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") 62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 64{ 65 switch (which_alternative) 66 { 67 case 0: 68 return standard_sse_constant_opcode (insn, operands[1]); 69 case 1: 70 case 2: 71 if (get_attr_mode (insn) == MODE_V4SF) 72 return "movaps\t{%1, %0|%0, %1}"; 73 else 74 return "movdqa\t{%1, %0|%0, %1}"; 75 default: 76 gcc_unreachable (); 77 } 78} 79 [(set_attr "type" "sselog1,ssemov,ssemov") 80 (set (attr "mode") 81 (if_then_else 82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 83 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 84 (and (eq_attr "alternative" "2") 85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 86 (const_int 0)))) 87 (const_string "V4SF") 88 (const_string "TI")))]) 89 90(define_expand "movv4sf" 91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 92 (match_operand:V4SF 1 "nonimmediate_operand" ""))] 93 "TARGET_SSE" 94{ 95 ix86_expand_vector_move (V4SFmode, operands); 96 DONE; 97}) 98 99(define_insn "*movv4sf_internal" 100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 102 "TARGET_SSE" 103{ 104 switch (which_alternative) 105 { 106 case 0: 107 return standard_sse_constant_opcode (insn, operands[1]); 108 case 1: 109 case 2: 110 return "movaps\t{%1, %0|%0, %1}"; 111 default: 112 abort(); 113 } 114} 115 [(set_attr "type" "sselog1,ssemov,ssemov") 116 (set_attr "mode" "V4SF")]) 117 118(define_split 119 [(set (match_operand:V4SF 0 "register_operand" "") 120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 121 "TARGET_SSE && reload_completed" 122 [(set (match_dup 0) 123 (vec_merge:V4SF 124 (vec_duplicate:V4SF (match_dup 1)) 125 (match_dup 2) 126 (const_int 1)))] 127{ 128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 129 operands[2] = CONST0_RTX (V4SFmode); 130}) 131 132(define_expand "movv2df" 133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 134 (match_operand:V2DF 1 "nonimmediate_operand" ""))] 135 "TARGET_SSE" 136{ 137 ix86_expand_vector_move (V2DFmode, operands); 138 DONE; 139}) 140 141(define_insn "*movv2df_internal" 142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 145{ 146 switch (which_alternative) 147 { 148 case 0: 149 return standard_sse_constant_opcode (insn, operands[1]); 150 case 1: 151 case 2: 152 if (get_attr_mode (insn) == MODE_V4SF) 153 return "movaps\t{%1, %0|%0, %1}"; 154 else 155 return "movapd\t{%1, %0|%0, %1}"; 156 default: 157 gcc_unreachable (); 158 } 159} 160 [(set_attr "type" "sselog1,ssemov,ssemov") 161 (set (attr "mode") 162 (if_then_else 163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 164 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 165 (and (eq_attr "alternative" "2") 166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 167 (const_int 0)))) 168 (const_string "V4SF") 169 (const_string "V2DF")))]) 170 171(define_split 172 [(set (match_operand:V2DF 0 "register_operand" "") 173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 174 "TARGET_SSE2 && reload_completed" 175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 176{ 177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 178 operands[2] = CONST0_RTX (DFmode); 179}) 180 181(define_expand "push<mode>1" 182 [(match_operand:SSEMODE 0 "register_operand" "")] 183 "TARGET_SSE" 184{ 185 ix86_expand_push (<MODE>mode, operands[0]); 186 DONE; 187}) 188 189(define_expand "movmisalign<mode>" 190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 192 "TARGET_SSE" 193{ 194 ix86_expand_vector_move_misalign (<MODE>mode, operands); 195 DONE; 196}) 197 198(define_insn "sse_movups" 199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") 200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 201 UNSPEC_MOVU))] 202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 203 "movups\t{%1, %0|%0, %1}" 204 [(set_attr "type" "ssemov") 205 (set_attr "mode" "V2DF")]) 206 207(define_insn "sse2_movupd" 208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") 209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] 210 UNSPEC_MOVU))] 211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 212 "movupd\t{%1, %0|%0, %1}" 213 [(set_attr "type" "ssemov") 214 (set_attr "mode" "V2DF")]) 215 216(define_insn "sse2_movdqu" 217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] 219 UNSPEC_MOVU))] 220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 221 "movdqu\t{%1, %0|%0, %1}" 222 [(set_attr "type" "ssemov") 223 (set_attr "mode" "TI")]) 224 225(define_insn "sse_movntv4sf" 226 [(set (match_operand:V4SF 0 "memory_operand" "=m") 227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 228 UNSPEC_MOVNT))] 229 "TARGET_SSE" 230 "movntps\t{%1, %0|%0, %1}" 231 [(set_attr "type" "ssemov") 232 (set_attr "mode" "V4SF")]) 233 234(define_insn "sse2_movntv2df" 235 [(set (match_operand:V2DF 0 "memory_operand" "=m") 236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 237 UNSPEC_MOVNT))] 238 "TARGET_SSE2" 239 "movntpd\t{%1, %0|%0, %1}" 240 [(set_attr "type" "ssecvt") 241 (set_attr "mode" "V2DF")]) 242 243(define_insn "sse2_movntv2di" 244 [(set (match_operand:V2DI 0 "memory_operand" "=m") 245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 246 UNSPEC_MOVNT))] 247 "TARGET_SSE2" 248 "movntdq\t{%1, %0|%0, %1}" 249 [(set_attr "type" "ssecvt") 250 (set_attr "mode" "TI")]) 251 252(define_insn "sse2_movntsi" 253 [(set (match_operand:SI 0 "memory_operand" "=m") 254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 255 UNSPEC_MOVNT))] 256 "TARGET_SSE2" 257 "movnti\t{%1, %0|%0, %1}" 258 [(set_attr "type" "ssecvt") 259 (set_attr "mode" "V2DF")]) 260 261(define_insn "sse3_lddqu" 262 [(set (match_operand:V16QI 0 "register_operand" "=x") 263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] 264 UNSPEC_LDQQU))] 265 "TARGET_SSE3" 266 "lddqu\t{%1, %0|%0, %1}" 267 [(set_attr "type" "ssecvt") 268 (set_attr "mode" "TI")]) 269 270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271;; 272;; Parallel single-precision floating point arithmetic 273;; 274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 275 276(define_expand "negv4sf2" 277 [(set (match_operand:V4SF 0 "register_operand" "") 278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 279 "TARGET_SSE" 280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") 281 282(define_expand "absv4sf2" 283 [(set (match_operand:V4SF 0 "register_operand" "") 284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 285 "TARGET_SSE" 286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") 287 288(define_expand "addv4sf3" 289 [(set (match_operand:V4SF 0 "register_operand" "") 290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 291 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 292 "TARGET_SSE" 293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") 294 295(define_insn "*addv4sf3" 296 [(set (match_operand:V4SF 0 "register_operand" "=x") 297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 300 "addps\t{%2, %0|%0, %2}" 301 [(set_attr "type" "sseadd") 302 (set_attr "mode" "V4SF")]) 303 304(define_insn "sse_vmaddv4sf3" 305 [(set (match_operand:V4SF 0 "register_operand" "=x") 306 (vec_merge:V4SF 307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 308 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 309 (match_dup 1) 310 (const_int 1)))] 311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 312 "addss\t{%2, %0|%0, %2}" 313 [(set_attr "type" "sseadd") 314 (set_attr "mode" "SF")]) 315 316(define_expand "subv4sf3" 317 [(set (match_operand:V4SF 0 "register_operand" "") 318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "") 319 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 320 "TARGET_SSE" 321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") 322 323(define_insn "*subv4sf3" 324 [(set (match_operand:V4SF 0 "register_operand" "=x") 325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 327 "TARGET_SSE" 328 "subps\t{%2, %0|%0, %2}" 329 [(set_attr "type" "sseadd") 330 (set_attr "mode" "V4SF")]) 331 332(define_insn "sse_vmsubv4sf3" 333 [(set (match_operand:V4SF 0 "register_operand" "=x") 334 (vec_merge:V4SF 335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 336 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 337 (match_dup 1) 338 (const_int 1)))] 339 "TARGET_SSE" 340 "subss\t{%2, %0|%0, %2}" 341 [(set_attr "type" "sseadd") 342 (set_attr "mode" "SF")]) 343 344(define_expand "mulv4sf3" 345 [(set (match_operand:V4SF 0 "register_operand" "") 346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 347 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 348 "TARGET_SSE" 349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") 350 351(define_insn "*mulv4sf3" 352 [(set (match_operand:V4SF 0 "register_operand" "=x") 353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 356 "mulps\t{%2, %0|%0, %2}" 357 [(set_attr "type" "ssemul") 358 (set_attr "mode" "V4SF")]) 359 360(define_insn "sse_vmmulv4sf3" 361 [(set (match_operand:V4SF 0 "register_operand" "=x") 362 (vec_merge:V4SF 363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") 364 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 365 (match_dup 1) 366 (const_int 1)))] 367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 368 "mulss\t{%2, %0|%0, %2}" 369 [(set_attr "type" "ssemul") 370 (set_attr "mode" "SF")]) 371 372(define_expand "divv4sf3" 373 [(set (match_operand:V4SF 0 "register_operand" "") 374 (div:V4SF (match_operand:V4SF 1 "register_operand" "") 375 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 376 "TARGET_SSE" 377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") 378 379(define_insn "*divv4sf3" 380 [(set (match_operand:V4SF 0 "register_operand" "=x") 381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 383 "TARGET_SSE" 384 "divps\t{%2, %0|%0, %2}" 385 [(set_attr "type" "ssediv") 386 (set_attr "mode" "V4SF")]) 387 388(define_insn "sse_vmdivv4sf3" 389 [(set (match_operand:V4SF 0 "register_operand" "=x") 390 (vec_merge:V4SF 391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 393 (match_dup 1) 394 (const_int 1)))] 395 "TARGET_SSE" 396 "divss\t{%2, %0|%0, %2}" 397 [(set_attr "type" "ssediv") 398 (set_attr "mode" "SF")]) 399 400(define_insn "sse_rcpv4sf2" 401 [(set (match_operand:V4SF 0 "register_operand" "=x") 402 (unspec:V4SF 403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 404 "TARGET_SSE" 405 "rcpps\t{%1, %0|%0, %1}" 406 [(set_attr "type" "sse") 407 (set_attr "mode" "V4SF")]) 408 409(define_insn "sse_vmrcpv4sf2" 410 [(set (match_operand:V4SF 0 "register_operand" "=x") 411 (vec_merge:V4SF 412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 413 UNSPEC_RCP) 414 (match_operand:V4SF 2 "register_operand" "0") 415 (const_int 1)))] 416 "TARGET_SSE" 417 "rcpss\t{%1, %0|%0, %1}" 418 [(set_attr "type" "sse") 419 (set_attr "mode" "SF")]) 420 421(define_insn "sse_rsqrtv4sf2" 422 [(set (match_operand:V4SF 0 "register_operand" "=x") 423 (unspec:V4SF 424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 425 "TARGET_SSE" 426 "rsqrtps\t{%1, %0|%0, %1}" 427 [(set_attr "type" "sse") 428 (set_attr "mode" "V4SF")]) 429 430(define_insn "sse_vmrsqrtv4sf2" 431 [(set (match_operand:V4SF 0 "register_operand" "=x") 432 (vec_merge:V4SF 433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 434 UNSPEC_RSQRT) 435 (match_operand:V4SF 2 "register_operand" "0") 436 (const_int 1)))] 437 "TARGET_SSE" 438 "rsqrtss\t{%1, %0|%0, %1}" 439 [(set_attr "type" "sse") 440 (set_attr "mode" "SF")]) 441 442(define_insn "sqrtv4sf2" 443 [(set (match_operand:V4SF 0 "register_operand" "=x") 444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 445 "TARGET_SSE" 446 "sqrtps\t{%1, %0|%0, %1}" 447 [(set_attr "type" "sse") 448 (set_attr "mode" "V4SF")]) 449 450(define_insn "sse_vmsqrtv4sf2" 451 [(set (match_operand:V4SF 0 "register_operand" "=x") 452 (vec_merge:V4SF 453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 454 (match_operand:V4SF 2 "register_operand" "0") 455 (const_int 1)))] 456 "TARGET_SSE" 457 "sqrtss\t{%1, %0|%0, %1}" 458 [(set_attr "type" "sse") 459 (set_attr "mode" "SF")]) 460 461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 462;; isn't really correct, as those rtl operators aren't defined when 463;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 464 465(define_expand "smaxv4sf3" 466 [(set (match_operand:V4SF 0 "register_operand" "") 467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 468 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 469 "TARGET_SSE" 470{ 471 if (!flag_finite_math_only) 472 operands[1] = force_reg (V4SFmode, operands[1]); 473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); 474}) 475 476(define_insn "*smaxv4sf3_finite" 477 [(set (match_operand:V4SF 0 "register_operand" "=x") 478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 480 "TARGET_SSE && flag_finite_math_only 481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" 482 "maxps\t{%2, %0|%0, %2}" 483 [(set_attr "type" "sse") 484 (set_attr "mode" "V4SF")]) 485 486(define_insn "*smaxv4sf3" 487 [(set (match_operand:V4SF 0 "register_operand" "=x") 488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 490 "TARGET_SSE" 491 "maxps\t{%2, %0|%0, %2}" 492 [(set_attr "type" "sse") 493 (set_attr "mode" "V4SF")]) 494 495(define_insn "sse_vmsmaxv4sf3" 496 [(set (match_operand:V4SF 0 "register_operand" "=x") 497 (vec_merge:V4SF 498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 499 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 500 (match_dup 1) 501 (const_int 1)))] 502 "TARGET_SSE" 503 "maxss\t{%2, %0|%0, %2}" 504 [(set_attr "type" "sse") 505 (set_attr "mode" "SF")]) 506 507(define_expand "sminv4sf3" 508 [(set (match_operand:V4SF 0 "register_operand" "") 509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 510 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 511 "TARGET_SSE" 512{ 513 if (!flag_finite_math_only) 514 operands[1] = force_reg (V4SFmode, operands[1]); 515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); 516}) 517 518(define_insn "*sminv4sf3_finite" 519 [(set (match_operand:V4SF 0 "register_operand" "=x") 520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 522 "TARGET_SSE && flag_finite_math_only 523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" 524 "minps\t{%2, %0|%0, %2}" 525 [(set_attr "type" "sse") 526 (set_attr "mode" "V4SF")]) 527 528(define_insn "*sminv4sf3" 529 [(set (match_operand:V4SF 0 "register_operand" "=x") 530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 532 "TARGET_SSE" 533 "minps\t{%2, %0|%0, %2}" 534 [(set_attr "type" "sse") 535 (set_attr "mode" "V4SF")]) 536 537(define_insn "sse_vmsminv4sf3" 538 [(set (match_operand:V4SF 0 "register_operand" "=x") 539 (vec_merge:V4SF 540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 541 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 542 (match_dup 1) 543 (const_int 1)))] 544 "TARGET_SSE" 545 "minss\t{%2, %0|%0, %2}" 546 [(set_attr "type" "sse") 547 (set_attr "mode" "SF")]) 548 549;; These versions of the min/max patterns implement exactly the operations 550;; min = (op1 < op2 ? op1 : op2) 551;; max = (!(op1 < op2) ? op1 : op2) 552;; Their operands are not commutative, and thus they may be used in the 553;; presence of -0.0 and NaN. 554 555(define_insn "*ieee_sminv4sf3" 556 [(set (match_operand:V4SF 0 "register_operand" "=x") 557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 559 UNSPEC_IEEE_MIN))] 560 "TARGET_SSE" 561 "minps\t{%2, %0|%0, %2}" 562 [(set_attr "type" "sseadd") 563 (set_attr "mode" "V4SF")]) 564 565(define_insn "*ieee_smaxv4sf3" 566 [(set (match_operand:V4SF 0 "register_operand" "=x") 567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 569 UNSPEC_IEEE_MAX))] 570 "TARGET_SSE" 571 "maxps\t{%2, %0|%0, %2}" 572 [(set_attr "type" "sseadd") 573 (set_attr "mode" "V4SF")]) 574 575(define_insn "*ieee_sminv2df3" 576 [(set (match_operand:V2DF 0 "register_operand" "=x") 577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 579 UNSPEC_IEEE_MIN))] 580 "TARGET_SSE2" 581 "minpd\t{%2, %0|%0, %2}" 582 [(set_attr "type" "sseadd") 583 (set_attr "mode" "V2DF")]) 584 585(define_insn "*ieee_smaxv2df3" 586 [(set (match_operand:V2DF 0 "register_operand" "=x") 587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 589 UNSPEC_IEEE_MAX))] 590 "TARGET_SSE2" 591 "maxpd\t{%2, %0|%0, %2}" 592 [(set_attr "type" "sseadd") 593 (set_attr "mode" "V2DF")]) 594 595(define_insn "sse3_addsubv4sf3" 596 [(set (match_operand:V4SF 0 "register_operand" "=x") 597 (vec_merge:V4SF 598 (plus:V4SF 599 (match_operand:V4SF 1 "register_operand" "0") 600 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 601 (minus:V4SF (match_dup 1) (match_dup 2)) 602 (const_int 5)))] 603 "TARGET_SSE3" 604 "addsubps\t{%2, %0|%0, %2}" 605 [(set_attr "type" "sseadd") 606 (set_attr "mode" "V4SF")]) 607 608(define_insn "sse3_haddv4sf3" 609 [(set (match_operand:V4SF 0 "register_operand" "=x") 610 (vec_concat:V4SF 611 (vec_concat:V2SF 612 (plus:SF 613 (vec_select:SF 614 (match_operand:V4SF 1 "register_operand" "0") 615 (parallel [(const_int 0)])) 616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 617 (plus:SF 618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 620 (vec_concat:V2SF 621 (plus:SF 622 (vec_select:SF 623 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 624 (parallel [(const_int 0)])) 625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 626 (plus:SF 627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 629 "TARGET_SSE3" 630 "haddps\t{%2, %0|%0, %2}" 631 [(set_attr "type" "sseadd") 632 (set_attr "mode" "V4SF")]) 633 634(define_insn "sse3_hsubv4sf3" 635 [(set (match_operand:V4SF 0 "register_operand" "=x") 636 (vec_concat:V4SF 637 (vec_concat:V2SF 638 (minus:SF 639 (vec_select:SF 640 (match_operand:V4SF 1 "register_operand" "0") 641 (parallel [(const_int 0)])) 642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 643 (minus:SF 644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 646 (vec_concat:V2SF 647 (minus:SF 648 (vec_select:SF 649 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 650 (parallel [(const_int 0)])) 651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 652 (minus:SF 653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 655 "TARGET_SSE3" 656 "hsubps\t{%2, %0|%0, %2}" 657 [(set_attr "type" "sseadd") 658 (set_attr "mode" "V4SF")]) 659 660(define_expand "reduc_splus_v4sf" 661 [(match_operand:V4SF 0 "register_operand" "") 662 (match_operand:V4SF 1 "register_operand" "")] 663 "TARGET_SSE" 664{ 665 if (TARGET_SSE3) 666 { 667 rtx tmp = gen_reg_rtx (V4SFmode); 668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 670 } 671 else 672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); 673 DONE; 674}) 675 676(define_expand "reduc_smax_v4sf" 677 [(match_operand:V4SF 0 "register_operand" "") 678 (match_operand:V4SF 1 "register_operand" "")] 679 "TARGET_SSE" 680{ 681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); 682 DONE; 683}) 684 685(define_expand "reduc_smin_v4sf" 686 [(match_operand:V4SF 0 "register_operand" "") 687 (match_operand:V4SF 1 "register_operand" "")] 688 "TARGET_SSE" 689{ 690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); 691 DONE; 692}) 693 694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 695;; 696;; Parallel single-precision floating point comparisons 697;; 698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 699 700(define_insn "sse_maskcmpv4sf3" 701 [(set (match_operand:V4SF 0 "register_operand" "=x") 702 (match_operator:V4SF 3 "sse_comparison_operator" 703 [(match_operand:V4SF 1 "register_operand" "0") 704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] 705 "TARGET_SSE" 706 "cmp%D3ps\t{%2, %0|%0, %2}" 707 [(set_attr "type" "ssecmp") 708 (set_attr "mode" "V4SF")]) 709 710(define_insn "sse_vmmaskcmpv4sf3" 711 [(set (match_operand:V4SF 0 "register_operand" "=x") 712 (vec_merge:V4SF 713 (match_operator:V4SF 3 "sse_comparison_operator" 714 [(match_operand:V4SF 1 "register_operand" "0") 715 (match_operand:V4SF 2 "register_operand" "x")]) 716 (match_dup 1) 717 (const_int 1)))] 718 "TARGET_SSE" 719 "cmp%D3ss\t{%2, %0|%0, %2}" 720 [(set_attr "type" "ssecmp") 721 (set_attr "mode" "SF")]) 722 723(define_insn "sse_comi" 724 [(set (reg:CCFP FLAGS_REG) 725 (compare:CCFP 726 (vec_select:SF 727 (match_operand:V4SF 0 "register_operand" "x") 728 (parallel [(const_int 0)])) 729 (vec_select:SF 730 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 731 (parallel [(const_int 0)]))))] 732 "TARGET_SSE" 733 "comiss\t{%1, %0|%0, %1}" 734 [(set_attr "type" "ssecomi") 735 (set_attr "mode" "SF")]) 736 737(define_insn "sse_ucomi" 738 [(set (reg:CCFPU FLAGS_REG) 739 (compare:CCFPU 740 (vec_select:SF 741 (match_operand:V4SF 0 "register_operand" "x") 742 (parallel [(const_int 0)])) 743 (vec_select:SF 744 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 745 (parallel [(const_int 0)]))))] 746 "TARGET_SSE" 747 "ucomiss\t{%1, %0|%0, %1}" 748 [(set_attr "type" "ssecomi") 749 (set_attr "mode" "SF")]) 750 751(define_expand "vcondv4sf" 752 [(set (match_operand:V4SF 0 "register_operand" "") 753 (if_then_else:V4SF 754 (match_operator 3 "" 755 [(match_operand:V4SF 4 "nonimmediate_operand" "") 756 (match_operand:V4SF 5 "nonimmediate_operand" "")]) 757 (match_operand:V4SF 1 "general_operand" "") 758 (match_operand:V4SF 2 "general_operand" "")))] 759 "TARGET_SSE" 760{ 761 if (ix86_expand_fp_vcond (operands)) 762 DONE; 763 else 764 FAIL; 765}) 766 767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 768;; 769;; Parallel single-precision floating point logical operations 770;; 771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 772 773(define_expand "andv4sf3" 774 [(set (match_operand:V4SF 0 "register_operand" "") 775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 776 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 777 "TARGET_SSE" 778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") 779 780(define_insn "*andv4sf3" 781 [(set (match_operand:V4SF 0 "register_operand" "=x") 782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" 785 "andps\t{%2, %0|%0, %2}" 786 [(set_attr "type" "sselog") 787 (set_attr "mode" "V4SF")]) 788 789(define_insn "sse_nandv4sf3" 790 [(set (match_operand:V4SF 0 "register_operand" "=x") 791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) 792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 793 "TARGET_SSE" 794 "andnps\t{%2, %0|%0, %2}" 795 [(set_attr "type" "sselog") 796 (set_attr "mode" "V4SF")]) 797 798(define_expand "iorv4sf3" 799 [(set (match_operand:V4SF 0 "register_operand" "") 800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 801 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 802 "TARGET_SSE" 803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") 804 805(define_insn "*iorv4sf3" 806 [(set (match_operand:V4SF 0 "register_operand" "=x") 807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" 810 "orps\t{%2, %0|%0, %2}" 811 [(set_attr "type" "sselog") 812 (set_attr "mode" "V4SF")]) 813 814(define_expand "xorv4sf3" 815 [(set (match_operand:V4SF 0 "register_operand" "") 816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 817 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 818 "TARGET_SSE" 819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") 820 821(define_insn "*xorv4sf3" 822 [(set (match_operand:V4SF 0 "register_operand" "=x") 823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" 826 "xorps\t{%2, %0|%0, %2}" 827 [(set_attr "type" "sselog") 828 (set_attr "mode" "V4SF")]) 829 830;; Also define scalar versions. These are used for abs, neg, and 831;; conditional move. Using subregs into vector modes causes register 832;; allocation lossage. These patterns do not allow memory operands 833;; because the native instructions read the full 128-bits. 834 835(define_insn "*andsf3" 836 [(set (match_operand:SF 0 "register_operand" "=x") 837 (and:SF (match_operand:SF 1 "register_operand" "0") 838 (match_operand:SF 2 "register_operand" "x")))] 839 "TARGET_SSE" 840 "andps\t{%2, %0|%0, %2}" 841 [(set_attr "type" "sselog") 842 (set_attr "mode" "V4SF")]) 843 844(define_insn "*nandsf3" 845 [(set (match_operand:SF 0 "register_operand" "=x") 846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) 847 (match_operand:SF 2 "register_operand" "x")))] 848 "TARGET_SSE" 849 "andnps\t{%2, %0|%0, %2}" 850 [(set_attr "type" "sselog") 851 (set_attr "mode" "V4SF")]) 852 853(define_insn "*iorsf3" 854 [(set (match_operand:SF 0 "register_operand" "=x") 855 (ior:SF (match_operand:SF 1 "register_operand" "0") 856 (match_operand:SF 2 "register_operand" "x")))] 857 "TARGET_SSE" 858 "orps\t{%2, %0|%0, %2}" 859 [(set_attr "type" "sselog") 860 (set_attr "mode" "V4SF")]) 861 862(define_insn "*xorsf3" 863 [(set (match_operand:SF 0 "register_operand" "=x") 864 (xor:SF (match_operand:SF 1 "register_operand" "0") 865 (match_operand:SF 2 "register_operand" "x")))] 866 "TARGET_SSE" 867 "xorps\t{%2, %0|%0, %2}" 868 [(set_attr "type" "sselog") 869 (set_attr "mode" "V4SF")]) 870 871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872;; 873;; Parallel single-precision floating point conversion operations 874;; 875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 876 877(define_insn "sse_cvtpi2ps" 878 [(set (match_operand:V4SF 0 "register_operand" "=x") 879 (vec_merge:V4SF 880 (vec_duplicate:V4SF 881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 882 (match_operand:V4SF 1 "register_operand" "0") 883 (const_int 3)))] 884 "TARGET_SSE" 885 "cvtpi2ps\t{%2, %0|%0, %2}" 886 [(set_attr "type" "ssecvt") 887 (set_attr "mode" "V4SF")]) 888 889(define_insn "sse_cvtps2pi" 890 [(set (match_operand:V2SI 0 "register_operand" "=y") 891 (vec_select:V2SI 892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 893 UNSPEC_FIX_NOTRUNC) 894 (parallel [(const_int 0) (const_int 1)])))] 895 "TARGET_SSE" 896 "cvtps2pi\t{%1, %0|%0, %1}" 897 [(set_attr "type" "ssecvt") 898 (set_attr "unit" "mmx") 899 (set_attr "mode" "DI")]) 900 901(define_insn "sse_cvttps2pi" 902 [(set (match_operand:V2SI 0 "register_operand" "=y") 903 (vec_select:V2SI 904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 905 (parallel [(const_int 0) (const_int 1)])))] 906 "TARGET_SSE" 907 "cvttps2pi\t{%1, %0|%0, %1}" 908 [(set_attr "type" "ssecvt") 909 (set_attr "unit" "mmx") 910 (set_attr "mode" "SF")]) 911 912(define_insn "sse_cvtsi2ss" 913 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 914 (vec_merge:V4SF 915 (vec_duplicate:V4SF 916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 917 (match_operand:V4SF 1 "register_operand" "0,0") 918 (const_int 1)))] 919 "TARGET_SSE" 920 "cvtsi2ss\t{%2, %0|%0, %2}" 921 [(set_attr "type" "sseicvt") 922 (set_attr "athlon_decode" "vector,double") 923 (set_attr "mode" "SF")]) 924 925(define_insn "sse_cvtsi2ssq" 926 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 927 (vec_merge:V4SF 928 (vec_duplicate:V4SF 929 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) 930 (match_operand:V4SF 1 "register_operand" "0,0") 931 (const_int 1)))] 932 "TARGET_SSE && TARGET_64BIT" 933 "cvtsi2ssq\t{%2, %0|%0, %2}" 934 [(set_attr "type" "sseicvt") 935 (set_attr "athlon_decode" "vector,double") 936 (set_attr "mode" "SF")]) 937 938(define_insn "sse_cvtss2si" 939 [(set (match_operand:SI 0 "register_operand" "=r,r") 940 (unspec:SI 941 [(vec_select:SF 942 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 943 (parallel [(const_int 0)]))] 944 UNSPEC_FIX_NOTRUNC))] 945 "TARGET_SSE" 946 "cvtss2si\t{%1, %0|%0, %1}" 947 [(set_attr "type" "sseicvt") 948 (set_attr "athlon_decode" "double,vector") 949 (set_attr "mode" "SI")]) 950 951(define_insn "sse_cvtss2siq" 952 [(set (match_operand:DI 0 "register_operand" "=r,r") 953 (unspec:DI 954 [(vec_select:SF 955 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 956 (parallel [(const_int 0)]))] 957 UNSPEC_FIX_NOTRUNC))] 958 "TARGET_SSE && TARGET_64BIT" 959 "cvtss2siq\t{%1, %0|%0, %1}" 960 [(set_attr "type" "sseicvt") 961 (set_attr "athlon_decode" "double,vector") 962 (set_attr "mode" "DI")]) 963 964(define_insn "sse_cvttss2si" 965 [(set (match_operand:SI 0 "register_operand" "=r,r") 966 (fix:SI 967 (vec_select:SF 968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 969 (parallel [(const_int 0)]))))] 970 "TARGET_SSE" 971 "cvttss2si\t{%1, %0|%0, %1}" 972 [(set_attr "type" "sseicvt") 973 (set_attr "athlon_decode" "double,vector") 974 (set_attr "mode" "SI")]) 975 976(define_insn "sse_cvttss2siq" 977 [(set (match_operand:DI 0 "register_operand" "=r,r") 978 (fix:DI 979 (vec_select:SF 980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 981 (parallel [(const_int 0)]))))] 982 "TARGET_SSE && TARGET_64BIT" 983 "cvttss2siq\t{%1, %0|%0, %1}" 984 [(set_attr "type" "sseicvt") 985 (set_attr "athlon_decode" "double,vector") 986 (set_attr "mode" "DI")]) 987 988(define_insn "sse2_cvtdq2ps" 989 [(set (match_operand:V4SF 0 "register_operand" "=x") 990 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 991 "TARGET_SSE2" 992 "cvtdq2ps\t{%1, %0|%0, %1}" 993 [(set_attr "type" "ssecvt") 994 (set_attr "mode" "V2DF")]) 995 996(define_insn "sse2_cvtps2dq" 997 [(set (match_operand:V4SI 0 "register_operand" "=x") 998 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 999 UNSPEC_FIX_NOTRUNC))] 1000 "TARGET_SSE2" 1001 "cvtps2dq\t{%1, %0|%0, %1}" 1002 [(set_attr "type" "ssecvt") 1003 (set_attr "mode" "TI")]) 1004 1005(define_insn "sse2_cvttps2dq" 1006 [(set (match_operand:V4SI 0 "register_operand" "=x") 1007 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 1008 "TARGET_SSE2" 1009 "cvttps2dq\t{%1, %0|%0, %1}" 1010 [(set_attr "type" "ssecvt") 1011 (set_attr "mode" "TI")]) 1012 1013;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1014;; 1015;; Parallel single-precision floating point element swizzling 1016;; 1017;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1018 1019(define_insn "sse_movhlps" 1020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1021 (vec_select:V4SF 1022 (vec_concat:V8SF 1023 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1024 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) 1025 (parallel [(const_int 6) 1026 (const_int 7) 1027 (const_int 2) 1028 (const_int 3)])))] 1029 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 1030 "@ 1031 movhlps\t{%2, %0|%0, %2} 1032 movlps\t{%H2, %0|%0, %H2} 1033 movhps\t{%2, %0|%0, %2}" 1034 [(set_attr "type" "ssemov") 1035 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1036 1037(define_insn "sse_movlhps" 1038 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1039 (vec_select:V4SF 1040 (vec_concat:V8SF 1041 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1042 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) 1043 (parallel [(const_int 0) 1044 (const_int 1) 1045 (const_int 4) 1046 (const_int 5)])))] 1047 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 1048 "@ 1049 movlhps\t{%2, %0|%0, %2} 1050 movhps\t{%2, %0|%0, %2} 1051 movlps\t{%2, %H0|%H0, %2}" 1052 [(set_attr "type" "ssemov") 1053 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1054 1055(define_insn "sse_unpckhps" 1056 [(set (match_operand:V4SF 0 "register_operand" "=x") 1057 (vec_select:V4SF 1058 (vec_concat:V8SF 1059 (match_operand:V4SF 1 "register_operand" "0") 1060 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1061 (parallel [(const_int 2) (const_int 6) 1062 (const_int 3) (const_int 7)])))] 1063 "TARGET_SSE" 1064 "unpckhps\t{%2, %0|%0, %2}" 1065 [(set_attr "type" "sselog") 1066 (set_attr "mode" "V4SF")]) 1067 1068(define_insn "sse_unpcklps" 1069 [(set (match_operand:V4SF 0 "register_operand" "=x") 1070 (vec_select:V4SF 1071 (vec_concat:V8SF 1072 (match_operand:V4SF 1 "register_operand" "0") 1073 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1074 (parallel [(const_int 0) (const_int 4) 1075 (const_int 1) (const_int 5)])))] 1076 "TARGET_SSE" 1077 "unpcklps\t{%2, %0|%0, %2}" 1078 [(set_attr "type" "sselog") 1079 (set_attr "mode" "V4SF")]) 1080 1081;; These are modeled with the same vec_concat as the others so that we 1082;; capture users of shufps that can use the new instructions 1083(define_insn "sse3_movshdup" 1084 [(set (match_operand:V4SF 0 "register_operand" "=x") 1085 (vec_select:V4SF 1086 (vec_concat:V8SF 1087 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1088 (match_dup 1)) 1089 (parallel [(const_int 1) 1090 (const_int 1) 1091 (const_int 7) 1092 (const_int 7)])))] 1093 "TARGET_SSE3" 1094 "movshdup\t{%1, %0|%0, %1}" 1095 [(set_attr "type" "sse") 1096 (set_attr "mode" "V4SF")]) 1097 1098(define_insn "sse3_movsldup" 1099 [(set (match_operand:V4SF 0 "register_operand" "=x") 1100 (vec_select:V4SF 1101 (vec_concat:V8SF 1102 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1103 (match_dup 1)) 1104 (parallel [(const_int 0) 1105 (const_int 0) 1106 (const_int 6) 1107 (const_int 6)])))] 1108 "TARGET_SSE3" 1109 "movsldup\t{%1, %0|%0, %1}" 1110 [(set_attr "type" "sse") 1111 (set_attr "mode" "V4SF")]) 1112 1113(define_expand "sse_shufps" 1114 [(match_operand:V4SF 0 "register_operand" "") 1115 (match_operand:V4SF 1 "register_operand" "") 1116 (match_operand:V4SF 2 "nonimmediate_operand" "") 1117 (match_operand:SI 3 "const_int_operand" "")] 1118 "TARGET_SSE" 1119{ 1120 int mask = INTVAL (operands[3]); 1121 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], 1122 GEN_INT ((mask >> 0) & 3), 1123 GEN_INT ((mask >> 2) & 3), 1124 GEN_INT (((mask >> 4) & 3) + 4), 1125 GEN_INT (((mask >> 6) & 3) + 4))); 1126 DONE; 1127}) 1128 1129(define_insn "sse_shufps_1" 1130 [(set (match_operand:V4SF 0 "register_operand" "=x") 1131 (vec_select:V4SF 1132 (vec_concat:V8SF 1133 (match_operand:V4SF 1 "register_operand" "0") 1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1135 (parallel [(match_operand 3 "const_0_to_3_operand" "") 1136 (match_operand 4 "const_0_to_3_operand" "") 1137 (match_operand 5 "const_4_to_7_operand" "") 1138 (match_operand 6 "const_4_to_7_operand" "")])))] 1139 "TARGET_SSE" 1140{ 1141 int mask = 0; 1142 mask |= INTVAL (operands[3]) << 0; 1143 mask |= INTVAL (operands[4]) << 2; 1144 mask |= (INTVAL (operands[5]) - 4) << 4; 1145 mask |= (INTVAL (operands[6]) - 4) << 6; 1146 operands[3] = GEN_INT (mask); 1147 1148 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 1149} 1150 [(set_attr "type" "sselog") 1151 (set_attr "mode" "V4SF")]) 1152 1153(define_insn "sse_storehps" 1154 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1155 (vec_select:V2SF 1156 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 1157 (parallel [(const_int 2) (const_int 3)])))] 1158 "TARGET_SSE" 1159 "@ 1160 movhps\t{%1, %0|%0, %1} 1161 movhlps\t{%1, %0|%0, %1} 1162 movlps\t{%H1, %0|%0, %H1}" 1163 [(set_attr "type" "ssemov") 1164 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1165 1166(define_insn "sse_loadhps" 1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1168 (vec_concat:V4SF 1169 (vec_select:V2SF 1170 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") 1171 (parallel [(const_int 0) (const_int 1)])) 1172 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] 1173 "TARGET_SSE" 1174 "@ 1175 movhps\t{%2, %0|%0, %2} 1176 movlhps\t{%2, %0|%0, %2} 1177 movlps\t{%2, %H0|%H0, %2}" 1178 [(set_attr "type" "ssemov") 1179 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1180 1181(define_insn "sse_storelps" 1182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1183 (vec_select:V2SF 1184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") 1185 (parallel [(const_int 0) (const_int 1)])))] 1186 "TARGET_SSE" 1187 "@ 1188 movlps\t{%1, %0|%0, %1} 1189 movaps\t{%1, %0|%0, %1} 1190 movlps\t{%1, %0|%0, %1}" 1191 [(set_attr "type" "ssemov") 1192 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1193 1194(define_insn "sse_loadlps" 1195 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1196 (vec_concat:V4SF 1197 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") 1198 (vec_select:V2SF 1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") 1200 (parallel [(const_int 2) (const_int 3)]))))] 1201 "TARGET_SSE" 1202 "@ 1203 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 1204 movlps\t{%2, %0|%0, %2} 1205 movlps\t{%2, %0|%0, %2}" 1206 [(set_attr "type" "sselog,ssemov,ssemov") 1207 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1208 1209(define_insn "sse_movss" 1210 [(set (match_operand:V4SF 0 "register_operand" "=x") 1211 (vec_merge:V4SF 1212 (match_operand:V4SF 2 "register_operand" "x") 1213 (match_operand:V4SF 1 "register_operand" "0") 1214 (const_int 1)))] 1215 "TARGET_SSE" 1216 "movss\t{%2, %0|%0, %2}" 1217 [(set_attr "type" "ssemov") 1218 (set_attr "mode" "SF")]) 1219 1220(define_insn "*vec_dupv4sf" 1221 [(set (match_operand:V4SF 0 "register_operand" "=x") 1222 (vec_duplicate:V4SF 1223 (match_operand:SF 1 "register_operand" "0")))] 1224 "TARGET_SSE" 1225 "shufps\t{$0, %0, %0|%0, %0, 0}" 1226 [(set_attr "type" "sselog1") 1227 (set_attr "mode" "V4SF")]) 1228 1229;; ??? In theory we can match memory for the MMX alternative, but allowing 1230;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 1231;; alternatives pretty much forces the MMX alternative to be chosen. 1232(define_insn "*sse_concatv2sf" 1233 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 1234 (vec_concat:V2SF 1235 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 1236 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 1237 "TARGET_SSE" 1238 "@ 1239 unpcklps\t{%2, %0|%0, %2} 1240 movss\t{%1, %0|%0, %1} 1241 punpckldq\t{%2, %0|%0, %2} 1242 movd\t{%1, %0|%0, %1}" 1243 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 1244 (set_attr "mode" "V4SF,SF,DI,DI")]) 1245 1246(define_insn "*sse_concatv4sf" 1247 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1248 (vec_concat:V4SF 1249 (match_operand:V2SF 1 "register_operand" " 0,0") 1250 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] 1251 "TARGET_SSE" 1252 "@ 1253 movlhps\t{%2, %0|%0, %2} 1254 movhps\t{%2, %0|%0, %2}" 1255 [(set_attr "type" "ssemov") 1256 (set_attr "mode" "V4SF,V2SF")]) 1257 1258(define_expand "vec_initv4sf" 1259 [(match_operand:V4SF 0 "register_operand" "") 1260 (match_operand 1 "" "")] 1261 "TARGET_SSE" 1262{ 1263 ix86_expand_vector_init (false, operands[0], operands[1]); 1264 DONE; 1265}) 1266 1267(define_insn "*vec_setv4sf_0" 1268 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m") 1269 (vec_merge:V4SF 1270 (vec_duplicate:V4SF 1271 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 1272 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") 1273 (const_int 1)))] 1274 "TARGET_SSE" 1275 "@ 1276 movss\t{%2, %0|%0, %2} 1277 movss\t{%2, %0|%0, %2} 1278 movd\t{%2, %0|%0, %2} 1279 #" 1280 [(set_attr "type" "ssemov") 1281 (set_attr "mode" "SF")]) 1282 1283(define_split 1284 [(set (match_operand:V4SF 0 "memory_operand" "") 1285 (vec_merge:V4SF 1286 (vec_duplicate:V4SF 1287 (match_operand:SF 1 "nonmemory_operand" "")) 1288 (match_dup 0) 1289 (const_int 1)))] 1290 "TARGET_SSE && reload_completed" 1291 [(const_int 0)] 1292{ 1293 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); 1294 DONE; 1295}) 1296 1297(define_expand "vec_setv4sf" 1298 [(match_operand:V4SF 0 "register_operand" "") 1299 (match_operand:SF 1 "register_operand" "") 1300 (match_operand 2 "const_int_operand" "")] 1301 "TARGET_SSE" 1302{ 1303 ix86_expand_vector_set (false, operands[0], operands[1], 1304 INTVAL (operands[2])); 1305 DONE; 1306}) 1307 1308(define_insn_and_split "*vec_extractv4sf_0" 1309 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") 1310 (vec_select:SF 1311 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") 1312 (parallel [(const_int 0)])))] 1313 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 1314 "#" 1315 "&& reload_completed" 1316 [(const_int 0)] 1317{ 1318 rtx op1 = operands[1]; 1319 if (REG_P (op1)) 1320 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 1321 else 1322 op1 = gen_lowpart (SFmode, op1); 1323 emit_move_insn (operands[0], op1); 1324 DONE; 1325}) 1326 1327(define_expand "vec_extractv4sf" 1328 [(match_operand:SF 0 "register_operand" "") 1329 (match_operand:V4SF 1 "register_operand" "") 1330 (match_operand 2 "const_int_operand" "")] 1331 "TARGET_SSE" 1332{ 1333 ix86_expand_vector_extract (false, operands[0], operands[1], 1334 INTVAL (operands[2])); 1335 DONE; 1336}) 1337 1338;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1339;; 1340;; Parallel double-precision floating point arithmetic 1341;; 1342;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1343 1344(define_expand "negv2df2" 1345 [(set (match_operand:V2DF 0 "register_operand" "") 1346 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1347 "TARGET_SSE2" 1348 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") 1349 1350(define_expand "absv2df2" 1351 [(set (match_operand:V2DF 0 "register_operand" "") 1352 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1353 "TARGET_SSE2" 1354 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") 1355 1356(define_expand "addv2df3" 1357 [(set (match_operand:V2DF 0 "register_operand" "") 1358 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1359 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1360 "TARGET_SSE2" 1361 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") 1362 1363(define_insn "*addv2df3" 1364 [(set (match_operand:V2DF 0 "register_operand" "=x") 1365 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1366 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1367 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" 1368 "addpd\t{%2, %0|%0, %2}" 1369 [(set_attr "type" "sseadd") 1370 (set_attr "mode" "V2DF")]) 1371 1372(define_insn "sse2_vmaddv2df3" 1373 [(set (match_operand:V2DF 0 "register_operand" "=x") 1374 (vec_merge:V2DF 1375 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1376 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1377 (match_dup 1) 1378 (const_int 1)))] 1379 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 1380 "addsd\t{%2, %0|%0, %2}" 1381 [(set_attr "type" "sseadd") 1382 (set_attr "mode" "DF")]) 1383 1384(define_expand "subv2df3" 1385 [(set (match_operand:V2DF 0 "register_operand" "") 1386 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1387 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1388 "TARGET_SSE2" 1389 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") 1390 1391(define_insn "*subv2df3" 1392 [(set (match_operand:V2DF 0 "register_operand" "=x") 1393 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1394 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1395 "TARGET_SSE2" 1396 "subpd\t{%2, %0|%0, %2}" 1397 [(set_attr "type" "sseadd") 1398 (set_attr "mode" "V2DF")]) 1399 1400(define_insn "sse2_vmsubv2df3" 1401 [(set (match_operand:V2DF 0 "register_operand" "=x") 1402 (vec_merge:V2DF 1403 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1404 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1405 (match_dup 1) 1406 (const_int 1)))] 1407 "TARGET_SSE2" 1408 "subsd\t{%2, %0|%0, %2}" 1409 [(set_attr "type" "sseadd") 1410 (set_attr "mode" "DF")]) 1411 1412(define_expand "mulv2df3" 1413 [(set (match_operand:V2DF 0 "register_operand" "") 1414 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1416 "TARGET_SSE2" 1417 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") 1418 1419(define_insn "*mulv2df3" 1420 [(set (match_operand:V2DF 0 "register_operand" "=x") 1421 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1423 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1424 "mulpd\t{%2, %0|%0, %2}" 1425 [(set_attr "type" "ssemul") 1426 (set_attr "mode" "V2DF")]) 1427 1428(define_insn "sse2_vmmulv2df3" 1429 [(set (match_operand:V2DF 0 "register_operand" "=x") 1430 (vec_merge:V2DF 1431 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") 1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1433 (match_dup 1) 1434 (const_int 1)))] 1435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1436 "mulsd\t{%2, %0|%0, %2}" 1437 [(set_attr "type" "ssemul") 1438 (set_attr "mode" "DF")]) 1439 1440(define_expand "divv2df3" 1441 [(set (match_operand:V2DF 0 "register_operand" "") 1442 (div:V2DF (match_operand:V2DF 1 "register_operand" "") 1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1444 "TARGET_SSE2" 1445 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") 1446 1447(define_insn "*divv2df3" 1448 [(set (match_operand:V2DF 0 "register_operand" "=x") 1449 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1451 "TARGET_SSE2" 1452 "divpd\t{%2, %0|%0, %2}" 1453 [(set_attr "type" "ssediv") 1454 (set_attr "mode" "V2DF")]) 1455 1456(define_insn "sse2_vmdivv2df3" 1457 [(set (match_operand:V2DF 0 "register_operand" "=x") 1458 (vec_merge:V2DF 1459 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1461 (match_dup 1) 1462 (const_int 1)))] 1463 "TARGET_SSE2" 1464 "divsd\t{%2, %0|%0, %2}" 1465 [(set_attr "type" "ssediv") 1466 (set_attr "mode" "DF")]) 1467 1468(define_insn "sqrtv2df2" 1469 [(set (match_operand:V2DF 0 "register_operand" "=x") 1470 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1471 "TARGET_SSE2" 1472 "sqrtpd\t{%1, %0|%0, %1}" 1473 [(set_attr "type" "sse") 1474 (set_attr "mode" "V2DF")]) 1475 1476(define_insn "sse2_vmsqrtv2df2" 1477 [(set (match_operand:V2DF 0 "register_operand" "=x") 1478 (vec_merge:V2DF 1479 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) 1480 (match_operand:V2DF 2 "register_operand" "0") 1481 (const_int 1)))] 1482 "TARGET_SSE2" 1483 "sqrtsd\t{%1, %0|%0, %1}" 1484 [(set_attr "type" "sse") 1485 (set_attr "mode" "DF")]) 1486 1487;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1488;; isn't really correct, as those rtl operators aren't defined when 1489;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1490 1491(define_expand "smaxv2df3" 1492 [(set (match_operand:V2DF 0 "register_operand" "") 1493 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1494 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1495 "TARGET_SSE2" 1496{ 1497 if (!flag_finite_math_only) 1498 operands[1] = force_reg (V2DFmode, operands[1]); 1499 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); 1500}) 1501 1502(define_insn "*smaxv2df3_finite" 1503 [(set (match_operand:V2DF 0 "register_operand" "=x") 1504 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1506 "TARGET_SSE2 && flag_finite_math_only 1507 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" 1508 "maxpd\t{%2, %0|%0, %2}" 1509 [(set_attr "type" "sseadd") 1510 (set_attr "mode" "V2DF")]) 1511 1512(define_insn "*smaxv2df3" 1513 [(set (match_operand:V2DF 0 "register_operand" "=x") 1514 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1516 "TARGET_SSE2" 1517 "maxpd\t{%2, %0|%0, %2}" 1518 [(set_attr "type" "sseadd") 1519 (set_attr "mode" "V2DF")]) 1520 1521(define_insn "sse2_vmsmaxv2df3" 1522 [(set (match_operand:V2DF 0 "register_operand" "=x") 1523 (vec_merge:V2DF 1524 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1525 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1526 (match_dup 1) 1527 (const_int 1)))] 1528 "TARGET_SSE2" 1529 "maxsd\t{%2, %0|%0, %2}" 1530 [(set_attr "type" "sseadd") 1531 (set_attr "mode" "DF")]) 1532 1533(define_expand "sminv2df3" 1534 [(set (match_operand:V2DF 0 "register_operand" "") 1535 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1536 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1537 "TARGET_SSE2" 1538{ 1539 if (!flag_finite_math_only) 1540 operands[1] = force_reg (V2DFmode, operands[1]); 1541 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); 1542}) 1543 1544(define_insn "*sminv2df3_finite" 1545 [(set (match_operand:V2DF 0 "register_operand" "=x") 1546 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1548 "TARGET_SSE2 && flag_finite_math_only 1549 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" 1550 "minpd\t{%2, %0|%0, %2}" 1551 [(set_attr "type" "sseadd") 1552 (set_attr "mode" "V2DF")]) 1553 1554(define_insn "*sminv2df3" 1555 [(set (match_operand:V2DF 0 "register_operand" "=x") 1556 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1558 "TARGET_SSE2" 1559 "minpd\t{%2, %0|%0, %2}" 1560 [(set_attr "type" "sseadd") 1561 (set_attr "mode" "V2DF")]) 1562 1563(define_insn "sse2_vmsminv2df3" 1564 [(set (match_operand:V2DF 0 "register_operand" "=x") 1565 (vec_merge:V2DF 1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1568 (match_dup 1) 1569 (const_int 1)))] 1570 "TARGET_SSE2" 1571 "minsd\t{%2, %0|%0, %2}" 1572 [(set_attr "type" "sseadd") 1573 (set_attr "mode" "DF")]) 1574 1575(define_insn "sse3_addsubv2df3" 1576 [(set (match_operand:V2DF 0 "register_operand" "=x") 1577 (vec_merge:V2DF 1578 (plus:V2DF 1579 (match_operand:V2DF 1 "register_operand" "0") 1580 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1581 (minus:V2DF (match_dup 1) (match_dup 2)) 1582 (const_int 1)))] 1583 "TARGET_SSE3" 1584 "addsubpd\t{%2, %0|%0, %2}" 1585 [(set_attr "type" "sseadd") 1586 (set_attr "mode" "V2DF")]) 1587 1588(define_insn "sse3_haddv2df3" 1589 [(set (match_operand:V2DF 0 "register_operand" "=x") 1590 (vec_concat:V2DF 1591 (plus:DF 1592 (vec_select:DF 1593 (match_operand:V2DF 1 "register_operand" "0") 1594 (parallel [(const_int 0)])) 1595 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1596 (plus:DF 1597 (vec_select:DF 1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1599 (parallel [(const_int 0)])) 1600 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1601 "TARGET_SSE3" 1602 "haddpd\t{%2, %0|%0, %2}" 1603 [(set_attr "type" "sseadd") 1604 (set_attr "mode" "V2DF")]) 1605 1606(define_insn "sse3_hsubv2df3" 1607 [(set (match_operand:V2DF 0 "register_operand" "=x") 1608 (vec_concat:V2DF 1609 (minus:DF 1610 (vec_select:DF 1611 (match_operand:V2DF 1 "register_operand" "0") 1612 (parallel [(const_int 0)])) 1613 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1614 (minus:DF 1615 (vec_select:DF 1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1617 (parallel [(const_int 0)])) 1618 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1619 "TARGET_SSE3" 1620 "hsubpd\t{%2, %0|%0, %2}" 1621 [(set_attr "type" "sseadd") 1622 (set_attr "mode" "V2DF")]) 1623 1624(define_expand "reduc_splus_v2df" 1625 [(match_operand:V2DF 0 "register_operand" "") 1626 (match_operand:V2DF 1 "register_operand" "")] 1627 "TARGET_SSE3" 1628{ 1629 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1630 DONE; 1631}) 1632 1633;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1634;; 1635;; Parallel double-precision floating point comparisons 1636;; 1637;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1638 1639(define_insn "sse2_maskcmpv2df3" 1640 [(set (match_operand:V2DF 0 "register_operand" "=x") 1641 (match_operator:V2DF 3 "sse_comparison_operator" 1642 [(match_operand:V2DF 1 "register_operand" "0") 1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] 1644 "TARGET_SSE2" 1645 "cmp%D3pd\t{%2, %0|%0, %2}" 1646 [(set_attr "type" "ssecmp") 1647 (set_attr "mode" "V2DF")]) 1648 1649(define_insn "sse2_vmmaskcmpv2df3" 1650 [(set (match_operand:V2DF 0 "register_operand" "=x") 1651 (vec_merge:V2DF 1652 (match_operator:V2DF 3 "sse_comparison_operator" 1653 [(match_operand:V2DF 1 "register_operand" "0") 1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) 1655 (match_dup 1) 1656 (const_int 1)))] 1657 "TARGET_SSE2" 1658 "cmp%D3sd\t{%2, %0|%0, %2}" 1659 [(set_attr "type" "ssecmp") 1660 (set_attr "mode" "DF")]) 1661 1662(define_insn "sse2_comi" 1663 [(set (reg:CCFP FLAGS_REG) 1664 (compare:CCFP 1665 (vec_select:DF 1666 (match_operand:V2DF 0 "register_operand" "x") 1667 (parallel [(const_int 0)])) 1668 (vec_select:DF 1669 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1670 (parallel [(const_int 0)]))))] 1671 "TARGET_SSE2" 1672 "comisd\t{%1, %0|%0, %1}" 1673 [(set_attr "type" "ssecomi") 1674 (set_attr "mode" "DF")]) 1675 1676(define_insn "sse2_ucomi" 1677 [(set (reg:CCFPU FLAGS_REG) 1678 (compare:CCFPU 1679 (vec_select:DF 1680 (match_operand:V2DF 0 "register_operand" "x") 1681 (parallel [(const_int 0)])) 1682 (vec_select:DF 1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1684 (parallel [(const_int 0)]))))] 1685 "TARGET_SSE2" 1686 "ucomisd\t{%1, %0|%0, %1}" 1687 [(set_attr "type" "ssecomi") 1688 (set_attr "mode" "DF")]) 1689 1690(define_expand "vcondv2df" 1691 [(set (match_operand:V2DF 0 "register_operand" "") 1692 (if_then_else:V2DF 1693 (match_operator 3 "" 1694 [(match_operand:V2DF 4 "nonimmediate_operand" "") 1695 (match_operand:V2DF 5 "nonimmediate_operand" "")]) 1696 (match_operand:V2DF 1 "general_operand" "") 1697 (match_operand:V2DF 2 "general_operand" "")))] 1698 "TARGET_SSE2" 1699{ 1700 if (ix86_expand_fp_vcond (operands)) 1701 DONE; 1702 else 1703 FAIL; 1704}) 1705 1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1707;; 1708;; Parallel double-precision floating point logical operations 1709;; 1710;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1711 1712(define_expand "andv2df3" 1713 [(set (match_operand:V2DF 0 "register_operand" "") 1714 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1715 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1716 "TARGET_SSE2" 1717 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") 1718 1719(define_insn "*andv2df3" 1720 [(set (match_operand:V2DF 0 "register_operand" "=x") 1721 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1722 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1723 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" 1724 "andpd\t{%2, %0|%0, %2}" 1725 [(set_attr "type" "sselog") 1726 (set_attr "mode" "V2DF")]) 1727 1728(define_insn "sse2_nandv2df3" 1729 [(set (match_operand:V2DF 0 "register_operand" "=x") 1730 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) 1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1732 "TARGET_SSE2" 1733 "andnpd\t{%2, %0|%0, %2}" 1734 [(set_attr "type" "sselog") 1735 (set_attr "mode" "V2DF")]) 1736 1737(define_expand "iorv2df3" 1738 [(set (match_operand:V2DF 0 "register_operand" "") 1739 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1740 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1741 "TARGET_SSE2" 1742 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") 1743 1744(define_insn "*iorv2df3" 1745 [(set (match_operand:V2DF 0 "register_operand" "=x") 1746 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1748 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" 1749 "orpd\t{%2, %0|%0, %2}" 1750 [(set_attr "type" "sselog") 1751 (set_attr "mode" "V2DF")]) 1752 1753(define_expand "xorv2df3" 1754 [(set (match_operand:V2DF 0 "register_operand" "") 1755 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1756 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1757 "TARGET_SSE2" 1758 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") 1759 1760(define_insn "*xorv2df3" 1761 [(set (match_operand:V2DF 0 "register_operand" "=x") 1762 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1764 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" 1765 "xorpd\t{%2, %0|%0, %2}" 1766 [(set_attr "type" "sselog") 1767 (set_attr "mode" "V2DF")]) 1768 1769;; Also define scalar versions. These are used for abs, neg, and 1770;; conditional move. Using subregs into vector modes causes register 1771;; allocation lossage. These patterns do not allow memory operands 1772;; because the native instructions read the full 128-bits. 1773 1774(define_insn "*anddf3" 1775 [(set (match_operand:DF 0 "register_operand" "=x") 1776 (and:DF (match_operand:DF 1 "register_operand" "0") 1777 (match_operand:DF 2 "register_operand" "x")))] 1778 "TARGET_SSE2" 1779 "andpd\t{%2, %0|%0, %2}" 1780 [(set_attr "type" "sselog") 1781 (set_attr "mode" "V2DF")]) 1782 1783(define_insn "*nanddf3" 1784 [(set (match_operand:DF 0 "register_operand" "=x") 1785 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) 1786 (match_operand:DF 2 "register_operand" "x")))] 1787 "TARGET_SSE2" 1788 "andnpd\t{%2, %0|%0, %2}" 1789 [(set_attr "type" "sselog") 1790 (set_attr "mode" "V2DF")]) 1791 1792(define_insn "*iordf3" 1793 [(set (match_operand:DF 0 "register_operand" "=x") 1794 (ior:DF (match_operand:DF 1 "register_operand" "0") 1795 (match_operand:DF 2 "register_operand" "x")))] 1796 "TARGET_SSE2" 1797 "orpd\t{%2, %0|%0, %2}" 1798 [(set_attr "type" "sselog") 1799 (set_attr "mode" "V2DF")]) 1800 1801(define_insn "*xordf3" 1802 [(set (match_operand:DF 0 "register_operand" "=x") 1803 (xor:DF (match_operand:DF 1 "register_operand" "0") 1804 (match_operand:DF 2 "register_operand" "x")))] 1805 "TARGET_SSE2" 1806 "xorpd\t{%2, %0|%0, %2}" 1807 [(set_attr "type" "sselog") 1808 (set_attr "mode" "V2DF")]) 1809 1810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1811;; 1812;; Parallel double-precision floating point conversion operations 1813;; 1814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1815 1816(define_insn "sse2_cvtpi2pd" 1817 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1818 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 1819 "TARGET_SSE2" 1820 "cvtpi2pd\t{%1, %0|%0, %1}" 1821 [(set_attr "type" "ssecvt") 1822 (set_attr "unit" "mmx,*") 1823 (set_attr "mode" "V2DF")]) 1824 1825(define_insn "sse2_cvtpd2pi" 1826 [(set (match_operand:V2SI 0 "register_operand" "=y") 1827 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1828 UNSPEC_FIX_NOTRUNC))] 1829 "TARGET_SSE2" 1830 "cvtpd2pi\t{%1, %0|%0, %1}" 1831 [(set_attr "type" "ssecvt") 1832 (set_attr "unit" "mmx") 1833 (set_attr "mode" "DI")]) 1834 1835(define_insn "sse2_cvttpd2pi" 1836 [(set (match_operand:V2SI 0 "register_operand" "=y") 1837 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1838 "TARGET_SSE2" 1839 "cvttpd2pi\t{%1, %0|%0, %1}" 1840 [(set_attr "type" "ssecvt") 1841 (set_attr "unit" "mmx") 1842 (set_attr "mode" "TI")]) 1843 1844(define_insn "sse2_cvtsi2sd" 1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1846 (vec_merge:V2DF 1847 (vec_duplicate:V2DF 1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 1849 (match_operand:V2DF 1 "register_operand" "0,0") 1850 (const_int 1)))] 1851 "TARGET_SSE2" 1852 "cvtsi2sd\t{%2, %0|%0, %2}" 1853 [(set_attr "type" "sseicvt") 1854 (set_attr "mode" "DF") 1855 (set_attr "athlon_decode" "double,direct")]) 1856 1857(define_insn "sse2_cvtsi2sdq" 1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1859 (vec_merge:V2DF 1860 (vec_duplicate:V2DF 1861 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) 1862 (match_operand:V2DF 1 "register_operand" "0,0") 1863 (const_int 1)))] 1864 "TARGET_SSE2 && TARGET_64BIT" 1865 "cvtsi2sdq\t{%2, %0|%0, %2}" 1866 [(set_attr "type" "sseicvt") 1867 (set_attr "mode" "DF") 1868 (set_attr "athlon_decode" "double,direct")]) 1869 1870(define_insn "sse2_cvtsd2si" 1871 [(set (match_operand:SI 0 "register_operand" "=r,r") 1872 (unspec:SI 1873 [(vec_select:DF 1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1875 (parallel [(const_int 0)]))] 1876 UNSPEC_FIX_NOTRUNC))] 1877 "TARGET_SSE2" 1878 "cvtsd2si\t{%1, %0|%0, %1}" 1879 [(set_attr "type" "sseicvt") 1880 (set_attr "athlon_decode" "double,vector") 1881 (set_attr "mode" "SI")]) 1882 1883(define_insn "sse2_cvtsd2siq" 1884 [(set (match_operand:DI 0 "register_operand" "=r,r") 1885 (unspec:DI 1886 [(vec_select:DF 1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1888 (parallel [(const_int 0)]))] 1889 UNSPEC_FIX_NOTRUNC))] 1890 "TARGET_SSE2 && TARGET_64BIT" 1891 "cvtsd2siq\t{%1, %0|%0, %1}" 1892 [(set_attr "type" "sseicvt") 1893 (set_attr "athlon_decode" "double,vector") 1894 (set_attr "mode" "DI")]) 1895 1896(define_insn "sse2_cvttsd2si" 1897 [(set (match_operand:SI 0 "register_operand" "=r,r") 1898 (fix:SI 1899 (vec_select:DF 1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1901 (parallel [(const_int 0)]))))] 1902 "TARGET_SSE2" 1903 "cvttsd2si\t{%1, %0|%0, %1}" 1904 [(set_attr "type" "sseicvt") 1905 (set_attr "mode" "SI") 1906 (set_attr "athlon_decode" "double,vector")]) 1907 1908(define_insn "sse2_cvttsd2siq" 1909 [(set (match_operand:DI 0 "register_operand" "=r,r") 1910 (fix:DI 1911 (vec_select:DF 1912 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1913 (parallel [(const_int 0)]))))] 1914 "TARGET_SSE2 && TARGET_64BIT" 1915 "cvttsd2siq\t{%1, %0|%0, %1}" 1916 [(set_attr "type" "sseicvt") 1917 (set_attr "mode" "DI") 1918 (set_attr "athlon_decode" "double,vector")]) 1919 1920(define_insn "sse2_cvtdq2pd" 1921 [(set (match_operand:V2DF 0 "register_operand" "=x") 1922 (float:V2DF 1923 (vec_select:V2SI 1924 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 1925 (parallel [(const_int 0) (const_int 1)]))))] 1926 "TARGET_SSE2" 1927 "cvtdq2pd\t{%1, %0|%0, %1}" 1928 [(set_attr "type" "ssecvt") 1929 (set_attr "mode" "V2DF")]) 1930 1931(define_expand "sse2_cvtpd2dq" 1932 [(set (match_operand:V4SI 0 "register_operand" "") 1933 (vec_concat:V4SI 1934 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 1935 UNSPEC_FIX_NOTRUNC) 1936 (match_dup 2)))] 1937 "TARGET_SSE2" 1938 "operands[2] = CONST0_RTX (V2SImode);") 1939 1940(define_insn "*sse2_cvtpd2dq" 1941 [(set (match_operand:V4SI 0 "register_operand" "=x") 1942 (vec_concat:V4SI 1943 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1944 UNSPEC_FIX_NOTRUNC) 1945 (match_operand:V2SI 2 "const0_operand" "")))] 1946 "TARGET_SSE2" 1947 "cvtpd2dq\t{%1, %0|%0, %1}" 1948 [(set_attr "type" "ssecvt") 1949 (set_attr "mode" "TI")]) 1950 1951(define_expand "sse2_cvttpd2dq" 1952 [(set (match_operand:V4SI 0 "register_operand" "") 1953 (vec_concat:V4SI 1954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 1955 (match_dup 2)))] 1956 "TARGET_SSE2" 1957 "operands[2] = CONST0_RTX (V2SImode);") 1958 1959(define_insn "*sse2_cvttpd2dq" 1960 [(set (match_operand:V4SI 0 "register_operand" "=x") 1961 (vec_concat:V4SI 1962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 1963 (match_operand:V2SI 2 "const0_operand" "")))] 1964 "TARGET_SSE2" 1965 "cvttpd2dq\t{%1, %0|%0, %1}" 1966 [(set_attr "type" "ssecvt") 1967 (set_attr "mode" "TI")]) 1968 1969(define_insn "sse2_cvtsd2ss" 1970 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1971 (vec_merge:V4SF 1972 (vec_duplicate:V4SF 1973 (float_truncate:V2SF 1974 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) 1975 (match_operand:V4SF 1 "register_operand" "0,0") 1976 (const_int 1)))] 1977 "TARGET_SSE2" 1978 "cvtsd2ss\t{%2, %0|%0, %2}" 1979 [(set_attr "type" "ssecvt") 1980 (set_attr "athlon_decode" "vector,double") 1981 (set_attr "mode" "SF")]) 1982 1983(define_insn "sse2_cvtss2sd" 1984 [(set (match_operand:V2DF 0 "register_operand" "=x") 1985 (vec_merge:V2DF 1986 (float_extend:V2DF 1987 (vec_select:V2SF 1988 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 1989 (parallel [(const_int 0) (const_int 1)]))) 1990 (match_operand:V2DF 1 "register_operand" "0") 1991 (const_int 1)))] 1992 "TARGET_SSE2" 1993 "cvtss2sd\t{%2, %0|%0, %2}" 1994 [(set_attr "type" "ssecvt") 1995 (set_attr "mode" "DF")]) 1996 1997(define_expand "sse2_cvtpd2ps" 1998 [(set (match_operand:V4SF 0 "register_operand" "") 1999 (vec_concat:V4SF 2000 (float_truncate:V2SF 2001 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2002 (match_dup 2)))] 2003 "TARGET_SSE2" 2004 "operands[2] = CONST0_RTX (V2SFmode);") 2005 2006(define_insn "*sse2_cvtpd2ps" 2007 [(set (match_operand:V4SF 0 "register_operand" "=x") 2008 (vec_concat:V4SF 2009 (float_truncate:V2SF 2010 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2011 (match_operand:V2SF 2 "const0_operand" "")))] 2012 "TARGET_SSE2" 2013 "cvtpd2ps\t{%1, %0|%0, %1}" 2014 [(set_attr "type" "ssecvt") 2015 (set_attr "mode" "V4SF")]) 2016 2017(define_insn "sse2_cvtps2pd" 2018 [(set (match_operand:V2DF 0 "register_operand" "=x") 2019 (float_extend:V2DF 2020 (vec_select:V2SF 2021 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2022 (parallel [(const_int 0) (const_int 1)]))))] 2023 "TARGET_SSE2" 2024 "cvtps2pd\t{%1, %0|%0, %1}" 2025 [(set_attr "type" "ssecvt") 2026 (set_attr "mode" "V2DF")]) 2027 2028;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2029;; 2030;; Parallel double-precision floating point element swizzling 2031;; 2032;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2033 2034(define_insn "sse2_unpckhpd" 2035 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 2036 (vec_select:V2DF 2037 (vec_concat:V4DF 2038 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") 2039 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) 2040 (parallel [(const_int 1) 2041 (const_int 3)])))] 2042 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2043 "@ 2044 unpckhpd\t{%2, %0|%0, %2} 2045 movlpd\t{%H1, %0|%0, %H1} 2046 movhpd\t{%1, %0|%0, %1}" 2047 [(set_attr "type" "sselog,ssemov,ssemov") 2048 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2049 2050(define_insn "*sse3_movddup" 2051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") 2052 (vec_select:V2DF 2053 (vec_concat:V4DF 2054 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 2055 (match_dup 1)) 2056 (parallel [(const_int 0) 2057 (const_int 2)])))] 2058 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2059 "@ 2060 movddup\t{%1, %0|%0, %1} 2061 #" 2062 [(set_attr "type" "sselog1,ssemov") 2063 (set_attr "mode" "V2DF")]) 2064 2065(define_split 2066 [(set (match_operand:V2DF 0 "memory_operand" "") 2067 (vec_select:V2DF 2068 (vec_concat:V4DF 2069 (match_operand:V2DF 1 "register_operand" "") 2070 (match_dup 1)) 2071 (parallel [(const_int 0) 2072 (const_int 2)])))] 2073 "TARGET_SSE3 && reload_completed" 2074 [(const_int 0)] 2075{ 2076 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 2077 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 2078 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 2079 DONE; 2080}) 2081 2082(define_insn "sse2_unpcklpd" 2083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") 2084 (vec_select:V2DF 2085 (vec_concat:V4DF 2086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") 2087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) 2088 (parallel [(const_int 0) 2089 (const_int 2)])))] 2090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2091 "@ 2092 unpcklpd\t{%2, %0|%0, %2} 2093 movhpd\t{%2, %0|%0, %2} 2094 movlpd\t{%2, %H0|%H0, %2}" 2095 [(set_attr "type" "sselog,ssemov,ssemov") 2096 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2097 2098(define_expand "sse2_shufpd" 2099 [(match_operand:V2DF 0 "register_operand" "") 2100 (match_operand:V2DF 1 "register_operand" "") 2101 (match_operand:V2DF 2 "nonimmediate_operand" "") 2102 (match_operand:SI 3 "const_int_operand" "")] 2103 "TARGET_SSE2" 2104{ 2105 int mask = INTVAL (operands[3]); 2106 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], 2107 GEN_INT (mask & 1), 2108 GEN_INT (mask & 2 ? 3 : 2))); 2109 DONE; 2110}) 2111 2112(define_insn "sse2_shufpd_1" 2113 [(set (match_operand:V2DF 0 "register_operand" "=x") 2114 (vec_select:V2DF 2115 (vec_concat:V4DF 2116 (match_operand:V2DF 1 "register_operand" "0") 2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 2118 (parallel [(match_operand 3 "const_0_to_1_operand" "") 2119 (match_operand 4 "const_2_to_3_operand" "")])))] 2120 "TARGET_SSE2" 2121{ 2122 int mask; 2123 mask = INTVAL (operands[3]); 2124 mask |= (INTVAL (operands[4]) - 2) << 1; 2125 operands[3] = GEN_INT (mask); 2126 2127 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 2128} 2129 [(set_attr "type" "sselog") 2130 (set_attr "mode" "V2DF")]) 2131 2132(define_insn "sse2_storehpd" 2133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2134 (vec_select:DF 2135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") 2136 (parallel [(const_int 1)])))] 2137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2138 "@ 2139 movhpd\t{%1, %0|%0, %1} 2140 unpckhpd\t%0, %0 2141 #" 2142 [(set_attr "type" "ssemov,sselog1,ssemov") 2143 (set_attr "mode" "V1DF,V2DF,DF")]) 2144 2145(define_split 2146 [(set (match_operand:DF 0 "register_operand" "") 2147 (vec_select:DF 2148 (match_operand:V2DF 1 "memory_operand" "") 2149 (parallel [(const_int 1)])))] 2150 "TARGET_SSE2 && reload_completed" 2151 [(set (match_dup 0) (match_dup 1))] 2152{ 2153 operands[1] = adjust_address (operands[1], DFmode, 8); 2154}) 2155 2156(define_insn "sse2_storelpd" 2157 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2158 (vec_select:DF 2159 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") 2160 (parallel [(const_int 0)])))] 2161 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2162 "@ 2163 movlpd\t{%1, %0|%0, %1} 2164 # 2165 #" 2166 [(set_attr "type" "ssemov") 2167 (set_attr "mode" "V1DF,DF,DF")]) 2168 2169(define_split 2170 [(set (match_operand:DF 0 "register_operand" "") 2171 (vec_select:DF 2172 (match_operand:V2DF 1 "nonimmediate_operand" "") 2173 (parallel [(const_int 0)])))] 2174 "TARGET_SSE2 && reload_completed" 2175 [(const_int 0)] 2176{ 2177 rtx op1 = operands[1]; 2178 if (REG_P (op1)) 2179 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 2180 else 2181 op1 = gen_lowpart (DFmode, op1); 2182 emit_move_insn (operands[0], op1); 2183 DONE; 2184}) 2185 2186(define_insn "sse2_loadhpd" 2187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") 2188 (vec_concat:V2DF 2189 (vec_select:DF 2190 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") 2191 (parallel [(const_int 0)])) 2192 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] 2193 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2194 "@ 2195 movhpd\t{%2, %0|%0, %2} 2196 unpcklpd\t{%2, %0|%0, %2} 2197 shufpd\t{$1, %1, %0|%0, %1, 1} 2198 #" 2199 [(set_attr "type" "ssemov,sselog,sselog,other") 2200 (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) 2201 2202(define_split 2203 [(set (match_operand:V2DF 0 "memory_operand" "") 2204 (vec_concat:V2DF 2205 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 2206 (match_operand:DF 1 "register_operand" "")))] 2207 "TARGET_SSE2 && reload_completed" 2208 [(set (match_dup 0) (match_dup 1))] 2209{ 2210 operands[0] = adjust_address (operands[0], DFmode, 8); 2211}) 2212 2213(define_insn "sse2_loadlpd" 2214 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 2215 (vec_concat:V2DF 2216 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") 2217 (vec_select:DF 2218 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") 2219 (parallel [(const_int 1)]))))] 2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2221 "@ 2222 movsd\t{%2, %0|%0, %2} 2223 movlpd\t{%2, %0|%0, %2} 2224 movsd\t{%2, %0|%0, %2} 2225 shufpd\t{$2, %2, %0|%0, %2, 2} 2226 movhpd\t{%H1, %0|%0, %H1} 2227 #" 2228 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") 2229 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) 2230 2231(define_split 2232 [(set (match_operand:V2DF 0 "memory_operand" "") 2233 (vec_concat:V2DF 2234 (match_operand:DF 1 "register_operand" "") 2235 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 2236 "TARGET_SSE2 && reload_completed" 2237 [(set (match_dup 0) (match_dup 1))] 2238{ 2239 operands[0] = adjust_address (operands[0], DFmode, 8); 2240}) 2241 2242;; Not sure these two are ever used, but it doesn't hurt to have 2243;; them. -aoliva 2244(define_insn "*vec_extractv2df_1_sse" 2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2246 (vec_select:DF 2247 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 2248 (parallel [(const_int 1)])))] 2249 "!TARGET_SSE2 && TARGET_SSE 2250 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2251 "@ 2252 movhps\t{%1, %0|%0, %1} 2253 movhlps\t{%1, %0|%0, %1} 2254 movlps\t{%H1, %0|%0, %H1}" 2255 [(set_attr "type" "ssemov") 2256 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2257 2258(define_insn "*vec_extractv2df_0_sse" 2259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2260 (vec_select:DF 2261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 2262 (parallel [(const_int 0)])))] 2263 "!TARGET_SSE2 && TARGET_SSE 2264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2265 "@ 2266 movlps\t{%1, %0|%0, %1} 2267 movaps\t{%1, %0|%0, %1} 2268 movlps\t{%1, %0|%0, %1}" 2269 [(set_attr "type" "ssemov") 2270 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2271 2272(define_insn "sse2_movsd" 2273 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") 2274 (vec_merge:V2DF 2275 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") 2276 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") 2277 (const_int 1)))] 2278 "TARGET_SSE2" 2279 "@ 2280 movsd\t{%2, %0|%0, %2} 2281 movlpd\t{%2, %0|%0, %2} 2282 movlpd\t{%2, %0|%0, %2} 2283 shufpd\t{$2, %2, %0|%0, %2, 2} 2284 movhps\t{%H1, %0|%0, %H1} 2285 movhps\t{%1, %H0|%H0, %1}" 2286 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 2287 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) 2288 2289(define_insn "*vec_dupv2df_sse3" 2290 [(set (match_operand:V2DF 0 "register_operand" "=x") 2291 (vec_duplicate:V2DF 2292 (match_operand:DF 1 "nonimmediate_operand" "xm")))] 2293 "TARGET_SSE3" 2294 "movddup\t{%1, %0|%0, %1}" 2295 [(set_attr "type" "sselog1") 2296 (set_attr "mode" "DF")]) 2297 2298(define_insn "*vec_dupv2df" 2299 [(set (match_operand:V2DF 0 "register_operand" "=x") 2300 (vec_duplicate:V2DF 2301 (match_operand:DF 1 "register_operand" "0")))] 2302 "TARGET_SSE2" 2303 "unpcklpd\t%0, %0" 2304 [(set_attr "type" "sselog1") 2305 (set_attr "mode" "V4SF")]) 2306 2307(define_insn "*vec_concatv2df_sse3" 2308 [(set (match_operand:V2DF 0 "register_operand" "=x") 2309 (vec_concat:V2DF 2310 (match_operand:DF 1 "nonimmediate_operand" "xm") 2311 (match_dup 1)))] 2312 "TARGET_SSE3" 2313 "movddup\t{%1, %0|%0, %1}" 2314 [(set_attr "type" "sselog1") 2315 (set_attr "mode" "DF")]) 2316 2317(define_insn "*vec_concatv2df" 2318 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x") 2319 (vec_concat:V2DF 2320 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0") 2321 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))] 2322 "TARGET_SSE" 2323 "@ 2324 unpcklpd\t{%2, %0|%0, %2} 2325 movhpd\t{%2, %0|%0, %2} 2326 movsd\t{%1, %0|%0, %1} 2327 movlhps\t{%2, %0|%0, %2} 2328 movhps\t{%2, %0|%0, %2}" 2329 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") 2330 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) 2331 2332(define_expand "vec_setv2df" 2333 [(match_operand:V2DF 0 "register_operand" "") 2334 (match_operand:DF 1 "register_operand" "") 2335 (match_operand 2 "const_int_operand" "")] 2336 "TARGET_SSE" 2337{ 2338 ix86_expand_vector_set (false, operands[0], operands[1], 2339 INTVAL (operands[2])); 2340 DONE; 2341}) 2342 2343(define_expand "vec_extractv2df" 2344 [(match_operand:DF 0 "register_operand" "") 2345 (match_operand:V2DF 1 "register_operand" "") 2346 (match_operand 2 "const_int_operand" "")] 2347 "TARGET_SSE" 2348{ 2349 ix86_expand_vector_extract (false, operands[0], operands[1], 2350 INTVAL (operands[2])); 2351 DONE; 2352}) 2353 2354(define_expand "vec_initv2df" 2355 [(match_operand:V2DF 0 "register_operand" "") 2356 (match_operand 1 "" "")] 2357 "TARGET_SSE" 2358{ 2359 ix86_expand_vector_init (false, operands[0], operands[1]); 2360 DONE; 2361}) 2362 2363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2364;; 2365;; Parallel integral arithmetic 2366;; 2367;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2368 2369(define_expand "neg<mode>2" 2370 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2371 (minus:SSEMODEI 2372 (match_dup 2) 2373 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] 2374 "TARGET_SSE2" 2375 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 2376 2377(define_expand "add<mode>3" 2378 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2379 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 2380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2381 "TARGET_SSE2" 2382 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") 2383 2384(define_insn "*add<mode>3" 2385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2386 (plus:SSEMODEI 2387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 2388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2389 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" 2390 "padd<ssevecsize>\t{%2, %0|%0, %2}" 2391 [(set_attr "type" "sseiadd") 2392 (set_attr "mode" "TI")]) 2393 2394(define_insn "sse2_ssadd<mode>3" 2395 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2396 (ss_plus:SSEMODE12 2397 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2398 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2399 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)" 2400 "padds<ssevecsize>\t{%2, %0|%0, %2}" 2401 [(set_attr "type" "sseiadd") 2402 (set_attr "mode" "TI")]) 2403 2404(define_insn "sse2_usadd<mode>3" 2405 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2406 (us_plus:SSEMODE12 2407 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2408 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2409 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)" 2410 "paddus<ssevecsize>\t{%2, %0|%0, %2}" 2411 [(set_attr "type" "sseiadd") 2412 (set_attr "mode" "TI")]) 2413 2414(define_expand "sub<mode>3" 2415 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2416 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") 2417 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2418 "TARGET_SSE2" 2419 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") 2420 2421(define_insn "*sub<mode>3" 2422 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2423 (minus:SSEMODEI 2424 (match_operand:SSEMODEI 1 "register_operand" "0") 2425 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2426 "TARGET_SSE2" 2427 "psub<ssevecsize>\t{%2, %0|%0, %2}" 2428 [(set_attr "type" "sseiadd") 2429 (set_attr "mode" "TI")]) 2430 2431(define_insn "sse2_sssub<mode>3" 2432 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2433 (ss_minus:SSEMODE12 2434 (match_operand:SSEMODE12 1 "register_operand" "0") 2435 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2436 "TARGET_SSE2" 2437 "psubs<ssevecsize>\t{%2, %0|%0, %2}" 2438 [(set_attr "type" "sseiadd") 2439 (set_attr "mode" "TI")]) 2440 2441(define_insn "sse2_ussub<mode>3" 2442 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2443 (us_minus:SSEMODE12 2444 (match_operand:SSEMODE12 1 "register_operand" "0") 2445 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2446 "TARGET_SSE2" 2447 "psubus<ssevecsize>\t{%2, %0|%0, %2}" 2448 [(set_attr "type" "sseiadd") 2449 (set_attr "mode" "TI")]) 2450 2451(define_expand "mulv16qi3" 2452 [(set (match_operand:V16QI 0 "register_operand" "") 2453 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") 2454 (match_operand:V16QI 2 "register_operand" "")))] 2455 "TARGET_SSE2" 2456{ 2457 rtx t[12], op0; 2458 int i; 2459 2460 for (i = 0; i < 12; ++i) 2461 t[i] = gen_reg_rtx (V16QImode); 2462 2463 /* Unpack data such that we've got a source byte in each low byte of 2464 each word. We don't care what goes into the high byte of each word. 2465 Rather than trying to get zero in there, most convenient is to let 2466 it be a copy of the low byte. */ 2467 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); 2468 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); 2469 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); 2470 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); 2471 2472 /* Multiply words. The end-of-line annotations here give a picture of what 2473 the output of that instruction looks like. Dot means don't care; the 2474 letters are the bytes of the result with A being the most significant. */ 2475 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ 2476 gen_lowpart (V8HImode, t[0]), 2477 gen_lowpart (V8HImode, t[1]))); 2478 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ 2479 gen_lowpart (V8HImode, t[2]), 2480 gen_lowpart (V8HImode, t[3]))); 2481 2482 /* Extract the relevant bytes and merge them back together. */ 2483 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ 2484 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ 2485 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ 2486 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ 2487 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ 2488 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ 2489 2490 op0 = operands[0]; 2491 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ 2492 DONE; 2493}) 2494 2495(define_expand "mulv8hi3" 2496 [(set (match_operand:V8HI 0 "register_operand" "") 2497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2498 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2499 "TARGET_SSE2" 2500 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 2501 2502(define_insn "*mulv8hi3" 2503 [(set (match_operand:V8HI 0 "register_operand" "=x") 2504 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2507 "pmullw\t{%2, %0|%0, %2}" 2508 [(set_attr "type" "sseimul") 2509 (set_attr "mode" "TI")]) 2510 2511(define_insn "sse2_smulv8hi3_highpart" 2512 [(set (match_operand:V8HI 0 "register_operand" "=x") 2513 (truncate:V8HI 2514 (lshiftrt:V8SI 2515 (mult:V8SI 2516 (sign_extend:V8SI 2517 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2518 (sign_extend:V8SI 2519 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2520 (const_int 16))))] 2521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2522 "pmulhw\t{%2, %0|%0, %2}" 2523 [(set_attr "type" "sseimul") 2524 (set_attr "mode" "TI")]) 2525 2526(define_insn "sse2_umulv8hi3_highpart" 2527 [(set (match_operand:V8HI 0 "register_operand" "=x") 2528 (truncate:V8HI 2529 (lshiftrt:V8SI 2530 (mult:V8SI 2531 (zero_extend:V8SI 2532 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2533 (zero_extend:V8SI 2534 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2535 (const_int 16))))] 2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2537 "pmulhuw\t{%2, %0|%0, %2}" 2538 [(set_attr "type" "sseimul") 2539 (set_attr "mode" "TI")]) 2540 2541(define_insn "sse2_umulv2siv2di3" 2542 [(set (match_operand:V2DI 0 "register_operand" "=x") 2543 (mult:V2DI 2544 (zero_extend:V2DI 2545 (vec_select:V2SI 2546 (match_operand:V4SI 1 "nonimmediate_operand" "%0") 2547 (parallel [(const_int 0) (const_int 2)]))) 2548 (zero_extend:V2DI 2549 (vec_select:V2SI 2550 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 2551 (parallel [(const_int 0) (const_int 2)])))))] 2552 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2553 "pmuludq\t{%2, %0|%0, %2}" 2554 [(set_attr "type" "sseimul") 2555 (set_attr "mode" "TI")]) 2556 2557(define_insn "sse2_pmaddwd" 2558 [(set (match_operand:V4SI 0 "register_operand" "=x") 2559 (plus:V4SI 2560 (mult:V4SI 2561 (sign_extend:V4SI 2562 (vec_select:V4HI 2563 (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2564 (parallel [(const_int 0) 2565 (const_int 2) 2566 (const_int 4) 2567 (const_int 6)]))) 2568 (sign_extend:V4SI 2569 (vec_select:V4HI 2570 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 2571 (parallel [(const_int 0) 2572 (const_int 2) 2573 (const_int 4) 2574 (const_int 6)])))) 2575 (mult:V4SI 2576 (sign_extend:V4SI 2577 (vec_select:V4HI (match_dup 1) 2578 (parallel [(const_int 1) 2579 (const_int 3) 2580 (const_int 5) 2581 (const_int 7)]))) 2582 (sign_extend:V4SI 2583 (vec_select:V4HI (match_dup 2) 2584 (parallel [(const_int 1) 2585 (const_int 3) 2586 (const_int 5) 2587 (const_int 7)]))))))] 2588 "TARGET_SSE2" 2589 "pmaddwd\t{%2, %0|%0, %2}" 2590 [(set_attr "type" "sseiadd") 2591 (set_attr "mode" "TI")]) 2592 2593(define_expand "mulv4si3" 2594 [(set (match_operand:V4SI 0 "register_operand" "") 2595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 2596 (match_operand:V4SI 2 "register_operand" "")))] 2597 "TARGET_SSE2" 2598{ 2599 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2600 rtx op0, op1, op2; 2601 2602 op0 = operands[0]; 2603 op1 = operands[1]; 2604 op2 = operands[2]; 2605 t1 = gen_reg_rtx (V4SImode); 2606 t2 = gen_reg_rtx (V4SImode); 2607 t3 = gen_reg_rtx (V4SImode); 2608 t4 = gen_reg_rtx (V4SImode); 2609 t5 = gen_reg_rtx (V4SImode); 2610 t6 = gen_reg_rtx (V4SImode); 2611 thirtytwo = GEN_INT (32); 2612 2613 /* Multiply elements 2 and 0. */ 2614 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); 2615 2616 /* Shift both input vectors down one element, so that elements 3 and 1 2617 are now in the slots for elements 2 and 0. For K8, at least, this is 2618 faster than using a shuffle. */ 2619 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2620 gen_lowpart (TImode, op1), thirtytwo)); 2621 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2622 gen_lowpart (TImode, op2), thirtytwo)); 2623 2624 /* Multiply elements 3 and 1. */ 2625 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); 2626 2627 /* Move the results in element 2 down to element 1; we don't care what 2628 goes in elements 2 and 3. */ 2629 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 2630 const0_rtx, const0_rtx)); 2631 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 2632 const0_rtx, const0_rtx)); 2633 2634 /* Merge the parts back together. */ 2635 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); 2636 DONE; 2637}) 2638 2639(define_expand "mulv2di3" 2640 [(set (match_operand:V2DI 0 "register_operand" "") 2641 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") 2642 (match_operand:V2DI 2 "register_operand" "")))] 2643 "TARGET_SSE2" 2644{ 2645 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2646 rtx op0, op1, op2; 2647 2648 op0 = operands[0]; 2649 op1 = operands[1]; 2650 op2 = operands[2]; 2651 t1 = gen_reg_rtx (V2DImode); 2652 t2 = gen_reg_rtx (V2DImode); 2653 t3 = gen_reg_rtx (V2DImode); 2654 t4 = gen_reg_rtx (V2DImode); 2655 t5 = gen_reg_rtx (V2DImode); 2656 t6 = gen_reg_rtx (V2DImode); 2657 thirtytwo = GEN_INT (32); 2658 2659 /* Multiply low parts. */ 2660 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), 2661 gen_lowpart (V4SImode, op2))); 2662 2663 /* Shift input vectors left 32 bits so we can multiply high parts. */ 2664 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); 2665 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); 2666 2667 /* Multiply high parts by low parts. */ 2668 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), 2669 gen_lowpart (V4SImode, t3))); 2670 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), 2671 gen_lowpart (V4SImode, t2))); 2672 2673 /* Shift them back. */ 2674 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); 2675 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); 2676 2677 /* Add the three parts together. */ 2678 emit_insn (gen_addv2di3 (t6, t1, t4)); 2679 emit_insn (gen_addv2di3 (op0, t6, t5)); 2680 DONE; 2681}) 2682 2683(define_expand "sdot_prodv8hi" 2684 [(match_operand:V4SI 0 "register_operand" "") 2685 (match_operand:V8HI 1 "nonimmediate_operand" "") 2686 (match_operand:V8HI 2 "nonimmediate_operand" "") 2687 (match_operand:V4SI 3 "register_operand" "")] 2688 "TARGET_SSE2" 2689{ 2690 rtx t = gen_reg_rtx (V4SImode); 2691 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2])); 2692 emit_insn (gen_addv4si3 (operands[0], operands[3], t)); 2693 DONE; 2694}) 2695 2696(define_expand "udot_prodv4si" 2697 [(match_operand:V2DI 0 "register_operand" "") 2698 (match_operand:V4SI 1 "register_operand" "") 2699 (match_operand:V4SI 2 "register_operand" "") 2700 (match_operand:V2DI 3 "register_operand" "")] 2701 "TARGET_SSE2" 2702{ 2703 rtx t1, t2, t3, t4; 2704 2705 t1 = gen_reg_rtx (V2DImode); 2706 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); 2707 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 2708 2709 t2 = gen_reg_rtx (V4SImode); 2710 t3 = gen_reg_rtx (V4SImode); 2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2712 gen_lowpart (TImode, operands[1]), 2713 GEN_INT (32))); 2714 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2715 gen_lowpart (TImode, operands[2]), 2716 GEN_INT (32))); 2717 2718 t4 = gen_reg_rtx (V2DImode); 2719 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); 2720 2721 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 2722 DONE; 2723}) 2724 2725(define_insn "ashr<mode>3" 2726 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") 2727 (ashiftrt:SSEMODE24 2728 (match_operand:SSEMODE24 1 "register_operand" "0") 2729 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2730 "TARGET_SSE2" 2731 "psra<ssevecsize>\t{%2, %0|%0, %2}" 2732 [(set_attr "type" "sseishft") 2733 (set_attr "mode" "TI")]) 2734 2735(define_insn "lshr<mode>3" 2736 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2737 (lshiftrt:SSEMODE248 2738 (match_operand:SSEMODE248 1 "register_operand" "0") 2739 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2740 "TARGET_SSE2" 2741 "psrl<ssevecsize>\t{%2, %0|%0, %2}" 2742 [(set_attr "type" "sseishft") 2743 (set_attr "mode" "TI")]) 2744 2745(define_insn "ashl<mode>3" 2746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2747 (ashift:SSEMODE248 2748 (match_operand:SSEMODE248 1 "register_operand" "0") 2749 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2750 "TARGET_SSE2" 2751 "psll<ssevecsize>\t{%2, %0|%0, %2}" 2752 [(set_attr "type" "sseishft") 2753 (set_attr "mode" "TI")]) 2754 2755(define_insn "sse2_ashlti3" 2756 [(set (match_operand:TI 0 "register_operand" "=x") 2757 (ashift:TI (match_operand:TI 1 "register_operand" "0") 2758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2759 "TARGET_SSE2" 2760{ 2761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2762 return "pslldq\t{%2, %0|%0, %2}"; 2763} 2764 [(set_attr "type" "sseishft") 2765 (set_attr "mode" "TI")]) 2766 2767(define_expand "vec_shl_<mode>" 2768 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2769 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") 2770 (match_operand:SI 2 "general_operand" "")))] 2771 "TARGET_SSE2" 2772{ 2773 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2774 FAIL; 2775 operands[0] = gen_lowpart (TImode, operands[0]); 2776 operands[1] = gen_lowpart (TImode, operands[1]); 2777}) 2778 2779(define_insn "sse2_lshrti3" 2780 [(set (match_operand:TI 0 "register_operand" "=x") 2781 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") 2782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2783 "TARGET_SSE2" 2784{ 2785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2786 return "psrldq\t{%2, %0|%0, %2}"; 2787} 2788 [(set_attr "type" "sseishft") 2789 (set_attr "mode" "TI")]) 2790 2791(define_expand "vec_shr_<mode>" 2792 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2793 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") 2794 (match_operand:SI 2 "general_operand" "")))] 2795 "TARGET_SSE2" 2796{ 2797 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2798 FAIL; 2799 operands[0] = gen_lowpart (TImode, operands[0]); 2800 operands[1] = gen_lowpart (TImode, operands[1]); 2801}) 2802 2803(define_expand "umaxv16qi3" 2804 [(set (match_operand:V16QI 0 "register_operand" "") 2805 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2806 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2807 "TARGET_SSE2" 2808 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") 2809 2810(define_insn "*umaxv16qi3" 2811 [(set (match_operand:V16QI 0 "register_operand" "=x") 2812 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2814 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" 2815 "pmaxub\t{%2, %0|%0, %2}" 2816 [(set_attr "type" "sseiadd") 2817 (set_attr "mode" "TI")]) 2818 2819(define_expand "smaxv8hi3" 2820 [(set (match_operand:V8HI 0 "register_operand" "") 2821 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2822 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2823 "TARGET_SSE2" 2824 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") 2825 2826(define_insn "*smaxv8hi3" 2827 [(set (match_operand:V8HI 0 "register_operand" "=x") 2828 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2830 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" 2831 "pmaxsw\t{%2, %0|%0, %2}" 2832 [(set_attr "type" "sseiadd") 2833 (set_attr "mode" "TI")]) 2834 2835(define_expand "umaxv8hi3" 2836 [(set (match_operand:V8HI 0 "register_operand" "=x") 2837 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") 2838 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2839 (set (match_dup 3) 2840 (plus:V8HI (match_dup 0) (match_dup 2)))] 2841 "TARGET_SSE2" 2842{ 2843 operands[3] = operands[0]; 2844 if (rtx_equal_p (operands[0], operands[2])) 2845 operands[0] = gen_reg_rtx (V8HImode); 2846}) 2847 2848(define_expand "smax<mode>3" 2849 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2850 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2851 (match_operand:SSEMODE14 2 "register_operand" "")))] 2852 "TARGET_SSE2" 2853{ 2854 rtx xops[6]; 2855 bool ok; 2856 2857 xops[0] = operands[0]; 2858 xops[1] = operands[1]; 2859 xops[2] = operands[2]; 2860 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2861 xops[4] = operands[1]; 2862 xops[5] = operands[2]; 2863 ok = ix86_expand_int_vcond (xops); 2864 gcc_assert (ok); 2865 DONE; 2866}) 2867 2868(define_expand "umaxv4si3" 2869 [(set (match_operand:V4SI 0 "register_operand" "") 2870 (umax:V4SI (match_operand:V4SI 1 "register_operand" "") 2871 (match_operand:V4SI 2 "register_operand" "")))] 2872 "TARGET_SSE2" 2873{ 2874 rtx xops[6]; 2875 bool ok; 2876 2877 xops[0] = operands[0]; 2878 xops[1] = operands[1]; 2879 xops[2] = operands[2]; 2880 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2881 xops[4] = operands[1]; 2882 xops[5] = operands[2]; 2883 ok = ix86_expand_int_vcond (xops); 2884 gcc_assert (ok); 2885 DONE; 2886}) 2887 2888(define_expand "uminv16qi3" 2889 [(set (match_operand:V16QI 0 "register_operand" "") 2890 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2891 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2892 "TARGET_SSE2" 2893 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);") 2894 2895(define_insn "*uminv16qi3" 2896 [(set (match_operand:V16QI 0 "register_operand" "=x") 2897 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2899 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" 2900 "pminub\t{%2, %0|%0, %2}" 2901 [(set_attr "type" "sseiadd") 2902 (set_attr "mode" "TI")]) 2903 2904(define_expand "sminv8hi3" 2905 [(set (match_operand:V8HI 0 "register_operand" "") 2906 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2907 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2908 "TARGET_SSE2" 2909 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") 2910 2911(define_insn "*sminv8hi3" 2912 [(set (match_operand:V8HI 0 "register_operand" "=x") 2913 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2915 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" 2916 "pminsw\t{%2, %0|%0, %2}" 2917 [(set_attr "type" "sseiadd") 2918 (set_attr "mode" "TI")]) 2919 2920(define_expand "smin<mode>3" 2921 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2922 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2923 (match_operand:SSEMODE14 2 "register_operand" "")))] 2924 "TARGET_SSE2" 2925{ 2926 rtx xops[6]; 2927 bool ok; 2928 2929 xops[0] = operands[0]; 2930 xops[1] = operands[2]; 2931 xops[2] = operands[1]; 2932 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2933 xops[4] = operands[1]; 2934 xops[5] = operands[2]; 2935 ok = ix86_expand_int_vcond (xops); 2936 gcc_assert (ok); 2937 DONE; 2938}) 2939 2940(define_expand "umin<mode>3" 2941 [(set (match_operand:SSEMODE24 0 "register_operand" "") 2942 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") 2943 (match_operand:SSEMODE24 2 "register_operand" "")))] 2944 "TARGET_SSE2" 2945{ 2946 rtx xops[6]; 2947 bool ok; 2948 2949 xops[0] = operands[0]; 2950 xops[1] = operands[2]; 2951 xops[2] = operands[1]; 2952 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2953 xops[4] = operands[1]; 2954 xops[5] = operands[2]; 2955 ok = ix86_expand_int_vcond (xops); 2956 gcc_assert (ok); 2957 DONE; 2958}) 2959 2960;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2961;; 2962;; Parallel integral comparisons 2963;; 2964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2965 2966(define_insn "sse2_eq<mode>3" 2967 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2968 (eq:SSEMODE124 2969 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") 2970 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2971 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 2972 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" 2973 [(set_attr "type" "ssecmp") 2974 (set_attr "mode" "TI")]) 2975 2976(define_insn "sse2_gt<mode>3" 2977 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2978 (gt:SSEMODE124 2979 (match_operand:SSEMODE124 1 "register_operand" "0") 2980 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2981 "TARGET_SSE2" 2982 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" 2983 [(set_attr "type" "ssecmp") 2984 (set_attr "mode" "TI")]) 2985 2986(define_expand "vcond<mode>" 2987 [(set (match_operand:SSEMODE124 0 "register_operand" "") 2988 (if_then_else:SSEMODE124 2989 (match_operator 3 "" 2990 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 2991 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 2992 (match_operand:SSEMODE124 1 "general_operand" "") 2993 (match_operand:SSEMODE124 2 "general_operand" "")))] 2994 "TARGET_SSE2" 2995{ 2996 if (ix86_expand_int_vcond (operands)) 2997 DONE; 2998 else 2999 FAIL; 3000}) 3001 3002(define_expand "vcondu<mode>" 3003 [(set (match_operand:SSEMODE124 0 "register_operand" "") 3004 (if_then_else:SSEMODE124 3005 (match_operator 3 "" 3006 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 3007 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 3008 (match_operand:SSEMODE124 1 "general_operand" "") 3009 (match_operand:SSEMODE124 2 "general_operand" "")))] 3010 "TARGET_SSE2" 3011{ 3012 if (ix86_expand_int_vcond (operands)) 3013 DONE; 3014 else 3015 FAIL; 3016}) 3017 3018;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3019;; 3020;; Parallel integral logical operations 3021;; 3022;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3023 3024(define_expand "one_cmpl<mode>2" 3025 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3026 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3027 (match_dup 2)))] 3028 "TARGET_SSE2" 3029{ 3030 int i, n = GET_MODE_NUNITS (<MODE>mode); 3031 rtvec v = rtvec_alloc (n); 3032 3033 for (i = 0; i < n; ++i) 3034 RTVEC_ELT (v, i) = constm1_rtx; 3035 3036 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 3037}) 3038 3039(define_expand "and<mode>3" 3040 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3041 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3042 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3043 "TARGET_SSE2" 3044 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") 3045 3046(define_insn "*and<mode>3" 3047 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3048 (and:SSEMODEI 3049 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3050 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3051 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)" 3052 "pand\t{%2, %0|%0, %2}" 3053 [(set_attr "type" "sselog") 3054 (set_attr "mode" "TI")]) 3055 3056(define_insn "sse2_nand<mode>3" 3057 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3058 (and:SSEMODEI 3059 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) 3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3061 "TARGET_SSE2" 3062 "pandn\t{%2, %0|%0, %2}" 3063 [(set_attr "type" "sselog") 3064 (set_attr "mode" "TI")]) 3065 3066(define_expand "ior<mode>3" 3067 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3068 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3069 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3070 "TARGET_SSE2" 3071 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") 3072 3073(define_insn "*ior<mode>3" 3074 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3075 (ior:SSEMODEI 3076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3078 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" 3079 "por\t{%2, %0|%0, %2}" 3080 [(set_attr "type" "sselog") 3081 (set_attr "mode" "TI")]) 3082 3083(define_expand "xor<mode>3" 3084 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3085 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3086 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3087 "TARGET_SSE2" 3088 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") 3089 3090(define_insn "*xor<mode>3" 3091 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3092 (xor:SSEMODEI 3093 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3095 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" 3096 "pxor\t{%2, %0|%0, %2}" 3097 [(set_attr "type" "sselog") 3098 (set_attr "mode" "TI")]) 3099 3100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3101;; 3102;; Parallel integral element swizzling 3103;; 3104;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3105 3106(define_insn "sse2_packsswb" 3107 [(set (match_operand:V16QI 0 "register_operand" "=x") 3108 (vec_concat:V16QI 3109 (ss_truncate:V8QI 3110 (match_operand:V8HI 1 "register_operand" "0")) 3111 (ss_truncate:V8QI 3112 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3113 "TARGET_SSE2" 3114 "packsswb\t{%2, %0|%0, %2}" 3115 [(set_attr "type" "sselog") 3116 (set_attr "mode" "TI")]) 3117 3118(define_insn "sse2_packssdw" 3119 [(set (match_operand:V8HI 0 "register_operand" "=x") 3120 (vec_concat:V8HI 3121 (ss_truncate:V4HI 3122 (match_operand:V4SI 1 "register_operand" "0")) 3123 (ss_truncate:V4HI 3124 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] 3125 "TARGET_SSE2" 3126 "packssdw\t{%2, %0|%0, %2}" 3127 [(set_attr "type" "sselog") 3128 (set_attr "mode" "TI")]) 3129 3130(define_insn "sse2_packuswb" 3131 [(set (match_operand:V16QI 0 "register_operand" "=x") 3132 (vec_concat:V16QI 3133 (us_truncate:V8QI 3134 (match_operand:V8HI 1 "register_operand" "0")) 3135 (us_truncate:V8QI 3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3137 "TARGET_SSE2" 3138 "packuswb\t{%2, %0|%0, %2}" 3139 [(set_attr "type" "sselog") 3140 (set_attr "mode" "TI")]) 3141 3142(define_insn "sse2_punpckhbw" 3143 [(set (match_operand:V16QI 0 "register_operand" "=x") 3144 (vec_select:V16QI 3145 (vec_concat:V32QI 3146 (match_operand:V16QI 1 "register_operand" "0") 3147 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3148 (parallel [(const_int 8) (const_int 24) 3149 (const_int 9) (const_int 25) 3150 (const_int 10) (const_int 26) 3151 (const_int 11) (const_int 27) 3152 (const_int 12) (const_int 28) 3153 (const_int 13) (const_int 29) 3154 (const_int 14) (const_int 30) 3155 (const_int 15) (const_int 31)])))] 3156 "TARGET_SSE2" 3157 "punpckhbw\t{%2, %0|%0, %2}" 3158 [(set_attr "type" "sselog") 3159 (set_attr "mode" "TI")]) 3160 3161(define_insn "sse2_punpcklbw" 3162 [(set (match_operand:V16QI 0 "register_operand" "=x") 3163 (vec_select:V16QI 3164 (vec_concat:V32QI 3165 (match_operand:V16QI 1 "register_operand" "0") 3166 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3167 (parallel [(const_int 0) (const_int 16) 3168 (const_int 1) (const_int 17) 3169 (const_int 2) (const_int 18) 3170 (const_int 3) (const_int 19) 3171 (const_int 4) (const_int 20) 3172 (const_int 5) (const_int 21) 3173 (const_int 6) (const_int 22) 3174 (const_int 7) (const_int 23)])))] 3175 "TARGET_SSE2" 3176 "punpcklbw\t{%2, %0|%0, %2}" 3177 [(set_attr "type" "sselog") 3178 (set_attr "mode" "TI")]) 3179 3180(define_insn "sse2_punpckhwd" 3181 [(set (match_operand:V8HI 0 "register_operand" "=x") 3182 (vec_select:V8HI 3183 (vec_concat:V16HI 3184 (match_operand:V8HI 1 "register_operand" "0") 3185 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3186 (parallel [(const_int 4) (const_int 12) 3187 (const_int 5) (const_int 13) 3188 (const_int 6) (const_int 14) 3189 (const_int 7) (const_int 15)])))] 3190 "TARGET_SSE2" 3191 "punpckhwd\t{%2, %0|%0, %2}" 3192 [(set_attr "type" "sselog") 3193 (set_attr "mode" "TI")]) 3194 3195(define_insn "sse2_punpcklwd" 3196 [(set (match_operand:V8HI 0 "register_operand" "=x") 3197 (vec_select:V8HI 3198 (vec_concat:V16HI 3199 (match_operand:V8HI 1 "register_operand" "0") 3200 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3201 (parallel [(const_int 0) (const_int 8) 3202 (const_int 1) (const_int 9) 3203 (const_int 2) (const_int 10) 3204 (const_int 3) (const_int 11)])))] 3205 "TARGET_SSE2" 3206 "punpcklwd\t{%2, %0|%0, %2}" 3207 [(set_attr "type" "sselog") 3208 (set_attr "mode" "TI")]) 3209 3210(define_insn "sse2_punpckhdq" 3211 [(set (match_operand:V4SI 0 "register_operand" "=x") 3212 (vec_select:V4SI 3213 (vec_concat:V8SI 3214 (match_operand:V4SI 1 "register_operand" "0") 3215 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3216 (parallel [(const_int 2) (const_int 6) 3217 (const_int 3) (const_int 7)])))] 3218 "TARGET_SSE2" 3219 "punpckhdq\t{%2, %0|%0, %2}" 3220 [(set_attr "type" "sselog") 3221 (set_attr "mode" "TI")]) 3222 3223(define_insn "sse2_punpckldq" 3224 [(set (match_operand:V4SI 0 "register_operand" "=x") 3225 (vec_select:V4SI 3226 (vec_concat:V8SI 3227 (match_operand:V4SI 1 "register_operand" "0") 3228 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3229 (parallel [(const_int 0) (const_int 4) 3230 (const_int 1) (const_int 5)])))] 3231 "TARGET_SSE2" 3232 "punpckldq\t{%2, %0|%0, %2}" 3233 [(set_attr "type" "sselog") 3234 (set_attr "mode" "TI")]) 3235 3236(define_insn "sse2_punpckhqdq" 3237 [(set (match_operand:V2DI 0 "register_operand" "=x") 3238 (vec_select:V2DI 3239 (vec_concat:V4DI 3240 (match_operand:V2DI 1 "register_operand" "0") 3241 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3242 (parallel [(const_int 1) 3243 (const_int 3)])))] 3244 "TARGET_SSE2" 3245 "punpckhqdq\t{%2, %0|%0, %2}" 3246 [(set_attr "type" "sselog") 3247 (set_attr "mode" "TI")]) 3248 3249(define_insn "sse2_punpcklqdq" 3250 [(set (match_operand:V2DI 0 "register_operand" "=x") 3251 (vec_select:V2DI 3252 (vec_concat:V4DI 3253 (match_operand:V2DI 1 "register_operand" "0") 3254 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3255 (parallel [(const_int 0) 3256 (const_int 2)])))] 3257 "TARGET_SSE2" 3258 "punpcklqdq\t{%2, %0|%0, %2}" 3259 [(set_attr "type" "sselog") 3260 (set_attr "mode" "TI")]) 3261 3262(define_expand "sse2_pinsrw" 3263 [(set (match_operand:V8HI 0 "register_operand" "") 3264 (vec_merge:V8HI 3265 (vec_duplicate:V8HI 3266 (match_operand:SI 2 "nonimmediate_operand" "")) 3267 (match_operand:V8HI 1 "register_operand" "") 3268 (match_operand:SI 3 "const_0_to_7_operand" "")))] 3269 "TARGET_SSE2" 3270{ 3271 operands[2] = gen_lowpart (HImode, operands[2]); 3272 operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); 3273}) 3274 3275(define_insn "*sse2_pinsrw" 3276 [(set (match_operand:V8HI 0 "register_operand" "=x") 3277 (vec_merge:V8HI 3278 (vec_duplicate:V8HI 3279 (match_operand:HI 2 "nonimmediate_operand" "rm")) 3280 (match_operand:V8HI 1 "register_operand" "0") 3281 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] 3282 "TARGET_SSE2" 3283{ 3284 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 3285 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; 3286} 3287 [(set_attr "type" "sselog") 3288 (set_attr "mode" "TI")]) 3289 3290(define_insn "sse2_pextrw" 3291 [(set (match_operand:SI 0 "register_operand" "=r") 3292 (zero_extend:SI 3293 (vec_select:HI 3294 (match_operand:V8HI 1 "register_operand" "x") 3295 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 3296 "TARGET_SSE2" 3297 "pextrw\t{%2, %1, %0|%0, %1, %2}" 3298 [(set_attr "type" "sselog") 3299 (set_attr "mode" "TI")]) 3300 3301(define_expand "sse2_pshufd" 3302 [(match_operand:V4SI 0 "register_operand" "") 3303 (match_operand:V4SI 1 "nonimmediate_operand" "") 3304 (match_operand:SI 2 "const_int_operand" "")] 3305 "TARGET_SSE2" 3306{ 3307 int mask = INTVAL (operands[2]); 3308 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 3309 GEN_INT ((mask >> 0) & 3), 3310 GEN_INT ((mask >> 2) & 3), 3311 GEN_INT ((mask >> 4) & 3), 3312 GEN_INT ((mask >> 6) & 3))); 3313 DONE; 3314}) 3315 3316(define_insn "sse2_pshufd_1" 3317 [(set (match_operand:V4SI 0 "register_operand" "=x") 3318 (vec_select:V4SI 3319 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 3320 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3321 (match_operand 3 "const_0_to_3_operand" "") 3322 (match_operand 4 "const_0_to_3_operand" "") 3323 (match_operand 5 "const_0_to_3_operand" "")])))] 3324 "TARGET_SSE2" 3325{ 3326 int mask = 0; 3327 mask |= INTVAL (operands[2]) << 0; 3328 mask |= INTVAL (operands[3]) << 2; 3329 mask |= INTVAL (operands[4]) << 4; 3330 mask |= INTVAL (operands[5]) << 6; 3331 operands[2] = GEN_INT (mask); 3332 3333 return "pshufd\t{%2, %1, %0|%0, %1, %2}"; 3334} 3335 [(set_attr "type" "sselog1") 3336 (set_attr "mode" "TI")]) 3337 3338(define_expand "sse2_pshuflw" 3339 [(match_operand:V8HI 0 "register_operand" "") 3340 (match_operand:V8HI 1 "nonimmediate_operand" "") 3341 (match_operand:SI 2 "const_int_operand" "")] 3342 "TARGET_SSE2" 3343{ 3344 int mask = INTVAL (operands[2]); 3345 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 3346 GEN_INT ((mask >> 0) & 3), 3347 GEN_INT ((mask >> 2) & 3), 3348 GEN_INT ((mask >> 4) & 3), 3349 GEN_INT ((mask >> 6) & 3))); 3350 DONE; 3351}) 3352 3353(define_insn "sse2_pshuflw_1" 3354 [(set (match_operand:V8HI 0 "register_operand" "=x") 3355 (vec_select:V8HI 3356 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3357 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3358 (match_operand 3 "const_0_to_3_operand" "") 3359 (match_operand 4 "const_0_to_3_operand" "") 3360 (match_operand 5 "const_0_to_3_operand" "") 3361 (const_int 4) 3362 (const_int 5) 3363 (const_int 6) 3364 (const_int 7)])))] 3365 "TARGET_SSE2" 3366{ 3367 int mask = 0; 3368 mask |= INTVAL (operands[2]) << 0; 3369 mask |= INTVAL (operands[3]) << 2; 3370 mask |= INTVAL (operands[4]) << 4; 3371 mask |= INTVAL (operands[5]) << 6; 3372 operands[2] = GEN_INT (mask); 3373 3374 return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; 3375} 3376 [(set_attr "type" "sselog") 3377 (set_attr "mode" "TI")]) 3378 3379(define_expand "sse2_pshufhw" 3380 [(match_operand:V8HI 0 "register_operand" "") 3381 (match_operand:V8HI 1 "nonimmediate_operand" "") 3382 (match_operand:SI 2 "const_int_operand" "")] 3383 "TARGET_SSE2" 3384{ 3385 int mask = INTVAL (operands[2]); 3386 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 3387 GEN_INT (((mask >> 0) & 3) + 4), 3388 GEN_INT (((mask >> 2) & 3) + 4), 3389 GEN_INT (((mask >> 4) & 3) + 4), 3390 GEN_INT (((mask >> 6) & 3) + 4))); 3391 DONE; 3392}) 3393 3394(define_insn "sse2_pshufhw_1" 3395 [(set (match_operand:V8HI 0 "register_operand" "=x") 3396 (vec_select:V8HI 3397 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3398 (parallel [(const_int 0) 3399 (const_int 1) 3400 (const_int 2) 3401 (const_int 3) 3402 (match_operand 2 "const_4_to_7_operand" "") 3403 (match_operand 3 "const_4_to_7_operand" "") 3404 (match_operand 4 "const_4_to_7_operand" "") 3405 (match_operand 5 "const_4_to_7_operand" "")])))] 3406 "TARGET_SSE2" 3407{ 3408 int mask = 0; 3409 mask |= (INTVAL (operands[2]) - 4) << 0; 3410 mask |= (INTVAL (operands[3]) - 4) << 2; 3411 mask |= (INTVAL (operands[4]) - 4) << 4; 3412 mask |= (INTVAL (operands[5]) - 4) << 6; 3413 operands[2] = GEN_INT (mask); 3414 3415 return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; 3416} 3417 [(set_attr "type" "sselog") 3418 (set_attr "mode" "TI")]) 3419 3420(define_expand "sse2_loadd" 3421 [(set (match_operand:V4SI 0 "register_operand" "") 3422 (vec_merge:V4SI 3423 (vec_duplicate:V4SI 3424 (match_operand:SI 1 "nonimmediate_operand" "")) 3425 (match_dup 2) 3426 (const_int 1)))] 3427 "TARGET_SSE" 3428 "operands[2] = CONST0_RTX (V4SImode);") 3429 3430(define_insn "sse2_loadld" 3431 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3432 (vec_merge:V4SI 3433 (vec_duplicate:V4SI 3434 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x")) 3435 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0") 3436 (const_int 1)))] 3437 "TARGET_SSE" 3438 "@ 3439 movd\t{%2, %0|%0, %2} 3440 movss\t{%2, %0|%0, %2} 3441 movss\t{%2, %0|%0, %2}" 3442 [(set_attr "type" "ssemov") 3443 (set_attr "mode" "TI,V4SF,SF")]) 3444 3445;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3446;; be taken into account, and movdi isn't fully populated even without. 3447(define_insn_and_split "sse2_stored" 3448 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx") 3449 (vec_select:SI 3450 (match_operand:V4SI 1 "register_operand" "x") 3451 (parallel [(const_int 0)])))] 3452 "TARGET_SSE" 3453 "#" 3454 "&& reload_completed" 3455 [(set (match_dup 0) (match_dup 1))] 3456{ 3457 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); 3458}) 3459 3460(define_expand "sse_storeq" 3461 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3462 (vec_select:DI 3463 (match_operand:V2DI 1 "register_operand" "") 3464 (parallel [(const_int 0)])))] 3465 "TARGET_SSE" 3466 "") 3467 3468;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3469;; be taken into account, and movdi isn't fully populated even without. 3470(define_insn "*sse2_storeq" 3471 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") 3472 (vec_select:DI 3473 (match_operand:V2DI 1 "register_operand" "x") 3474 (parallel [(const_int 0)])))] 3475 "TARGET_SSE" 3476 "#") 3477 3478(define_split 3479 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3480 (vec_select:DI 3481 (match_operand:V2DI 1 "register_operand" "") 3482 (parallel [(const_int 0)])))] 3483 "TARGET_SSE && reload_completed" 3484 [(set (match_dup 0) (match_dup 1))] 3485{ 3486 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); 3487}) 3488 3489(define_insn "*vec_extractv2di_1_sse2" 3490 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3491 (vec_select:DI 3492 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o") 3493 (parallel [(const_int 1)])))] 3494 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3495 "@ 3496 movhps\t{%1, %0|%0, %1} 3497 psrldq\t{$8, %0|%0, 8} 3498 movq\t{%H1, %0|%0, %H1}" 3499 [(set_attr "type" "ssemov,sseishft,ssemov") 3500 (set_attr "memory" "*,none,*") 3501 (set_attr "mode" "V2SF,TI,TI")]) 3502 3503;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva 3504(define_insn "*vec_extractv2di_1_sse" 3505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3506 (vec_select:DI 3507 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o") 3508 (parallel [(const_int 1)])))] 3509 "!TARGET_SSE2 && TARGET_SSE 3510 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3511 "@ 3512 movhps\t{%1, %0|%0, %1} 3513 movhlps\t{%1, %0|%0, %1} 3514 movlps\t{%H1, %0|%0, %H1}" 3515 [(set_attr "type" "ssemov") 3516 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3517 3518(define_insn "*vec_dupv4si" 3519 [(set (match_operand:V4SI 0 "register_operand" "=Y,x") 3520 (vec_duplicate:V4SI 3521 (match_operand:SI 1 "register_operand" " Y,0")))] 3522 "TARGET_SSE" 3523 "@ 3524 pshufd\t{$0, %1, %0|%0, %1, 0} 3525 shufps\t{$0, %0, %0|%0, %0, 0}" 3526 [(set_attr "type" "sselog1") 3527 (set_attr "mode" "TI,V4SF")]) 3528 3529(define_insn "*vec_dupv2di" 3530 [(set (match_operand:V2DI 0 "register_operand" "=Y,x") 3531 (vec_duplicate:V2DI 3532 (match_operand:DI 1 "register_operand" " 0,0")))] 3533 "TARGET_SSE" 3534 "@ 3535 punpcklqdq\t%0, %0 3536 movlhps\t%0, %0" 3537 [(set_attr "type" "sselog1,ssemov") 3538 (set_attr "mode" "TI,V4SF")]) 3539 3540;; ??? In theory we can match memory for the MMX alternative, but allowing 3541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3542;; alternatives pretty much forces the MMX alternative to be chosen. 3543(define_insn "*sse2_concatv2si" 3544 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y") 3545 (vec_concat:V2SI 3546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 3547 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))] 3548 "TARGET_SSE2" 3549 "@ 3550 punpckldq\t{%2, %0|%0, %2} 3551 movd\t{%1, %0|%0, %1} 3552 punpckldq\t{%2, %0|%0, %2} 3553 movd\t{%1, %0|%0, %1}" 3554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3555 (set_attr "mode" "TI,TI,DI,DI")]) 3556 3557(define_insn "*sse1_concatv2si" 3558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 3559 (vec_concat:V2SI 3560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 3561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 3562 "TARGET_SSE" 3563 "@ 3564 unpcklps\t{%2, %0|%0, %2} 3565 movss\t{%1, %0|%0, %1} 3566 punpckldq\t{%2, %0|%0, %2} 3567 movd\t{%1, %0|%0, %1}" 3568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3569 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 3570 3571(define_insn "*vec_concatv4si_1" 3572 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3573 (vec_concat:V4SI 3574 (match_operand:V2SI 1 "register_operand" " 0,0,0") 3575 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))] 3576 "TARGET_SSE" 3577 "@ 3578 punpcklqdq\t{%2, %0|%0, %2} 3579 movlhps\t{%2, %0|%0, %2} 3580 movhps\t{%2, %0|%0, %2}" 3581 [(set_attr "type" "sselog,ssemov,ssemov") 3582 (set_attr "mode" "TI,V4SF,V2SF")]) 3583 3584(define_insn "*vec_concatv2di" 3585 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x") 3586 (vec_concat:V2DI 3587 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m") 3588 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))] 3589 "TARGET_SSE" 3590 "@ 3591 movq\t{%1, %0|%0, %1} 3592 movq2dq\t{%1, %0|%0, %1} 3593 punpcklqdq\t{%2, %0|%0, %2} 3594 movlhps\t{%2, %0|%0, %2} 3595 movhps\t{%2, %0|%0, %2} 3596 movlps\t{%1, %0|%0, %1}" 3597 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") 3598 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) 3599 3600(define_expand "vec_setv2di" 3601 [(match_operand:V2DI 0 "register_operand" "") 3602 (match_operand:DI 1 "register_operand" "") 3603 (match_operand 2 "const_int_operand" "")] 3604 "TARGET_SSE" 3605{ 3606 ix86_expand_vector_set (false, operands[0], operands[1], 3607 INTVAL (operands[2])); 3608 DONE; 3609}) 3610 3611(define_expand "vec_extractv2di" 3612 [(match_operand:DI 0 "register_operand" "") 3613 (match_operand:V2DI 1 "register_operand" "") 3614 (match_operand 2 "const_int_operand" "")] 3615 "TARGET_SSE" 3616{ 3617 ix86_expand_vector_extract (false, operands[0], operands[1], 3618 INTVAL (operands[2])); 3619 DONE; 3620}) 3621 3622(define_expand "vec_initv2di" 3623 [(match_operand:V2DI 0 "register_operand" "") 3624 (match_operand 1 "" "")] 3625 "TARGET_SSE" 3626{ 3627 ix86_expand_vector_init (false, operands[0], operands[1]); 3628 DONE; 3629}) 3630 3631(define_expand "vec_setv4si" 3632 [(match_operand:V4SI 0 "register_operand" "") 3633 (match_operand:SI 1 "register_operand" "") 3634 (match_operand 2 "const_int_operand" "")] 3635 "TARGET_SSE" 3636{ 3637 ix86_expand_vector_set (false, operands[0], operands[1], 3638 INTVAL (operands[2])); 3639 DONE; 3640}) 3641 3642(define_expand "vec_extractv4si" 3643 [(match_operand:SI 0 "register_operand" "") 3644 (match_operand:V4SI 1 "register_operand" "") 3645 (match_operand 2 "const_int_operand" "")] 3646 "TARGET_SSE" 3647{ 3648 ix86_expand_vector_extract (false, operands[0], operands[1], 3649 INTVAL (operands[2])); 3650 DONE; 3651}) 3652 3653(define_expand "vec_initv4si" 3654 [(match_operand:V4SI 0 "register_operand" "") 3655 (match_operand 1 "" "")] 3656 "TARGET_SSE" 3657{ 3658 ix86_expand_vector_init (false, operands[0], operands[1]); 3659 DONE; 3660}) 3661 3662(define_expand "vec_setv8hi" 3663 [(match_operand:V8HI 0 "register_operand" "") 3664 (match_operand:HI 1 "register_operand" "") 3665 (match_operand 2 "const_int_operand" "")] 3666 "TARGET_SSE" 3667{ 3668 ix86_expand_vector_set (false, operands[0], operands[1], 3669 INTVAL (operands[2])); 3670 DONE; 3671}) 3672 3673(define_expand "vec_extractv8hi" 3674 [(match_operand:HI 0 "register_operand" "") 3675 (match_operand:V8HI 1 "register_operand" "") 3676 (match_operand 2 "const_int_operand" "")] 3677 "TARGET_SSE" 3678{ 3679 ix86_expand_vector_extract (false, operands[0], operands[1], 3680 INTVAL (operands[2])); 3681 DONE; 3682}) 3683 3684(define_expand "vec_initv8hi" 3685 [(match_operand:V8HI 0 "register_operand" "") 3686 (match_operand 1 "" "")] 3687 "TARGET_SSE" 3688{ 3689 ix86_expand_vector_init (false, operands[0], operands[1]); 3690 DONE; 3691}) 3692 3693(define_expand "vec_setv16qi" 3694 [(match_operand:V16QI 0 "register_operand" "") 3695 (match_operand:QI 1 "register_operand" "") 3696 (match_operand 2 "const_int_operand" "")] 3697 "TARGET_SSE" 3698{ 3699 ix86_expand_vector_set (false, operands[0], operands[1], 3700 INTVAL (operands[2])); 3701 DONE; 3702}) 3703 3704(define_expand "vec_extractv16qi" 3705 [(match_operand:QI 0 "register_operand" "") 3706 (match_operand:V16QI 1 "register_operand" "") 3707 (match_operand 2 "const_int_operand" "")] 3708 "TARGET_SSE" 3709{ 3710 ix86_expand_vector_extract (false, operands[0], operands[1], 3711 INTVAL (operands[2])); 3712 DONE; 3713}) 3714 3715(define_expand "vec_initv16qi" 3716 [(match_operand:V16QI 0 "register_operand" "") 3717 (match_operand 1 "" "")] 3718 "TARGET_SSE" 3719{ 3720 ix86_expand_vector_init (false, operands[0], operands[1]); 3721 DONE; 3722}) 3723 3724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3725;; 3726;; Miscellaneous 3727;; 3728;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3729 3730(define_insn "sse2_uavgv16qi3" 3731 [(set (match_operand:V16QI 0 "register_operand" "=x") 3732 (truncate:V16QI 3733 (lshiftrt:V16HI 3734 (plus:V16HI 3735 (plus:V16HI 3736 (zero_extend:V16HI 3737 (match_operand:V16QI 1 "nonimmediate_operand" "%0")) 3738 (zero_extend:V16HI 3739 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) 3740 (const_vector:V16QI [(const_int 1) (const_int 1) 3741 (const_int 1) (const_int 1) 3742 (const_int 1) (const_int 1) 3743 (const_int 1) (const_int 1) 3744 (const_int 1) (const_int 1) 3745 (const_int 1) (const_int 1) 3746 (const_int 1) (const_int 1) 3747 (const_int 1) (const_int 1)])) 3748 (const_int 1))))] 3749 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 3750 "pavgb\t{%2, %0|%0, %2}" 3751 [(set_attr "type" "sseiadd") 3752 (set_attr "mode" "TI")]) 3753 3754(define_insn "sse2_uavgv8hi3" 3755 [(set (match_operand:V8HI 0 "register_operand" "=x") 3756 (truncate:V8HI 3757 (lshiftrt:V8SI 3758 (plus:V8SI 3759 (plus:V8SI 3760 (zero_extend:V8SI 3761 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 3762 (zero_extend:V8SI 3763 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 3764 (const_vector:V8HI [(const_int 1) (const_int 1) 3765 (const_int 1) (const_int 1) 3766 (const_int 1) (const_int 1) 3767 (const_int 1) (const_int 1)])) 3768 (const_int 1))))] 3769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 3770 "pavgw\t{%2, %0|%0, %2}" 3771 [(set_attr "type" "sseiadd") 3772 (set_attr "mode" "TI")]) 3773 3774;; The correct representation for this is absolutely enormous, and 3775;; surely not generally useful. 3776(define_insn "sse2_psadbw" 3777 [(set (match_operand:V2DI 0 "register_operand" "=x") 3778 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") 3779 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 3780 UNSPEC_PSADBW))] 3781 "TARGET_SSE2" 3782 "psadbw\t{%2, %0|%0, %2}" 3783 [(set_attr "type" "sseiadd") 3784 (set_attr "mode" "TI")]) 3785 3786(define_insn "sse_movmskps" 3787 [(set (match_operand:SI 0 "register_operand" "=r") 3788 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 3789 UNSPEC_MOVMSK))] 3790 "TARGET_SSE" 3791 "movmskps\t{%1, %0|%0, %1}" 3792 [(set_attr "type" "ssecvt") 3793 (set_attr "mode" "V4SF")]) 3794 3795(define_insn "sse2_movmskpd" 3796 [(set (match_operand:SI 0 "register_operand" "=r") 3797 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 3798 UNSPEC_MOVMSK))] 3799 "TARGET_SSE2" 3800 "movmskpd\t{%1, %0|%0, %1}" 3801 [(set_attr "type" "ssecvt") 3802 (set_attr "mode" "V2DF")]) 3803 3804(define_insn "sse2_pmovmskb" 3805 [(set (match_operand:SI 0 "register_operand" "=r") 3806 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 3807 UNSPEC_MOVMSK))] 3808 "TARGET_SSE2" 3809 "pmovmskb\t{%1, %0|%0, %1}" 3810 [(set_attr "type" "ssecvt") 3811 (set_attr "mode" "V2DF")]) 3812 3813(define_expand "sse2_maskmovdqu" 3814 [(set (match_operand:V16QI 0 "memory_operand" "") 3815 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3816 (match_operand:V16QI 2 "register_operand" "x") 3817 (match_dup 0)] 3818 UNSPEC_MASKMOV))] 3819 "TARGET_SSE2" 3820 "") 3821 3822(define_insn "*sse2_maskmovdqu" 3823 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) 3824 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3825 (match_operand:V16QI 2 "register_operand" "x") 3826 (mem:V16QI (match_dup 0))] 3827 UNSPEC_MASKMOV))] 3828 "TARGET_SSE2 && !TARGET_64BIT" 3829 ;; @@@ check ordering of operands in intel/nonintel syntax 3830 "maskmovdqu\t{%2, %1|%1, %2}" 3831 [(set_attr "type" "ssecvt") 3832 (set_attr "mode" "TI")]) 3833 3834(define_insn "*sse2_maskmovdqu_rex64" 3835 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) 3836 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3837 (match_operand:V16QI 2 "register_operand" "x") 3838 (mem:V16QI (match_dup 0))] 3839 UNSPEC_MASKMOV))] 3840 "TARGET_SSE2 && TARGET_64BIT" 3841 ;; @@@ check ordering of operands in intel/nonintel syntax 3842 "maskmovdqu\t{%2, %1|%1, %2}" 3843 [(set_attr "type" "ssecvt") 3844 (set_attr "mode" "TI")]) 3845 3846(define_insn "sse_ldmxcsr" 3847 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 3848 UNSPECV_LDMXCSR)] 3849 "TARGET_SSE" 3850 "ldmxcsr\t%0" 3851 [(set_attr "type" "sse") 3852 (set_attr "memory" "load")]) 3853 3854(define_insn "sse_stmxcsr" 3855 [(set (match_operand:SI 0 "memory_operand" "=m") 3856 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 3857 "TARGET_SSE" 3858 "stmxcsr\t%0" 3859 [(set_attr "type" "sse") 3860 (set_attr "memory" "store")]) 3861 3862(define_expand "sse_sfence" 3863 [(set (match_dup 0) 3864 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3865 "TARGET_SSE || TARGET_3DNOW_A" 3866{ 3867 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3868 MEM_VOLATILE_P (operands[0]) = 1; 3869}) 3870 3871(define_insn "*sse_sfence" 3872 [(set (match_operand:BLK 0 "" "") 3873 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3874 "TARGET_SSE || TARGET_3DNOW_A" 3875 "sfence" 3876 [(set_attr "type" "sse") 3877 (set_attr "memory" "unknown")]) 3878 3879(define_insn "sse2_clflush" 3880 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 3881 UNSPECV_CLFLUSH)] 3882 "TARGET_SSE2" 3883 "clflush\t%a0" 3884 [(set_attr "type" "sse") 3885 (set_attr "memory" "unknown")]) 3886 3887(define_expand "sse2_mfence" 3888 [(set (match_dup 0) 3889 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3890 "TARGET_SSE2" 3891{ 3892 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3893 MEM_VOLATILE_P (operands[0]) = 1; 3894}) 3895 3896(define_insn "*sse2_mfence" 3897 [(set (match_operand:BLK 0 "" "") 3898 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3899 "TARGET_SSE2" 3900 "mfence" 3901 [(set_attr "type" "sse") 3902 (set_attr "memory" "unknown")]) 3903 3904(define_expand "sse2_lfence" 3905 [(set (match_dup 0) 3906 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3907 "TARGET_SSE2" 3908{ 3909 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3910 MEM_VOLATILE_P (operands[0]) = 1; 3911}) 3912 3913(define_insn "*sse2_lfence" 3914 [(set (match_operand:BLK 0 "" "") 3915 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3916 "TARGET_SSE2" 3917 "lfence" 3918 [(set_attr "type" "sse") 3919 (set_attr "memory" "unknown")]) 3920 3921(define_insn "sse3_mwait" 3922 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3923 (match_operand:SI 1 "register_operand" "c")] 3924 UNSPECV_MWAIT)] 3925 "TARGET_SSE3" 3926;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 3927;; Since 32bit register operands are implicitly zero extended to 64bit, 3928;; we only need to set up 32bit registers. 3929 "mwait" 3930 [(set_attr "length" "3")]) 3931 3932(define_insn "sse3_monitor" 3933 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3934 (match_operand:SI 1 "register_operand" "c") 3935 (match_operand:SI 2 "register_operand" "d")] 3936 UNSPECV_MONITOR)] 3937 "TARGET_SSE3 && !TARGET_64BIT" 3938 "monitor\t%0, %1, %2" 3939 [(set_attr "length" "3")]) 3940 3941(define_insn "sse3_monitor64" 3942 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") 3943 (match_operand:SI 1 "register_operand" "c") 3944 (match_operand:SI 2 "register_operand" "d")] 3945 UNSPECV_MONITOR)] 3946 "TARGET_SSE3 && TARGET_64BIT" 3947;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 3948;; RCX and RDX are used. Since 32bit register operands are implicitly 3949;; zero extended to 64bit, we only need to set up 32bit registers. 3950 "monitor" 3951 [(set_attr "length" "3")])
| 1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005, 2006 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 2, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING. If not, write to 19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20;; Boston, MA 02110-1301, USA. 21 22 23;; 16 byte integral modes handled by SSE, minus TImode, which gets 24;; special-cased for TARGET_64BIT. 25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) 26 27;; All 16-byte vector modes handled by SSE 28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) 29 30;; Mix-n-match 31(define_mode_macro SSEMODE12 [V16QI V8HI]) 32(define_mode_macro SSEMODE24 [V8HI V4SI]) 33(define_mode_macro SSEMODE14 [V16QI V4SI]) 34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) 35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) 36 37;; Mapping from integer vector mode to mnemonic suffix 38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) 39 40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 41 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43;; 44;; Move patterns 45;; 46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 47 48;; All of these patterns are enabled for SSE1 as well as SSE2. 49;; This is essential for maintaining stable calling conventions. 50 51(define_expand "mov<mode>" 52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") 53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] 54 "TARGET_SSE" 55{ 56 ix86_expand_vector_move (<MODE>mode, operands); 57 DONE; 58}) 59 60(define_insn "*mov<mode>_internal" 61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") 62 (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 64{ 65 switch (which_alternative) 66 { 67 case 0: 68 return standard_sse_constant_opcode (insn, operands[1]); 69 case 1: 70 case 2: 71 if (get_attr_mode (insn) == MODE_V4SF) 72 return "movaps\t{%1, %0|%0, %1}"; 73 else 74 return "movdqa\t{%1, %0|%0, %1}"; 75 default: 76 gcc_unreachable (); 77 } 78} 79 [(set_attr "type" "sselog1,ssemov,ssemov") 80 (set (attr "mode") 81 (if_then_else 82 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 83 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 84 (and (eq_attr "alternative" "2") 85 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 86 (const_int 0)))) 87 (const_string "V4SF") 88 (const_string "TI")))]) 89 90(define_expand "movv4sf" 91 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 92 (match_operand:V4SF 1 "nonimmediate_operand" ""))] 93 "TARGET_SSE" 94{ 95 ix86_expand_vector_move (V4SFmode, operands); 96 DONE; 97}) 98 99(define_insn "*movv4sf_internal" 100 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 101 (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 102 "TARGET_SSE" 103{ 104 switch (which_alternative) 105 { 106 case 0: 107 return standard_sse_constant_opcode (insn, operands[1]); 108 case 1: 109 case 2: 110 return "movaps\t{%1, %0|%0, %1}"; 111 default: 112 abort(); 113 } 114} 115 [(set_attr "type" "sselog1,ssemov,ssemov") 116 (set_attr "mode" "V4SF")]) 117 118(define_split 119 [(set (match_operand:V4SF 0 "register_operand" "") 120 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 121 "TARGET_SSE && reload_completed" 122 [(set (match_dup 0) 123 (vec_merge:V4SF 124 (vec_duplicate:V4SF (match_dup 1)) 125 (match_dup 2) 126 (const_int 1)))] 127{ 128 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 129 operands[2] = CONST0_RTX (V4SFmode); 130}) 131 132(define_expand "movv2df" 133 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 134 (match_operand:V2DF 1 "nonimmediate_operand" ""))] 135 "TARGET_SSE" 136{ 137 ix86_expand_vector_move (V2DFmode, operands); 138 DONE; 139}) 140 141(define_insn "*movv2df_internal" 142 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 143 (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))] 144 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 145{ 146 switch (which_alternative) 147 { 148 case 0: 149 return standard_sse_constant_opcode (insn, operands[1]); 150 case 1: 151 case 2: 152 if (get_attr_mode (insn) == MODE_V4SF) 153 return "movaps\t{%1, %0|%0, %1}"; 154 else 155 return "movapd\t{%1, %0|%0, %1}"; 156 default: 157 gcc_unreachable (); 158 } 159} 160 [(set_attr "type" "sselog1,ssemov,ssemov") 161 (set (attr "mode") 162 (if_then_else 163 (ior (ior (ne (symbol_ref "optimize_size") (const_int 0)) 164 (eq (symbol_ref "TARGET_SSE2") (const_int 0))) 165 (and (eq_attr "alternative" "2") 166 (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 167 (const_int 0)))) 168 (const_string "V4SF") 169 (const_string "V2DF")))]) 170 171(define_split 172 [(set (match_operand:V2DF 0 "register_operand" "") 173 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 174 "TARGET_SSE2 && reload_completed" 175 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 176{ 177 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 178 operands[2] = CONST0_RTX (DFmode); 179}) 180 181(define_expand "push<mode>1" 182 [(match_operand:SSEMODE 0 "register_operand" "")] 183 "TARGET_SSE" 184{ 185 ix86_expand_push (<MODE>mode, operands[0]); 186 DONE; 187}) 188 189(define_expand "movmisalign<mode>" 190 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 191 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 192 "TARGET_SSE" 193{ 194 ix86_expand_vector_move_misalign (<MODE>mode, operands); 195 DONE; 196}) 197 198(define_insn "sse_movups" 199 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") 200 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 201 UNSPEC_MOVU))] 202 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 203 "movups\t{%1, %0|%0, %1}" 204 [(set_attr "type" "ssemov") 205 (set_attr "mode" "V2DF")]) 206 207(define_insn "sse2_movupd" 208 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") 209 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] 210 UNSPEC_MOVU))] 211 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 212 "movupd\t{%1, %0|%0, %1}" 213 [(set_attr "type" "ssemov") 214 (set_attr "mode" "V2DF")]) 215 216(define_insn "sse2_movdqu" 217 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 218 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] 219 UNSPEC_MOVU))] 220 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 221 "movdqu\t{%1, %0|%0, %1}" 222 [(set_attr "type" "ssemov") 223 (set_attr "mode" "TI")]) 224 225(define_insn "sse_movntv4sf" 226 [(set (match_operand:V4SF 0 "memory_operand" "=m") 227 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 228 UNSPEC_MOVNT))] 229 "TARGET_SSE" 230 "movntps\t{%1, %0|%0, %1}" 231 [(set_attr "type" "ssemov") 232 (set_attr "mode" "V4SF")]) 233 234(define_insn "sse2_movntv2df" 235 [(set (match_operand:V2DF 0 "memory_operand" "=m") 236 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 237 UNSPEC_MOVNT))] 238 "TARGET_SSE2" 239 "movntpd\t{%1, %0|%0, %1}" 240 [(set_attr "type" "ssecvt") 241 (set_attr "mode" "V2DF")]) 242 243(define_insn "sse2_movntv2di" 244 [(set (match_operand:V2DI 0 "memory_operand" "=m") 245 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 246 UNSPEC_MOVNT))] 247 "TARGET_SSE2" 248 "movntdq\t{%1, %0|%0, %1}" 249 [(set_attr "type" "ssecvt") 250 (set_attr "mode" "TI")]) 251 252(define_insn "sse2_movntsi" 253 [(set (match_operand:SI 0 "memory_operand" "=m") 254 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 255 UNSPEC_MOVNT))] 256 "TARGET_SSE2" 257 "movnti\t{%1, %0|%0, %1}" 258 [(set_attr "type" "ssecvt") 259 (set_attr "mode" "V2DF")]) 260 261(define_insn "sse3_lddqu" 262 [(set (match_operand:V16QI 0 "register_operand" "=x") 263 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] 264 UNSPEC_LDQQU))] 265 "TARGET_SSE3" 266 "lddqu\t{%1, %0|%0, %1}" 267 [(set_attr "type" "ssecvt") 268 (set_attr "mode" "TI")]) 269 270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 271;; 272;; Parallel single-precision floating point arithmetic 273;; 274;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 275 276(define_expand "negv4sf2" 277 [(set (match_operand:V4SF 0 "register_operand" "") 278 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 279 "TARGET_SSE" 280 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") 281 282(define_expand "absv4sf2" 283 [(set (match_operand:V4SF 0 "register_operand" "") 284 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 285 "TARGET_SSE" 286 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") 287 288(define_expand "addv4sf3" 289 [(set (match_operand:V4SF 0 "register_operand" "") 290 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 291 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 292 "TARGET_SSE" 293 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") 294 295(define_insn "*addv4sf3" 296 [(set (match_operand:V4SF 0 "register_operand" "=x") 297 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 298 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 299 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 300 "addps\t{%2, %0|%0, %2}" 301 [(set_attr "type" "sseadd") 302 (set_attr "mode" "V4SF")]) 303 304(define_insn "sse_vmaddv4sf3" 305 [(set (match_operand:V4SF 0 "register_operand" "=x") 306 (vec_merge:V4SF 307 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 308 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 309 (match_dup 1) 310 (const_int 1)))] 311 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 312 "addss\t{%2, %0|%0, %2}" 313 [(set_attr "type" "sseadd") 314 (set_attr "mode" "SF")]) 315 316(define_expand "subv4sf3" 317 [(set (match_operand:V4SF 0 "register_operand" "") 318 (minus:V4SF (match_operand:V4SF 1 "register_operand" "") 319 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 320 "TARGET_SSE" 321 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") 322 323(define_insn "*subv4sf3" 324 [(set (match_operand:V4SF 0 "register_operand" "=x") 325 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 326 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 327 "TARGET_SSE" 328 "subps\t{%2, %0|%0, %2}" 329 [(set_attr "type" "sseadd") 330 (set_attr "mode" "V4SF")]) 331 332(define_insn "sse_vmsubv4sf3" 333 [(set (match_operand:V4SF 0 "register_operand" "=x") 334 (vec_merge:V4SF 335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 336 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 337 (match_dup 1) 338 (const_int 1)))] 339 "TARGET_SSE" 340 "subss\t{%2, %0|%0, %2}" 341 [(set_attr "type" "sseadd") 342 (set_attr "mode" "SF")]) 343 344(define_expand "mulv4sf3" 345 [(set (match_operand:V4SF 0 "register_operand" "") 346 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 347 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 348 "TARGET_SSE" 349 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") 350 351(define_insn "*mulv4sf3" 352 [(set (match_operand:V4SF 0 "register_operand" "=x") 353 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 354 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 355 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 356 "mulps\t{%2, %0|%0, %2}" 357 [(set_attr "type" "ssemul") 358 (set_attr "mode" "V4SF")]) 359 360(define_insn "sse_vmmulv4sf3" 361 [(set (match_operand:V4SF 0 "register_operand" "=x") 362 (vec_merge:V4SF 363 (mult:V4SF (match_operand:V4SF 1 "register_operand" "0") 364 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 365 (match_dup 1) 366 (const_int 1)))] 367 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 368 "mulss\t{%2, %0|%0, %2}" 369 [(set_attr "type" "ssemul") 370 (set_attr "mode" "SF")]) 371 372(define_expand "divv4sf3" 373 [(set (match_operand:V4SF 0 "register_operand" "") 374 (div:V4SF (match_operand:V4SF 1 "register_operand" "") 375 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 376 "TARGET_SSE" 377 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") 378 379(define_insn "*divv4sf3" 380 [(set (match_operand:V4SF 0 "register_operand" "=x") 381 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 382 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 383 "TARGET_SSE" 384 "divps\t{%2, %0|%0, %2}" 385 [(set_attr "type" "ssediv") 386 (set_attr "mode" "V4SF")]) 387 388(define_insn "sse_vmdivv4sf3" 389 [(set (match_operand:V4SF 0 "register_operand" "=x") 390 (vec_merge:V4SF 391 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 392 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 393 (match_dup 1) 394 (const_int 1)))] 395 "TARGET_SSE" 396 "divss\t{%2, %0|%0, %2}" 397 [(set_attr "type" "ssediv") 398 (set_attr "mode" "SF")]) 399 400(define_insn "sse_rcpv4sf2" 401 [(set (match_operand:V4SF 0 "register_operand" "=x") 402 (unspec:V4SF 403 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 404 "TARGET_SSE" 405 "rcpps\t{%1, %0|%0, %1}" 406 [(set_attr "type" "sse") 407 (set_attr "mode" "V4SF")]) 408 409(define_insn "sse_vmrcpv4sf2" 410 [(set (match_operand:V4SF 0 "register_operand" "=x") 411 (vec_merge:V4SF 412 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 413 UNSPEC_RCP) 414 (match_operand:V4SF 2 "register_operand" "0") 415 (const_int 1)))] 416 "TARGET_SSE" 417 "rcpss\t{%1, %0|%0, %1}" 418 [(set_attr "type" "sse") 419 (set_attr "mode" "SF")]) 420 421(define_insn "sse_rsqrtv4sf2" 422 [(set (match_operand:V4SF 0 "register_operand" "=x") 423 (unspec:V4SF 424 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 425 "TARGET_SSE" 426 "rsqrtps\t{%1, %0|%0, %1}" 427 [(set_attr "type" "sse") 428 (set_attr "mode" "V4SF")]) 429 430(define_insn "sse_vmrsqrtv4sf2" 431 [(set (match_operand:V4SF 0 "register_operand" "=x") 432 (vec_merge:V4SF 433 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 434 UNSPEC_RSQRT) 435 (match_operand:V4SF 2 "register_operand" "0") 436 (const_int 1)))] 437 "TARGET_SSE" 438 "rsqrtss\t{%1, %0|%0, %1}" 439 [(set_attr "type" "sse") 440 (set_attr "mode" "SF")]) 441 442(define_insn "sqrtv4sf2" 443 [(set (match_operand:V4SF 0 "register_operand" "=x") 444 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 445 "TARGET_SSE" 446 "sqrtps\t{%1, %0|%0, %1}" 447 [(set_attr "type" "sse") 448 (set_attr "mode" "V4SF")]) 449 450(define_insn "sse_vmsqrtv4sf2" 451 [(set (match_operand:V4SF 0 "register_operand" "=x") 452 (vec_merge:V4SF 453 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 454 (match_operand:V4SF 2 "register_operand" "0") 455 (const_int 1)))] 456 "TARGET_SSE" 457 "sqrtss\t{%1, %0|%0, %1}" 458 [(set_attr "type" "sse") 459 (set_attr "mode" "SF")]) 460 461;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 462;; isn't really correct, as those rtl operators aren't defined when 463;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 464 465(define_expand "smaxv4sf3" 466 [(set (match_operand:V4SF 0 "register_operand" "") 467 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 468 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 469 "TARGET_SSE" 470{ 471 if (!flag_finite_math_only) 472 operands[1] = force_reg (V4SFmode, operands[1]); 473 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); 474}) 475 476(define_insn "*smaxv4sf3_finite" 477 [(set (match_operand:V4SF 0 "register_operand" "=x") 478 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 479 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 480 "TARGET_SSE && flag_finite_math_only 481 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" 482 "maxps\t{%2, %0|%0, %2}" 483 [(set_attr "type" "sse") 484 (set_attr "mode" "V4SF")]) 485 486(define_insn "*smaxv4sf3" 487 [(set (match_operand:V4SF 0 "register_operand" "=x") 488 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 489 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 490 "TARGET_SSE" 491 "maxps\t{%2, %0|%0, %2}" 492 [(set_attr "type" "sse") 493 (set_attr "mode" "V4SF")]) 494 495(define_insn "sse_vmsmaxv4sf3" 496 [(set (match_operand:V4SF 0 "register_operand" "=x") 497 (vec_merge:V4SF 498 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 499 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 500 (match_dup 1) 501 (const_int 1)))] 502 "TARGET_SSE" 503 "maxss\t{%2, %0|%0, %2}" 504 [(set_attr "type" "sse") 505 (set_attr "mode" "SF")]) 506 507(define_expand "sminv4sf3" 508 [(set (match_operand:V4SF 0 "register_operand" "") 509 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 510 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 511 "TARGET_SSE" 512{ 513 if (!flag_finite_math_only) 514 operands[1] = force_reg (V4SFmode, operands[1]); 515 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); 516}) 517 518(define_insn "*sminv4sf3_finite" 519 [(set (match_operand:V4SF 0 "register_operand" "=x") 520 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 521 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 522 "TARGET_SSE && flag_finite_math_only 523 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" 524 "minps\t{%2, %0|%0, %2}" 525 [(set_attr "type" "sse") 526 (set_attr "mode" "V4SF")]) 527 528(define_insn "*sminv4sf3" 529 [(set (match_operand:V4SF 0 "register_operand" "=x") 530 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 531 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 532 "TARGET_SSE" 533 "minps\t{%2, %0|%0, %2}" 534 [(set_attr "type" "sse") 535 (set_attr "mode" "V4SF")]) 536 537(define_insn "sse_vmsminv4sf3" 538 [(set (match_operand:V4SF 0 "register_operand" "=x") 539 (vec_merge:V4SF 540 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 541 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 542 (match_dup 1) 543 (const_int 1)))] 544 "TARGET_SSE" 545 "minss\t{%2, %0|%0, %2}" 546 [(set_attr "type" "sse") 547 (set_attr "mode" "SF")]) 548 549;; These versions of the min/max patterns implement exactly the operations 550;; min = (op1 < op2 ? op1 : op2) 551;; max = (!(op1 < op2) ? op1 : op2) 552;; Their operands are not commutative, and thus they may be used in the 553;; presence of -0.0 and NaN. 554 555(define_insn "*ieee_sminv4sf3" 556 [(set (match_operand:V4SF 0 "register_operand" "=x") 557 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 558 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 559 UNSPEC_IEEE_MIN))] 560 "TARGET_SSE" 561 "minps\t{%2, %0|%0, %2}" 562 [(set_attr "type" "sseadd") 563 (set_attr "mode" "V4SF")]) 564 565(define_insn "*ieee_smaxv4sf3" 566 [(set (match_operand:V4SF 0 "register_operand" "=x") 567 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 568 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 569 UNSPEC_IEEE_MAX))] 570 "TARGET_SSE" 571 "maxps\t{%2, %0|%0, %2}" 572 [(set_attr "type" "sseadd") 573 (set_attr "mode" "V4SF")]) 574 575(define_insn "*ieee_sminv2df3" 576 [(set (match_operand:V2DF 0 "register_operand" "=x") 577 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 578 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 579 UNSPEC_IEEE_MIN))] 580 "TARGET_SSE2" 581 "minpd\t{%2, %0|%0, %2}" 582 [(set_attr "type" "sseadd") 583 (set_attr "mode" "V2DF")]) 584 585(define_insn "*ieee_smaxv2df3" 586 [(set (match_operand:V2DF 0 "register_operand" "=x") 587 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 588 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 589 UNSPEC_IEEE_MAX))] 590 "TARGET_SSE2" 591 "maxpd\t{%2, %0|%0, %2}" 592 [(set_attr "type" "sseadd") 593 (set_attr "mode" "V2DF")]) 594 595(define_insn "sse3_addsubv4sf3" 596 [(set (match_operand:V4SF 0 "register_operand" "=x") 597 (vec_merge:V4SF 598 (plus:V4SF 599 (match_operand:V4SF 1 "register_operand" "0") 600 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 601 (minus:V4SF (match_dup 1) (match_dup 2)) 602 (const_int 5)))] 603 "TARGET_SSE3" 604 "addsubps\t{%2, %0|%0, %2}" 605 [(set_attr "type" "sseadd") 606 (set_attr "mode" "V4SF")]) 607 608(define_insn "sse3_haddv4sf3" 609 [(set (match_operand:V4SF 0 "register_operand" "=x") 610 (vec_concat:V4SF 611 (vec_concat:V2SF 612 (plus:SF 613 (vec_select:SF 614 (match_operand:V4SF 1 "register_operand" "0") 615 (parallel [(const_int 0)])) 616 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 617 (plus:SF 618 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 619 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 620 (vec_concat:V2SF 621 (plus:SF 622 (vec_select:SF 623 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 624 (parallel [(const_int 0)])) 625 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 626 (plus:SF 627 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 628 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 629 "TARGET_SSE3" 630 "haddps\t{%2, %0|%0, %2}" 631 [(set_attr "type" "sseadd") 632 (set_attr "mode" "V4SF")]) 633 634(define_insn "sse3_hsubv4sf3" 635 [(set (match_operand:V4SF 0 "register_operand" "=x") 636 (vec_concat:V4SF 637 (vec_concat:V2SF 638 (minus:SF 639 (vec_select:SF 640 (match_operand:V4SF 1 "register_operand" "0") 641 (parallel [(const_int 0)])) 642 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 643 (minus:SF 644 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 645 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 646 (vec_concat:V2SF 647 (minus:SF 648 (vec_select:SF 649 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 650 (parallel [(const_int 0)])) 651 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 652 (minus:SF 653 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 654 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 655 "TARGET_SSE3" 656 "hsubps\t{%2, %0|%0, %2}" 657 [(set_attr "type" "sseadd") 658 (set_attr "mode" "V4SF")]) 659 660(define_expand "reduc_splus_v4sf" 661 [(match_operand:V4SF 0 "register_operand" "") 662 (match_operand:V4SF 1 "register_operand" "")] 663 "TARGET_SSE" 664{ 665 if (TARGET_SSE3) 666 { 667 rtx tmp = gen_reg_rtx (V4SFmode); 668 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 669 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 670 } 671 else 672 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); 673 DONE; 674}) 675 676(define_expand "reduc_smax_v4sf" 677 [(match_operand:V4SF 0 "register_operand" "") 678 (match_operand:V4SF 1 "register_operand" "")] 679 "TARGET_SSE" 680{ 681 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); 682 DONE; 683}) 684 685(define_expand "reduc_smin_v4sf" 686 [(match_operand:V4SF 0 "register_operand" "") 687 (match_operand:V4SF 1 "register_operand" "")] 688 "TARGET_SSE" 689{ 690 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); 691 DONE; 692}) 693 694;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 695;; 696;; Parallel single-precision floating point comparisons 697;; 698;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 699 700(define_insn "sse_maskcmpv4sf3" 701 [(set (match_operand:V4SF 0 "register_operand" "=x") 702 (match_operator:V4SF 3 "sse_comparison_operator" 703 [(match_operand:V4SF 1 "register_operand" "0") 704 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] 705 "TARGET_SSE" 706 "cmp%D3ps\t{%2, %0|%0, %2}" 707 [(set_attr "type" "ssecmp") 708 (set_attr "mode" "V4SF")]) 709 710(define_insn "sse_vmmaskcmpv4sf3" 711 [(set (match_operand:V4SF 0 "register_operand" "=x") 712 (vec_merge:V4SF 713 (match_operator:V4SF 3 "sse_comparison_operator" 714 [(match_operand:V4SF 1 "register_operand" "0") 715 (match_operand:V4SF 2 "register_operand" "x")]) 716 (match_dup 1) 717 (const_int 1)))] 718 "TARGET_SSE" 719 "cmp%D3ss\t{%2, %0|%0, %2}" 720 [(set_attr "type" "ssecmp") 721 (set_attr "mode" "SF")]) 722 723(define_insn "sse_comi" 724 [(set (reg:CCFP FLAGS_REG) 725 (compare:CCFP 726 (vec_select:SF 727 (match_operand:V4SF 0 "register_operand" "x") 728 (parallel [(const_int 0)])) 729 (vec_select:SF 730 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 731 (parallel [(const_int 0)]))))] 732 "TARGET_SSE" 733 "comiss\t{%1, %0|%0, %1}" 734 [(set_attr "type" "ssecomi") 735 (set_attr "mode" "SF")]) 736 737(define_insn "sse_ucomi" 738 [(set (reg:CCFPU FLAGS_REG) 739 (compare:CCFPU 740 (vec_select:SF 741 (match_operand:V4SF 0 "register_operand" "x") 742 (parallel [(const_int 0)])) 743 (vec_select:SF 744 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 745 (parallel [(const_int 0)]))))] 746 "TARGET_SSE" 747 "ucomiss\t{%1, %0|%0, %1}" 748 [(set_attr "type" "ssecomi") 749 (set_attr "mode" "SF")]) 750 751(define_expand "vcondv4sf" 752 [(set (match_operand:V4SF 0 "register_operand" "") 753 (if_then_else:V4SF 754 (match_operator 3 "" 755 [(match_operand:V4SF 4 "nonimmediate_operand" "") 756 (match_operand:V4SF 5 "nonimmediate_operand" "")]) 757 (match_operand:V4SF 1 "general_operand" "") 758 (match_operand:V4SF 2 "general_operand" "")))] 759 "TARGET_SSE" 760{ 761 if (ix86_expand_fp_vcond (operands)) 762 DONE; 763 else 764 FAIL; 765}) 766 767;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 768;; 769;; Parallel single-precision floating point logical operations 770;; 771;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 772 773(define_expand "andv4sf3" 774 [(set (match_operand:V4SF 0 "register_operand" "") 775 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 776 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 777 "TARGET_SSE" 778 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") 779 780(define_insn "*andv4sf3" 781 [(set (match_operand:V4SF 0 "register_operand" "=x") 782 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 783 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 784 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" 785 "andps\t{%2, %0|%0, %2}" 786 [(set_attr "type" "sselog") 787 (set_attr "mode" "V4SF")]) 788 789(define_insn "sse_nandv4sf3" 790 [(set (match_operand:V4SF 0 "register_operand" "=x") 791 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) 792 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 793 "TARGET_SSE" 794 "andnps\t{%2, %0|%0, %2}" 795 [(set_attr "type" "sselog") 796 (set_attr "mode" "V4SF")]) 797 798(define_expand "iorv4sf3" 799 [(set (match_operand:V4SF 0 "register_operand" "") 800 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 801 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 802 "TARGET_SSE" 803 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") 804 805(define_insn "*iorv4sf3" 806 [(set (match_operand:V4SF 0 "register_operand" "=x") 807 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 808 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 809 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" 810 "orps\t{%2, %0|%0, %2}" 811 [(set_attr "type" "sselog") 812 (set_attr "mode" "V4SF")]) 813 814(define_expand "xorv4sf3" 815 [(set (match_operand:V4SF 0 "register_operand" "") 816 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 817 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 818 "TARGET_SSE" 819 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") 820 821(define_insn "*xorv4sf3" 822 [(set (match_operand:V4SF 0 "register_operand" "=x") 823 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 824 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 825 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" 826 "xorps\t{%2, %0|%0, %2}" 827 [(set_attr "type" "sselog") 828 (set_attr "mode" "V4SF")]) 829 830;; Also define scalar versions. These are used for abs, neg, and 831;; conditional move. Using subregs into vector modes causes register 832;; allocation lossage. These patterns do not allow memory operands 833;; because the native instructions read the full 128-bits. 834 835(define_insn "*andsf3" 836 [(set (match_operand:SF 0 "register_operand" "=x") 837 (and:SF (match_operand:SF 1 "register_operand" "0") 838 (match_operand:SF 2 "register_operand" "x")))] 839 "TARGET_SSE" 840 "andps\t{%2, %0|%0, %2}" 841 [(set_attr "type" "sselog") 842 (set_attr "mode" "V4SF")]) 843 844(define_insn "*nandsf3" 845 [(set (match_operand:SF 0 "register_operand" "=x") 846 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) 847 (match_operand:SF 2 "register_operand" "x")))] 848 "TARGET_SSE" 849 "andnps\t{%2, %0|%0, %2}" 850 [(set_attr "type" "sselog") 851 (set_attr "mode" "V4SF")]) 852 853(define_insn "*iorsf3" 854 [(set (match_operand:SF 0 "register_operand" "=x") 855 (ior:SF (match_operand:SF 1 "register_operand" "0") 856 (match_operand:SF 2 "register_operand" "x")))] 857 "TARGET_SSE" 858 "orps\t{%2, %0|%0, %2}" 859 [(set_attr "type" "sselog") 860 (set_attr "mode" "V4SF")]) 861 862(define_insn "*xorsf3" 863 [(set (match_operand:SF 0 "register_operand" "=x") 864 (xor:SF (match_operand:SF 1 "register_operand" "0") 865 (match_operand:SF 2 "register_operand" "x")))] 866 "TARGET_SSE" 867 "xorps\t{%2, %0|%0, %2}" 868 [(set_attr "type" "sselog") 869 (set_attr "mode" "V4SF")]) 870 871;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 872;; 873;; Parallel single-precision floating point conversion operations 874;; 875;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 876 877(define_insn "sse_cvtpi2ps" 878 [(set (match_operand:V4SF 0 "register_operand" "=x") 879 (vec_merge:V4SF 880 (vec_duplicate:V4SF 881 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 882 (match_operand:V4SF 1 "register_operand" "0") 883 (const_int 3)))] 884 "TARGET_SSE" 885 "cvtpi2ps\t{%2, %0|%0, %2}" 886 [(set_attr "type" "ssecvt") 887 (set_attr "mode" "V4SF")]) 888 889(define_insn "sse_cvtps2pi" 890 [(set (match_operand:V2SI 0 "register_operand" "=y") 891 (vec_select:V2SI 892 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 893 UNSPEC_FIX_NOTRUNC) 894 (parallel [(const_int 0) (const_int 1)])))] 895 "TARGET_SSE" 896 "cvtps2pi\t{%1, %0|%0, %1}" 897 [(set_attr "type" "ssecvt") 898 (set_attr "unit" "mmx") 899 (set_attr "mode" "DI")]) 900 901(define_insn "sse_cvttps2pi" 902 [(set (match_operand:V2SI 0 "register_operand" "=y") 903 (vec_select:V2SI 904 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 905 (parallel [(const_int 0) (const_int 1)])))] 906 "TARGET_SSE" 907 "cvttps2pi\t{%1, %0|%0, %1}" 908 [(set_attr "type" "ssecvt") 909 (set_attr "unit" "mmx") 910 (set_attr "mode" "SF")]) 911 912(define_insn "sse_cvtsi2ss" 913 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 914 (vec_merge:V4SF 915 (vec_duplicate:V4SF 916 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 917 (match_operand:V4SF 1 "register_operand" "0,0") 918 (const_int 1)))] 919 "TARGET_SSE" 920 "cvtsi2ss\t{%2, %0|%0, %2}" 921 [(set_attr "type" "sseicvt") 922 (set_attr "athlon_decode" "vector,double") 923 (set_attr "mode" "SF")]) 924 925(define_insn "sse_cvtsi2ssq" 926 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 927 (vec_merge:V4SF 928 (vec_duplicate:V4SF 929 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) 930 (match_operand:V4SF 1 "register_operand" "0,0") 931 (const_int 1)))] 932 "TARGET_SSE && TARGET_64BIT" 933 "cvtsi2ssq\t{%2, %0|%0, %2}" 934 [(set_attr "type" "sseicvt") 935 (set_attr "athlon_decode" "vector,double") 936 (set_attr "mode" "SF")]) 937 938(define_insn "sse_cvtss2si" 939 [(set (match_operand:SI 0 "register_operand" "=r,r") 940 (unspec:SI 941 [(vec_select:SF 942 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 943 (parallel [(const_int 0)]))] 944 UNSPEC_FIX_NOTRUNC))] 945 "TARGET_SSE" 946 "cvtss2si\t{%1, %0|%0, %1}" 947 [(set_attr "type" "sseicvt") 948 (set_attr "athlon_decode" "double,vector") 949 (set_attr "mode" "SI")]) 950 951(define_insn "sse_cvtss2siq" 952 [(set (match_operand:DI 0 "register_operand" "=r,r") 953 (unspec:DI 954 [(vec_select:SF 955 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 956 (parallel [(const_int 0)]))] 957 UNSPEC_FIX_NOTRUNC))] 958 "TARGET_SSE && TARGET_64BIT" 959 "cvtss2siq\t{%1, %0|%0, %1}" 960 [(set_attr "type" "sseicvt") 961 (set_attr "athlon_decode" "double,vector") 962 (set_attr "mode" "DI")]) 963 964(define_insn "sse_cvttss2si" 965 [(set (match_operand:SI 0 "register_operand" "=r,r") 966 (fix:SI 967 (vec_select:SF 968 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 969 (parallel [(const_int 0)]))))] 970 "TARGET_SSE" 971 "cvttss2si\t{%1, %0|%0, %1}" 972 [(set_attr "type" "sseicvt") 973 (set_attr "athlon_decode" "double,vector") 974 (set_attr "mode" "SI")]) 975 976(define_insn "sse_cvttss2siq" 977 [(set (match_operand:DI 0 "register_operand" "=r,r") 978 (fix:DI 979 (vec_select:SF 980 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 981 (parallel [(const_int 0)]))))] 982 "TARGET_SSE && TARGET_64BIT" 983 "cvttss2siq\t{%1, %0|%0, %1}" 984 [(set_attr "type" "sseicvt") 985 (set_attr "athlon_decode" "double,vector") 986 (set_attr "mode" "DI")]) 987 988(define_insn "sse2_cvtdq2ps" 989 [(set (match_operand:V4SF 0 "register_operand" "=x") 990 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 991 "TARGET_SSE2" 992 "cvtdq2ps\t{%1, %0|%0, %1}" 993 [(set_attr "type" "ssecvt") 994 (set_attr "mode" "V2DF")]) 995 996(define_insn "sse2_cvtps2dq" 997 [(set (match_operand:V4SI 0 "register_operand" "=x") 998 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 999 UNSPEC_FIX_NOTRUNC))] 1000 "TARGET_SSE2" 1001 "cvtps2dq\t{%1, %0|%0, %1}" 1002 [(set_attr "type" "ssecvt") 1003 (set_attr "mode" "TI")]) 1004 1005(define_insn "sse2_cvttps2dq" 1006 [(set (match_operand:V4SI 0 "register_operand" "=x") 1007 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 1008 "TARGET_SSE2" 1009 "cvttps2dq\t{%1, %0|%0, %1}" 1010 [(set_attr "type" "ssecvt") 1011 (set_attr "mode" "TI")]) 1012 1013;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1014;; 1015;; Parallel single-precision floating point element swizzling 1016;; 1017;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1018 1019(define_insn "sse_movhlps" 1020 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1021 (vec_select:V4SF 1022 (vec_concat:V8SF 1023 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1024 (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x")) 1025 (parallel [(const_int 6) 1026 (const_int 7) 1027 (const_int 2) 1028 (const_int 3)])))] 1029 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 1030 "@ 1031 movhlps\t{%2, %0|%0, %2} 1032 movlps\t{%H2, %0|%0, %H2} 1033 movhps\t{%2, %0|%0, %2}" 1034 [(set_attr "type" "ssemov") 1035 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1036 1037(define_insn "sse_movlhps" 1038 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1039 (vec_select:V4SF 1040 (vec_concat:V8SF 1041 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1042 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) 1043 (parallel [(const_int 0) 1044 (const_int 1) 1045 (const_int 4) 1046 (const_int 5)])))] 1047 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 1048 "@ 1049 movlhps\t{%2, %0|%0, %2} 1050 movhps\t{%2, %0|%0, %2} 1051 movlps\t{%2, %H0|%H0, %2}" 1052 [(set_attr "type" "ssemov") 1053 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1054 1055(define_insn "sse_unpckhps" 1056 [(set (match_operand:V4SF 0 "register_operand" "=x") 1057 (vec_select:V4SF 1058 (vec_concat:V8SF 1059 (match_operand:V4SF 1 "register_operand" "0") 1060 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1061 (parallel [(const_int 2) (const_int 6) 1062 (const_int 3) (const_int 7)])))] 1063 "TARGET_SSE" 1064 "unpckhps\t{%2, %0|%0, %2}" 1065 [(set_attr "type" "sselog") 1066 (set_attr "mode" "V4SF")]) 1067 1068(define_insn "sse_unpcklps" 1069 [(set (match_operand:V4SF 0 "register_operand" "=x") 1070 (vec_select:V4SF 1071 (vec_concat:V8SF 1072 (match_operand:V4SF 1 "register_operand" "0") 1073 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1074 (parallel [(const_int 0) (const_int 4) 1075 (const_int 1) (const_int 5)])))] 1076 "TARGET_SSE" 1077 "unpcklps\t{%2, %0|%0, %2}" 1078 [(set_attr "type" "sselog") 1079 (set_attr "mode" "V4SF")]) 1080 1081;; These are modeled with the same vec_concat as the others so that we 1082;; capture users of shufps that can use the new instructions 1083(define_insn "sse3_movshdup" 1084 [(set (match_operand:V4SF 0 "register_operand" "=x") 1085 (vec_select:V4SF 1086 (vec_concat:V8SF 1087 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1088 (match_dup 1)) 1089 (parallel [(const_int 1) 1090 (const_int 1) 1091 (const_int 7) 1092 (const_int 7)])))] 1093 "TARGET_SSE3" 1094 "movshdup\t{%1, %0|%0, %1}" 1095 [(set_attr "type" "sse") 1096 (set_attr "mode" "V4SF")]) 1097 1098(define_insn "sse3_movsldup" 1099 [(set (match_operand:V4SF 0 "register_operand" "=x") 1100 (vec_select:V4SF 1101 (vec_concat:V8SF 1102 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1103 (match_dup 1)) 1104 (parallel [(const_int 0) 1105 (const_int 0) 1106 (const_int 6) 1107 (const_int 6)])))] 1108 "TARGET_SSE3" 1109 "movsldup\t{%1, %0|%0, %1}" 1110 [(set_attr "type" "sse") 1111 (set_attr "mode" "V4SF")]) 1112 1113(define_expand "sse_shufps" 1114 [(match_operand:V4SF 0 "register_operand" "") 1115 (match_operand:V4SF 1 "register_operand" "") 1116 (match_operand:V4SF 2 "nonimmediate_operand" "") 1117 (match_operand:SI 3 "const_int_operand" "")] 1118 "TARGET_SSE" 1119{ 1120 int mask = INTVAL (operands[3]); 1121 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], 1122 GEN_INT ((mask >> 0) & 3), 1123 GEN_INT ((mask >> 2) & 3), 1124 GEN_INT (((mask >> 4) & 3) + 4), 1125 GEN_INT (((mask >> 6) & 3) + 4))); 1126 DONE; 1127}) 1128 1129(define_insn "sse_shufps_1" 1130 [(set (match_operand:V4SF 0 "register_operand" "=x") 1131 (vec_select:V4SF 1132 (vec_concat:V8SF 1133 (match_operand:V4SF 1 "register_operand" "0") 1134 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1135 (parallel [(match_operand 3 "const_0_to_3_operand" "") 1136 (match_operand 4 "const_0_to_3_operand" "") 1137 (match_operand 5 "const_4_to_7_operand" "") 1138 (match_operand 6 "const_4_to_7_operand" "")])))] 1139 "TARGET_SSE" 1140{ 1141 int mask = 0; 1142 mask |= INTVAL (operands[3]) << 0; 1143 mask |= INTVAL (operands[4]) << 2; 1144 mask |= (INTVAL (operands[5]) - 4) << 4; 1145 mask |= (INTVAL (operands[6]) - 4) << 6; 1146 operands[3] = GEN_INT (mask); 1147 1148 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 1149} 1150 [(set_attr "type" "sselog") 1151 (set_attr "mode" "V4SF")]) 1152 1153(define_insn "sse_storehps" 1154 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1155 (vec_select:V2SF 1156 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 1157 (parallel [(const_int 2) (const_int 3)])))] 1158 "TARGET_SSE" 1159 "@ 1160 movhps\t{%1, %0|%0, %1} 1161 movhlps\t{%1, %0|%0, %1} 1162 movlps\t{%H1, %0|%0, %H1}" 1163 [(set_attr "type" "ssemov") 1164 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1165 1166(define_insn "sse_loadhps" 1167 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1168 (vec_concat:V4SF 1169 (vec_select:V2SF 1170 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") 1171 (parallel [(const_int 0) (const_int 1)])) 1172 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] 1173 "TARGET_SSE" 1174 "@ 1175 movhps\t{%2, %0|%0, %2} 1176 movlhps\t{%2, %0|%0, %2} 1177 movlps\t{%2, %H0|%H0, %2}" 1178 [(set_attr "type" "ssemov") 1179 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1180 1181(define_insn "sse_storelps" 1182 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1183 (vec_select:V2SF 1184 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") 1185 (parallel [(const_int 0) (const_int 1)])))] 1186 "TARGET_SSE" 1187 "@ 1188 movlps\t{%1, %0|%0, %1} 1189 movaps\t{%1, %0|%0, %1} 1190 movlps\t{%1, %0|%0, %1}" 1191 [(set_attr "type" "ssemov") 1192 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1193 1194(define_insn "sse_loadlps" 1195 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1196 (vec_concat:V4SF 1197 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") 1198 (vec_select:V2SF 1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") 1200 (parallel [(const_int 2) (const_int 3)]))))] 1201 "TARGET_SSE" 1202 "@ 1203 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 1204 movlps\t{%2, %0|%0, %2} 1205 movlps\t{%2, %0|%0, %2}" 1206 [(set_attr "type" "sselog,ssemov,ssemov") 1207 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1208 1209(define_insn "sse_movss" 1210 [(set (match_operand:V4SF 0 "register_operand" "=x") 1211 (vec_merge:V4SF 1212 (match_operand:V4SF 2 "register_operand" "x") 1213 (match_operand:V4SF 1 "register_operand" "0") 1214 (const_int 1)))] 1215 "TARGET_SSE" 1216 "movss\t{%2, %0|%0, %2}" 1217 [(set_attr "type" "ssemov") 1218 (set_attr "mode" "SF")]) 1219 1220(define_insn "*vec_dupv4sf" 1221 [(set (match_operand:V4SF 0 "register_operand" "=x") 1222 (vec_duplicate:V4SF 1223 (match_operand:SF 1 "register_operand" "0")))] 1224 "TARGET_SSE" 1225 "shufps\t{$0, %0, %0|%0, %0, 0}" 1226 [(set_attr "type" "sselog1") 1227 (set_attr "mode" "V4SF")]) 1228 1229;; ??? In theory we can match memory for the MMX alternative, but allowing 1230;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 1231;; alternatives pretty much forces the MMX alternative to be chosen. 1232(define_insn "*sse_concatv2sf" 1233 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 1234 (vec_concat:V2SF 1235 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 1236 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 1237 "TARGET_SSE" 1238 "@ 1239 unpcklps\t{%2, %0|%0, %2} 1240 movss\t{%1, %0|%0, %1} 1241 punpckldq\t{%2, %0|%0, %2} 1242 movd\t{%1, %0|%0, %1}" 1243 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 1244 (set_attr "mode" "V4SF,SF,DI,DI")]) 1245 1246(define_insn "*sse_concatv4sf" 1247 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1248 (vec_concat:V4SF 1249 (match_operand:V2SF 1 "register_operand" " 0,0") 1250 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] 1251 "TARGET_SSE" 1252 "@ 1253 movlhps\t{%2, %0|%0, %2} 1254 movhps\t{%2, %0|%0, %2}" 1255 [(set_attr "type" "ssemov") 1256 (set_attr "mode" "V4SF,V2SF")]) 1257 1258(define_expand "vec_initv4sf" 1259 [(match_operand:V4SF 0 "register_operand" "") 1260 (match_operand 1 "" "")] 1261 "TARGET_SSE" 1262{ 1263 ix86_expand_vector_init (false, operands[0], operands[1]); 1264 DONE; 1265}) 1266 1267(define_insn "*vec_setv4sf_0" 1268 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m") 1269 (vec_merge:V4SF 1270 (vec_duplicate:V4SF 1271 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 1272 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") 1273 (const_int 1)))] 1274 "TARGET_SSE" 1275 "@ 1276 movss\t{%2, %0|%0, %2} 1277 movss\t{%2, %0|%0, %2} 1278 movd\t{%2, %0|%0, %2} 1279 #" 1280 [(set_attr "type" "ssemov") 1281 (set_attr "mode" "SF")]) 1282 1283(define_split 1284 [(set (match_operand:V4SF 0 "memory_operand" "") 1285 (vec_merge:V4SF 1286 (vec_duplicate:V4SF 1287 (match_operand:SF 1 "nonmemory_operand" "")) 1288 (match_dup 0) 1289 (const_int 1)))] 1290 "TARGET_SSE && reload_completed" 1291 [(const_int 0)] 1292{ 1293 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); 1294 DONE; 1295}) 1296 1297(define_expand "vec_setv4sf" 1298 [(match_operand:V4SF 0 "register_operand" "") 1299 (match_operand:SF 1 "register_operand" "") 1300 (match_operand 2 "const_int_operand" "")] 1301 "TARGET_SSE" 1302{ 1303 ix86_expand_vector_set (false, operands[0], operands[1], 1304 INTVAL (operands[2])); 1305 DONE; 1306}) 1307 1308(define_insn_and_split "*vec_extractv4sf_0" 1309 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") 1310 (vec_select:SF 1311 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") 1312 (parallel [(const_int 0)])))] 1313 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 1314 "#" 1315 "&& reload_completed" 1316 [(const_int 0)] 1317{ 1318 rtx op1 = operands[1]; 1319 if (REG_P (op1)) 1320 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 1321 else 1322 op1 = gen_lowpart (SFmode, op1); 1323 emit_move_insn (operands[0], op1); 1324 DONE; 1325}) 1326 1327(define_expand "vec_extractv4sf" 1328 [(match_operand:SF 0 "register_operand" "") 1329 (match_operand:V4SF 1 "register_operand" "") 1330 (match_operand 2 "const_int_operand" "")] 1331 "TARGET_SSE" 1332{ 1333 ix86_expand_vector_extract (false, operands[0], operands[1], 1334 INTVAL (operands[2])); 1335 DONE; 1336}) 1337 1338;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1339;; 1340;; Parallel double-precision floating point arithmetic 1341;; 1342;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1343 1344(define_expand "negv2df2" 1345 [(set (match_operand:V2DF 0 "register_operand" "") 1346 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1347 "TARGET_SSE2" 1348 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") 1349 1350(define_expand "absv2df2" 1351 [(set (match_operand:V2DF 0 "register_operand" "") 1352 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1353 "TARGET_SSE2" 1354 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") 1355 1356(define_expand "addv2df3" 1357 [(set (match_operand:V2DF 0 "register_operand" "") 1358 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1359 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1360 "TARGET_SSE2" 1361 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") 1362 1363(define_insn "*addv2df3" 1364 [(set (match_operand:V2DF 0 "register_operand" "=x") 1365 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1366 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1367 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" 1368 "addpd\t{%2, %0|%0, %2}" 1369 [(set_attr "type" "sseadd") 1370 (set_attr "mode" "V2DF")]) 1371 1372(define_insn "sse2_vmaddv2df3" 1373 [(set (match_operand:V2DF 0 "register_operand" "=x") 1374 (vec_merge:V2DF 1375 (plus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1376 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1377 (match_dup 1) 1378 (const_int 1)))] 1379 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 1380 "addsd\t{%2, %0|%0, %2}" 1381 [(set_attr "type" "sseadd") 1382 (set_attr "mode" "DF")]) 1383 1384(define_expand "subv2df3" 1385 [(set (match_operand:V2DF 0 "register_operand" "") 1386 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1387 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1388 "TARGET_SSE2" 1389 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") 1390 1391(define_insn "*subv2df3" 1392 [(set (match_operand:V2DF 0 "register_operand" "=x") 1393 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1394 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1395 "TARGET_SSE2" 1396 "subpd\t{%2, %0|%0, %2}" 1397 [(set_attr "type" "sseadd") 1398 (set_attr "mode" "V2DF")]) 1399 1400(define_insn "sse2_vmsubv2df3" 1401 [(set (match_operand:V2DF 0 "register_operand" "=x") 1402 (vec_merge:V2DF 1403 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1404 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1405 (match_dup 1) 1406 (const_int 1)))] 1407 "TARGET_SSE2" 1408 "subsd\t{%2, %0|%0, %2}" 1409 [(set_attr "type" "sseadd") 1410 (set_attr "mode" "DF")]) 1411 1412(define_expand "mulv2df3" 1413 [(set (match_operand:V2DF 0 "register_operand" "") 1414 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1415 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1416 "TARGET_SSE2" 1417 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") 1418 1419(define_insn "*mulv2df3" 1420 [(set (match_operand:V2DF 0 "register_operand" "=x") 1421 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1422 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1423 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1424 "mulpd\t{%2, %0|%0, %2}" 1425 [(set_attr "type" "ssemul") 1426 (set_attr "mode" "V2DF")]) 1427 1428(define_insn "sse2_vmmulv2df3" 1429 [(set (match_operand:V2DF 0 "register_operand" "=x") 1430 (vec_merge:V2DF 1431 (mult:V2DF (match_operand:V2DF 1 "register_operand" "0") 1432 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1433 (match_dup 1) 1434 (const_int 1)))] 1435 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1436 "mulsd\t{%2, %0|%0, %2}" 1437 [(set_attr "type" "ssemul") 1438 (set_attr "mode" "DF")]) 1439 1440(define_expand "divv2df3" 1441 [(set (match_operand:V2DF 0 "register_operand" "") 1442 (div:V2DF (match_operand:V2DF 1 "register_operand" "") 1443 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1444 "TARGET_SSE2" 1445 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") 1446 1447(define_insn "*divv2df3" 1448 [(set (match_operand:V2DF 0 "register_operand" "=x") 1449 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1450 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1451 "TARGET_SSE2" 1452 "divpd\t{%2, %0|%0, %2}" 1453 [(set_attr "type" "ssediv") 1454 (set_attr "mode" "V2DF")]) 1455 1456(define_insn "sse2_vmdivv2df3" 1457 [(set (match_operand:V2DF 0 "register_operand" "=x") 1458 (vec_merge:V2DF 1459 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1460 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1461 (match_dup 1) 1462 (const_int 1)))] 1463 "TARGET_SSE2" 1464 "divsd\t{%2, %0|%0, %2}" 1465 [(set_attr "type" "ssediv") 1466 (set_attr "mode" "DF")]) 1467 1468(define_insn "sqrtv2df2" 1469 [(set (match_operand:V2DF 0 "register_operand" "=x") 1470 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1471 "TARGET_SSE2" 1472 "sqrtpd\t{%1, %0|%0, %1}" 1473 [(set_attr "type" "sse") 1474 (set_attr "mode" "V2DF")]) 1475 1476(define_insn "sse2_vmsqrtv2df2" 1477 [(set (match_operand:V2DF 0 "register_operand" "=x") 1478 (vec_merge:V2DF 1479 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) 1480 (match_operand:V2DF 2 "register_operand" "0") 1481 (const_int 1)))] 1482 "TARGET_SSE2" 1483 "sqrtsd\t{%1, %0|%0, %1}" 1484 [(set_attr "type" "sse") 1485 (set_attr "mode" "DF")]) 1486 1487;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1488;; isn't really correct, as those rtl operators aren't defined when 1489;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1490 1491(define_expand "smaxv2df3" 1492 [(set (match_operand:V2DF 0 "register_operand" "") 1493 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1494 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1495 "TARGET_SSE2" 1496{ 1497 if (!flag_finite_math_only) 1498 operands[1] = force_reg (V2DFmode, operands[1]); 1499 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); 1500}) 1501 1502(define_insn "*smaxv2df3_finite" 1503 [(set (match_operand:V2DF 0 "register_operand" "=x") 1504 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1505 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1506 "TARGET_SSE2 && flag_finite_math_only 1507 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" 1508 "maxpd\t{%2, %0|%0, %2}" 1509 [(set_attr "type" "sseadd") 1510 (set_attr "mode" "V2DF")]) 1511 1512(define_insn "*smaxv2df3" 1513 [(set (match_operand:V2DF 0 "register_operand" "=x") 1514 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1515 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1516 "TARGET_SSE2" 1517 "maxpd\t{%2, %0|%0, %2}" 1518 [(set_attr "type" "sseadd") 1519 (set_attr "mode" "V2DF")]) 1520 1521(define_insn "sse2_vmsmaxv2df3" 1522 [(set (match_operand:V2DF 0 "register_operand" "=x") 1523 (vec_merge:V2DF 1524 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1525 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1526 (match_dup 1) 1527 (const_int 1)))] 1528 "TARGET_SSE2" 1529 "maxsd\t{%2, %0|%0, %2}" 1530 [(set_attr "type" "sseadd") 1531 (set_attr "mode" "DF")]) 1532 1533(define_expand "sminv2df3" 1534 [(set (match_operand:V2DF 0 "register_operand" "") 1535 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1536 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1537 "TARGET_SSE2" 1538{ 1539 if (!flag_finite_math_only) 1540 operands[1] = force_reg (V2DFmode, operands[1]); 1541 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); 1542}) 1543 1544(define_insn "*sminv2df3_finite" 1545 [(set (match_operand:V2DF 0 "register_operand" "=x") 1546 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1547 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1548 "TARGET_SSE2 && flag_finite_math_only 1549 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" 1550 "minpd\t{%2, %0|%0, %2}" 1551 [(set_attr "type" "sseadd") 1552 (set_attr "mode" "V2DF")]) 1553 1554(define_insn "*sminv2df3" 1555 [(set (match_operand:V2DF 0 "register_operand" "=x") 1556 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1557 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1558 "TARGET_SSE2" 1559 "minpd\t{%2, %0|%0, %2}" 1560 [(set_attr "type" "sseadd") 1561 (set_attr "mode" "V2DF")]) 1562 1563(define_insn "sse2_vmsminv2df3" 1564 [(set (match_operand:V2DF 0 "register_operand" "=x") 1565 (vec_merge:V2DF 1566 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1567 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1568 (match_dup 1) 1569 (const_int 1)))] 1570 "TARGET_SSE2" 1571 "minsd\t{%2, %0|%0, %2}" 1572 [(set_attr "type" "sseadd") 1573 (set_attr "mode" "DF")]) 1574 1575(define_insn "sse3_addsubv2df3" 1576 [(set (match_operand:V2DF 0 "register_operand" "=x") 1577 (vec_merge:V2DF 1578 (plus:V2DF 1579 (match_operand:V2DF 1 "register_operand" "0") 1580 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1581 (minus:V2DF (match_dup 1) (match_dup 2)) 1582 (const_int 1)))] 1583 "TARGET_SSE3" 1584 "addsubpd\t{%2, %0|%0, %2}" 1585 [(set_attr "type" "sseadd") 1586 (set_attr "mode" "V2DF")]) 1587 1588(define_insn "sse3_haddv2df3" 1589 [(set (match_operand:V2DF 0 "register_operand" "=x") 1590 (vec_concat:V2DF 1591 (plus:DF 1592 (vec_select:DF 1593 (match_operand:V2DF 1 "register_operand" "0") 1594 (parallel [(const_int 0)])) 1595 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1596 (plus:DF 1597 (vec_select:DF 1598 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1599 (parallel [(const_int 0)])) 1600 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1601 "TARGET_SSE3" 1602 "haddpd\t{%2, %0|%0, %2}" 1603 [(set_attr "type" "sseadd") 1604 (set_attr "mode" "V2DF")]) 1605 1606(define_insn "sse3_hsubv2df3" 1607 [(set (match_operand:V2DF 0 "register_operand" "=x") 1608 (vec_concat:V2DF 1609 (minus:DF 1610 (vec_select:DF 1611 (match_operand:V2DF 1 "register_operand" "0") 1612 (parallel [(const_int 0)])) 1613 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1614 (minus:DF 1615 (vec_select:DF 1616 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1617 (parallel [(const_int 0)])) 1618 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1619 "TARGET_SSE3" 1620 "hsubpd\t{%2, %0|%0, %2}" 1621 [(set_attr "type" "sseadd") 1622 (set_attr "mode" "V2DF")]) 1623 1624(define_expand "reduc_splus_v2df" 1625 [(match_operand:V2DF 0 "register_operand" "") 1626 (match_operand:V2DF 1 "register_operand" "")] 1627 "TARGET_SSE3" 1628{ 1629 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1630 DONE; 1631}) 1632 1633;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1634;; 1635;; Parallel double-precision floating point comparisons 1636;; 1637;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1638 1639(define_insn "sse2_maskcmpv2df3" 1640 [(set (match_operand:V2DF 0 "register_operand" "=x") 1641 (match_operator:V2DF 3 "sse_comparison_operator" 1642 [(match_operand:V2DF 1 "register_operand" "0") 1643 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] 1644 "TARGET_SSE2" 1645 "cmp%D3pd\t{%2, %0|%0, %2}" 1646 [(set_attr "type" "ssecmp") 1647 (set_attr "mode" "V2DF")]) 1648 1649(define_insn "sse2_vmmaskcmpv2df3" 1650 [(set (match_operand:V2DF 0 "register_operand" "=x") 1651 (vec_merge:V2DF 1652 (match_operator:V2DF 3 "sse_comparison_operator" 1653 [(match_operand:V2DF 1 "register_operand" "0") 1654 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) 1655 (match_dup 1) 1656 (const_int 1)))] 1657 "TARGET_SSE2" 1658 "cmp%D3sd\t{%2, %0|%0, %2}" 1659 [(set_attr "type" "ssecmp") 1660 (set_attr "mode" "DF")]) 1661 1662(define_insn "sse2_comi" 1663 [(set (reg:CCFP FLAGS_REG) 1664 (compare:CCFP 1665 (vec_select:DF 1666 (match_operand:V2DF 0 "register_operand" "x") 1667 (parallel [(const_int 0)])) 1668 (vec_select:DF 1669 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1670 (parallel [(const_int 0)]))))] 1671 "TARGET_SSE2" 1672 "comisd\t{%1, %0|%0, %1}" 1673 [(set_attr "type" "ssecomi") 1674 (set_attr "mode" "DF")]) 1675 1676(define_insn "sse2_ucomi" 1677 [(set (reg:CCFPU FLAGS_REG) 1678 (compare:CCFPU 1679 (vec_select:DF 1680 (match_operand:V2DF 0 "register_operand" "x") 1681 (parallel [(const_int 0)])) 1682 (vec_select:DF 1683 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1684 (parallel [(const_int 0)]))))] 1685 "TARGET_SSE2" 1686 "ucomisd\t{%1, %0|%0, %1}" 1687 [(set_attr "type" "ssecomi") 1688 (set_attr "mode" "DF")]) 1689 1690(define_expand "vcondv2df" 1691 [(set (match_operand:V2DF 0 "register_operand" "") 1692 (if_then_else:V2DF 1693 (match_operator 3 "" 1694 [(match_operand:V2DF 4 "nonimmediate_operand" "") 1695 (match_operand:V2DF 5 "nonimmediate_operand" "")]) 1696 (match_operand:V2DF 1 "general_operand" "") 1697 (match_operand:V2DF 2 "general_operand" "")))] 1698 "TARGET_SSE2" 1699{ 1700 if (ix86_expand_fp_vcond (operands)) 1701 DONE; 1702 else 1703 FAIL; 1704}) 1705 1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1707;; 1708;; Parallel double-precision floating point logical operations 1709;; 1710;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1711 1712(define_expand "andv2df3" 1713 [(set (match_operand:V2DF 0 "register_operand" "") 1714 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1715 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1716 "TARGET_SSE2" 1717 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") 1718 1719(define_insn "*andv2df3" 1720 [(set (match_operand:V2DF 0 "register_operand" "=x") 1721 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1722 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1723 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" 1724 "andpd\t{%2, %0|%0, %2}" 1725 [(set_attr "type" "sselog") 1726 (set_attr "mode" "V2DF")]) 1727 1728(define_insn "sse2_nandv2df3" 1729 [(set (match_operand:V2DF 0 "register_operand" "=x") 1730 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) 1731 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1732 "TARGET_SSE2" 1733 "andnpd\t{%2, %0|%0, %2}" 1734 [(set_attr "type" "sselog") 1735 (set_attr "mode" "V2DF")]) 1736 1737(define_expand "iorv2df3" 1738 [(set (match_operand:V2DF 0 "register_operand" "") 1739 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1740 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1741 "TARGET_SSE2" 1742 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") 1743 1744(define_insn "*iorv2df3" 1745 [(set (match_operand:V2DF 0 "register_operand" "=x") 1746 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1747 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1748 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" 1749 "orpd\t{%2, %0|%0, %2}" 1750 [(set_attr "type" "sselog") 1751 (set_attr "mode" "V2DF")]) 1752 1753(define_expand "xorv2df3" 1754 [(set (match_operand:V2DF 0 "register_operand" "") 1755 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1756 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1757 "TARGET_SSE2" 1758 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") 1759 1760(define_insn "*xorv2df3" 1761 [(set (match_operand:V2DF 0 "register_operand" "=x") 1762 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1763 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1764 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" 1765 "xorpd\t{%2, %0|%0, %2}" 1766 [(set_attr "type" "sselog") 1767 (set_attr "mode" "V2DF")]) 1768 1769;; Also define scalar versions. These are used for abs, neg, and 1770;; conditional move. Using subregs into vector modes causes register 1771;; allocation lossage. These patterns do not allow memory operands 1772;; because the native instructions read the full 128-bits. 1773 1774(define_insn "*anddf3" 1775 [(set (match_operand:DF 0 "register_operand" "=x") 1776 (and:DF (match_operand:DF 1 "register_operand" "0") 1777 (match_operand:DF 2 "register_operand" "x")))] 1778 "TARGET_SSE2" 1779 "andpd\t{%2, %0|%0, %2}" 1780 [(set_attr "type" "sselog") 1781 (set_attr "mode" "V2DF")]) 1782 1783(define_insn "*nanddf3" 1784 [(set (match_operand:DF 0 "register_operand" "=x") 1785 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) 1786 (match_operand:DF 2 "register_operand" "x")))] 1787 "TARGET_SSE2" 1788 "andnpd\t{%2, %0|%0, %2}" 1789 [(set_attr "type" "sselog") 1790 (set_attr "mode" "V2DF")]) 1791 1792(define_insn "*iordf3" 1793 [(set (match_operand:DF 0 "register_operand" "=x") 1794 (ior:DF (match_operand:DF 1 "register_operand" "0") 1795 (match_operand:DF 2 "register_operand" "x")))] 1796 "TARGET_SSE2" 1797 "orpd\t{%2, %0|%0, %2}" 1798 [(set_attr "type" "sselog") 1799 (set_attr "mode" "V2DF")]) 1800 1801(define_insn "*xordf3" 1802 [(set (match_operand:DF 0 "register_operand" "=x") 1803 (xor:DF (match_operand:DF 1 "register_operand" "0") 1804 (match_operand:DF 2 "register_operand" "x")))] 1805 "TARGET_SSE2" 1806 "xorpd\t{%2, %0|%0, %2}" 1807 [(set_attr "type" "sselog") 1808 (set_attr "mode" "V2DF")]) 1809 1810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1811;; 1812;; Parallel double-precision floating point conversion operations 1813;; 1814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1815 1816(define_insn "sse2_cvtpi2pd" 1817 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1818 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 1819 "TARGET_SSE2" 1820 "cvtpi2pd\t{%1, %0|%0, %1}" 1821 [(set_attr "type" "ssecvt") 1822 (set_attr "unit" "mmx,*") 1823 (set_attr "mode" "V2DF")]) 1824 1825(define_insn "sse2_cvtpd2pi" 1826 [(set (match_operand:V2SI 0 "register_operand" "=y") 1827 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1828 UNSPEC_FIX_NOTRUNC))] 1829 "TARGET_SSE2" 1830 "cvtpd2pi\t{%1, %0|%0, %1}" 1831 [(set_attr "type" "ssecvt") 1832 (set_attr "unit" "mmx") 1833 (set_attr "mode" "DI")]) 1834 1835(define_insn "sse2_cvttpd2pi" 1836 [(set (match_operand:V2SI 0 "register_operand" "=y") 1837 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1838 "TARGET_SSE2" 1839 "cvttpd2pi\t{%1, %0|%0, %1}" 1840 [(set_attr "type" "ssecvt") 1841 (set_attr "unit" "mmx") 1842 (set_attr "mode" "TI")]) 1843 1844(define_insn "sse2_cvtsi2sd" 1845 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1846 (vec_merge:V2DF 1847 (vec_duplicate:V2DF 1848 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 1849 (match_operand:V2DF 1 "register_operand" "0,0") 1850 (const_int 1)))] 1851 "TARGET_SSE2" 1852 "cvtsi2sd\t{%2, %0|%0, %2}" 1853 [(set_attr "type" "sseicvt") 1854 (set_attr "mode" "DF") 1855 (set_attr "athlon_decode" "double,direct")]) 1856 1857(define_insn "sse2_cvtsi2sdq" 1858 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1859 (vec_merge:V2DF 1860 (vec_duplicate:V2DF 1861 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) 1862 (match_operand:V2DF 1 "register_operand" "0,0") 1863 (const_int 1)))] 1864 "TARGET_SSE2 && TARGET_64BIT" 1865 "cvtsi2sdq\t{%2, %0|%0, %2}" 1866 [(set_attr "type" "sseicvt") 1867 (set_attr "mode" "DF") 1868 (set_attr "athlon_decode" "double,direct")]) 1869 1870(define_insn "sse2_cvtsd2si" 1871 [(set (match_operand:SI 0 "register_operand" "=r,r") 1872 (unspec:SI 1873 [(vec_select:DF 1874 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1875 (parallel [(const_int 0)]))] 1876 UNSPEC_FIX_NOTRUNC))] 1877 "TARGET_SSE2" 1878 "cvtsd2si\t{%1, %0|%0, %1}" 1879 [(set_attr "type" "sseicvt") 1880 (set_attr "athlon_decode" "double,vector") 1881 (set_attr "mode" "SI")]) 1882 1883(define_insn "sse2_cvtsd2siq" 1884 [(set (match_operand:DI 0 "register_operand" "=r,r") 1885 (unspec:DI 1886 [(vec_select:DF 1887 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1888 (parallel [(const_int 0)]))] 1889 UNSPEC_FIX_NOTRUNC))] 1890 "TARGET_SSE2 && TARGET_64BIT" 1891 "cvtsd2siq\t{%1, %0|%0, %1}" 1892 [(set_attr "type" "sseicvt") 1893 (set_attr "athlon_decode" "double,vector") 1894 (set_attr "mode" "DI")]) 1895 1896(define_insn "sse2_cvttsd2si" 1897 [(set (match_operand:SI 0 "register_operand" "=r,r") 1898 (fix:SI 1899 (vec_select:DF 1900 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1901 (parallel [(const_int 0)]))))] 1902 "TARGET_SSE2" 1903 "cvttsd2si\t{%1, %0|%0, %1}" 1904 [(set_attr "type" "sseicvt") 1905 (set_attr "mode" "SI") 1906 (set_attr "athlon_decode" "double,vector")]) 1907 1908(define_insn "sse2_cvttsd2siq" 1909 [(set (match_operand:DI 0 "register_operand" "=r,r") 1910 (fix:DI 1911 (vec_select:DF 1912 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1913 (parallel [(const_int 0)]))))] 1914 "TARGET_SSE2 && TARGET_64BIT" 1915 "cvttsd2siq\t{%1, %0|%0, %1}" 1916 [(set_attr "type" "sseicvt") 1917 (set_attr "mode" "DI") 1918 (set_attr "athlon_decode" "double,vector")]) 1919 1920(define_insn "sse2_cvtdq2pd" 1921 [(set (match_operand:V2DF 0 "register_operand" "=x") 1922 (float:V2DF 1923 (vec_select:V2SI 1924 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 1925 (parallel [(const_int 0) (const_int 1)]))))] 1926 "TARGET_SSE2" 1927 "cvtdq2pd\t{%1, %0|%0, %1}" 1928 [(set_attr "type" "ssecvt") 1929 (set_attr "mode" "V2DF")]) 1930 1931(define_expand "sse2_cvtpd2dq" 1932 [(set (match_operand:V4SI 0 "register_operand" "") 1933 (vec_concat:V4SI 1934 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 1935 UNSPEC_FIX_NOTRUNC) 1936 (match_dup 2)))] 1937 "TARGET_SSE2" 1938 "operands[2] = CONST0_RTX (V2SImode);") 1939 1940(define_insn "*sse2_cvtpd2dq" 1941 [(set (match_operand:V4SI 0 "register_operand" "=x") 1942 (vec_concat:V4SI 1943 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1944 UNSPEC_FIX_NOTRUNC) 1945 (match_operand:V2SI 2 "const0_operand" "")))] 1946 "TARGET_SSE2" 1947 "cvtpd2dq\t{%1, %0|%0, %1}" 1948 [(set_attr "type" "ssecvt") 1949 (set_attr "mode" "TI")]) 1950 1951(define_expand "sse2_cvttpd2dq" 1952 [(set (match_operand:V4SI 0 "register_operand" "") 1953 (vec_concat:V4SI 1954 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 1955 (match_dup 2)))] 1956 "TARGET_SSE2" 1957 "operands[2] = CONST0_RTX (V2SImode);") 1958 1959(define_insn "*sse2_cvttpd2dq" 1960 [(set (match_operand:V4SI 0 "register_operand" "=x") 1961 (vec_concat:V4SI 1962 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 1963 (match_operand:V2SI 2 "const0_operand" "")))] 1964 "TARGET_SSE2" 1965 "cvttpd2dq\t{%1, %0|%0, %1}" 1966 [(set_attr "type" "ssecvt") 1967 (set_attr "mode" "TI")]) 1968 1969(define_insn "sse2_cvtsd2ss" 1970 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1971 (vec_merge:V4SF 1972 (vec_duplicate:V4SF 1973 (float_truncate:V2SF 1974 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) 1975 (match_operand:V4SF 1 "register_operand" "0,0") 1976 (const_int 1)))] 1977 "TARGET_SSE2" 1978 "cvtsd2ss\t{%2, %0|%0, %2}" 1979 [(set_attr "type" "ssecvt") 1980 (set_attr "athlon_decode" "vector,double") 1981 (set_attr "mode" "SF")]) 1982 1983(define_insn "sse2_cvtss2sd" 1984 [(set (match_operand:V2DF 0 "register_operand" "=x") 1985 (vec_merge:V2DF 1986 (float_extend:V2DF 1987 (vec_select:V2SF 1988 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 1989 (parallel [(const_int 0) (const_int 1)]))) 1990 (match_operand:V2DF 1 "register_operand" "0") 1991 (const_int 1)))] 1992 "TARGET_SSE2" 1993 "cvtss2sd\t{%2, %0|%0, %2}" 1994 [(set_attr "type" "ssecvt") 1995 (set_attr "mode" "DF")]) 1996 1997(define_expand "sse2_cvtpd2ps" 1998 [(set (match_operand:V4SF 0 "register_operand" "") 1999 (vec_concat:V4SF 2000 (float_truncate:V2SF 2001 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2002 (match_dup 2)))] 2003 "TARGET_SSE2" 2004 "operands[2] = CONST0_RTX (V2SFmode);") 2005 2006(define_insn "*sse2_cvtpd2ps" 2007 [(set (match_operand:V4SF 0 "register_operand" "=x") 2008 (vec_concat:V4SF 2009 (float_truncate:V2SF 2010 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2011 (match_operand:V2SF 2 "const0_operand" "")))] 2012 "TARGET_SSE2" 2013 "cvtpd2ps\t{%1, %0|%0, %1}" 2014 [(set_attr "type" "ssecvt") 2015 (set_attr "mode" "V4SF")]) 2016 2017(define_insn "sse2_cvtps2pd" 2018 [(set (match_operand:V2DF 0 "register_operand" "=x") 2019 (float_extend:V2DF 2020 (vec_select:V2SF 2021 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2022 (parallel [(const_int 0) (const_int 1)]))))] 2023 "TARGET_SSE2" 2024 "cvtps2pd\t{%1, %0|%0, %1}" 2025 [(set_attr "type" "ssecvt") 2026 (set_attr "mode" "V2DF")]) 2027 2028;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2029;; 2030;; Parallel double-precision floating point element swizzling 2031;; 2032;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2033 2034(define_insn "sse2_unpckhpd" 2035 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 2036 (vec_select:V2DF 2037 (vec_concat:V4DF 2038 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") 2039 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) 2040 (parallel [(const_int 1) 2041 (const_int 3)])))] 2042 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2043 "@ 2044 unpckhpd\t{%2, %0|%0, %2} 2045 movlpd\t{%H1, %0|%0, %H1} 2046 movhpd\t{%1, %0|%0, %1}" 2047 [(set_attr "type" "sselog,ssemov,ssemov") 2048 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2049 2050(define_insn "*sse3_movddup" 2051 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") 2052 (vec_select:V2DF 2053 (vec_concat:V4DF 2054 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 2055 (match_dup 1)) 2056 (parallel [(const_int 0) 2057 (const_int 2)])))] 2058 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2059 "@ 2060 movddup\t{%1, %0|%0, %1} 2061 #" 2062 [(set_attr "type" "sselog1,ssemov") 2063 (set_attr "mode" "V2DF")]) 2064 2065(define_split 2066 [(set (match_operand:V2DF 0 "memory_operand" "") 2067 (vec_select:V2DF 2068 (vec_concat:V4DF 2069 (match_operand:V2DF 1 "register_operand" "") 2070 (match_dup 1)) 2071 (parallel [(const_int 0) 2072 (const_int 2)])))] 2073 "TARGET_SSE3 && reload_completed" 2074 [(const_int 0)] 2075{ 2076 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 2077 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 2078 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 2079 DONE; 2080}) 2081 2082(define_insn "sse2_unpcklpd" 2083 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") 2084 (vec_select:V2DF 2085 (vec_concat:V4DF 2086 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") 2087 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) 2088 (parallel [(const_int 0) 2089 (const_int 2)])))] 2090 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2091 "@ 2092 unpcklpd\t{%2, %0|%0, %2} 2093 movhpd\t{%2, %0|%0, %2} 2094 movlpd\t{%2, %H0|%H0, %2}" 2095 [(set_attr "type" "sselog,ssemov,ssemov") 2096 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2097 2098(define_expand "sse2_shufpd" 2099 [(match_operand:V2DF 0 "register_operand" "") 2100 (match_operand:V2DF 1 "register_operand" "") 2101 (match_operand:V2DF 2 "nonimmediate_operand" "") 2102 (match_operand:SI 3 "const_int_operand" "")] 2103 "TARGET_SSE2" 2104{ 2105 int mask = INTVAL (operands[3]); 2106 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], 2107 GEN_INT (mask & 1), 2108 GEN_INT (mask & 2 ? 3 : 2))); 2109 DONE; 2110}) 2111 2112(define_insn "sse2_shufpd_1" 2113 [(set (match_operand:V2DF 0 "register_operand" "=x") 2114 (vec_select:V2DF 2115 (vec_concat:V4DF 2116 (match_operand:V2DF 1 "register_operand" "0") 2117 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 2118 (parallel [(match_operand 3 "const_0_to_1_operand" "") 2119 (match_operand 4 "const_2_to_3_operand" "")])))] 2120 "TARGET_SSE2" 2121{ 2122 int mask; 2123 mask = INTVAL (operands[3]); 2124 mask |= (INTVAL (operands[4]) - 2) << 1; 2125 operands[3] = GEN_INT (mask); 2126 2127 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 2128} 2129 [(set_attr "type" "sselog") 2130 (set_attr "mode" "V2DF")]) 2131 2132(define_insn "sse2_storehpd" 2133 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2134 (vec_select:DF 2135 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") 2136 (parallel [(const_int 1)])))] 2137 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2138 "@ 2139 movhpd\t{%1, %0|%0, %1} 2140 unpckhpd\t%0, %0 2141 #" 2142 [(set_attr "type" "ssemov,sselog1,ssemov") 2143 (set_attr "mode" "V1DF,V2DF,DF")]) 2144 2145(define_split 2146 [(set (match_operand:DF 0 "register_operand" "") 2147 (vec_select:DF 2148 (match_operand:V2DF 1 "memory_operand" "") 2149 (parallel [(const_int 1)])))] 2150 "TARGET_SSE2 && reload_completed" 2151 [(set (match_dup 0) (match_dup 1))] 2152{ 2153 operands[1] = adjust_address (operands[1], DFmode, 8); 2154}) 2155 2156(define_insn "sse2_storelpd" 2157 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2158 (vec_select:DF 2159 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") 2160 (parallel [(const_int 0)])))] 2161 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2162 "@ 2163 movlpd\t{%1, %0|%0, %1} 2164 # 2165 #" 2166 [(set_attr "type" "ssemov") 2167 (set_attr "mode" "V1DF,DF,DF")]) 2168 2169(define_split 2170 [(set (match_operand:DF 0 "register_operand" "") 2171 (vec_select:DF 2172 (match_operand:V2DF 1 "nonimmediate_operand" "") 2173 (parallel [(const_int 0)])))] 2174 "TARGET_SSE2 && reload_completed" 2175 [(const_int 0)] 2176{ 2177 rtx op1 = operands[1]; 2178 if (REG_P (op1)) 2179 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 2180 else 2181 op1 = gen_lowpart (DFmode, op1); 2182 emit_move_insn (operands[0], op1); 2183 DONE; 2184}) 2185 2186(define_insn "sse2_loadhpd" 2187 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") 2188 (vec_concat:V2DF 2189 (vec_select:DF 2190 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") 2191 (parallel [(const_int 0)])) 2192 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] 2193 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2194 "@ 2195 movhpd\t{%2, %0|%0, %2} 2196 unpcklpd\t{%2, %0|%0, %2} 2197 shufpd\t{$1, %1, %0|%0, %1, 1} 2198 #" 2199 [(set_attr "type" "ssemov,sselog,sselog,other") 2200 (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) 2201 2202(define_split 2203 [(set (match_operand:V2DF 0 "memory_operand" "") 2204 (vec_concat:V2DF 2205 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 2206 (match_operand:DF 1 "register_operand" "")))] 2207 "TARGET_SSE2 && reload_completed" 2208 [(set (match_dup 0) (match_dup 1))] 2209{ 2210 operands[0] = adjust_address (operands[0], DFmode, 8); 2211}) 2212 2213(define_insn "sse2_loadlpd" 2214 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 2215 (vec_concat:V2DF 2216 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") 2217 (vec_select:DF 2218 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") 2219 (parallel [(const_int 1)]))))] 2220 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2221 "@ 2222 movsd\t{%2, %0|%0, %2} 2223 movlpd\t{%2, %0|%0, %2} 2224 movsd\t{%2, %0|%0, %2} 2225 shufpd\t{$2, %2, %0|%0, %2, 2} 2226 movhpd\t{%H1, %0|%0, %H1} 2227 #" 2228 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") 2229 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) 2230 2231(define_split 2232 [(set (match_operand:V2DF 0 "memory_operand" "") 2233 (vec_concat:V2DF 2234 (match_operand:DF 1 "register_operand" "") 2235 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 2236 "TARGET_SSE2 && reload_completed" 2237 [(set (match_dup 0) (match_dup 1))] 2238{ 2239 operands[0] = adjust_address (operands[0], DFmode, 8); 2240}) 2241 2242;; Not sure these two are ever used, but it doesn't hurt to have 2243;; them. -aoliva 2244(define_insn "*vec_extractv2df_1_sse" 2245 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2246 (vec_select:DF 2247 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o") 2248 (parallel [(const_int 1)])))] 2249 "!TARGET_SSE2 && TARGET_SSE 2250 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2251 "@ 2252 movhps\t{%1, %0|%0, %1} 2253 movhlps\t{%1, %0|%0, %1} 2254 movlps\t{%H1, %0|%0, %H1}" 2255 [(set_attr "type" "ssemov") 2256 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2257 2258(define_insn "*vec_extractv2df_0_sse" 2259 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x") 2260 (vec_select:DF 2261 (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m") 2262 (parallel [(const_int 0)])))] 2263 "!TARGET_SSE2 && TARGET_SSE 2264 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2265 "@ 2266 movlps\t{%1, %0|%0, %1} 2267 movaps\t{%1, %0|%0, %1} 2268 movlps\t{%1, %0|%0, %1}" 2269 [(set_attr "type" "ssemov") 2270 (set_attr "mode" "V2SF,V4SF,V2SF")]) 2271 2272(define_insn "sse2_movsd" 2273 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") 2274 (vec_merge:V2DF 2275 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") 2276 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") 2277 (const_int 1)))] 2278 "TARGET_SSE2" 2279 "@ 2280 movsd\t{%2, %0|%0, %2} 2281 movlpd\t{%2, %0|%0, %2} 2282 movlpd\t{%2, %0|%0, %2} 2283 shufpd\t{$2, %2, %0|%0, %2, 2} 2284 movhps\t{%H1, %0|%0, %H1} 2285 movhps\t{%1, %H0|%H0, %1}" 2286 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 2287 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) 2288 2289(define_insn "*vec_dupv2df_sse3" 2290 [(set (match_operand:V2DF 0 "register_operand" "=x") 2291 (vec_duplicate:V2DF 2292 (match_operand:DF 1 "nonimmediate_operand" "xm")))] 2293 "TARGET_SSE3" 2294 "movddup\t{%1, %0|%0, %1}" 2295 [(set_attr "type" "sselog1") 2296 (set_attr "mode" "DF")]) 2297 2298(define_insn "*vec_dupv2df" 2299 [(set (match_operand:V2DF 0 "register_operand" "=x") 2300 (vec_duplicate:V2DF 2301 (match_operand:DF 1 "register_operand" "0")))] 2302 "TARGET_SSE2" 2303 "unpcklpd\t%0, %0" 2304 [(set_attr "type" "sselog1") 2305 (set_attr "mode" "V4SF")]) 2306 2307(define_insn "*vec_concatv2df_sse3" 2308 [(set (match_operand:V2DF 0 "register_operand" "=x") 2309 (vec_concat:V2DF 2310 (match_operand:DF 1 "nonimmediate_operand" "xm") 2311 (match_dup 1)))] 2312 "TARGET_SSE3" 2313 "movddup\t{%1, %0|%0, %1}" 2314 [(set_attr "type" "sselog1") 2315 (set_attr "mode" "DF")]) 2316 2317(define_insn "*vec_concatv2df" 2318 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x") 2319 (vec_concat:V2DF 2320 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0") 2321 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))] 2322 "TARGET_SSE" 2323 "@ 2324 unpcklpd\t{%2, %0|%0, %2} 2325 movhpd\t{%2, %0|%0, %2} 2326 movsd\t{%1, %0|%0, %1} 2327 movlhps\t{%2, %0|%0, %2} 2328 movhps\t{%2, %0|%0, %2}" 2329 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") 2330 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) 2331 2332(define_expand "vec_setv2df" 2333 [(match_operand:V2DF 0 "register_operand" "") 2334 (match_operand:DF 1 "register_operand" "") 2335 (match_operand 2 "const_int_operand" "")] 2336 "TARGET_SSE" 2337{ 2338 ix86_expand_vector_set (false, operands[0], operands[1], 2339 INTVAL (operands[2])); 2340 DONE; 2341}) 2342 2343(define_expand "vec_extractv2df" 2344 [(match_operand:DF 0 "register_operand" "") 2345 (match_operand:V2DF 1 "register_operand" "") 2346 (match_operand 2 "const_int_operand" "")] 2347 "TARGET_SSE" 2348{ 2349 ix86_expand_vector_extract (false, operands[0], operands[1], 2350 INTVAL (operands[2])); 2351 DONE; 2352}) 2353 2354(define_expand "vec_initv2df" 2355 [(match_operand:V2DF 0 "register_operand" "") 2356 (match_operand 1 "" "")] 2357 "TARGET_SSE" 2358{ 2359 ix86_expand_vector_init (false, operands[0], operands[1]); 2360 DONE; 2361}) 2362 2363;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2364;; 2365;; Parallel integral arithmetic 2366;; 2367;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2368 2369(define_expand "neg<mode>2" 2370 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2371 (minus:SSEMODEI 2372 (match_dup 2) 2373 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] 2374 "TARGET_SSE2" 2375 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 2376 2377(define_expand "add<mode>3" 2378 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2379 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 2380 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2381 "TARGET_SSE2" 2382 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") 2383 2384(define_insn "*add<mode>3" 2385 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2386 (plus:SSEMODEI 2387 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 2388 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2389 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" 2390 "padd<ssevecsize>\t{%2, %0|%0, %2}" 2391 [(set_attr "type" "sseiadd") 2392 (set_attr "mode" "TI")]) 2393 2394(define_insn "sse2_ssadd<mode>3" 2395 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2396 (ss_plus:SSEMODE12 2397 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2398 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2399 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)" 2400 "padds<ssevecsize>\t{%2, %0|%0, %2}" 2401 [(set_attr "type" "sseiadd") 2402 (set_attr "mode" "TI")]) 2403 2404(define_insn "sse2_usadd<mode>3" 2405 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2406 (us_plus:SSEMODE12 2407 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2408 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2409 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)" 2410 "paddus<ssevecsize>\t{%2, %0|%0, %2}" 2411 [(set_attr "type" "sseiadd") 2412 (set_attr "mode" "TI")]) 2413 2414(define_expand "sub<mode>3" 2415 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2416 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") 2417 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2418 "TARGET_SSE2" 2419 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") 2420 2421(define_insn "*sub<mode>3" 2422 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2423 (minus:SSEMODEI 2424 (match_operand:SSEMODEI 1 "register_operand" "0") 2425 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2426 "TARGET_SSE2" 2427 "psub<ssevecsize>\t{%2, %0|%0, %2}" 2428 [(set_attr "type" "sseiadd") 2429 (set_attr "mode" "TI")]) 2430 2431(define_insn "sse2_sssub<mode>3" 2432 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2433 (ss_minus:SSEMODE12 2434 (match_operand:SSEMODE12 1 "register_operand" "0") 2435 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2436 "TARGET_SSE2" 2437 "psubs<ssevecsize>\t{%2, %0|%0, %2}" 2438 [(set_attr "type" "sseiadd") 2439 (set_attr "mode" "TI")]) 2440 2441(define_insn "sse2_ussub<mode>3" 2442 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2443 (us_minus:SSEMODE12 2444 (match_operand:SSEMODE12 1 "register_operand" "0") 2445 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2446 "TARGET_SSE2" 2447 "psubus<ssevecsize>\t{%2, %0|%0, %2}" 2448 [(set_attr "type" "sseiadd") 2449 (set_attr "mode" "TI")]) 2450 2451(define_expand "mulv16qi3" 2452 [(set (match_operand:V16QI 0 "register_operand" "") 2453 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") 2454 (match_operand:V16QI 2 "register_operand" "")))] 2455 "TARGET_SSE2" 2456{ 2457 rtx t[12], op0; 2458 int i; 2459 2460 for (i = 0; i < 12; ++i) 2461 t[i] = gen_reg_rtx (V16QImode); 2462 2463 /* Unpack data such that we've got a source byte in each low byte of 2464 each word. We don't care what goes into the high byte of each word. 2465 Rather than trying to get zero in there, most convenient is to let 2466 it be a copy of the low byte. */ 2467 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); 2468 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); 2469 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); 2470 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); 2471 2472 /* Multiply words. The end-of-line annotations here give a picture of what 2473 the output of that instruction looks like. Dot means don't care; the 2474 letters are the bytes of the result with A being the most significant. */ 2475 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ 2476 gen_lowpart (V8HImode, t[0]), 2477 gen_lowpart (V8HImode, t[1]))); 2478 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ 2479 gen_lowpart (V8HImode, t[2]), 2480 gen_lowpart (V8HImode, t[3]))); 2481 2482 /* Extract the relevant bytes and merge them back together. */ 2483 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ 2484 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ 2485 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ 2486 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ 2487 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ 2488 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ 2489 2490 op0 = operands[0]; 2491 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ 2492 DONE; 2493}) 2494 2495(define_expand "mulv8hi3" 2496 [(set (match_operand:V8HI 0 "register_operand" "") 2497 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2498 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2499 "TARGET_SSE2" 2500 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 2501 2502(define_insn "*mulv8hi3" 2503 [(set (match_operand:V8HI 0 "register_operand" "=x") 2504 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2505 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2506 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2507 "pmullw\t{%2, %0|%0, %2}" 2508 [(set_attr "type" "sseimul") 2509 (set_attr "mode" "TI")]) 2510 2511(define_insn "sse2_smulv8hi3_highpart" 2512 [(set (match_operand:V8HI 0 "register_operand" "=x") 2513 (truncate:V8HI 2514 (lshiftrt:V8SI 2515 (mult:V8SI 2516 (sign_extend:V8SI 2517 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2518 (sign_extend:V8SI 2519 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2520 (const_int 16))))] 2521 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2522 "pmulhw\t{%2, %0|%0, %2}" 2523 [(set_attr "type" "sseimul") 2524 (set_attr "mode" "TI")]) 2525 2526(define_insn "sse2_umulv8hi3_highpart" 2527 [(set (match_operand:V8HI 0 "register_operand" "=x") 2528 (truncate:V8HI 2529 (lshiftrt:V8SI 2530 (mult:V8SI 2531 (zero_extend:V8SI 2532 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2533 (zero_extend:V8SI 2534 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2535 (const_int 16))))] 2536 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2537 "pmulhuw\t{%2, %0|%0, %2}" 2538 [(set_attr "type" "sseimul") 2539 (set_attr "mode" "TI")]) 2540 2541(define_insn "sse2_umulv2siv2di3" 2542 [(set (match_operand:V2DI 0 "register_operand" "=x") 2543 (mult:V2DI 2544 (zero_extend:V2DI 2545 (vec_select:V2SI 2546 (match_operand:V4SI 1 "nonimmediate_operand" "%0") 2547 (parallel [(const_int 0) (const_int 2)]))) 2548 (zero_extend:V2DI 2549 (vec_select:V2SI 2550 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 2551 (parallel [(const_int 0) (const_int 2)])))))] 2552 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2553 "pmuludq\t{%2, %0|%0, %2}" 2554 [(set_attr "type" "sseimul") 2555 (set_attr "mode" "TI")]) 2556 2557(define_insn "sse2_pmaddwd" 2558 [(set (match_operand:V4SI 0 "register_operand" "=x") 2559 (plus:V4SI 2560 (mult:V4SI 2561 (sign_extend:V4SI 2562 (vec_select:V4HI 2563 (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2564 (parallel [(const_int 0) 2565 (const_int 2) 2566 (const_int 4) 2567 (const_int 6)]))) 2568 (sign_extend:V4SI 2569 (vec_select:V4HI 2570 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 2571 (parallel [(const_int 0) 2572 (const_int 2) 2573 (const_int 4) 2574 (const_int 6)])))) 2575 (mult:V4SI 2576 (sign_extend:V4SI 2577 (vec_select:V4HI (match_dup 1) 2578 (parallel [(const_int 1) 2579 (const_int 3) 2580 (const_int 5) 2581 (const_int 7)]))) 2582 (sign_extend:V4SI 2583 (vec_select:V4HI (match_dup 2) 2584 (parallel [(const_int 1) 2585 (const_int 3) 2586 (const_int 5) 2587 (const_int 7)]))))))] 2588 "TARGET_SSE2" 2589 "pmaddwd\t{%2, %0|%0, %2}" 2590 [(set_attr "type" "sseiadd") 2591 (set_attr "mode" "TI")]) 2592 2593(define_expand "mulv4si3" 2594 [(set (match_operand:V4SI 0 "register_operand" "") 2595 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 2596 (match_operand:V4SI 2 "register_operand" "")))] 2597 "TARGET_SSE2" 2598{ 2599 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2600 rtx op0, op1, op2; 2601 2602 op0 = operands[0]; 2603 op1 = operands[1]; 2604 op2 = operands[2]; 2605 t1 = gen_reg_rtx (V4SImode); 2606 t2 = gen_reg_rtx (V4SImode); 2607 t3 = gen_reg_rtx (V4SImode); 2608 t4 = gen_reg_rtx (V4SImode); 2609 t5 = gen_reg_rtx (V4SImode); 2610 t6 = gen_reg_rtx (V4SImode); 2611 thirtytwo = GEN_INT (32); 2612 2613 /* Multiply elements 2 and 0. */ 2614 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); 2615 2616 /* Shift both input vectors down one element, so that elements 3 and 1 2617 are now in the slots for elements 2 and 0. For K8, at least, this is 2618 faster than using a shuffle. */ 2619 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2620 gen_lowpart (TImode, op1), thirtytwo)); 2621 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2622 gen_lowpart (TImode, op2), thirtytwo)); 2623 2624 /* Multiply elements 3 and 1. */ 2625 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); 2626 2627 /* Move the results in element 2 down to element 1; we don't care what 2628 goes in elements 2 and 3. */ 2629 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 2630 const0_rtx, const0_rtx)); 2631 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 2632 const0_rtx, const0_rtx)); 2633 2634 /* Merge the parts back together. */ 2635 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); 2636 DONE; 2637}) 2638 2639(define_expand "mulv2di3" 2640 [(set (match_operand:V2DI 0 "register_operand" "") 2641 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") 2642 (match_operand:V2DI 2 "register_operand" "")))] 2643 "TARGET_SSE2" 2644{ 2645 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2646 rtx op0, op1, op2; 2647 2648 op0 = operands[0]; 2649 op1 = operands[1]; 2650 op2 = operands[2]; 2651 t1 = gen_reg_rtx (V2DImode); 2652 t2 = gen_reg_rtx (V2DImode); 2653 t3 = gen_reg_rtx (V2DImode); 2654 t4 = gen_reg_rtx (V2DImode); 2655 t5 = gen_reg_rtx (V2DImode); 2656 t6 = gen_reg_rtx (V2DImode); 2657 thirtytwo = GEN_INT (32); 2658 2659 /* Multiply low parts. */ 2660 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), 2661 gen_lowpart (V4SImode, op2))); 2662 2663 /* Shift input vectors left 32 bits so we can multiply high parts. */ 2664 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); 2665 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); 2666 2667 /* Multiply high parts by low parts. */ 2668 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), 2669 gen_lowpart (V4SImode, t3))); 2670 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), 2671 gen_lowpart (V4SImode, t2))); 2672 2673 /* Shift them back. */ 2674 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); 2675 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); 2676 2677 /* Add the three parts together. */ 2678 emit_insn (gen_addv2di3 (t6, t1, t4)); 2679 emit_insn (gen_addv2di3 (op0, t6, t5)); 2680 DONE; 2681}) 2682 2683(define_expand "sdot_prodv8hi" 2684 [(match_operand:V4SI 0 "register_operand" "") 2685 (match_operand:V8HI 1 "nonimmediate_operand" "") 2686 (match_operand:V8HI 2 "nonimmediate_operand" "") 2687 (match_operand:V4SI 3 "register_operand" "")] 2688 "TARGET_SSE2" 2689{ 2690 rtx t = gen_reg_rtx (V4SImode); 2691 emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2])); 2692 emit_insn (gen_addv4si3 (operands[0], operands[3], t)); 2693 DONE; 2694}) 2695 2696(define_expand "udot_prodv4si" 2697 [(match_operand:V2DI 0 "register_operand" "") 2698 (match_operand:V4SI 1 "register_operand" "") 2699 (match_operand:V4SI 2 "register_operand" "") 2700 (match_operand:V2DI 3 "register_operand" "")] 2701 "TARGET_SSE2" 2702{ 2703 rtx t1, t2, t3, t4; 2704 2705 t1 = gen_reg_rtx (V2DImode); 2706 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); 2707 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 2708 2709 t2 = gen_reg_rtx (V4SImode); 2710 t3 = gen_reg_rtx (V4SImode); 2711 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2712 gen_lowpart (TImode, operands[1]), 2713 GEN_INT (32))); 2714 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2715 gen_lowpart (TImode, operands[2]), 2716 GEN_INT (32))); 2717 2718 t4 = gen_reg_rtx (V2DImode); 2719 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); 2720 2721 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 2722 DONE; 2723}) 2724 2725(define_insn "ashr<mode>3" 2726 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") 2727 (ashiftrt:SSEMODE24 2728 (match_operand:SSEMODE24 1 "register_operand" "0") 2729 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2730 "TARGET_SSE2" 2731 "psra<ssevecsize>\t{%2, %0|%0, %2}" 2732 [(set_attr "type" "sseishft") 2733 (set_attr "mode" "TI")]) 2734 2735(define_insn "lshr<mode>3" 2736 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2737 (lshiftrt:SSEMODE248 2738 (match_operand:SSEMODE248 1 "register_operand" "0") 2739 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2740 "TARGET_SSE2" 2741 "psrl<ssevecsize>\t{%2, %0|%0, %2}" 2742 [(set_attr "type" "sseishft") 2743 (set_attr "mode" "TI")]) 2744 2745(define_insn "ashl<mode>3" 2746 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2747 (ashift:SSEMODE248 2748 (match_operand:SSEMODE248 1 "register_operand" "0") 2749 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2750 "TARGET_SSE2" 2751 "psll<ssevecsize>\t{%2, %0|%0, %2}" 2752 [(set_attr "type" "sseishft") 2753 (set_attr "mode" "TI")]) 2754 2755(define_insn "sse2_ashlti3" 2756 [(set (match_operand:TI 0 "register_operand" "=x") 2757 (ashift:TI (match_operand:TI 1 "register_operand" "0") 2758 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2759 "TARGET_SSE2" 2760{ 2761 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2762 return "pslldq\t{%2, %0|%0, %2}"; 2763} 2764 [(set_attr "type" "sseishft") 2765 (set_attr "mode" "TI")]) 2766 2767(define_expand "vec_shl_<mode>" 2768 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2769 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") 2770 (match_operand:SI 2 "general_operand" "")))] 2771 "TARGET_SSE2" 2772{ 2773 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2774 FAIL; 2775 operands[0] = gen_lowpart (TImode, operands[0]); 2776 operands[1] = gen_lowpart (TImode, operands[1]); 2777}) 2778 2779(define_insn "sse2_lshrti3" 2780 [(set (match_operand:TI 0 "register_operand" "=x") 2781 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") 2782 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2783 "TARGET_SSE2" 2784{ 2785 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2786 return "psrldq\t{%2, %0|%0, %2}"; 2787} 2788 [(set_attr "type" "sseishft") 2789 (set_attr "mode" "TI")]) 2790 2791(define_expand "vec_shr_<mode>" 2792 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2793 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") 2794 (match_operand:SI 2 "general_operand" "")))] 2795 "TARGET_SSE2" 2796{ 2797 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2798 FAIL; 2799 operands[0] = gen_lowpart (TImode, operands[0]); 2800 operands[1] = gen_lowpart (TImode, operands[1]); 2801}) 2802 2803(define_expand "umaxv16qi3" 2804 [(set (match_operand:V16QI 0 "register_operand" "") 2805 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2806 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2807 "TARGET_SSE2" 2808 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") 2809 2810(define_insn "*umaxv16qi3" 2811 [(set (match_operand:V16QI 0 "register_operand" "=x") 2812 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2813 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2814 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" 2815 "pmaxub\t{%2, %0|%0, %2}" 2816 [(set_attr "type" "sseiadd") 2817 (set_attr "mode" "TI")]) 2818 2819(define_expand "smaxv8hi3" 2820 [(set (match_operand:V8HI 0 "register_operand" "") 2821 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2822 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2823 "TARGET_SSE2" 2824 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") 2825 2826(define_insn "*smaxv8hi3" 2827 [(set (match_operand:V8HI 0 "register_operand" "=x") 2828 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2829 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2830 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" 2831 "pmaxsw\t{%2, %0|%0, %2}" 2832 [(set_attr "type" "sseiadd") 2833 (set_attr "mode" "TI")]) 2834 2835(define_expand "umaxv8hi3" 2836 [(set (match_operand:V8HI 0 "register_operand" "=x") 2837 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") 2838 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2839 (set (match_dup 3) 2840 (plus:V8HI (match_dup 0) (match_dup 2)))] 2841 "TARGET_SSE2" 2842{ 2843 operands[3] = operands[0]; 2844 if (rtx_equal_p (operands[0], operands[2])) 2845 operands[0] = gen_reg_rtx (V8HImode); 2846}) 2847 2848(define_expand "smax<mode>3" 2849 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2850 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2851 (match_operand:SSEMODE14 2 "register_operand" "")))] 2852 "TARGET_SSE2" 2853{ 2854 rtx xops[6]; 2855 bool ok; 2856 2857 xops[0] = operands[0]; 2858 xops[1] = operands[1]; 2859 xops[2] = operands[2]; 2860 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2861 xops[4] = operands[1]; 2862 xops[5] = operands[2]; 2863 ok = ix86_expand_int_vcond (xops); 2864 gcc_assert (ok); 2865 DONE; 2866}) 2867 2868(define_expand "umaxv4si3" 2869 [(set (match_operand:V4SI 0 "register_operand" "") 2870 (umax:V4SI (match_operand:V4SI 1 "register_operand" "") 2871 (match_operand:V4SI 2 "register_operand" "")))] 2872 "TARGET_SSE2" 2873{ 2874 rtx xops[6]; 2875 bool ok; 2876 2877 xops[0] = operands[0]; 2878 xops[1] = operands[1]; 2879 xops[2] = operands[2]; 2880 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2881 xops[4] = operands[1]; 2882 xops[5] = operands[2]; 2883 ok = ix86_expand_int_vcond (xops); 2884 gcc_assert (ok); 2885 DONE; 2886}) 2887 2888(define_expand "uminv16qi3" 2889 [(set (match_operand:V16QI 0 "register_operand" "") 2890 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2891 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2892 "TARGET_SSE2" 2893 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);") 2894 2895(define_insn "*uminv16qi3" 2896 [(set (match_operand:V16QI 0 "register_operand" "=x") 2897 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2899 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" 2900 "pminub\t{%2, %0|%0, %2}" 2901 [(set_attr "type" "sseiadd") 2902 (set_attr "mode" "TI")]) 2903 2904(define_expand "sminv8hi3" 2905 [(set (match_operand:V8HI 0 "register_operand" "") 2906 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2907 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2908 "TARGET_SSE2" 2909 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") 2910 2911(define_insn "*sminv8hi3" 2912 [(set (match_operand:V8HI 0 "register_operand" "=x") 2913 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2914 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2915 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" 2916 "pminsw\t{%2, %0|%0, %2}" 2917 [(set_attr "type" "sseiadd") 2918 (set_attr "mode" "TI")]) 2919 2920(define_expand "smin<mode>3" 2921 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2922 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2923 (match_operand:SSEMODE14 2 "register_operand" "")))] 2924 "TARGET_SSE2" 2925{ 2926 rtx xops[6]; 2927 bool ok; 2928 2929 xops[0] = operands[0]; 2930 xops[1] = operands[2]; 2931 xops[2] = operands[1]; 2932 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2933 xops[4] = operands[1]; 2934 xops[5] = operands[2]; 2935 ok = ix86_expand_int_vcond (xops); 2936 gcc_assert (ok); 2937 DONE; 2938}) 2939 2940(define_expand "umin<mode>3" 2941 [(set (match_operand:SSEMODE24 0 "register_operand" "") 2942 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") 2943 (match_operand:SSEMODE24 2 "register_operand" "")))] 2944 "TARGET_SSE2" 2945{ 2946 rtx xops[6]; 2947 bool ok; 2948 2949 xops[0] = operands[0]; 2950 xops[1] = operands[2]; 2951 xops[2] = operands[1]; 2952 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2953 xops[4] = operands[1]; 2954 xops[5] = operands[2]; 2955 ok = ix86_expand_int_vcond (xops); 2956 gcc_assert (ok); 2957 DONE; 2958}) 2959 2960;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2961;; 2962;; Parallel integral comparisons 2963;; 2964;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2965 2966(define_insn "sse2_eq<mode>3" 2967 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2968 (eq:SSEMODE124 2969 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") 2970 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2971 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 2972 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" 2973 [(set_attr "type" "ssecmp") 2974 (set_attr "mode" "TI")]) 2975 2976(define_insn "sse2_gt<mode>3" 2977 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2978 (gt:SSEMODE124 2979 (match_operand:SSEMODE124 1 "register_operand" "0") 2980 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2981 "TARGET_SSE2" 2982 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" 2983 [(set_attr "type" "ssecmp") 2984 (set_attr "mode" "TI")]) 2985 2986(define_expand "vcond<mode>" 2987 [(set (match_operand:SSEMODE124 0 "register_operand" "") 2988 (if_then_else:SSEMODE124 2989 (match_operator 3 "" 2990 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 2991 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 2992 (match_operand:SSEMODE124 1 "general_operand" "") 2993 (match_operand:SSEMODE124 2 "general_operand" "")))] 2994 "TARGET_SSE2" 2995{ 2996 if (ix86_expand_int_vcond (operands)) 2997 DONE; 2998 else 2999 FAIL; 3000}) 3001 3002(define_expand "vcondu<mode>" 3003 [(set (match_operand:SSEMODE124 0 "register_operand" "") 3004 (if_then_else:SSEMODE124 3005 (match_operator 3 "" 3006 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 3007 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 3008 (match_operand:SSEMODE124 1 "general_operand" "") 3009 (match_operand:SSEMODE124 2 "general_operand" "")))] 3010 "TARGET_SSE2" 3011{ 3012 if (ix86_expand_int_vcond (operands)) 3013 DONE; 3014 else 3015 FAIL; 3016}) 3017 3018;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3019;; 3020;; Parallel integral logical operations 3021;; 3022;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3023 3024(define_expand "one_cmpl<mode>2" 3025 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3026 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3027 (match_dup 2)))] 3028 "TARGET_SSE2" 3029{ 3030 int i, n = GET_MODE_NUNITS (<MODE>mode); 3031 rtvec v = rtvec_alloc (n); 3032 3033 for (i = 0; i < n; ++i) 3034 RTVEC_ELT (v, i) = constm1_rtx; 3035 3036 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 3037}) 3038 3039(define_expand "and<mode>3" 3040 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3041 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3042 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3043 "TARGET_SSE2" 3044 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") 3045 3046(define_insn "*and<mode>3" 3047 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3048 (and:SSEMODEI 3049 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3050 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3051 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)" 3052 "pand\t{%2, %0|%0, %2}" 3053 [(set_attr "type" "sselog") 3054 (set_attr "mode" "TI")]) 3055 3056(define_insn "sse2_nand<mode>3" 3057 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3058 (and:SSEMODEI 3059 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) 3060 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3061 "TARGET_SSE2" 3062 "pandn\t{%2, %0|%0, %2}" 3063 [(set_attr "type" "sselog") 3064 (set_attr "mode" "TI")]) 3065 3066(define_expand "ior<mode>3" 3067 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3068 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3069 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3070 "TARGET_SSE2" 3071 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") 3072 3073(define_insn "*ior<mode>3" 3074 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3075 (ior:SSEMODEI 3076 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3077 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3078 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" 3079 "por\t{%2, %0|%0, %2}" 3080 [(set_attr "type" "sselog") 3081 (set_attr "mode" "TI")]) 3082 3083(define_expand "xor<mode>3" 3084 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3085 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3086 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3087 "TARGET_SSE2" 3088 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") 3089 3090(define_insn "*xor<mode>3" 3091 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3092 (xor:SSEMODEI 3093 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3094 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3095 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" 3096 "pxor\t{%2, %0|%0, %2}" 3097 [(set_attr "type" "sselog") 3098 (set_attr "mode" "TI")]) 3099 3100;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3101;; 3102;; Parallel integral element swizzling 3103;; 3104;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3105 3106(define_insn "sse2_packsswb" 3107 [(set (match_operand:V16QI 0 "register_operand" "=x") 3108 (vec_concat:V16QI 3109 (ss_truncate:V8QI 3110 (match_operand:V8HI 1 "register_operand" "0")) 3111 (ss_truncate:V8QI 3112 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3113 "TARGET_SSE2" 3114 "packsswb\t{%2, %0|%0, %2}" 3115 [(set_attr "type" "sselog") 3116 (set_attr "mode" "TI")]) 3117 3118(define_insn "sse2_packssdw" 3119 [(set (match_operand:V8HI 0 "register_operand" "=x") 3120 (vec_concat:V8HI 3121 (ss_truncate:V4HI 3122 (match_operand:V4SI 1 "register_operand" "0")) 3123 (ss_truncate:V4HI 3124 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] 3125 "TARGET_SSE2" 3126 "packssdw\t{%2, %0|%0, %2}" 3127 [(set_attr "type" "sselog") 3128 (set_attr "mode" "TI")]) 3129 3130(define_insn "sse2_packuswb" 3131 [(set (match_operand:V16QI 0 "register_operand" "=x") 3132 (vec_concat:V16QI 3133 (us_truncate:V8QI 3134 (match_operand:V8HI 1 "register_operand" "0")) 3135 (us_truncate:V8QI 3136 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3137 "TARGET_SSE2" 3138 "packuswb\t{%2, %0|%0, %2}" 3139 [(set_attr "type" "sselog") 3140 (set_attr "mode" "TI")]) 3141 3142(define_insn "sse2_punpckhbw" 3143 [(set (match_operand:V16QI 0 "register_operand" "=x") 3144 (vec_select:V16QI 3145 (vec_concat:V32QI 3146 (match_operand:V16QI 1 "register_operand" "0") 3147 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3148 (parallel [(const_int 8) (const_int 24) 3149 (const_int 9) (const_int 25) 3150 (const_int 10) (const_int 26) 3151 (const_int 11) (const_int 27) 3152 (const_int 12) (const_int 28) 3153 (const_int 13) (const_int 29) 3154 (const_int 14) (const_int 30) 3155 (const_int 15) (const_int 31)])))] 3156 "TARGET_SSE2" 3157 "punpckhbw\t{%2, %0|%0, %2}" 3158 [(set_attr "type" "sselog") 3159 (set_attr "mode" "TI")]) 3160 3161(define_insn "sse2_punpcklbw" 3162 [(set (match_operand:V16QI 0 "register_operand" "=x") 3163 (vec_select:V16QI 3164 (vec_concat:V32QI 3165 (match_operand:V16QI 1 "register_operand" "0") 3166 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3167 (parallel [(const_int 0) (const_int 16) 3168 (const_int 1) (const_int 17) 3169 (const_int 2) (const_int 18) 3170 (const_int 3) (const_int 19) 3171 (const_int 4) (const_int 20) 3172 (const_int 5) (const_int 21) 3173 (const_int 6) (const_int 22) 3174 (const_int 7) (const_int 23)])))] 3175 "TARGET_SSE2" 3176 "punpcklbw\t{%2, %0|%0, %2}" 3177 [(set_attr "type" "sselog") 3178 (set_attr "mode" "TI")]) 3179 3180(define_insn "sse2_punpckhwd" 3181 [(set (match_operand:V8HI 0 "register_operand" "=x") 3182 (vec_select:V8HI 3183 (vec_concat:V16HI 3184 (match_operand:V8HI 1 "register_operand" "0") 3185 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3186 (parallel [(const_int 4) (const_int 12) 3187 (const_int 5) (const_int 13) 3188 (const_int 6) (const_int 14) 3189 (const_int 7) (const_int 15)])))] 3190 "TARGET_SSE2" 3191 "punpckhwd\t{%2, %0|%0, %2}" 3192 [(set_attr "type" "sselog") 3193 (set_attr "mode" "TI")]) 3194 3195(define_insn "sse2_punpcklwd" 3196 [(set (match_operand:V8HI 0 "register_operand" "=x") 3197 (vec_select:V8HI 3198 (vec_concat:V16HI 3199 (match_operand:V8HI 1 "register_operand" "0") 3200 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3201 (parallel [(const_int 0) (const_int 8) 3202 (const_int 1) (const_int 9) 3203 (const_int 2) (const_int 10) 3204 (const_int 3) (const_int 11)])))] 3205 "TARGET_SSE2" 3206 "punpcklwd\t{%2, %0|%0, %2}" 3207 [(set_attr "type" "sselog") 3208 (set_attr "mode" "TI")]) 3209 3210(define_insn "sse2_punpckhdq" 3211 [(set (match_operand:V4SI 0 "register_operand" "=x") 3212 (vec_select:V4SI 3213 (vec_concat:V8SI 3214 (match_operand:V4SI 1 "register_operand" "0") 3215 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3216 (parallel [(const_int 2) (const_int 6) 3217 (const_int 3) (const_int 7)])))] 3218 "TARGET_SSE2" 3219 "punpckhdq\t{%2, %0|%0, %2}" 3220 [(set_attr "type" "sselog") 3221 (set_attr "mode" "TI")]) 3222 3223(define_insn "sse2_punpckldq" 3224 [(set (match_operand:V4SI 0 "register_operand" "=x") 3225 (vec_select:V4SI 3226 (vec_concat:V8SI 3227 (match_operand:V4SI 1 "register_operand" "0") 3228 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3229 (parallel [(const_int 0) (const_int 4) 3230 (const_int 1) (const_int 5)])))] 3231 "TARGET_SSE2" 3232 "punpckldq\t{%2, %0|%0, %2}" 3233 [(set_attr "type" "sselog") 3234 (set_attr "mode" "TI")]) 3235 3236(define_insn "sse2_punpckhqdq" 3237 [(set (match_operand:V2DI 0 "register_operand" "=x") 3238 (vec_select:V2DI 3239 (vec_concat:V4DI 3240 (match_operand:V2DI 1 "register_operand" "0") 3241 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3242 (parallel [(const_int 1) 3243 (const_int 3)])))] 3244 "TARGET_SSE2" 3245 "punpckhqdq\t{%2, %0|%0, %2}" 3246 [(set_attr "type" "sselog") 3247 (set_attr "mode" "TI")]) 3248 3249(define_insn "sse2_punpcklqdq" 3250 [(set (match_operand:V2DI 0 "register_operand" "=x") 3251 (vec_select:V2DI 3252 (vec_concat:V4DI 3253 (match_operand:V2DI 1 "register_operand" "0") 3254 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3255 (parallel [(const_int 0) 3256 (const_int 2)])))] 3257 "TARGET_SSE2" 3258 "punpcklqdq\t{%2, %0|%0, %2}" 3259 [(set_attr "type" "sselog") 3260 (set_attr "mode" "TI")]) 3261 3262(define_expand "sse2_pinsrw" 3263 [(set (match_operand:V8HI 0 "register_operand" "") 3264 (vec_merge:V8HI 3265 (vec_duplicate:V8HI 3266 (match_operand:SI 2 "nonimmediate_operand" "")) 3267 (match_operand:V8HI 1 "register_operand" "") 3268 (match_operand:SI 3 "const_0_to_7_operand" "")))] 3269 "TARGET_SSE2" 3270{ 3271 operands[2] = gen_lowpart (HImode, operands[2]); 3272 operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); 3273}) 3274 3275(define_insn "*sse2_pinsrw" 3276 [(set (match_operand:V8HI 0 "register_operand" "=x") 3277 (vec_merge:V8HI 3278 (vec_duplicate:V8HI 3279 (match_operand:HI 2 "nonimmediate_operand" "rm")) 3280 (match_operand:V8HI 1 "register_operand" "0") 3281 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] 3282 "TARGET_SSE2" 3283{ 3284 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 3285 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; 3286} 3287 [(set_attr "type" "sselog") 3288 (set_attr "mode" "TI")]) 3289 3290(define_insn "sse2_pextrw" 3291 [(set (match_operand:SI 0 "register_operand" "=r") 3292 (zero_extend:SI 3293 (vec_select:HI 3294 (match_operand:V8HI 1 "register_operand" "x") 3295 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 3296 "TARGET_SSE2" 3297 "pextrw\t{%2, %1, %0|%0, %1, %2}" 3298 [(set_attr "type" "sselog") 3299 (set_attr "mode" "TI")]) 3300 3301(define_expand "sse2_pshufd" 3302 [(match_operand:V4SI 0 "register_operand" "") 3303 (match_operand:V4SI 1 "nonimmediate_operand" "") 3304 (match_operand:SI 2 "const_int_operand" "")] 3305 "TARGET_SSE2" 3306{ 3307 int mask = INTVAL (operands[2]); 3308 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 3309 GEN_INT ((mask >> 0) & 3), 3310 GEN_INT ((mask >> 2) & 3), 3311 GEN_INT ((mask >> 4) & 3), 3312 GEN_INT ((mask >> 6) & 3))); 3313 DONE; 3314}) 3315 3316(define_insn "sse2_pshufd_1" 3317 [(set (match_operand:V4SI 0 "register_operand" "=x") 3318 (vec_select:V4SI 3319 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 3320 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3321 (match_operand 3 "const_0_to_3_operand" "") 3322 (match_operand 4 "const_0_to_3_operand" "") 3323 (match_operand 5 "const_0_to_3_operand" "")])))] 3324 "TARGET_SSE2" 3325{ 3326 int mask = 0; 3327 mask |= INTVAL (operands[2]) << 0; 3328 mask |= INTVAL (operands[3]) << 2; 3329 mask |= INTVAL (operands[4]) << 4; 3330 mask |= INTVAL (operands[5]) << 6; 3331 operands[2] = GEN_INT (mask); 3332 3333 return "pshufd\t{%2, %1, %0|%0, %1, %2}"; 3334} 3335 [(set_attr "type" "sselog1") 3336 (set_attr "mode" "TI")]) 3337 3338(define_expand "sse2_pshuflw" 3339 [(match_operand:V8HI 0 "register_operand" "") 3340 (match_operand:V8HI 1 "nonimmediate_operand" "") 3341 (match_operand:SI 2 "const_int_operand" "")] 3342 "TARGET_SSE2" 3343{ 3344 int mask = INTVAL (operands[2]); 3345 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 3346 GEN_INT ((mask >> 0) & 3), 3347 GEN_INT ((mask >> 2) & 3), 3348 GEN_INT ((mask >> 4) & 3), 3349 GEN_INT ((mask >> 6) & 3))); 3350 DONE; 3351}) 3352 3353(define_insn "sse2_pshuflw_1" 3354 [(set (match_operand:V8HI 0 "register_operand" "=x") 3355 (vec_select:V8HI 3356 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3357 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3358 (match_operand 3 "const_0_to_3_operand" "") 3359 (match_operand 4 "const_0_to_3_operand" "") 3360 (match_operand 5 "const_0_to_3_operand" "") 3361 (const_int 4) 3362 (const_int 5) 3363 (const_int 6) 3364 (const_int 7)])))] 3365 "TARGET_SSE2" 3366{ 3367 int mask = 0; 3368 mask |= INTVAL (operands[2]) << 0; 3369 mask |= INTVAL (operands[3]) << 2; 3370 mask |= INTVAL (operands[4]) << 4; 3371 mask |= INTVAL (operands[5]) << 6; 3372 operands[2] = GEN_INT (mask); 3373 3374 return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; 3375} 3376 [(set_attr "type" "sselog") 3377 (set_attr "mode" "TI")]) 3378 3379(define_expand "sse2_pshufhw" 3380 [(match_operand:V8HI 0 "register_operand" "") 3381 (match_operand:V8HI 1 "nonimmediate_operand" "") 3382 (match_operand:SI 2 "const_int_operand" "")] 3383 "TARGET_SSE2" 3384{ 3385 int mask = INTVAL (operands[2]); 3386 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 3387 GEN_INT (((mask >> 0) & 3) + 4), 3388 GEN_INT (((mask >> 2) & 3) + 4), 3389 GEN_INT (((mask >> 4) & 3) + 4), 3390 GEN_INT (((mask >> 6) & 3) + 4))); 3391 DONE; 3392}) 3393 3394(define_insn "sse2_pshufhw_1" 3395 [(set (match_operand:V8HI 0 "register_operand" "=x") 3396 (vec_select:V8HI 3397 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3398 (parallel [(const_int 0) 3399 (const_int 1) 3400 (const_int 2) 3401 (const_int 3) 3402 (match_operand 2 "const_4_to_7_operand" "") 3403 (match_operand 3 "const_4_to_7_operand" "") 3404 (match_operand 4 "const_4_to_7_operand" "") 3405 (match_operand 5 "const_4_to_7_operand" "")])))] 3406 "TARGET_SSE2" 3407{ 3408 int mask = 0; 3409 mask |= (INTVAL (operands[2]) - 4) << 0; 3410 mask |= (INTVAL (operands[3]) - 4) << 2; 3411 mask |= (INTVAL (operands[4]) - 4) << 4; 3412 mask |= (INTVAL (operands[5]) - 4) << 6; 3413 operands[2] = GEN_INT (mask); 3414 3415 return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; 3416} 3417 [(set_attr "type" "sselog") 3418 (set_attr "mode" "TI")]) 3419 3420(define_expand "sse2_loadd" 3421 [(set (match_operand:V4SI 0 "register_operand" "") 3422 (vec_merge:V4SI 3423 (vec_duplicate:V4SI 3424 (match_operand:SI 1 "nonimmediate_operand" "")) 3425 (match_dup 2) 3426 (const_int 1)))] 3427 "TARGET_SSE" 3428 "operands[2] = CONST0_RTX (V4SImode);") 3429 3430(define_insn "sse2_loadld" 3431 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3432 (vec_merge:V4SI 3433 (vec_duplicate:V4SI 3434 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x")) 3435 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0") 3436 (const_int 1)))] 3437 "TARGET_SSE" 3438 "@ 3439 movd\t{%2, %0|%0, %2} 3440 movss\t{%2, %0|%0, %2} 3441 movss\t{%2, %0|%0, %2}" 3442 [(set_attr "type" "ssemov") 3443 (set_attr "mode" "TI,V4SF,SF")]) 3444 3445;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3446;; be taken into account, and movdi isn't fully populated even without. 3447(define_insn_and_split "sse2_stored" 3448 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx") 3449 (vec_select:SI 3450 (match_operand:V4SI 1 "register_operand" "x") 3451 (parallel [(const_int 0)])))] 3452 "TARGET_SSE" 3453 "#" 3454 "&& reload_completed" 3455 [(set (match_dup 0) (match_dup 1))] 3456{ 3457 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); 3458}) 3459 3460(define_expand "sse_storeq" 3461 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3462 (vec_select:DI 3463 (match_operand:V2DI 1 "register_operand" "") 3464 (parallel [(const_int 0)])))] 3465 "TARGET_SSE" 3466 "") 3467 3468;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3469;; be taken into account, and movdi isn't fully populated even without. 3470(define_insn "*sse2_storeq" 3471 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") 3472 (vec_select:DI 3473 (match_operand:V2DI 1 "register_operand" "x") 3474 (parallel [(const_int 0)])))] 3475 "TARGET_SSE" 3476 "#") 3477 3478(define_split 3479 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3480 (vec_select:DI 3481 (match_operand:V2DI 1 "register_operand" "") 3482 (parallel [(const_int 0)])))] 3483 "TARGET_SSE && reload_completed" 3484 [(set (match_dup 0) (match_dup 1))] 3485{ 3486 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); 3487}) 3488 3489(define_insn "*vec_extractv2di_1_sse2" 3490 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3491 (vec_select:DI 3492 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o") 3493 (parallel [(const_int 1)])))] 3494 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3495 "@ 3496 movhps\t{%1, %0|%0, %1} 3497 psrldq\t{$8, %0|%0, 8} 3498 movq\t{%H1, %0|%0, %H1}" 3499 [(set_attr "type" "ssemov,sseishft,ssemov") 3500 (set_attr "memory" "*,none,*") 3501 (set_attr "mode" "V2SF,TI,TI")]) 3502 3503;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva 3504(define_insn "*vec_extractv2di_1_sse" 3505 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3506 (vec_select:DI 3507 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o") 3508 (parallel [(const_int 1)])))] 3509 "!TARGET_SSE2 && TARGET_SSE 3510 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3511 "@ 3512 movhps\t{%1, %0|%0, %1} 3513 movhlps\t{%1, %0|%0, %1} 3514 movlps\t{%H1, %0|%0, %H1}" 3515 [(set_attr "type" "ssemov") 3516 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3517 3518(define_insn "*vec_dupv4si" 3519 [(set (match_operand:V4SI 0 "register_operand" "=Y,x") 3520 (vec_duplicate:V4SI 3521 (match_operand:SI 1 "register_operand" " Y,0")))] 3522 "TARGET_SSE" 3523 "@ 3524 pshufd\t{$0, %1, %0|%0, %1, 0} 3525 shufps\t{$0, %0, %0|%0, %0, 0}" 3526 [(set_attr "type" "sselog1") 3527 (set_attr "mode" "TI,V4SF")]) 3528 3529(define_insn "*vec_dupv2di" 3530 [(set (match_operand:V2DI 0 "register_operand" "=Y,x") 3531 (vec_duplicate:V2DI 3532 (match_operand:DI 1 "register_operand" " 0,0")))] 3533 "TARGET_SSE" 3534 "@ 3535 punpcklqdq\t%0, %0 3536 movlhps\t%0, %0" 3537 [(set_attr "type" "sselog1,ssemov") 3538 (set_attr "mode" "TI,V4SF")]) 3539 3540;; ??? In theory we can match memory for the MMX alternative, but allowing 3541;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3542;; alternatives pretty much forces the MMX alternative to be chosen. 3543(define_insn "*sse2_concatv2si" 3544 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y") 3545 (vec_concat:V2SI 3546 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 3547 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))] 3548 "TARGET_SSE2" 3549 "@ 3550 punpckldq\t{%2, %0|%0, %2} 3551 movd\t{%1, %0|%0, %1} 3552 punpckldq\t{%2, %0|%0, %2} 3553 movd\t{%1, %0|%0, %1}" 3554 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3555 (set_attr "mode" "TI,TI,DI,DI")]) 3556 3557(define_insn "*sse1_concatv2si" 3558 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 3559 (vec_concat:V2SI 3560 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 3561 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 3562 "TARGET_SSE" 3563 "@ 3564 unpcklps\t{%2, %0|%0, %2} 3565 movss\t{%1, %0|%0, %1} 3566 punpckldq\t{%2, %0|%0, %2} 3567 movd\t{%1, %0|%0, %1}" 3568 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3569 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 3570 3571(define_insn "*vec_concatv4si_1" 3572 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3573 (vec_concat:V4SI 3574 (match_operand:V2SI 1 "register_operand" " 0,0,0") 3575 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))] 3576 "TARGET_SSE" 3577 "@ 3578 punpcklqdq\t{%2, %0|%0, %2} 3579 movlhps\t{%2, %0|%0, %2} 3580 movhps\t{%2, %0|%0, %2}" 3581 [(set_attr "type" "sselog,ssemov,ssemov") 3582 (set_attr "mode" "TI,V4SF,V2SF")]) 3583 3584(define_insn "*vec_concatv2di" 3585 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x") 3586 (vec_concat:V2DI 3587 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m") 3588 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))] 3589 "TARGET_SSE" 3590 "@ 3591 movq\t{%1, %0|%0, %1} 3592 movq2dq\t{%1, %0|%0, %1} 3593 punpcklqdq\t{%2, %0|%0, %2} 3594 movlhps\t{%2, %0|%0, %2} 3595 movhps\t{%2, %0|%0, %2} 3596 movlps\t{%1, %0|%0, %1}" 3597 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") 3598 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) 3599 3600(define_expand "vec_setv2di" 3601 [(match_operand:V2DI 0 "register_operand" "") 3602 (match_operand:DI 1 "register_operand" "") 3603 (match_operand 2 "const_int_operand" "")] 3604 "TARGET_SSE" 3605{ 3606 ix86_expand_vector_set (false, operands[0], operands[1], 3607 INTVAL (operands[2])); 3608 DONE; 3609}) 3610 3611(define_expand "vec_extractv2di" 3612 [(match_operand:DI 0 "register_operand" "") 3613 (match_operand:V2DI 1 "register_operand" "") 3614 (match_operand 2 "const_int_operand" "")] 3615 "TARGET_SSE" 3616{ 3617 ix86_expand_vector_extract (false, operands[0], operands[1], 3618 INTVAL (operands[2])); 3619 DONE; 3620}) 3621 3622(define_expand "vec_initv2di" 3623 [(match_operand:V2DI 0 "register_operand" "") 3624 (match_operand 1 "" "")] 3625 "TARGET_SSE" 3626{ 3627 ix86_expand_vector_init (false, operands[0], operands[1]); 3628 DONE; 3629}) 3630 3631(define_expand "vec_setv4si" 3632 [(match_operand:V4SI 0 "register_operand" "") 3633 (match_operand:SI 1 "register_operand" "") 3634 (match_operand 2 "const_int_operand" "")] 3635 "TARGET_SSE" 3636{ 3637 ix86_expand_vector_set (false, operands[0], operands[1], 3638 INTVAL (operands[2])); 3639 DONE; 3640}) 3641 3642(define_expand "vec_extractv4si" 3643 [(match_operand:SI 0 "register_operand" "") 3644 (match_operand:V4SI 1 "register_operand" "") 3645 (match_operand 2 "const_int_operand" "")] 3646 "TARGET_SSE" 3647{ 3648 ix86_expand_vector_extract (false, operands[0], operands[1], 3649 INTVAL (operands[2])); 3650 DONE; 3651}) 3652 3653(define_expand "vec_initv4si" 3654 [(match_operand:V4SI 0 "register_operand" "") 3655 (match_operand 1 "" "")] 3656 "TARGET_SSE" 3657{ 3658 ix86_expand_vector_init (false, operands[0], operands[1]); 3659 DONE; 3660}) 3661 3662(define_expand "vec_setv8hi" 3663 [(match_operand:V8HI 0 "register_operand" "") 3664 (match_operand:HI 1 "register_operand" "") 3665 (match_operand 2 "const_int_operand" "")] 3666 "TARGET_SSE" 3667{ 3668 ix86_expand_vector_set (false, operands[0], operands[1], 3669 INTVAL (operands[2])); 3670 DONE; 3671}) 3672 3673(define_expand "vec_extractv8hi" 3674 [(match_operand:HI 0 "register_operand" "") 3675 (match_operand:V8HI 1 "register_operand" "") 3676 (match_operand 2 "const_int_operand" "")] 3677 "TARGET_SSE" 3678{ 3679 ix86_expand_vector_extract (false, operands[0], operands[1], 3680 INTVAL (operands[2])); 3681 DONE; 3682}) 3683 3684(define_expand "vec_initv8hi" 3685 [(match_operand:V8HI 0 "register_operand" "") 3686 (match_operand 1 "" "")] 3687 "TARGET_SSE" 3688{ 3689 ix86_expand_vector_init (false, operands[0], operands[1]); 3690 DONE; 3691}) 3692 3693(define_expand "vec_setv16qi" 3694 [(match_operand:V16QI 0 "register_operand" "") 3695 (match_operand:QI 1 "register_operand" "") 3696 (match_operand 2 "const_int_operand" "")] 3697 "TARGET_SSE" 3698{ 3699 ix86_expand_vector_set (false, operands[0], operands[1], 3700 INTVAL (operands[2])); 3701 DONE; 3702}) 3703 3704(define_expand "vec_extractv16qi" 3705 [(match_operand:QI 0 "register_operand" "") 3706 (match_operand:V16QI 1 "register_operand" "") 3707 (match_operand 2 "const_int_operand" "")] 3708 "TARGET_SSE" 3709{ 3710 ix86_expand_vector_extract (false, operands[0], operands[1], 3711 INTVAL (operands[2])); 3712 DONE; 3713}) 3714 3715(define_expand "vec_initv16qi" 3716 [(match_operand:V16QI 0 "register_operand" "") 3717 (match_operand 1 "" "")] 3718 "TARGET_SSE" 3719{ 3720 ix86_expand_vector_init (false, operands[0], operands[1]); 3721 DONE; 3722}) 3723 3724;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3725;; 3726;; Miscellaneous 3727;; 3728;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3729 3730(define_insn "sse2_uavgv16qi3" 3731 [(set (match_operand:V16QI 0 "register_operand" "=x") 3732 (truncate:V16QI 3733 (lshiftrt:V16HI 3734 (plus:V16HI 3735 (plus:V16HI 3736 (zero_extend:V16HI 3737 (match_operand:V16QI 1 "nonimmediate_operand" "%0")) 3738 (zero_extend:V16HI 3739 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) 3740 (const_vector:V16QI [(const_int 1) (const_int 1) 3741 (const_int 1) (const_int 1) 3742 (const_int 1) (const_int 1) 3743 (const_int 1) (const_int 1) 3744 (const_int 1) (const_int 1) 3745 (const_int 1) (const_int 1) 3746 (const_int 1) (const_int 1) 3747 (const_int 1) (const_int 1)])) 3748 (const_int 1))))] 3749 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 3750 "pavgb\t{%2, %0|%0, %2}" 3751 [(set_attr "type" "sseiadd") 3752 (set_attr "mode" "TI")]) 3753 3754(define_insn "sse2_uavgv8hi3" 3755 [(set (match_operand:V8HI 0 "register_operand" "=x") 3756 (truncate:V8HI 3757 (lshiftrt:V8SI 3758 (plus:V8SI 3759 (plus:V8SI 3760 (zero_extend:V8SI 3761 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 3762 (zero_extend:V8SI 3763 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 3764 (const_vector:V8HI [(const_int 1) (const_int 1) 3765 (const_int 1) (const_int 1) 3766 (const_int 1) (const_int 1) 3767 (const_int 1) (const_int 1)])) 3768 (const_int 1))))] 3769 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 3770 "pavgw\t{%2, %0|%0, %2}" 3771 [(set_attr "type" "sseiadd") 3772 (set_attr "mode" "TI")]) 3773 3774;; The correct representation for this is absolutely enormous, and 3775;; surely not generally useful. 3776(define_insn "sse2_psadbw" 3777 [(set (match_operand:V2DI 0 "register_operand" "=x") 3778 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") 3779 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 3780 UNSPEC_PSADBW))] 3781 "TARGET_SSE2" 3782 "psadbw\t{%2, %0|%0, %2}" 3783 [(set_attr "type" "sseiadd") 3784 (set_attr "mode" "TI")]) 3785 3786(define_insn "sse_movmskps" 3787 [(set (match_operand:SI 0 "register_operand" "=r") 3788 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 3789 UNSPEC_MOVMSK))] 3790 "TARGET_SSE" 3791 "movmskps\t{%1, %0|%0, %1}" 3792 [(set_attr "type" "ssecvt") 3793 (set_attr "mode" "V4SF")]) 3794 3795(define_insn "sse2_movmskpd" 3796 [(set (match_operand:SI 0 "register_operand" "=r") 3797 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 3798 UNSPEC_MOVMSK))] 3799 "TARGET_SSE2" 3800 "movmskpd\t{%1, %0|%0, %1}" 3801 [(set_attr "type" "ssecvt") 3802 (set_attr "mode" "V2DF")]) 3803 3804(define_insn "sse2_pmovmskb" 3805 [(set (match_operand:SI 0 "register_operand" "=r") 3806 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 3807 UNSPEC_MOVMSK))] 3808 "TARGET_SSE2" 3809 "pmovmskb\t{%1, %0|%0, %1}" 3810 [(set_attr "type" "ssecvt") 3811 (set_attr "mode" "V2DF")]) 3812 3813(define_expand "sse2_maskmovdqu" 3814 [(set (match_operand:V16QI 0 "memory_operand" "") 3815 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3816 (match_operand:V16QI 2 "register_operand" "x") 3817 (match_dup 0)] 3818 UNSPEC_MASKMOV))] 3819 "TARGET_SSE2" 3820 "") 3821 3822(define_insn "*sse2_maskmovdqu" 3823 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) 3824 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3825 (match_operand:V16QI 2 "register_operand" "x") 3826 (mem:V16QI (match_dup 0))] 3827 UNSPEC_MASKMOV))] 3828 "TARGET_SSE2 && !TARGET_64BIT" 3829 ;; @@@ check ordering of operands in intel/nonintel syntax 3830 "maskmovdqu\t{%2, %1|%1, %2}" 3831 [(set_attr "type" "ssecvt") 3832 (set_attr "mode" "TI")]) 3833 3834(define_insn "*sse2_maskmovdqu_rex64" 3835 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) 3836 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3837 (match_operand:V16QI 2 "register_operand" "x") 3838 (mem:V16QI (match_dup 0))] 3839 UNSPEC_MASKMOV))] 3840 "TARGET_SSE2 && TARGET_64BIT" 3841 ;; @@@ check ordering of operands in intel/nonintel syntax 3842 "maskmovdqu\t{%2, %1|%1, %2}" 3843 [(set_attr "type" "ssecvt") 3844 (set_attr "mode" "TI")]) 3845 3846(define_insn "sse_ldmxcsr" 3847 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 3848 UNSPECV_LDMXCSR)] 3849 "TARGET_SSE" 3850 "ldmxcsr\t%0" 3851 [(set_attr "type" "sse") 3852 (set_attr "memory" "load")]) 3853 3854(define_insn "sse_stmxcsr" 3855 [(set (match_operand:SI 0 "memory_operand" "=m") 3856 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 3857 "TARGET_SSE" 3858 "stmxcsr\t%0" 3859 [(set_attr "type" "sse") 3860 (set_attr "memory" "store")]) 3861 3862(define_expand "sse_sfence" 3863 [(set (match_dup 0) 3864 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3865 "TARGET_SSE || TARGET_3DNOW_A" 3866{ 3867 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3868 MEM_VOLATILE_P (operands[0]) = 1; 3869}) 3870 3871(define_insn "*sse_sfence" 3872 [(set (match_operand:BLK 0 "" "") 3873 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3874 "TARGET_SSE || TARGET_3DNOW_A" 3875 "sfence" 3876 [(set_attr "type" "sse") 3877 (set_attr "memory" "unknown")]) 3878 3879(define_insn "sse2_clflush" 3880 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 3881 UNSPECV_CLFLUSH)] 3882 "TARGET_SSE2" 3883 "clflush\t%a0" 3884 [(set_attr "type" "sse") 3885 (set_attr "memory" "unknown")]) 3886 3887(define_expand "sse2_mfence" 3888 [(set (match_dup 0) 3889 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3890 "TARGET_SSE2" 3891{ 3892 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3893 MEM_VOLATILE_P (operands[0]) = 1; 3894}) 3895 3896(define_insn "*sse2_mfence" 3897 [(set (match_operand:BLK 0 "" "") 3898 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3899 "TARGET_SSE2" 3900 "mfence" 3901 [(set_attr "type" "sse") 3902 (set_attr "memory" "unknown")]) 3903 3904(define_expand "sse2_lfence" 3905 [(set (match_dup 0) 3906 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3907 "TARGET_SSE2" 3908{ 3909 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3910 MEM_VOLATILE_P (operands[0]) = 1; 3911}) 3912 3913(define_insn "*sse2_lfence" 3914 [(set (match_operand:BLK 0 "" "") 3915 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3916 "TARGET_SSE2" 3917 "lfence" 3918 [(set_attr "type" "sse") 3919 (set_attr "memory" "unknown")]) 3920 3921(define_insn "sse3_mwait" 3922 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3923 (match_operand:SI 1 "register_operand" "c")] 3924 UNSPECV_MWAIT)] 3925 "TARGET_SSE3" 3926;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used. 3927;; Since 32bit register operands are implicitly zero extended to 64bit, 3928;; we only need to set up 32bit registers. 3929 "mwait" 3930 [(set_attr "length" "3")]) 3931 3932(define_insn "sse3_monitor" 3933 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3934 (match_operand:SI 1 "register_operand" "c") 3935 (match_operand:SI 2 "register_operand" "d")] 3936 UNSPECV_MONITOR)] 3937 "TARGET_SSE3 && !TARGET_64BIT" 3938 "monitor\t%0, %1, %2" 3939 [(set_attr "length" "3")]) 3940 3941(define_insn "sse3_monitor64" 3942 [(unspec_volatile [(match_operand:DI 0 "register_operand" "a") 3943 (match_operand:SI 1 "register_operand" "c") 3944 (match_operand:SI 2 "register_operand" "d")] 3945 UNSPECV_MONITOR)] 3946 "TARGET_SSE3 && TARGET_64BIT" 3947;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in 3948;; RCX and RDX are used. Since 32bit register operands are implicitly 3949;; zero extended to 64bit, we only need to set up 32bit registers. 3950 "monitor" 3951 [(set_attr "length" "3")])
|