1;; GCC machine description for SSE instructions 2;; Copyright (C) 2005 3;; Free Software Foundation, Inc. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify 8;; it under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 2, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, 13;; but WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15;; GNU General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING. If not, write to 19;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, 20;; Boston, MA 02110-1301, USA. 21 22 23;; 16 byte integral modes handled by SSE, minus TImode, which gets 24;; special-cased for TARGET_64BIT. 25(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI]) 26 27;; All 16-byte vector modes handled by SSE 28(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) 29 30;; Mix-n-match 31(define_mode_macro SSEMODE12 [V16QI V8HI]) 32(define_mode_macro SSEMODE24 [V8HI V4SI]) 33(define_mode_macro SSEMODE14 [V16QI V4SI]) 34(define_mode_macro SSEMODE124 [V16QI V8HI V4SI]) 35(define_mode_macro SSEMODE248 [V8HI V4SI V2DI]) 36 37;; Mapping from integer vector mode to mnemonic suffix 38(define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) 39 40;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 41 42;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 43;; 44;; Move patterns 45;; 46;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 47 48;; All of these patterns are enabled for SSE1 as well as SSE2. 49;; This is essential for maintaining stable calling conventions. 50 51(define_expand "mov<mode>" 52 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "") 53 (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))] 54 "TARGET_SSE" 55{ 56 ix86_expand_vector_move (<MODE>mode, operands); 57 DONE; 58}) 59 60(define_insn "*mov<mode>_internal" 61 [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m") 62 (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))] 63 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 64{ 65 switch (which_alternative) 66 { 67 case 0: 68 if (get_attr_mode (insn) == MODE_V4SF) 69 return "xorps\t%0, %0"; 70 else 71 return "pxor\t%0, %0"; 72 case 1: 73 case 2: 74 if (get_attr_mode (insn) == MODE_V4SF) 75 return "movaps\t{%1, %0|%0, %1}"; 76 else 77 return "movdqa\t{%1, %0|%0, %1}"; 78 default: 79 gcc_unreachable (); 80 } 81} 82 [(set_attr "type" "sselog1,ssemov,ssemov") 83 (set (attr "mode") 84 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) 85 (const_string "V4SF") 86 87 (eq_attr "alternative" "0,1") 88 (if_then_else 89 (ne (symbol_ref "optimize_size") 90 (const_int 0)) 91 (const_string "V4SF") 92 (const_string "TI")) 93 (eq_attr "alternative" "2") 94 (if_then_else 95 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 96 (const_int 0)) 97 (ne (symbol_ref "optimize_size") 98 (const_int 0))) 99 (const_string "V4SF") 100 (const_string "TI"))] 101 (const_string "TI")))]) 102 103(define_expand "movv4sf" 104 [(set (match_operand:V4SF 0 "nonimmediate_operand" "") 105 (match_operand:V4SF 1 "nonimmediate_operand" ""))] 106 "TARGET_SSE" 107{ 108 ix86_expand_vector_move (V4SFmode, operands); 109 DONE; 110}) 111 112(define_insn "*movv4sf_internal" 113 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 114 (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))] 115 "TARGET_SSE" 116 "@ 117 xorps\t%0, %0 118 movaps\t{%1, %0|%0, %1} 119 movaps\t{%1, %0|%0, %1}" 120 [(set_attr "type" "sselog1,ssemov,ssemov") 121 (set_attr "mode" "V4SF")]) 122 123(define_split 124 [(set (match_operand:V4SF 0 "register_operand" "") 125 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 126 "TARGET_SSE && reload_completed" 127 [(set (match_dup 0) 128 (vec_merge:V4SF 129 (vec_duplicate:V4SF (match_dup 1)) 130 (match_dup 2) 131 (const_int 1)))] 132{ 133 operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0); 134 operands[2] = CONST0_RTX (V4SFmode); 135}) 136 137(define_expand "movv2df" 138 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 139 (match_operand:V2DF 1 "nonimmediate_operand" ""))] 140 "TARGET_SSE" 141{ 142 ix86_expand_vector_move (V2DFmode, operands); 143 DONE; 144}) 145 146(define_insn "*movv2df_internal" 147 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 148 (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))] 149 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 150{ 151 switch (which_alternative) 152 { 153 case 0: 154 if (get_attr_mode (insn) == MODE_V4SF) 155 return "xorps\t%0, %0"; 156 else 157 return "xorpd\t%0, %0"; 158 case 1: 159 case 2: 160 if (get_attr_mode (insn) == MODE_V4SF) 161 return "movaps\t{%1, %0|%0, %1}"; 162 else 163 return "movapd\t{%1, %0|%0, %1}"; 164 default: 165 gcc_unreachable (); 166 } 167} 168 [(set_attr "type" "sselog1,ssemov,ssemov") 169 (set (attr "mode") 170 (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0)) 171 (const_string "V4SF") 172 (eq_attr "alternative" "0,1") 173 (if_then_else 174 (ne (symbol_ref "optimize_size") 175 (const_int 0)) 176 (const_string "V4SF") 177 (const_string "V2DF")) 178 (eq_attr "alternative" "2") 179 (if_then_else 180 (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES") 181 (const_int 0)) 182 (ne (symbol_ref "optimize_size") 183 (const_int 0))) 184 (const_string "V4SF") 185 (const_string "V2DF"))] 186 (const_string "V2DF")))]) 187 188(define_split 189 [(set (match_operand:V2DF 0 "register_operand" "") 190 (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))] 191 "TARGET_SSE2 && reload_completed" 192 [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))] 193{ 194 operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0); 195 operands[2] = CONST0_RTX (DFmode); 196}) 197 198(define_expand "push<mode>1" 199 [(match_operand:SSEMODE 0 "register_operand" "")] 200 "TARGET_SSE" 201{ 202 ix86_expand_push (<MODE>mode, operands[0]); 203 DONE; 204}) 205 206(define_expand "movmisalign<mode>" 207 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 208 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 209 "TARGET_SSE" 210{ 211 ix86_expand_vector_move_misalign (<MODE>mode, operands); 212 DONE; 213}) 214 215(define_insn "sse_movups" 216 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m") 217 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")] 218 UNSPEC_MOVU))] 219 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 220 "movups\t{%1, %0|%0, %1}" 221 [(set_attr "type" "ssemov") 222 (set_attr "mode" "V2DF")]) 223 224(define_insn "sse2_movupd" 225 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m") 226 (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")] 227 UNSPEC_MOVU))] 228 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 229 "movupd\t{%1, %0|%0, %1}" 230 [(set_attr "type" "ssemov") 231 (set_attr "mode" "V2DF")]) 232 233(define_insn "sse2_movdqu" 234 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 235 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] 236 UNSPEC_MOVU))] 237 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 238 "movdqu\t{%1, %0|%0, %1}" 239 [(set_attr "type" "ssemov") 240 (set_attr "mode" "TI")]) 241 242(define_insn "sse_movntv4sf" 243 [(set (match_operand:V4SF 0 "memory_operand" "=m") 244 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")] 245 UNSPEC_MOVNT))] 246 "TARGET_SSE" 247 "movntps\t{%1, %0|%0, %1}" 248 [(set_attr "type" "ssemov") 249 (set_attr "mode" "V4SF")]) 250 251(define_insn "sse2_movntv2df" 252 [(set (match_operand:V2DF 0 "memory_operand" "=m") 253 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")] 254 UNSPEC_MOVNT))] 255 "TARGET_SSE2" 256 "movntpd\t{%1, %0|%0, %1}" 257 [(set_attr "type" "ssecvt") 258 (set_attr "mode" "V2DF")]) 259 260(define_insn "sse2_movntv2di" 261 [(set (match_operand:V2DI 0 "memory_operand" "=m") 262 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 263 UNSPEC_MOVNT))] 264 "TARGET_SSE2" 265 "movntdq\t{%1, %0|%0, %1}" 266 [(set_attr "type" "ssecvt") 267 (set_attr "mode" "TI")]) 268 269(define_insn "sse2_movntsi" 270 [(set (match_operand:SI 0 "memory_operand" "=m") 271 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 272 UNSPEC_MOVNT))] 273 "TARGET_SSE2" 274 "movnti\t{%1, %0|%0, %1}" 275 [(set_attr "type" "ssecvt") 276 (set_attr "mode" "V2DF")]) 277 278(define_insn "sse3_lddqu" 279 [(set (match_operand:V16QI 0 "register_operand" "=x") 280 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] 281 UNSPEC_LDQQU))] 282 "TARGET_SSE3" 283 "lddqu\t{%1, %0|%0, %1}" 284 [(set_attr "type" "ssecvt") 285 (set_attr "mode" "TI")]) 286 287;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 288;; 289;; Parallel single-precision floating point arithmetic 290;; 291;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 292 293(define_expand "negv4sf2" 294 [(set (match_operand:V4SF 0 "register_operand" "") 295 (neg:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 296 "TARGET_SSE" 297 "ix86_expand_fp_absneg_operator (NEG, V4SFmode, operands); DONE;") 298 299(define_expand "absv4sf2" 300 [(set (match_operand:V4SF 0 "register_operand" "") 301 (abs:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")))] 302 "TARGET_SSE" 303 "ix86_expand_fp_absneg_operator (ABS, V4SFmode, operands); DONE;") 304 305(define_expand "addv4sf3" 306 [(set (match_operand:V4SF 0 "register_operand" "") 307 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 308 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 309 "TARGET_SSE" 310 "ix86_fixup_binary_operands_no_copy (PLUS, V4SFmode, operands);") 311 312(define_insn "*addv4sf3" 313 [(set (match_operand:V4SF 0 "register_operand" "=x") 314 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 315 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 316 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 317 "addps\t{%2, %0|%0, %2}" 318 [(set_attr "type" "sseadd") 319 (set_attr "mode" "V4SF")]) 320 321(define_insn "sse_vmaddv4sf3" 322 [(set (match_operand:V4SF 0 "register_operand" "=x") 323 (vec_merge:V4SF 324 (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 325 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 326 (match_dup 1) 327 (const_int 1)))] 328 "TARGET_SSE && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 329 "addss\t{%2, %0|%0, %2}" 330 [(set_attr "type" "sseadd") 331 (set_attr "mode" "SF")]) 332 333(define_expand "subv4sf3" 334 [(set (match_operand:V4SF 0 "register_operand" "") 335 (minus:V4SF (match_operand:V4SF 1 "register_operand" "") 336 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 337 "TARGET_SSE" 338 "ix86_fixup_binary_operands_no_copy (MINUS, V4SFmode, operands);") 339 340(define_insn "*subv4sf3" 341 [(set (match_operand:V4SF 0 "register_operand" "=x") 342 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 343 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 344 "TARGET_SSE" 345 "subps\t{%2, %0|%0, %2}" 346 [(set_attr "type" "sseadd") 347 (set_attr "mode" "V4SF")]) 348 349(define_insn "sse_vmsubv4sf3" 350 [(set (match_operand:V4SF 0 "register_operand" "=x") 351 (vec_merge:V4SF 352 (minus:V4SF (match_operand:V4SF 1 "register_operand" "0") 353 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 354 (match_dup 1) 355 (const_int 1)))] 356 "TARGET_SSE" 357 "subss\t{%2, %0|%0, %2}" 358 [(set_attr "type" "sseadd") 359 (set_attr "mode" "SF")]) 360 361(define_expand "mulv4sf3" 362 [(set (match_operand:V4SF 0 "register_operand" "") 363 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 364 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 365 "TARGET_SSE" 366 "ix86_fixup_binary_operands_no_copy (MULT, V4SFmode, operands);") 367 368(define_insn "*mulv4sf3" 369 [(set (match_operand:V4SF 0 "register_operand" "=x") 370 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 371 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 372 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 373 "mulps\t{%2, %0|%0, %2}" 374 [(set_attr "type" "ssemul") 375 (set_attr "mode" "V4SF")]) 376 377(define_insn "sse_vmmulv4sf3" 378 [(set (match_operand:V4SF 0 "register_operand" "=x") 379 (vec_merge:V4SF 380 (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 381 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 382 (match_dup 1) 383 (const_int 1)))] 384 "TARGET_SSE && ix86_binary_operator_ok (MULT, V4SFmode, operands)" 385 "mulss\t{%2, %0|%0, %2}" 386 [(set_attr "type" "ssemul") 387 (set_attr "mode" "SF")]) 388 389(define_expand "divv4sf3" 390 [(set (match_operand:V4SF 0 "register_operand" "") 391 (div:V4SF (match_operand:V4SF 1 "register_operand" "") 392 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 393 "TARGET_SSE" 394 "ix86_fixup_binary_operands_no_copy (DIV, V4SFmode, operands);") 395 396(define_insn "*divv4sf3" 397 [(set (match_operand:V4SF 0 "register_operand" "=x") 398 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 399 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 400 "TARGET_SSE" 401 "divps\t{%2, %0|%0, %2}" 402 [(set_attr "type" "ssediv") 403 (set_attr "mode" "V4SF")]) 404 405(define_insn "sse_vmdivv4sf3" 406 [(set (match_operand:V4SF 0 "register_operand" "=x") 407 (vec_merge:V4SF 408 (div:V4SF (match_operand:V4SF 1 "register_operand" "0") 409 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 410 (match_dup 1) 411 (const_int 1)))] 412 "TARGET_SSE" 413 "divss\t{%2, %0|%0, %2}" 414 [(set_attr "type" "ssediv") 415 (set_attr "mode" "SF")]) 416 417(define_insn "sse_rcpv4sf2" 418 [(set (match_operand:V4SF 0 "register_operand" "=x") 419 (unspec:V4SF 420 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 421 "TARGET_SSE" 422 "rcpps\t{%1, %0|%0, %1}" 423 [(set_attr "type" "sse") 424 (set_attr "mode" "V4SF")]) 425 426(define_insn "sse_vmrcpv4sf2" 427 [(set (match_operand:V4SF 0 "register_operand" "=x") 428 (vec_merge:V4SF 429 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 430 UNSPEC_RCP) 431 (match_operand:V4SF 2 "register_operand" "0") 432 (const_int 1)))] 433 "TARGET_SSE" 434 "rcpss\t{%1, %0|%0, %1}" 435 [(set_attr "type" "sse") 436 (set_attr "mode" "SF")]) 437 438(define_insn "sse_rsqrtv4sf2" 439 [(set (match_operand:V4SF 0 "register_operand" "=x") 440 (unspec:V4SF 441 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))] 442 "TARGET_SSE" 443 "rsqrtps\t{%1, %0|%0, %1}" 444 [(set_attr "type" "sse") 445 (set_attr "mode" "V4SF")]) 446 447(define_insn "sse_vmrsqrtv4sf2" 448 [(set (match_operand:V4SF 0 "register_operand" "=x") 449 (vec_merge:V4SF 450 (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 451 UNSPEC_RSQRT) 452 (match_operand:V4SF 2 "register_operand" "0") 453 (const_int 1)))] 454 "TARGET_SSE" 455 "rsqrtss\t{%1, %0|%0, %1}" 456 [(set_attr "type" "sse") 457 (set_attr "mode" "SF")]) 458 459(define_insn "sqrtv4sf2" 460 [(set (match_operand:V4SF 0 "register_operand" "=x") 461 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 462 "TARGET_SSE" 463 "sqrtps\t{%1, %0|%0, %1}" 464 [(set_attr "type" "sse") 465 (set_attr "mode" "V4SF")]) 466 467(define_insn "sse_vmsqrtv4sf2" 468 [(set (match_operand:V4SF 0 "register_operand" "=x") 469 (vec_merge:V4SF 470 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 471 (match_operand:V4SF 2 "register_operand" "0") 472 (const_int 1)))] 473 "TARGET_SSE" 474 "sqrtss\t{%1, %0|%0, %1}" 475 [(set_attr "type" "sse") 476 (set_attr "mode" "SF")]) 477 478;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 479;; isn't really correct, as those rtl operators aren't defined when 480;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 481 482(define_expand "smaxv4sf3" 483 [(set (match_operand:V4SF 0 "register_operand" "") 484 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 485 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 486 "TARGET_SSE" 487{ 488 if (!flag_finite_math_only) 489 operands[1] = force_reg (V4SFmode, operands[1]); 490 ix86_fixup_binary_operands_no_copy (SMAX, V4SFmode, operands); 491}) 492 493(define_insn "*smaxv4sf3_finite" 494 [(set (match_operand:V4SF 0 "register_operand" "=x") 495 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 496 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 497 "TARGET_SSE && flag_finite_math_only 498 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" 499 "maxps\t{%2, %0|%0, %2}" 500 [(set_attr "type" "sse") 501 (set_attr "mode" "V4SF")]) 502 503(define_insn "*smaxv4sf3" 504 [(set (match_operand:V4SF 0 "register_operand" "=x") 505 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 506 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 507 "TARGET_SSE" 508 "maxps\t{%2, %0|%0, %2}" 509 [(set_attr "type" "sse") 510 (set_attr "mode" "V4SF")]) 511 512(define_insn "*sse_vmsmaxv4sf3_finite" 513 [(set (match_operand:V4SF 0 "register_operand" "=x") 514 (vec_merge:V4SF 515 (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 516 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 517 (match_dup 1) 518 (const_int 1)))] 519 "TARGET_SSE && flag_finite_math_only 520 && ix86_binary_operator_ok (SMAX, V4SFmode, operands)" 521 "maxss\t{%2, %0|%0, %2}" 522 [(set_attr "type" "sse") 523 (set_attr "mode" "SF")]) 524 525(define_insn "sse_vmsmaxv4sf3" 526 [(set (match_operand:V4SF 0 "register_operand" "=x") 527 (vec_merge:V4SF 528 (smax:V4SF (match_operand:V4SF 1 "register_operand" "0") 529 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 530 (match_dup 1) 531 (const_int 1)))] 532 "TARGET_SSE" 533 "maxss\t{%2, %0|%0, %2}" 534 [(set_attr "type" "sse") 535 (set_attr "mode" "SF")]) 536 537(define_expand "sminv4sf3" 538 [(set (match_operand:V4SF 0 "register_operand" "") 539 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 540 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 541 "TARGET_SSE" 542{ 543 if (!flag_finite_math_only) 544 operands[1] = force_reg (V4SFmode, operands[1]); 545 ix86_fixup_binary_operands_no_copy (SMIN, V4SFmode, operands); 546}) 547 548(define_insn "*sminv4sf3_finite" 549 [(set (match_operand:V4SF 0 "register_operand" "=x") 550 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 551 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 552 "TARGET_SSE && flag_finite_math_only 553 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" 554 "minps\t{%2, %0|%0, %2}" 555 [(set_attr "type" "sse") 556 (set_attr "mode" "V4SF")]) 557 558(define_insn "*sminv4sf3" 559 [(set (match_operand:V4SF 0 "register_operand" "=x") 560 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 561 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 562 "TARGET_SSE" 563 "minps\t{%2, %0|%0, %2}" 564 [(set_attr "type" "sse") 565 (set_attr "mode" "V4SF")]) 566 567(define_insn "*sse_vmsminv4sf3_finite" 568 [(set (match_operand:V4SF 0 "register_operand" "=x") 569 (vec_merge:V4SF 570 (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 571 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 572 (match_dup 1) 573 (const_int 1)))] 574 "TARGET_SSE && flag_finite_math_only 575 && ix86_binary_operator_ok (SMIN, V4SFmode, operands)" 576 "minss\t{%2, %0|%0, %2}" 577 [(set_attr "type" "sse") 578 (set_attr "mode" "SF")]) 579 580(define_insn "sse_vmsminv4sf3" 581 [(set (match_operand:V4SF 0 "register_operand" "=x") 582 (vec_merge:V4SF 583 (smin:V4SF (match_operand:V4SF 1 "register_operand" "0") 584 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 585 (match_dup 1) 586 (const_int 1)))] 587 "TARGET_SSE" 588 "minss\t{%2, %0|%0, %2}" 589 [(set_attr "type" "sse") 590 (set_attr "mode" "SF")]) 591 592;; These versions of the min/max patterns implement exactly the operations 593;; min = (op1 < op2 ? op1 : op2) 594;; max = (!(op1 < op2) ? op1 : op2) 595;; Their operands are not commutative, and thus they may be used in the 596;; presence of -0.0 and NaN. 597 598(define_insn "*ieee_sminv4sf3" 599 [(set (match_operand:V4SF 0 "register_operand" "=x") 600 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 601 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 602 UNSPEC_IEEE_MIN))] 603 "TARGET_SSE" 604 "minps\t{%2, %0|%0, %2}" 605 [(set_attr "type" "sseadd") 606 (set_attr "mode" "V4SF")]) 607 608(define_insn "*ieee_smaxv4sf3" 609 [(set (match_operand:V4SF 0 "register_operand" "=x") 610 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0") 611 (match_operand:V4SF 2 "nonimmediate_operand" "xm")] 612 UNSPEC_IEEE_MAX))] 613 "TARGET_SSE" 614 "maxps\t{%2, %0|%0, %2}" 615 [(set_attr "type" "sseadd") 616 (set_attr "mode" "V4SF")]) 617 618(define_insn "*ieee_sminv2df3" 619 [(set (match_operand:V2DF 0 "register_operand" "=x") 620 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 621 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 622 UNSPEC_IEEE_MIN))] 623 "TARGET_SSE2" 624 "minpd\t{%2, %0|%0, %2}" 625 [(set_attr "type" "sseadd") 626 (set_attr "mode" "V2DF")]) 627 628(define_insn "*ieee_smaxv2df3" 629 [(set (match_operand:V2DF 0 "register_operand" "=x") 630 (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0") 631 (match_operand:V2DF 2 "nonimmediate_operand" "xm")] 632 UNSPEC_IEEE_MAX))] 633 "TARGET_SSE2" 634 "maxpd\t{%2, %0|%0, %2}" 635 [(set_attr "type" "sseadd") 636 (set_attr "mode" "V2DF")]) 637 638(define_insn "sse3_addsubv4sf3" 639 [(set (match_operand:V4SF 0 "register_operand" "=x") 640 (vec_merge:V4SF 641 (plus:V4SF 642 (match_operand:V4SF 1 "register_operand" "0") 643 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 644 (minus:V4SF (match_dup 1) (match_dup 2)) 645 (const_int 5)))] 646 "TARGET_SSE3" 647 "addsubps\t{%2, %0|%0, %2}" 648 [(set_attr "type" "sseadd") 649 (set_attr "mode" "V4SF")]) 650 651(define_insn "sse3_haddv4sf3" 652 [(set (match_operand:V4SF 0 "register_operand" "=x") 653 (vec_concat:V4SF 654 (vec_concat:V2SF 655 (plus:SF 656 (vec_select:SF 657 (match_operand:V4SF 1 "register_operand" "0") 658 (parallel [(const_int 0)])) 659 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 660 (plus:SF 661 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 662 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 663 (vec_concat:V2SF 664 (plus:SF 665 (vec_select:SF 666 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 667 (parallel [(const_int 0)])) 668 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 669 (plus:SF 670 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 671 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 672 "TARGET_SSE3" 673 "haddps\t{%2, %0|%0, %2}" 674 [(set_attr "type" "sseadd") 675 (set_attr "mode" "V4SF")]) 676 677(define_insn "sse3_hsubv4sf3" 678 [(set (match_operand:V4SF 0 "register_operand" "=x") 679 (vec_concat:V4SF 680 (vec_concat:V2SF 681 (minus:SF 682 (vec_select:SF 683 (match_operand:V4SF 1 "register_operand" "0") 684 (parallel [(const_int 0)])) 685 (vec_select:SF (match_dup 1) (parallel [(const_int 1)]))) 686 (minus:SF 687 (vec_select:SF (match_dup 1) (parallel [(const_int 2)])) 688 (vec_select:SF (match_dup 1) (parallel [(const_int 3)])))) 689 (vec_concat:V2SF 690 (minus:SF 691 (vec_select:SF 692 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 693 (parallel [(const_int 0)])) 694 (vec_select:SF (match_dup 2) (parallel [(const_int 1)]))) 695 (minus:SF 696 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 697 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 698 "TARGET_SSE3" 699 "hsubps\t{%2, %0|%0, %2}" 700 [(set_attr "type" "sseadd") 701 (set_attr "mode" "V4SF")]) 702 703(define_expand "reduc_splus_v4sf" 704 [(match_operand:V4SF 0 "register_operand" "") 705 (match_operand:V4SF 1 "register_operand" "")] 706 "TARGET_SSE" 707{ 708 if (TARGET_SSE3) 709 { 710 rtx tmp = gen_reg_rtx (V4SFmode); 711 emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1])); 712 emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp)); 713 } 714 else 715 ix86_expand_reduc_v4sf (gen_addv4sf3, operands[0], operands[1]); 716 DONE; 717}) 718 719(define_expand "reduc_smax_v4sf" 720 [(match_operand:V4SF 0 "register_operand" "") 721 (match_operand:V4SF 1 "register_operand" "")] 722 "TARGET_SSE" 723{ 724 ix86_expand_reduc_v4sf (gen_smaxv4sf3, operands[0], operands[1]); 725 DONE; 726}) 727 728(define_expand "reduc_smin_v4sf" 729 [(match_operand:V4SF 0 "register_operand" "") 730 (match_operand:V4SF 1 "register_operand" "")] 731 "TARGET_SSE" 732{ 733 ix86_expand_reduc_v4sf (gen_sminv4sf3, operands[0], operands[1]); 734 DONE; 735}) 736 737;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 738;; 739;; Parallel single-precision floating point comparisons 740;; 741;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 742 743(define_insn "sse_maskcmpv4sf3" 744 [(set (match_operand:V4SF 0 "register_operand" "=x") 745 (match_operator:V4SF 3 "sse_comparison_operator" 746 [(match_operand:V4SF 1 "register_operand" "0") 747 (match_operand:V4SF 2 "nonimmediate_operand" "xm")]))] 748 "TARGET_SSE" 749 "cmp%D3ps\t{%2, %0|%0, %2}" 750 [(set_attr "type" "ssecmp") 751 (set_attr "mode" "V4SF")]) 752 753(define_insn "sse_vmmaskcmpv4sf3" 754 [(set (match_operand:V4SF 0 "register_operand" "=x") 755 (vec_merge:V4SF 756 (match_operator:V4SF 3 "sse_comparison_operator" 757 [(match_operand:V4SF 1 "register_operand" "0") 758 (match_operand:V4SF 2 "register_operand" "x")]) 759 (match_dup 1) 760 (const_int 1)))] 761 "TARGET_SSE" 762 "cmp%D3ss\t{%2, %0|%0, %2}" 763 [(set_attr "type" "ssecmp") 764 (set_attr "mode" "SF")]) 765 766(define_insn "sse_comi" 767 [(set (reg:CCFP FLAGS_REG) 768 (compare:CCFP 769 (vec_select:SF 770 (match_operand:V4SF 0 "register_operand" "x") 771 (parallel [(const_int 0)])) 772 (vec_select:SF 773 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 774 (parallel [(const_int 0)]))))] 775 "TARGET_SSE" 776 "comiss\t{%1, %0|%0, %1}" 777 [(set_attr "type" "ssecomi") 778 (set_attr "mode" "SF")]) 779 780(define_insn "sse_ucomi" 781 [(set (reg:CCFPU FLAGS_REG) 782 (compare:CCFPU 783 (vec_select:SF 784 (match_operand:V4SF 0 "register_operand" "x") 785 (parallel [(const_int 0)])) 786 (vec_select:SF 787 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 788 (parallel [(const_int 0)]))))] 789 "TARGET_SSE" 790 "ucomiss\t{%1, %0|%0, %1}" 791 [(set_attr "type" "ssecomi") 792 (set_attr "mode" "SF")]) 793 794(define_expand "vcondv4sf" 795 [(set (match_operand:V4SF 0 "register_operand" "") 796 (if_then_else:V4SF 797 (match_operator 3 "" 798 [(match_operand:V4SF 4 "nonimmediate_operand" "") 799 (match_operand:V4SF 5 "nonimmediate_operand" "")]) 800 (match_operand:V4SF 1 "general_operand" "") 801 (match_operand:V4SF 2 "general_operand" "")))] 802 "TARGET_SSE" 803{ 804 if (ix86_expand_fp_vcond (operands)) 805 DONE; 806 else 807 FAIL; 808}) 809 810;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 811;; 812;; Parallel single-precision floating point logical operations 813;; 814;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 815 816(define_expand "andv4sf3" 817 [(set (match_operand:V4SF 0 "register_operand" "") 818 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 819 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 820 "TARGET_SSE" 821 "ix86_fixup_binary_operands_no_copy (AND, V4SFmode, operands);") 822 823(define_insn "*andv4sf3" 824 [(set (match_operand:V4SF 0 "register_operand" "=x") 825 (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 826 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 827 "TARGET_SSE && ix86_binary_operator_ok (AND, V4SFmode, operands)" 828 "andps\t{%2, %0|%0, %2}" 829 [(set_attr "type" "sselog") 830 (set_attr "mode" "V4SF")]) 831 832(define_insn "sse_nandv4sf3" 833 [(set (match_operand:V4SF 0 "register_operand" "=x") 834 (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0")) 835 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 836 "TARGET_SSE" 837 "andnps\t{%2, %0|%0, %2}" 838 [(set_attr "type" "sselog") 839 (set_attr "mode" "V4SF")]) 840 841(define_expand "iorv4sf3" 842 [(set (match_operand:V4SF 0 "register_operand" "") 843 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 844 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 845 "TARGET_SSE" 846 "ix86_fixup_binary_operands_no_copy (IOR, V4SFmode, operands);") 847 848(define_insn "*iorv4sf3" 849 [(set (match_operand:V4SF 0 "register_operand" "=x") 850 (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 851 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 852 "TARGET_SSE && ix86_binary_operator_ok (IOR, V4SFmode, operands)" 853 "orps\t{%2, %0|%0, %2}" 854 [(set_attr "type" "sselog") 855 (set_attr "mode" "V4SF")]) 856 857(define_expand "xorv4sf3" 858 [(set (match_operand:V4SF 0 "register_operand" "") 859 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "") 860 (match_operand:V4SF 2 "nonimmediate_operand" "")))] 861 "TARGET_SSE" 862 "ix86_fixup_binary_operands_no_copy (XOR, V4SFmode, operands);") 863 864(define_insn "*xorv4sf3" 865 [(set (match_operand:V4SF 0 "register_operand" "=x") 866 (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0") 867 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))] 868 "TARGET_SSE && ix86_binary_operator_ok (XOR, V4SFmode, operands)" 869 "xorps\t{%2, %0|%0, %2}" 870 [(set_attr "type" "sselog") 871 (set_attr "mode" "V4SF")]) 872 873;; Also define scalar versions. These are used for abs, neg, and 874;; conditional move. Using subregs into vector modes causes register 875;; allocation lossage. These patterns do not allow memory operands 876;; because the native instructions read the full 128-bits. 877 878(define_insn "*andsf3" 879 [(set (match_operand:SF 0 "register_operand" "=x") 880 (and:SF (match_operand:SF 1 "register_operand" "0") 881 (match_operand:SF 2 "register_operand" "x")))] 882 "TARGET_SSE" 883 "andps\t{%2, %0|%0, %2}" 884 [(set_attr "type" "sselog") 885 (set_attr "mode" "V4SF")]) 886 887(define_insn "*nandsf3" 888 [(set (match_operand:SF 0 "register_operand" "=x") 889 (and:SF (not:SF (match_operand:SF 1 "register_operand" "0")) 890 (match_operand:SF 2 "register_operand" "x")))] 891 "TARGET_SSE" 892 "andnps\t{%2, %0|%0, %2}" 893 [(set_attr "type" "sselog") 894 (set_attr "mode" "V4SF")]) 895 896(define_insn "*iorsf3" 897 [(set (match_operand:SF 0 "register_operand" "=x") 898 (ior:SF (match_operand:SF 1 "register_operand" "0") 899 (match_operand:SF 2 "register_operand" "x")))] 900 "TARGET_SSE" 901 "orps\t{%2, %0|%0, %2}" 902 [(set_attr "type" "sselog") 903 (set_attr "mode" "V4SF")]) 904 905(define_insn "*xorsf3" 906 [(set (match_operand:SF 0 "register_operand" "=x") 907 (xor:SF (match_operand:SF 1 "register_operand" "0") 908 (match_operand:SF 2 "register_operand" "x")))] 909 "TARGET_SSE" 910 "xorps\t{%2, %0|%0, %2}" 911 [(set_attr "type" "sselog") 912 (set_attr "mode" "V4SF")]) 913 914;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 915;; 916;; Parallel single-precision floating point conversion operations 917;; 918;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 919 920(define_insn "sse_cvtpi2ps" 921 [(set (match_operand:V4SF 0 "register_operand" "=x") 922 (vec_merge:V4SF 923 (vec_duplicate:V4SF 924 (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym"))) 925 (match_operand:V4SF 1 "register_operand" "0") 926 (const_int 3)))] 927 "TARGET_SSE" 928 "cvtpi2ps\t{%2, %0|%0, %2}" 929 [(set_attr "type" "ssecvt") 930 (set_attr "mode" "V4SF")]) 931 932(define_insn "sse_cvtps2pi" 933 [(set (match_operand:V2SI 0 "register_operand" "=y") 934 (vec_select:V2SI 935 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 936 UNSPEC_FIX_NOTRUNC) 937 (parallel [(const_int 0) (const_int 1)])))] 938 "TARGET_SSE" 939 "cvtps2pi\t{%1, %0|%0, %1}" 940 [(set_attr "type" "ssecvt") 941 (set_attr "unit" "mmx") 942 (set_attr "mode" "DI")]) 943 944(define_insn "sse_cvttps2pi" 945 [(set (match_operand:V2SI 0 "register_operand" "=y") 946 (vec_select:V2SI 947 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")) 948 (parallel [(const_int 0) (const_int 1)])))] 949 "TARGET_SSE" 950 "cvttps2pi\t{%1, %0|%0, %1}" 951 [(set_attr "type" "ssecvt") 952 (set_attr "unit" "mmx") 953 (set_attr "mode" "SF")]) 954 955(define_insn "sse_cvtsi2ss" 956 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 957 (vec_merge:V4SF 958 (vec_duplicate:V4SF 959 (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 960 (match_operand:V4SF 1 "register_operand" "0,0") 961 (const_int 1)))] 962 "TARGET_SSE" 963 "cvtsi2ss\t{%2, %0|%0, %2}" 964 [(set_attr "type" "sseicvt") 965 (set_attr "athlon_decode" "vector,double") 966 (set_attr "mode" "SF")]) 967 968(define_insn "sse_cvtsi2ssq" 969 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 970 (vec_merge:V4SF 971 (vec_duplicate:V4SF 972 (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm"))) 973 (match_operand:V4SF 1 "register_operand" "0,0") 974 (const_int 1)))] 975 "TARGET_SSE && TARGET_64BIT" 976 "cvtsi2ssq\t{%2, %0|%0, %2}" 977 [(set_attr "type" "sseicvt") 978 (set_attr "athlon_decode" "vector,double") 979 (set_attr "mode" "SF")]) 980 981(define_insn "sse_cvtss2si" 982 [(set (match_operand:SI 0 "register_operand" "=r,r") 983 (unspec:SI 984 [(vec_select:SF 985 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 986 (parallel [(const_int 0)]))] 987 UNSPEC_FIX_NOTRUNC))] 988 "TARGET_SSE" 989 "cvtss2si\t{%1, %0|%0, %1}" 990 [(set_attr "type" "sseicvt") 991 (set_attr "athlon_decode" "double,vector") 992 (set_attr "mode" "SI")]) 993 994(define_insn "sse_cvtss2siq" 995 [(set (match_operand:DI 0 "register_operand" "=r,r") 996 (unspec:DI 997 [(vec_select:SF 998 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 999 (parallel [(const_int 0)]))] 1000 UNSPEC_FIX_NOTRUNC))] 1001 "TARGET_SSE && TARGET_64BIT" 1002 "cvtss2siq\t{%1, %0|%0, %1}" 1003 [(set_attr "type" "sseicvt") 1004 (set_attr "athlon_decode" "double,vector") 1005 (set_attr "mode" "DI")]) 1006 1007(define_insn "sse_cvttss2si" 1008 [(set (match_operand:SI 0 "register_operand" "=r,r") 1009 (fix:SI 1010 (vec_select:SF 1011 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 1012 (parallel [(const_int 0)]))))] 1013 "TARGET_SSE" 1014 "cvttss2si\t{%1, %0|%0, %1}" 1015 [(set_attr "type" "sseicvt") 1016 (set_attr "athlon_decode" "double,vector") 1017 (set_attr "mode" "SI")]) 1018 1019(define_insn "sse_cvttss2siq" 1020 [(set (match_operand:DI 0 "register_operand" "=r,r") 1021 (fix:DI 1022 (vec_select:SF 1023 (match_operand:V4SF 1 "nonimmediate_operand" "x,m") 1024 (parallel [(const_int 0)]))))] 1025 "TARGET_SSE && TARGET_64BIT" 1026 "cvttss2siq\t{%1, %0|%0, %1}" 1027 [(set_attr "type" "sseicvt") 1028 (set_attr "athlon_decode" "double,vector") 1029 (set_attr "mode" "DI")]) 1030 1031(define_insn "sse2_cvtdq2ps" 1032 [(set (match_operand:V4SF 0 "register_operand" "=x") 1033 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))] 1034 "TARGET_SSE2" 1035 "cvtdq2ps\t{%1, %0|%0, %1}" 1036 [(set_attr "type" "ssecvt") 1037 (set_attr "mode" "V2DF")]) 1038 1039(define_insn "sse2_cvtps2dq" 1040 [(set (match_operand:V4SI 0 "register_operand" "=x") 1041 (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] 1042 UNSPEC_FIX_NOTRUNC))] 1043 "TARGET_SSE2" 1044 "cvtps2dq\t{%1, %0|%0, %1}" 1045 [(set_attr "type" "ssecvt") 1046 (set_attr "mode" "TI")]) 1047 1048(define_insn "sse2_cvttps2dq" 1049 [(set (match_operand:V4SI 0 "register_operand" "=x") 1050 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 1051 "TARGET_SSE2" 1052 "cvttps2dq\t{%1, %0|%0, %1}" 1053 [(set_attr "type" "ssecvt") 1054 (set_attr "mode" "TI")]) 1055 1056;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1057;; 1058;; Parallel single-precision floating point element swizzling 1059;; 1060;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1061 1062(define_insn "sse_movhlps" 1063 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1064 (vec_select:V4SF 1065 (vec_concat:V8SF 1066 (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x") 1067 (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0")) 1068 (parallel [(const_int 6) 1069 (const_int 7) 1070 (const_int 2) 1071 (const_int 3)])))] 1072 "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 1073 "@ 1074 movhlps\t{%2, %0|%0, %2} 1075 movlps\t{%H1, %0|%0, %H1} 1076 movhps\t{%1, %0|%0, %1}" 1077 [(set_attr "type" "ssemov") 1078 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1079 1080(define_insn "sse_movlhps" 1081 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1082 (vec_select:V4SF 1083 (vec_concat:V8SF 1084 (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0") 1085 (match_operand:V4SF 2 "nonimmediate_operand" " x,m,x")) 1086 (parallel [(const_int 0) 1087 (const_int 1) 1088 (const_int 4) 1089 (const_int 5)])))] 1090 "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)" 1091 "@ 1092 movlhps\t{%2, %0|%0, %2} 1093 movhps\t{%2, %0|%0, %2} 1094 movlps\t{%2, %H0|%H0, %2}" 1095 [(set_attr "type" "ssemov") 1096 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1097 1098(define_insn "sse_unpckhps" 1099 [(set (match_operand:V4SF 0 "register_operand" "=x") 1100 (vec_select:V4SF 1101 (vec_concat:V8SF 1102 (match_operand:V4SF 1 "register_operand" "0") 1103 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1104 (parallel [(const_int 2) (const_int 6) 1105 (const_int 3) (const_int 7)])))] 1106 "TARGET_SSE" 1107 "unpckhps\t{%2, %0|%0, %2}" 1108 [(set_attr "type" "sselog") 1109 (set_attr "mode" "V4SF")]) 1110 1111(define_insn "sse_unpcklps" 1112 [(set (match_operand:V4SF 0 "register_operand" "=x") 1113 (vec_select:V4SF 1114 (vec_concat:V8SF 1115 (match_operand:V4SF 1 "register_operand" "0") 1116 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1117 (parallel [(const_int 0) (const_int 4) 1118 (const_int 1) (const_int 5)])))] 1119 "TARGET_SSE" 1120 "unpcklps\t{%2, %0|%0, %2}" 1121 [(set_attr "type" "sselog") 1122 (set_attr "mode" "V4SF")]) 1123 1124;; These are modeled with the same vec_concat as the others so that we 1125;; capture users of shufps that can use the new instructions 1126(define_insn "sse3_movshdup" 1127 [(set (match_operand:V4SF 0 "register_operand" "=x") 1128 (vec_select:V4SF 1129 (vec_concat:V8SF 1130 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1131 (match_dup 1)) 1132 (parallel [(const_int 1) 1133 (const_int 1) 1134 (const_int 7) 1135 (const_int 7)])))] 1136 "TARGET_SSE3" 1137 "movshdup\t{%1, %0|%0, %1}" 1138 [(set_attr "type" "sse") 1139 (set_attr "mode" "V4SF")]) 1140 1141(define_insn "sse3_movsldup" 1142 [(set (match_operand:V4SF 0 "register_operand" "=x") 1143 (vec_select:V4SF 1144 (vec_concat:V8SF 1145 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 1146 (match_dup 1)) 1147 (parallel [(const_int 0) 1148 (const_int 0) 1149 (const_int 6) 1150 (const_int 6)])))] 1151 "TARGET_SSE3" 1152 "movsldup\t{%1, %0|%0, %1}" 1153 [(set_attr "type" "sse") 1154 (set_attr "mode" "V4SF")]) 1155 1156(define_expand "sse_shufps" 1157 [(match_operand:V4SF 0 "register_operand" "") 1158 (match_operand:V4SF 1 "register_operand" "") 1159 (match_operand:V4SF 2 "nonimmediate_operand" "") 1160 (match_operand:SI 3 "const_int_operand" "")] 1161 "TARGET_SSE" 1162{ 1163 int mask = INTVAL (operands[3]); 1164 emit_insn (gen_sse_shufps_1 (operands[0], operands[1], operands[2], 1165 GEN_INT ((mask >> 0) & 3), 1166 GEN_INT ((mask >> 2) & 3), 1167 GEN_INT (((mask >> 4) & 3) + 4), 1168 GEN_INT (((mask >> 6) & 3) + 4))); 1169 DONE; 1170}) 1171 1172(define_insn "sse_shufps_1" 1173 [(set (match_operand:V4SF 0 "register_operand" "=x") 1174 (vec_select:V4SF 1175 (vec_concat:V8SF 1176 (match_operand:V4SF 1 "register_operand" "0") 1177 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 1178 (parallel [(match_operand 3 "const_0_to_3_operand" "") 1179 (match_operand 4 "const_0_to_3_operand" "") 1180 (match_operand 5 "const_4_to_7_operand" "") 1181 (match_operand 6 "const_4_to_7_operand" "")])))] 1182 "TARGET_SSE" 1183{ 1184 int mask = 0; 1185 mask |= INTVAL (operands[3]) << 0; 1186 mask |= INTVAL (operands[4]) << 2; 1187 mask |= (INTVAL (operands[5]) - 4) << 4; 1188 mask |= (INTVAL (operands[6]) - 4) << 6; 1189 operands[3] = GEN_INT (mask); 1190 1191 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 1192} 1193 [(set_attr "type" "sselog") 1194 (set_attr "mode" "V4SF")]) 1195 1196(define_insn "sse_storehps" 1197 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1198 (vec_select:V2SF 1199 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o") 1200 (parallel [(const_int 2) (const_int 3)])))] 1201 "TARGET_SSE" 1202 "@ 1203 movhps\t{%1, %0|%0, %1} 1204 movhlps\t{%1, %0|%0, %1} 1205 movlps\t{%H1, %0|%0, %H1}" 1206 [(set_attr "type" "ssemov") 1207 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1208 1209(define_insn "sse_loadhps" 1210 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o") 1211 (vec_concat:V4SF 1212 (vec_select:V2SF 1213 (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0") 1214 (parallel [(const_int 0) (const_int 1)])) 1215 (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))] 1216 "TARGET_SSE" 1217 "@ 1218 movhps\t{%2, %0|%0, %2} 1219 movlhps\t{%2, %0|%0, %2} 1220 movlps\t{%2, %H0|%H0, %2}" 1221 [(set_attr "type" "ssemov") 1222 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1223 1224(define_insn "sse_storelps" 1225 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 1226 (vec_select:V2SF 1227 (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m") 1228 (parallel [(const_int 0) (const_int 1)])))] 1229 "TARGET_SSE" 1230 "@ 1231 movlps\t{%1, %0|%0, %1} 1232 movaps\t{%1, %0|%0, %1} 1233 movlps\t{%1, %0|%0, %1}" 1234 [(set_attr "type" "ssemov") 1235 (set_attr "mode" "V2SF,V4SF,V2SF")]) 1236 1237(define_insn "sse_loadlps" 1238 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 1239 (vec_concat:V4SF 1240 (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x") 1241 (vec_select:V2SF 1242 (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0") 1243 (parallel [(const_int 2) (const_int 3)]))))] 1244 "TARGET_SSE" 1245 "@ 1246 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 1247 movlps\t{%2, %0|%0, %2} 1248 movlps\t{%2, %0|%0, %2}" 1249 [(set_attr "type" "sselog,ssemov,ssemov") 1250 (set_attr "mode" "V4SF,V2SF,V2SF")]) 1251 1252(define_insn "sse_movss" 1253 [(set (match_operand:V4SF 0 "register_operand" "=x") 1254 (vec_merge:V4SF 1255 (match_operand:V4SF 2 "register_operand" "x") 1256 (match_operand:V4SF 1 "register_operand" "0") 1257 (const_int 1)))] 1258 "TARGET_SSE" 1259 "movss\t{%2, %0|%0, %2}" 1260 [(set_attr "type" "ssemov") 1261 (set_attr "mode" "SF")]) 1262 1263(define_insn "*vec_dupv4sf" 1264 [(set (match_operand:V4SF 0 "register_operand" "=x") 1265 (vec_duplicate:V4SF 1266 (match_operand:SF 1 "register_operand" "0")))] 1267 "TARGET_SSE" 1268 "shufps\t{$0, %0, %0|%0, %0, 0}" 1269 [(set_attr "type" "sselog1") 1270 (set_attr "mode" "V4SF")]) 1271 1272;; ??? In theory we can match memory for the MMX alternative, but allowing 1273;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 1274;; alternatives pretty much forces the MMX alternative to be chosen. 1275(define_insn "*sse_concatv2sf" 1276 [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y") 1277 (vec_concat:V2SF 1278 (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m") 1279 (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))] 1280 "TARGET_SSE" 1281 "@ 1282 unpcklps\t{%2, %0|%0, %2} 1283 movss\t{%1, %0|%0, %1} 1284 punpckldq\t{%2, %0|%0, %2} 1285 movd\t{%1, %0|%0, %1}" 1286 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 1287 (set_attr "mode" "V4SF,SF,DI,DI")]) 1288 1289(define_insn "*sse_concatv4sf" 1290 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 1291 (vec_concat:V4SF 1292 (match_operand:V2SF 1 "register_operand" " 0,0") 1293 (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))] 1294 "TARGET_SSE" 1295 "@ 1296 movlhps\t{%2, %0|%0, %2} 1297 movhps\t{%2, %0|%0, %2}" 1298 [(set_attr "type" "ssemov") 1299 (set_attr "mode" "V4SF,V2SF")]) 1300 1301(define_expand "vec_initv4sf" 1302 [(match_operand:V4SF 0 "register_operand" "") 1303 (match_operand 1 "" "")] 1304 "TARGET_SSE" 1305{ 1306 ix86_expand_vector_init (false, operands[0], operands[1]); 1307 DONE; 1308}) 1309 1310(define_insn "*vec_setv4sf_0" 1311 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m") 1312 (vec_merge:V4SF 1313 (vec_duplicate:V4SF 1314 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 1315 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") 1316 (const_int 1)))] 1317 "TARGET_SSE" 1318 "@ 1319 movss\t{%2, %0|%0, %2} 1320 movss\t{%2, %0|%0, %2} 1321 movd\t{%2, %0|%0, %2} 1322 #" 1323 [(set_attr "type" "ssemov") 1324 (set_attr "mode" "SF")]) 1325 1326(define_split 1327 [(set (match_operand:V4SF 0 "memory_operand" "") 1328 (vec_merge:V4SF 1329 (vec_duplicate:V4SF 1330 (match_operand:SF 1 "nonmemory_operand" "")) 1331 (match_dup 0) 1332 (const_int 1)))] 1333 "TARGET_SSE && reload_completed" 1334 [(const_int 0)] 1335{ 1336 emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]); 1337 DONE; 1338}) 1339 1340(define_expand "vec_setv4sf" 1341 [(match_operand:V4SF 0 "register_operand" "") 1342 (match_operand:SF 1 "register_operand" "") 1343 (match_operand 2 "const_int_operand" "")] 1344 "TARGET_SSE" 1345{ 1346 ix86_expand_vector_set (false, operands[0], operands[1], 1347 INTVAL (operands[2])); 1348 DONE; 1349}) 1350 1351(define_insn_and_split "*vec_extractv4sf_0" 1352 [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr") 1353 (vec_select:SF 1354 (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m") 1355 (parallel [(const_int 0)])))] 1356 "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 1357 "#" 1358 "&& reload_completed" 1359 [(const_int 0)] 1360{ 1361 rtx op1 = operands[1]; 1362 if (REG_P (op1)) 1363 op1 = gen_rtx_REG (SFmode, REGNO (op1)); 1364 else 1365 op1 = gen_lowpart (SFmode, op1); 1366 emit_move_insn (operands[0], op1); 1367 DONE; 1368}) 1369 1370(define_expand "vec_extractv4sf" 1371 [(match_operand:SF 0 "register_operand" "") 1372 (match_operand:V4SF 1 "register_operand" "") 1373 (match_operand 2 "const_int_operand" "")] 1374 "TARGET_SSE" 1375{ 1376 ix86_expand_vector_extract (false, operands[0], operands[1], 1377 INTVAL (operands[2])); 1378 DONE; 1379}) 1380 1381;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1382;; 1383;; Parallel double-precision floating point arithmetic 1384;; 1385;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1386 1387(define_expand "negv2df2" 1388 [(set (match_operand:V2DF 0 "register_operand" "") 1389 (neg:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1390 "TARGET_SSE2" 1391 "ix86_expand_fp_absneg_operator (NEG, V2DFmode, operands); DONE;") 1392 1393(define_expand "absv2df2" 1394 [(set (match_operand:V2DF 0 "register_operand" "") 1395 (abs:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")))] 1396 "TARGET_SSE2" 1397 "ix86_expand_fp_absneg_operator (ABS, V2DFmode, operands); DONE;") 1398 1399(define_expand "addv2df3" 1400 [(set (match_operand:V2DF 0 "register_operand" "") 1401 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1402 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1403 "TARGET_SSE2" 1404 "ix86_fixup_binary_operands_no_copy (PLUS, V2DFmode, operands);") 1405 1406(define_insn "*addv2df3" 1407 [(set (match_operand:V2DF 0 "register_operand" "=x") 1408 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1409 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1410 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V2DFmode, operands)" 1411 "addpd\t{%2, %0|%0, %2}" 1412 [(set_attr "type" "sseadd") 1413 (set_attr "mode" "V2DF")]) 1414 1415(define_insn "sse2_vmaddv2df3" 1416 [(set (match_operand:V2DF 0 "register_operand" "=x") 1417 (vec_merge:V2DF 1418 (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1419 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1420 (match_dup 1) 1421 (const_int 1)))] 1422 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V4SFmode, operands)" 1423 "addsd\t{%2, %0|%0, %2}" 1424 [(set_attr "type" "sseadd") 1425 (set_attr "mode" "DF")]) 1426 1427(define_expand "subv2df3" 1428 [(set (match_operand:V2DF 0 "register_operand" "") 1429 (minus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1430 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1431 "TARGET_SSE2" 1432 "ix86_fixup_binary_operands_no_copy (MINUS, V2DFmode, operands);") 1433 1434(define_insn "*subv2df3" 1435 [(set (match_operand:V2DF 0 "register_operand" "=x") 1436 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1437 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1438 "TARGET_SSE2" 1439 "subpd\t{%2, %0|%0, %2}" 1440 [(set_attr "type" "sseadd") 1441 (set_attr "mode" "V2DF")]) 1442 1443(define_insn "sse2_vmsubv2df3" 1444 [(set (match_operand:V2DF 0 "register_operand" "=x") 1445 (vec_merge:V2DF 1446 (minus:V2DF (match_operand:V2DF 1 "register_operand" "0") 1447 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1448 (match_dup 1) 1449 (const_int 1)))] 1450 "TARGET_SSE2" 1451 "subsd\t{%2, %0|%0, %2}" 1452 [(set_attr "type" "sseadd") 1453 (set_attr "mode" "DF")]) 1454 1455(define_expand "mulv2df3" 1456 [(set (match_operand:V2DF 0 "register_operand" "") 1457 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1458 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1459 "TARGET_SSE2" 1460 "ix86_fixup_binary_operands_no_copy (MULT, V2DFmode, operands);") 1461 1462(define_insn "*mulv2df3" 1463 [(set (match_operand:V2DF 0 "register_operand" "=x") 1464 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1465 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1466 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1467 "mulpd\t{%2, %0|%0, %2}" 1468 [(set_attr "type" "ssemul") 1469 (set_attr "mode" "V2DF")]) 1470 1471(define_insn "sse2_vmmulv2df3" 1472 [(set (match_operand:V2DF 0 "register_operand" "=x") 1473 (vec_merge:V2DF 1474 (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1475 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1476 (match_dup 1) 1477 (const_int 1)))] 1478 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2DFmode, operands)" 1479 "mulsd\t{%2, %0|%0, %2}" 1480 [(set_attr "type" "ssemul") 1481 (set_attr "mode" "DF")]) 1482 1483(define_expand "divv2df3" 1484 [(set (match_operand:V2DF 0 "register_operand" "") 1485 (div:V2DF (match_operand:V2DF 1 "register_operand" "") 1486 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1487 "TARGET_SSE2" 1488 "ix86_fixup_binary_operands_no_copy (DIV, V2DFmode, operands);") 1489 1490(define_insn "*divv2df3" 1491 [(set (match_operand:V2DF 0 "register_operand" "=x") 1492 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1493 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1494 "TARGET_SSE2" 1495 "divpd\t{%2, %0|%0, %2}" 1496 [(set_attr "type" "ssediv") 1497 (set_attr "mode" "V2DF")]) 1498 1499(define_insn "sse2_vmdivv2df3" 1500 [(set (match_operand:V2DF 0 "register_operand" "=x") 1501 (vec_merge:V2DF 1502 (div:V2DF (match_operand:V2DF 1 "register_operand" "0") 1503 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1504 (match_dup 1) 1505 (const_int 1)))] 1506 "TARGET_SSE2" 1507 "divsd\t{%2, %0|%0, %2}" 1508 [(set_attr "type" "ssediv") 1509 (set_attr "mode" "DF")]) 1510 1511(define_insn "sqrtv2df2" 1512 [(set (match_operand:V2DF 0 "register_operand" "=x") 1513 (sqrt:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1514 "TARGET_SSE2" 1515 "sqrtpd\t{%1, %0|%0, %1}" 1516 [(set_attr "type" "sse") 1517 (set_attr "mode" "V2DF")]) 1518 1519(define_insn "sse2_vmsqrtv2df2" 1520 [(set (match_operand:V2DF 0 "register_operand" "=x") 1521 (vec_merge:V2DF 1522 (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")) 1523 (match_operand:V2DF 2 "register_operand" "0") 1524 (const_int 1)))] 1525 "TARGET_SSE2" 1526 "sqrtsd\t{%1, %0|%0, %1}" 1527 [(set_attr "type" "sse") 1528 (set_attr "mode" "SF")]) 1529 1530;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX 1531;; isn't really correct, as those rtl operators aren't defined when 1532;; applied to NaNs. Hopefully the optimizers won't get too smart on us. 1533 1534(define_expand "smaxv2df3" 1535 [(set (match_operand:V2DF 0 "register_operand" "") 1536 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1537 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1538 "TARGET_SSE2" 1539{ 1540 if (!flag_finite_math_only) 1541 operands[1] = force_reg (V2DFmode, operands[1]); 1542 ix86_fixup_binary_operands_no_copy (SMAX, V2DFmode, operands); 1543}) 1544 1545(define_insn "*smaxv2df3_finite" 1546 [(set (match_operand:V2DF 0 "register_operand" "=x") 1547 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1548 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1549 "TARGET_SSE2 && flag_finite_math_only 1550 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" 1551 "maxpd\t{%2, %0|%0, %2}" 1552 [(set_attr "type" "sseadd") 1553 (set_attr "mode" "V2DF")]) 1554 1555(define_insn "*smaxv2df3" 1556 [(set (match_operand:V2DF 0 "register_operand" "=x") 1557 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1558 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1559 "TARGET_SSE2" 1560 "maxpd\t{%2, %0|%0, %2}" 1561 [(set_attr "type" "sseadd") 1562 (set_attr "mode" "V2DF")]) 1563 1564(define_insn "*sse2_vmsmaxv2df3_finite" 1565 [(set (match_operand:V2DF 0 "register_operand" "=x") 1566 (vec_merge:V2DF 1567 (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1568 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1569 (match_dup 1) 1570 (const_int 1)))] 1571 "TARGET_SSE2 && flag_finite_math_only 1572 && ix86_binary_operator_ok (SMAX, V2DFmode, operands)" 1573 "maxsd\t{%2, %0|%0, %2}" 1574 [(set_attr "type" "sseadd") 1575 (set_attr "mode" "DF")]) 1576 1577(define_insn "sse2_vmsmaxv2df3" 1578 [(set (match_operand:V2DF 0 "register_operand" "=x") 1579 (vec_merge:V2DF 1580 (smax:V2DF (match_operand:V2DF 1 "register_operand" "0") 1581 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1582 (match_dup 1) 1583 (const_int 1)))] 1584 "TARGET_SSE2" 1585 "maxsd\t{%2, %0|%0, %2}" 1586 [(set_attr "type" "sseadd") 1587 (set_attr "mode" "DF")]) 1588 1589(define_expand "sminv2df3" 1590 [(set (match_operand:V2DF 0 "register_operand" "") 1591 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1592 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1593 "TARGET_SSE2" 1594{ 1595 if (!flag_finite_math_only) 1596 operands[1] = force_reg (V2DFmode, operands[1]); 1597 ix86_fixup_binary_operands_no_copy (SMIN, V2DFmode, operands); 1598}) 1599 1600(define_insn "*sminv2df3_finite" 1601 [(set (match_operand:V2DF 0 "register_operand" "=x") 1602 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1603 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1604 "TARGET_SSE2 && flag_finite_math_only 1605 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" 1606 "minpd\t{%2, %0|%0, %2}" 1607 [(set_attr "type" "sseadd") 1608 (set_attr "mode" "V2DF")]) 1609 1610(define_insn "*sminv2df3" 1611 [(set (match_operand:V2DF 0 "register_operand" "=x") 1612 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1613 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1614 "TARGET_SSE2" 1615 "minpd\t{%2, %0|%0, %2}" 1616 [(set_attr "type" "sseadd") 1617 (set_attr "mode" "V2DF")]) 1618 1619(define_insn "*sse2_vmsminv2df3_finite" 1620 [(set (match_operand:V2DF 0 "register_operand" "=x") 1621 (vec_merge:V2DF 1622 (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1623 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1624 (match_dup 1) 1625 (const_int 1)))] 1626 "TARGET_SSE2 && flag_finite_math_only 1627 && ix86_binary_operator_ok (SMIN, V2DFmode, operands)" 1628 "minsd\t{%2, %0|%0, %2}" 1629 [(set_attr "type" "sseadd") 1630 (set_attr "mode" "DF")]) 1631 1632(define_insn "sse2_vmsminv2df3" 1633 [(set (match_operand:V2DF 0 "register_operand" "=x") 1634 (vec_merge:V2DF 1635 (smin:V2DF (match_operand:V2DF 1 "register_operand" "0") 1636 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1637 (match_dup 1) 1638 (const_int 1)))] 1639 "TARGET_SSE2" 1640 "minsd\t{%2, %0|%0, %2}" 1641 [(set_attr "type" "sseadd") 1642 (set_attr "mode" "DF")]) 1643 1644(define_insn "sse3_addsubv2df3" 1645 [(set (match_operand:V2DF 0 "register_operand" "=x") 1646 (vec_merge:V2DF 1647 (plus:V2DF 1648 (match_operand:V2DF 1 "register_operand" "0") 1649 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 1650 (minus:V2DF (match_dup 1) (match_dup 2)) 1651 (const_int 1)))] 1652 "TARGET_SSE3" 1653 "addsubpd\t{%2, %0|%0, %2}" 1654 [(set_attr "type" "sseadd") 1655 (set_attr "mode" "V2DF")]) 1656 1657(define_insn "sse3_haddv2df3" 1658 [(set (match_operand:V2DF 0 "register_operand" "=x") 1659 (vec_concat:V2DF 1660 (plus:DF 1661 (vec_select:DF 1662 (match_operand:V2DF 1 "register_operand" "0") 1663 (parallel [(const_int 0)])) 1664 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1665 (plus:DF 1666 (vec_select:DF 1667 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1668 (parallel [(const_int 0)])) 1669 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1670 "TARGET_SSE3" 1671 "haddpd\t{%2, %0|%0, %2}" 1672 [(set_attr "type" "sseadd") 1673 (set_attr "mode" "V2DF")]) 1674 1675(define_insn "sse3_hsubv2df3" 1676 [(set (match_operand:V2DF 0 "register_operand" "=x") 1677 (vec_concat:V2DF 1678 (minus:DF 1679 (vec_select:DF 1680 (match_operand:V2DF 1 "register_operand" "0") 1681 (parallel [(const_int 0)])) 1682 (vec_select:DF (match_dup 1) (parallel [(const_int 1)]))) 1683 (minus:DF 1684 (vec_select:DF 1685 (match_operand:V2DF 2 "nonimmediate_operand" "xm") 1686 (parallel [(const_int 0)])) 1687 (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))] 1688 "TARGET_SSE3" 1689 "hsubpd\t{%2, %0|%0, %2}" 1690 [(set_attr "type" "sseadd") 1691 (set_attr "mode" "V2DF")]) 1692 1693(define_expand "reduc_splus_v2df" 1694 [(match_operand:V2DF 0 "register_operand" "") 1695 (match_operand:V2DF 1 "register_operand" "")] 1696 "TARGET_SSE3" 1697{ 1698 emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1])); 1699 DONE; 1700}) 1701 1702;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1703;; 1704;; Parallel double-precision floating point comparisons 1705;; 1706;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1707 1708(define_insn "sse2_maskcmpv2df3" 1709 [(set (match_operand:V2DF 0 "register_operand" "=x") 1710 (match_operator:V2DF 3 "sse_comparison_operator" 1711 [(match_operand:V2DF 1 "register_operand" "0") 1712 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]))] 1713 "TARGET_SSE2" 1714 "cmp%D3pd\t{%2, %0|%0, %2}" 1715 [(set_attr "type" "ssecmp") 1716 (set_attr "mode" "V2DF")]) 1717 1718(define_insn "sse2_vmmaskcmpv2df3" 1719 [(set (match_operand:V2DF 0 "register_operand" "=x") 1720 (vec_merge:V2DF 1721 (match_operator:V2DF 3 "sse_comparison_operator" 1722 [(match_operand:V2DF 1 "register_operand" "0") 1723 (match_operand:V2DF 2 "nonimmediate_operand" "xm")]) 1724 (match_dup 1) 1725 (const_int 1)))] 1726 "TARGET_SSE2" 1727 "cmp%D3sd\t{%2, %0|%0, %2}" 1728 [(set_attr "type" "ssecmp") 1729 (set_attr "mode" "DF")]) 1730 1731(define_insn "sse2_comi" 1732 [(set (reg:CCFP FLAGS_REG) 1733 (compare:CCFP 1734 (vec_select:DF 1735 (match_operand:V2DF 0 "register_operand" "x") 1736 (parallel [(const_int 0)])) 1737 (vec_select:DF 1738 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1739 (parallel [(const_int 0)]))))] 1740 "TARGET_SSE2" 1741 "comisd\t{%1, %0|%0, %1}" 1742 [(set_attr "type" "ssecomi") 1743 (set_attr "mode" "DF")]) 1744 1745(define_insn "sse2_ucomi" 1746 [(set (reg:CCFPU FLAGS_REG) 1747 (compare:CCFPU 1748 (vec_select:DF 1749 (match_operand:V2DF 0 "register_operand" "x") 1750 (parallel [(const_int 0)])) 1751 (vec_select:DF 1752 (match_operand:V2DF 1 "nonimmediate_operand" "xm") 1753 (parallel [(const_int 0)]))))] 1754 "TARGET_SSE2" 1755 "ucomisd\t{%1, %0|%0, %1}" 1756 [(set_attr "type" "ssecomi") 1757 (set_attr "mode" "DF")]) 1758 1759(define_expand "vcondv2df" 1760 [(set (match_operand:V2DF 0 "register_operand" "") 1761 (if_then_else:V2DF 1762 (match_operator 3 "" 1763 [(match_operand:V2DF 4 "nonimmediate_operand" "") 1764 (match_operand:V2DF 5 "nonimmediate_operand" "")]) 1765 (match_operand:V2DF 1 "general_operand" "") 1766 (match_operand:V2DF 2 "general_operand" "")))] 1767 "TARGET_SSE2" 1768{ 1769 if (ix86_expand_fp_vcond (operands)) 1770 DONE; 1771 else 1772 FAIL; 1773}) 1774 1775;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1776;; 1777;; Parallel double-precision floating point logical operations 1778;; 1779;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1780 1781(define_expand "andv2df3" 1782 [(set (match_operand:V2DF 0 "register_operand" "") 1783 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1784 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1785 "TARGET_SSE2" 1786 "ix86_fixup_binary_operands_no_copy (AND, V2DFmode, operands);") 1787 1788(define_insn "*andv2df3" 1789 [(set (match_operand:V2DF 0 "register_operand" "=x") 1790 (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1791 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1792 "TARGET_SSE2 && ix86_binary_operator_ok (AND, V2DFmode, operands)" 1793 "andpd\t{%2, %0|%0, %2}" 1794 [(set_attr "type" "sselog") 1795 (set_attr "mode" "V2DF")]) 1796 1797(define_insn "sse2_nandv2df3" 1798 [(set (match_operand:V2DF 0 "register_operand" "=x") 1799 (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0")) 1800 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1801 "TARGET_SSE2" 1802 "andnpd\t{%2, %0|%0, %2}" 1803 [(set_attr "type" "sselog") 1804 (set_attr "mode" "V2DF")]) 1805 1806(define_expand "iorv2df3" 1807 [(set (match_operand:V2DF 0 "register_operand" "") 1808 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1809 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1810 "TARGET_SSE2" 1811 "ix86_fixup_binary_operands_no_copy (IOR, V2DFmode, operands);") 1812 1813(define_insn "*iorv2df3" 1814 [(set (match_operand:V2DF 0 "register_operand" "=x") 1815 (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1816 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1817 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, V2DFmode, operands)" 1818 "orpd\t{%2, %0|%0, %2}" 1819 [(set_attr "type" "sselog") 1820 (set_attr "mode" "V2DF")]) 1821 1822(define_expand "xorv2df3" 1823 [(set (match_operand:V2DF 0 "register_operand" "") 1824 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "") 1825 (match_operand:V2DF 2 "nonimmediate_operand" "")))] 1826 "TARGET_SSE2" 1827 "ix86_fixup_binary_operands_no_copy (XOR, V2DFmode, operands);") 1828 1829(define_insn "*xorv2df3" 1830 [(set (match_operand:V2DF 0 "register_operand" "=x") 1831 (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0") 1832 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))] 1833 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, V2DFmode, operands)" 1834 "xorpd\t{%2, %0|%0, %2}" 1835 [(set_attr "type" "sselog") 1836 (set_attr "mode" "V2DF")]) 1837 1838;; Also define scalar versions. These are used for abs, neg, and 1839;; conditional move. Using subregs into vector modes causes register 1840;; allocation lossage. These patterns do not allow memory operands 1841;; because the native instructions read the full 128-bits. 1842 1843(define_insn "*anddf3" 1844 [(set (match_operand:DF 0 "register_operand" "=x") 1845 (and:DF (match_operand:DF 1 "register_operand" "0") 1846 (match_operand:DF 2 "register_operand" "x")))] 1847 "TARGET_SSE2" 1848 "andpd\t{%2, %0|%0, %2}" 1849 [(set_attr "type" "sselog") 1850 (set_attr "mode" "V2DF")]) 1851 1852(define_insn "*nanddf3" 1853 [(set (match_operand:DF 0 "register_operand" "=x") 1854 (and:DF (not:DF (match_operand:DF 1 "register_operand" "0")) 1855 (match_operand:DF 2 "register_operand" "x")))] 1856 "TARGET_SSE2" 1857 "andnpd\t{%2, %0|%0, %2}" 1858 [(set_attr "type" "sselog") 1859 (set_attr "mode" "V2DF")]) 1860 1861(define_insn "*iordf3" 1862 [(set (match_operand:DF 0 "register_operand" "=x") 1863 (ior:DF (match_operand:DF 1 "register_operand" "0") 1864 (match_operand:DF 2 "register_operand" "x")))] 1865 "TARGET_SSE2" 1866 "orpd\t{%2, %0|%0, %2}" 1867 [(set_attr "type" "sselog") 1868 (set_attr "mode" "V2DF")]) 1869 1870(define_insn "*xordf3" 1871 [(set (match_operand:DF 0 "register_operand" "=x") 1872 (xor:DF (match_operand:DF 1 "register_operand" "0") 1873 (match_operand:DF 2 "register_operand" "x")))] 1874 "TARGET_SSE2" 1875 "xorpd\t{%2, %0|%0, %2}" 1876 [(set_attr "type" "sselog") 1877 (set_attr "mode" "V2DF")]) 1878 1879;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1880;; 1881;; Parallel double-precision floating point conversion operations 1882;; 1883;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1884 1885(define_insn "sse2_cvtpi2pd" 1886 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1887 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 1888 "TARGET_SSE2" 1889 "cvtpi2pd\t{%1, %0|%0, %1}" 1890 [(set_attr "type" "ssecvt") 1891 (set_attr "unit" "mmx,*") 1892 (set_attr "mode" "V2DF")]) 1893 1894(define_insn "sse2_cvtpd2pi" 1895 [(set (match_operand:V2SI 0 "register_operand" "=y") 1896 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 1897 UNSPEC_FIX_NOTRUNC))] 1898 "TARGET_SSE2" 1899 "cvtpd2pi\t{%1, %0|%0, %1}" 1900 [(set_attr "type" "ssecvt") 1901 (set_attr "unit" "mmx") 1902 (set_attr "mode" "DI")]) 1903 1904(define_insn "sse2_cvttpd2pi" 1905 [(set (match_operand:V2SI 0 "register_operand" "=y") 1906 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))] 1907 "TARGET_SSE2" 1908 "cvttpd2pi\t{%1, %0|%0, %1}" 1909 [(set_attr "type" "ssecvt") 1910 (set_attr "unit" "mmx") 1911 (set_attr "mode" "TI")]) 1912 1913(define_insn "sse2_cvtsi2sd" 1914 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1915 (vec_merge:V2DF 1916 (vec_duplicate:V2DF 1917 (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m"))) 1918 (match_operand:V2DF 1 "register_operand" "0,0") 1919 (const_int 1)))] 1920 "TARGET_SSE2" 1921 "cvtsi2sd\t{%2, %0|%0, %2}" 1922 [(set_attr "type" "sseicvt") 1923 (set_attr "mode" "DF") 1924 (set_attr "athlon_decode" "double,direct")]) 1925 1926(define_insn "sse2_cvtsi2sdq" 1927 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 1928 (vec_merge:V2DF 1929 (vec_duplicate:V2DF 1930 (float:DF (match_operand:DI 2 "nonimmediate_operand" "r,m"))) 1931 (match_operand:V2DF 1 "register_operand" "0,0") 1932 (const_int 1)))] 1933 "TARGET_SSE2 && TARGET_64BIT" 1934 "cvtsi2sdq\t{%2, %0|%0, %2}" 1935 [(set_attr "type" "sseicvt") 1936 (set_attr "mode" "DF") 1937 (set_attr "athlon_decode" "double,direct")]) 1938 1939(define_insn "sse2_cvtsd2si" 1940 [(set (match_operand:SI 0 "register_operand" "=r,r") 1941 (unspec:SI 1942 [(vec_select:DF 1943 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1944 (parallel [(const_int 0)]))] 1945 UNSPEC_FIX_NOTRUNC))] 1946 "TARGET_SSE2" 1947 "cvtsd2si\t{%1, %0|%0, %1}" 1948 [(set_attr "type" "sseicvt") 1949 (set_attr "athlon_decode" "double,vector") 1950 (set_attr "mode" "SI")]) 1951 1952(define_insn "sse2_cvtsd2siq" 1953 [(set (match_operand:DI 0 "register_operand" "=r,r") 1954 (unspec:DI 1955 [(vec_select:DF 1956 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1957 (parallel [(const_int 0)]))] 1958 UNSPEC_FIX_NOTRUNC))] 1959 "TARGET_SSE2 && TARGET_64BIT" 1960 "cvtsd2siq\t{%1, %0|%0, %1}" 1961 [(set_attr "type" "sseicvt") 1962 (set_attr "athlon_decode" "double,vector") 1963 (set_attr "mode" "DI")]) 1964 1965(define_insn "sse2_cvttsd2si" 1966 [(set (match_operand:SI 0 "register_operand" "=r,r") 1967 (fix:SI 1968 (vec_select:DF 1969 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1970 (parallel [(const_int 0)]))))] 1971 "TARGET_SSE2" 1972 "cvttsd2si\t{%1, %0|%0, %1}" 1973 [(set_attr "type" "sseicvt") 1974 (set_attr "mode" "SI") 1975 (set_attr "athlon_decode" "double,vector")]) 1976 1977(define_insn "sse2_cvttsd2siq" 1978 [(set (match_operand:DI 0 "register_operand" "=r,r") 1979 (fix:DI 1980 (vec_select:DF 1981 (match_operand:V2DF 1 "nonimmediate_operand" "x,m") 1982 (parallel [(const_int 0)]))))] 1983 "TARGET_SSE2 && TARGET_64BIT" 1984 "cvttsd2siq\t{%1, %0|%0, %1}" 1985 [(set_attr "type" "sseicvt") 1986 (set_attr "mode" "DI") 1987 (set_attr "athlon_decode" "double,vector")]) 1988 1989(define_insn "sse2_cvtdq2pd" 1990 [(set (match_operand:V2DF 0 "register_operand" "=x") 1991 (float:V2DF 1992 (vec_select:V2SI 1993 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 1994 (parallel [(const_int 0) (const_int 1)]))))] 1995 "TARGET_SSE2" 1996 "cvtdq2pd\t{%1, %0|%0, %1}" 1997 [(set_attr "type" "ssecvt") 1998 (set_attr "mode" "V2DF")]) 1999 2000(define_expand "sse2_cvtpd2dq" 2001 [(set (match_operand:V4SI 0 "register_operand" "") 2002 (vec_concat:V4SI 2003 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "")] 2004 UNSPEC_FIX_NOTRUNC) 2005 (match_dup 2)))] 2006 "TARGET_SSE2" 2007 "operands[2] = CONST0_RTX (V2SImode);") 2008 2009(define_insn "*sse2_cvtpd2dq" 2010 [(set (match_operand:V4SI 0 "register_operand" "=x") 2011 (vec_concat:V4SI 2012 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2013 UNSPEC_FIX_NOTRUNC) 2014 (match_operand:V2SI 2 "const0_operand" "")))] 2015 "TARGET_SSE2" 2016 "cvtpd2dq\t{%1, %0|%0, %1}" 2017 [(set_attr "type" "ssecvt") 2018 (set_attr "mode" "TI")]) 2019 2020(define_expand "sse2_cvttpd2dq" 2021 [(set (match_operand:V4SI 0 "register_operand" "") 2022 (vec_concat:V4SI 2023 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "")) 2024 (match_dup 2)))] 2025 "TARGET_SSE2" 2026 "operands[2] = CONST0_RTX (V2SImode);") 2027 2028(define_insn "*sse2_cvttpd2dq" 2029 [(set (match_operand:V4SI 0 "register_operand" "=x") 2030 (vec_concat:V4SI 2031 (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2032 (match_operand:V2SI 2 "const0_operand" "")))] 2033 "TARGET_SSE2" 2034 "cvttpd2dq\t{%1, %0|%0, %1}" 2035 [(set_attr "type" "ssecvt") 2036 (set_attr "mode" "TI")]) 2037 2038(define_insn "sse2_cvtsd2ss" 2039 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 2040 (vec_merge:V4SF 2041 (vec_duplicate:V4SF 2042 (float_truncate:V2SF 2043 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))) 2044 (match_operand:V4SF 1 "register_operand" "0,0") 2045 (const_int 1)))] 2046 "TARGET_SSE2" 2047 "cvtsd2ss\t{%2, %0|%0, %2}" 2048 [(set_attr "type" "ssecvt") 2049 (set_attr "athlon_decode" "vector,double") 2050 (set_attr "mode" "SF")]) 2051 2052(define_insn "sse2_cvtss2sd" 2053 [(set (match_operand:V2DF 0 "register_operand" "=x") 2054 (vec_merge:V2DF 2055 (float_extend:V2DF 2056 (vec_select:V2SF 2057 (match_operand:V4SF 2 "nonimmediate_operand" "xm") 2058 (parallel [(const_int 0) (const_int 1)]))) 2059 (match_operand:V2DF 1 "register_operand" "0") 2060 (const_int 1)))] 2061 "TARGET_SSE2" 2062 "cvtss2sd\t{%2, %0|%0, %2}" 2063 [(set_attr "type" "ssecvt") 2064 (set_attr "mode" "DF")]) 2065 2066(define_expand "sse2_cvtpd2ps" 2067 [(set (match_operand:V4SF 0 "register_operand" "") 2068 (vec_concat:V4SF 2069 (float_truncate:V2SF 2070 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2071 (match_dup 2)))] 2072 "TARGET_SSE2" 2073 "operands[2] = CONST0_RTX (V2SFmode);") 2074 2075(define_insn "*sse2_cvtpd2ps" 2076 [(set (match_operand:V4SF 0 "register_operand" "=x") 2077 (vec_concat:V4SF 2078 (float_truncate:V2SF 2079 (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 2080 (match_operand:V2SF 2 "const0_operand" "")))] 2081 "TARGET_SSE2" 2082 "cvtpd2ps\t{%1, %0|%0, %1}" 2083 [(set_attr "type" "ssecvt") 2084 (set_attr "mode" "V4SF")]) 2085 2086(define_insn "sse2_cvtps2pd" 2087 [(set (match_operand:V2DF 0 "register_operand" "=x") 2088 (float_extend:V2DF 2089 (vec_select:V2SF 2090 (match_operand:V4SF 1 "nonimmediate_operand" "xm") 2091 (parallel [(const_int 0) (const_int 1)]))))] 2092 "TARGET_SSE2" 2093 "cvtps2pd\t{%1, %0|%0, %1}" 2094 [(set_attr "type" "ssecvt") 2095 (set_attr "mode" "V2DF")]) 2096 2097;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2098;; 2099;; Parallel double-precision floating point element swizzling 2100;; 2101;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2102 2103(define_insn "sse2_unpckhpd" 2104 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 2105 (vec_select:V2DF 2106 (vec_concat:V4DF 2107 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") 2108 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) 2109 (parallel [(const_int 1) 2110 (const_int 3)])))] 2111 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2112 "@ 2113 unpckhpd\t{%2, %0|%0, %2} 2114 movlpd\t{%H1, %0|%0, %H1} 2115 movhpd\t{%1, %0|%0, %1}" 2116 [(set_attr "type" "sselog,ssemov,ssemov") 2117 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2118 2119(define_insn "*sse3_movddup" 2120 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") 2121 (vec_select:V2DF 2122 (vec_concat:V4DF 2123 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 2124 (match_dup 1)) 2125 (parallel [(const_int 0) 2126 (const_int 2)])))] 2127 "TARGET_SSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2128 "@ 2129 movddup\t{%1, %0|%0, %1} 2130 #" 2131 [(set_attr "type" "sselog,ssemov") 2132 (set_attr "mode" "V2DF")]) 2133 2134(define_split 2135 [(set (match_operand:V2DF 0 "memory_operand" "") 2136 (vec_select:V2DF 2137 (vec_concat:V4DF 2138 (match_operand:V2DF 1 "register_operand" "") 2139 (match_dup 1)) 2140 (parallel [(const_int 0) 2141 (const_int 2)])))] 2142 "TARGET_SSE3 && reload_completed" 2143 [(const_int 0)] 2144{ 2145 rtx low = gen_rtx_REG (DFmode, REGNO (operands[1])); 2146 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 2147 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 2148 DONE; 2149}) 2150 2151(define_insn "sse2_unpcklpd" 2152 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") 2153 (vec_select:V2DF 2154 (vec_concat:V4DF 2155 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") 2156 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) 2157 (parallel [(const_int 0) 2158 (const_int 2)])))] 2159 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2160 "@ 2161 unpcklpd\t{%2, %0|%0, %2} 2162 movhpd\t{%2, %0|%0, %2} 2163 movlpd\t{%2, %H0|%H0, %2}" 2164 [(set_attr "type" "sselog,ssemov,ssemov") 2165 (set_attr "mode" "V2DF,V1DF,V1DF")]) 2166 2167(define_expand "sse2_shufpd" 2168 [(match_operand:V2DF 0 "register_operand" "") 2169 (match_operand:V2DF 1 "register_operand" "") 2170 (match_operand:V2DF 2 "nonimmediate_operand" "") 2171 (match_operand:SI 3 "const_int_operand" "")] 2172 "TARGET_SSE2" 2173{ 2174 int mask = INTVAL (operands[3]); 2175 emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2], 2176 GEN_INT (mask & 1), 2177 GEN_INT (mask & 2 ? 3 : 2))); 2178 DONE; 2179}) 2180 2181(define_insn "sse2_shufpd_1" 2182 [(set (match_operand:V2DF 0 "register_operand" "=x") 2183 (vec_select:V2DF 2184 (vec_concat:V4DF 2185 (match_operand:V2DF 1 "register_operand" "0") 2186 (match_operand:V2DF 2 "nonimmediate_operand" "xm")) 2187 (parallel [(match_operand 3 "const_0_to_1_operand" "") 2188 (match_operand 4 "const_2_to_3_operand" "")])))] 2189 "TARGET_SSE2" 2190{ 2191 int mask; 2192 mask = INTVAL (operands[3]); 2193 mask |= (INTVAL (operands[4]) - 2) << 1; 2194 operands[3] = GEN_INT (mask); 2195 2196 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 2197} 2198 [(set_attr "type" "sselog") 2199 (set_attr "mode" "V2DF")]) 2200 2201(define_insn "sse2_storehpd" 2202 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2203 (vec_select:DF 2204 (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o") 2205 (parallel [(const_int 1)])))] 2206 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2207 "@ 2208 movhpd\t{%1, %0|%0, %1} 2209 unpckhpd\t%0, %0 2210 #" 2211 [(set_attr "type" "ssemov,sselog1,ssemov") 2212 (set_attr "mode" "V1DF,V2DF,DF")]) 2213 2214(define_split 2215 [(set (match_operand:DF 0 "register_operand" "") 2216 (vec_select:DF 2217 (match_operand:V2DF 1 "memory_operand" "") 2218 (parallel [(const_int 1)])))] 2219 "TARGET_SSE2 && reload_completed" 2220 [(set (match_dup 0) (match_dup 1))] 2221{ 2222 operands[1] = adjust_address (operands[1], DFmode, 8); 2223}) 2224 2225(define_insn "sse2_storelpd" 2226 [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr") 2227 (vec_select:DF 2228 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m") 2229 (parallel [(const_int 0)])))] 2230 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 2231 "@ 2232 movlpd\t{%1, %0|%0, %1} 2233 # 2234 #" 2235 [(set_attr "type" "ssemov") 2236 (set_attr "mode" "V1DF,DF,DF")]) 2237 2238(define_split 2239 [(set (match_operand:DF 0 "register_operand" "") 2240 (vec_select:DF 2241 (match_operand:V2DF 1 "nonimmediate_operand" "") 2242 (parallel [(const_int 0)])))] 2243 "TARGET_SSE2 && reload_completed" 2244 [(const_int 0)] 2245{ 2246 rtx op1 = operands[1]; 2247 if (REG_P (op1)) 2248 op1 = gen_rtx_REG (DFmode, REGNO (op1)); 2249 else 2250 op1 = gen_lowpart (DFmode, op1); 2251 emit_move_insn (operands[0], op1); 2252 DONE; 2253}) 2254 2255(define_insn "sse2_loadhpd" 2256 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") 2257 (vec_concat:V2DF 2258 (vec_select:DF 2259 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0") 2260 (parallel [(const_int 0)])) 2261 (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))] 2262 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2263 "@ 2264 movhpd\t{%2, %0|%0, %2} 2265 unpcklpd\t{%2, %0|%0, %2} 2266 shufpd\t{$1, %1, %0|%0, %1, 1} 2267 #" 2268 [(set_attr "type" "ssemov,sselog,sselog,other") 2269 (set_attr "mode" "V1DF,V2DF,V2DF,DF")]) 2270 2271(define_split 2272 [(set (match_operand:V2DF 0 "memory_operand" "") 2273 (vec_concat:V2DF 2274 (vec_select:DF (match_dup 0) (parallel [(const_int 0)])) 2275 (match_operand:DF 1 "register_operand" "")))] 2276 "TARGET_SSE2 && reload_completed" 2277 [(set (match_dup 0) (match_dup 1))] 2278{ 2279 operands[0] = adjust_address (operands[0], DFmode, 8); 2280}) 2281 2282(define_insn "sse2_loadlpd" 2283 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m") 2284 (vec_concat:V2DF 2285 (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr") 2286 (vec_select:DF 2287 (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0") 2288 (parallel [(const_int 1)]))))] 2289 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 2290 "@ 2291 movsd\t{%2, %0|%0, %2} 2292 movlpd\t{%2, %0|%0, %2} 2293 movsd\t{%2, %0|%0, %2} 2294 shufpd\t{$2, %2, %0|%0, %2, 2} 2295 movhpd\t{%H1, %0|%0, %H1} 2296 #" 2297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other") 2298 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")]) 2299 2300(define_split 2301 [(set (match_operand:V2DF 0 "memory_operand" "") 2302 (vec_concat:V2DF 2303 (match_operand:DF 1 "register_operand" "") 2304 (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))] 2305 "TARGET_SSE2 && reload_completed" 2306 [(set (match_dup 0) (match_dup 1))] 2307{ 2308 operands[0] = adjust_address (operands[0], DFmode, 8); 2309}) 2310 2311(define_insn "sse2_movsd" 2312 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o") 2313 (vec_merge:V2DF 2314 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0") 2315 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x") 2316 (const_int 1)))] 2317 "TARGET_SSE2" 2318 "@ 2319 movsd\t{%2, %0|%0, %2} 2320 movlpd\t{%2, %0|%0, %2} 2321 movlpd\t{%2, %0|%0, %2} 2322 shufpd\t{$2, %2, %0|%0, %2, 2} 2323 movhps\t{%H1, %0|%0, %H1} 2324 movhps\t{%1, %H0|%H0, %1}" 2325 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 2326 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) 2327 2328(define_insn "*vec_dupv2df_sse3" 2329 [(set (match_operand:V2DF 0 "register_operand" "=x") 2330 (vec_duplicate:V2DF 2331 (match_operand:DF 1 "nonimmediate_operand" "xm")))] 2332 "TARGET_SSE3" 2333 "movddup\t{%1, %0|%0, %1}" 2334 [(set_attr "type" "sselog1") 2335 (set_attr "mode" "DF")]) 2336 2337(define_insn "*vec_dupv2df" 2338 [(set (match_operand:V2DF 0 "register_operand" "=x") 2339 (vec_duplicate:V2DF 2340 (match_operand:DF 1 "register_operand" "0")))] 2341 "TARGET_SSE2" 2342 "unpcklpd\t%0, %0" 2343 [(set_attr "type" "sselog1") 2344 (set_attr "mode" "V4SF")]) 2345 2346(define_insn "*vec_concatv2df_sse3" 2347 [(set (match_operand:V2DF 0 "register_operand" "=x") 2348 (vec_concat:V2DF 2349 (match_operand:DF 1 "nonimmediate_operand" "xm") 2350 (match_dup 1)))] 2351 "TARGET_SSE3" 2352 "movddup\t{%1, %0|%0, %1}" 2353 [(set_attr "type" "sselog1") 2354 (set_attr "mode" "DF")]) 2355 2356(define_insn "*vec_concatv2df" 2357 [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x") 2358 (vec_concat:V2DF 2359 (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0") 2360 (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))] 2361 "TARGET_SSE" 2362 "@ 2363 unpcklpd\t{%2, %0|%0, %2} 2364 movhpd\t{%2, %0|%0, %2} 2365 movsd\t{%1, %0|%0, %1} 2366 movlhps\t{%2, %0|%0, %2} 2367 movhps\t{%2, %0|%0, %2}" 2368 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") 2369 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) 2370 2371(define_expand "vec_setv2df" 2372 [(match_operand:V2DF 0 "register_operand" "") 2373 (match_operand:DF 1 "register_operand" "") 2374 (match_operand 2 "const_int_operand" "")] 2375 "TARGET_SSE" 2376{ 2377 ix86_expand_vector_set (false, operands[0], operands[1], 2378 INTVAL (operands[2])); 2379 DONE; 2380}) 2381 2382(define_expand "vec_extractv2df" 2383 [(match_operand:DF 0 "register_operand" "") 2384 (match_operand:V2DF 1 "register_operand" "") 2385 (match_operand 2 "const_int_operand" "")] 2386 "TARGET_SSE" 2387{ 2388 ix86_expand_vector_extract (false, operands[0], operands[1], 2389 INTVAL (operands[2])); 2390 DONE; 2391}) 2392 2393(define_expand "vec_initv2df" 2394 [(match_operand:V2DF 0 "register_operand" "") 2395 (match_operand 1 "" "")] 2396 "TARGET_SSE" 2397{ 2398 ix86_expand_vector_init (false, operands[0], operands[1]); 2399 DONE; 2400}) 2401 2402;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2403;; 2404;; Parallel integral arithmetic 2405;; 2406;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2407 2408(define_expand "neg<mode>2" 2409 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2410 (minus:SSEMODEI 2411 (match_dup 2) 2412 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))] 2413 "TARGET_SSE2" 2414 "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));") 2415 2416(define_expand "add<mode>3" 2417 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2418 (plus:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 2419 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2420 "TARGET_SSE2" 2421 "ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);") 2422 2423(define_insn "*add<mode>3" 2424 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2425 (plus:SSEMODEI 2426 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 2427 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2428 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)" 2429 "padd<ssevecsize>\t{%2, %0|%0, %2}" 2430 [(set_attr "type" "sseiadd") 2431 (set_attr "mode" "TI")]) 2432 2433(define_insn "sse2_ssadd<mode>3" 2434 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2435 (ss_plus:SSEMODE12 2436 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2437 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2438 "TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)" 2439 "padds<ssevecsize>\t{%2, %0|%0, %2}" 2440 [(set_attr "type" "sseiadd") 2441 (set_attr "mode" "TI")]) 2442 2443(define_insn "sse2_usadd<mode>3" 2444 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2445 (us_plus:SSEMODE12 2446 (match_operand:SSEMODE12 1 "nonimmediate_operand" "%0") 2447 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2448 "TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)" 2449 "paddus<ssevecsize>\t{%2, %0|%0, %2}" 2450 [(set_attr "type" "sseiadd") 2451 (set_attr "mode" "TI")]) 2452 2453(define_expand "sub<mode>3" 2454 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2455 (minus:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "") 2456 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 2457 "TARGET_SSE2" 2458 "ix86_fixup_binary_operands_no_copy (MINUS, <MODE>mode, operands);") 2459 2460(define_insn "*sub<mode>3" 2461 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 2462 (minus:SSEMODEI 2463 (match_operand:SSEMODEI 1 "register_operand" "0") 2464 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 2465 "TARGET_SSE2" 2466 "psub<ssevecsize>\t{%2, %0|%0, %2}" 2467 [(set_attr "type" "sseiadd") 2468 (set_attr "mode" "TI")]) 2469 2470(define_insn "sse2_sssub<mode>3" 2471 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2472 (ss_minus:SSEMODE12 2473 (match_operand:SSEMODE12 1 "register_operand" "0") 2474 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2475 "TARGET_SSE2" 2476 "psubs<ssevecsize>\t{%2, %0|%0, %2}" 2477 [(set_attr "type" "sseiadd") 2478 (set_attr "mode" "TI")]) 2479 2480(define_insn "sse2_ussub<mode>3" 2481 [(set (match_operand:SSEMODE12 0 "register_operand" "=x") 2482 (us_minus:SSEMODE12 2483 (match_operand:SSEMODE12 1 "register_operand" "0") 2484 (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))] 2485 "TARGET_SSE2" 2486 "psubus<ssevecsize>\t{%2, %0|%0, %2}" 2487 [(set_attr "type" "sseiadd") 2488 (set_attr "mode" "TI")]) 2489 2490(define_expand "mulv16qi3" 2491 [(set (match_operand:V16QI 0 "register_operand" "") 2492 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") 2493 (match_operand:V16QI 2 "register_operand" "")))] 2494 "TARGET_SSE2" 2495{ 2496 rtx t[12], op0; 2497 int i; 2498 2499 for (i = 0; i < 12; ++i) 2500 t[i] = gen_reg_rtx (V16QImode); 2501 2502 /* Unpack data such that we've got a source byte in each low byte of 2503 each word. We don't care what goes into the high byte of each word. 2504 Rather than trying to get zero in there, most convenient is to let 2505 it be a copy of the low byte. */ 2506 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); 2507 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); 2508 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); 2509 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); 2510 2511 /* Multiply words. The end-of-line annotations here give a picture of what 2512 the output of that instruction looks like. Dot means don't care; the 2513 letters are the bytes of the result with A being the most significant. */ 2514 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ 2515 gen_lowpart (V8HImode, t[0]), 2516 gen_lowpart (V8HImode, t[1]))); 2517 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ 2518 gen_lowpart (V8HImode, t[2]), 2519 gen_lowpart (V8HImode, t[3]))); 2520 2521 /* Extract the relevant bytes and merge them back together. */ 2522 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ 2523 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ 2524 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ 2525 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ 2526 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ 2527 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ 2528 2529 op0 = operands[0]; 2530 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ 2531 DONE; 2532}) 2533 2534(define_expand "mulv8hi3" 2535 [(set (match_operand:V8HI 0 "register_operand" "") 2536 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2537 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2538 "TARGET_SSE2" 2539 "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") 2540 2541(define_insn "*mulv8hi3" 2542 [(set (match_operand:V8HI 0 "register_operand" "=x") 2543 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2544 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2545 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2546 "pmullw\t{%2, %0|%0, %2}" 2547 [(set_attr "type" "sseimul") 2548 (set_attr "mode" "TI")]) 2549 2550(define_insn "sse2_smulv8hi3_highpart" 2551 [(set (match_operand:V8HI 0 "register_operand" "=x") 2552 (truncate:V8HI 2553 (lshiftrt:V8SI 2554 (mult:V8SI 2555 (sign_extend:V8SI 2556 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2557 (sign_extend:V8SI 2558 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2559 (const_int 16))))] 2560 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2561 "pmulhw\t{%2, %0|%0, %2}" 2562 [(set_attr "type" "sseimul") 2563 (set_attr "mode" "TI")]) 2564 2565(define_insn "sse2_umulv8hi3_highpart" 2566 [(set (match_operand:V8HI 0 "register_operand" "=x") 2567 (truncate:V8HI 2568 (lshiftrt:V8SI 2569 (mult:V8SI 2570 (zero_extend:V8SI 2571 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 2572 (zero_extend:V8SI 2573 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2574 (const_int 16))))] 2575 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2576 "pmulhuw\t{%2, %0|%0, %2}" 2577 [(set_attr "type" "sseimul") 2578 (set_attr "mode" "TI")]) 2579 2580(define_insn "sse2_umulv2siv2di3" 2581 [(set (match_operand:V2DI 0 "register_operand" "=x") 2582 (mult:V2DI 2583 (zero_extend:V2DI 2584 (vec_select:V2SI 2585 (match_operand:V4SI 1 "nonimmediate_operand" "%0") 2586 (parallel [(const_int 0) (const_int 2)]))) 2587 (zero_extend:V2DI 2588 (vec_select:V2SI 2589 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 2590 (parallel [(const_int 0) (const_int 2)])))))] 2591 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 2592 "pmuludq\t{%2, %0|%0, %2}" 2593 [(set_attr "type" "sseimul") 2594 (set_attr "mode" "TI")]) 2595 2596(define_insn "sse2_pmaddwd" 2597 [(set (match_operand:V4SI 0 "register_operand" "=x") 2598 (plus:V4SI 2599 (mult:V4SI 2600 (sign_extend:V4SI 2601 (vec_select:V4HI 2602 (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2603 (parallel [(const_int 0) 2604 (const_int 2) 2605 (const_int 4) 2606 (const_int 6)]))) 2607 (sign_extend:V4SI 2608 (vec_select:V4HI 2609 (match_operand:V8HI 2 "nonimmediate_operand" "xm") 2610 (parallel [(const_int 0) 2611 (const_int 2) 2612 (const_int 4) 2613 (const_int 6)])))) 2614 (mult:V4SI 2615 (sign_extend:V4SI 2616 (vec_select:V4HI (match_dup 1) 2617 (parallel [(const_int 1) 2618 (const_int 3) 2619 (const_int 5) 2620 (const_int 7)]))) 2621 (sign_extend:V4SI 2622 (vec_select:V4HI (match_dup 2) 2623 (parallel [(const_int 1) 2624 (const_int 3) 2625 (const_int 5) 2626 (const_int 7)]))))))] 2627 "TARGET_SSE2" 2628 "pmaddwd\t{%2, %0|%0, %2}" 2629 [(set_attr "type" "sseiadd") 2630 (set_attr "mode" "TI")]) 2631 2632(define_expand "mulv4si3" 2633 [(set (match_operand:V4SI 0 "register_operand" "") 2634 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 2635 (match_operand:V4SI 2 "register_operand" "")))] 2636 "TARGET_SSE2" 2637{ 2638 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2639 rtx op0, op1, op2; 2640 2641 op0 = operands[0]; 2642 op1 = operands[1]; 2643 op2 = operands[2]; 2644 t1 = gen_reg_rtx (V4SImode); 2645 t2 = gen_reg_rtx (V4SImode); 2646 t3 = gen_reg_rtx (V4SImode); 2647 t4 = gen_reg_rtx (V4SImode); 2648 t5 = gen_reg_rtx (V4SImode); 2649 t6 = gen_reg_rtx (V4SImode); 2650 thirtytwo = GEN_INT (32); 2651 2652 /* Multiply elements 2 and 0. */ 2653 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2)); 2654 2655 /* Shift both input vectors down one element, so that elements 3 and 1 2656 are now in the slots for elements 2 and 0. For K8, at least, this is 2657 faster than using a shuffle. */ 2658 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 2659 gen_lowpart (TImode, op1), thirtytwo)); 2660 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 2661 gen_lowpart (TImode, op2), thirtytwo)); 2662 2663 /* Multiply elements 3 and 1. */ 2664 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3)); 2665 2666 /* Move the results in element 2 down to element 1; we don't care what 2667 goes in elements 2 and 3. */ 2668 emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx, 2669 const0_rtx, const0_rtx)); 2670 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 2671 const0_rtx, const0_rtx)); 2672 2673 /* Merge the parts back together. */ 2674 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); 2675 DONE; 2676}) 2677 2678(define_expand "mulv2di3" 2679 [(set (match_operand:V2DI 0 "register_operand" "") 2680 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") 2681 (match_operand:V2DI 2 "register_operand" "")))] 2682 "TARGET_SSE2" 2683{ 2684 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 2685 rtx op0, op1, op2; 2686 2687 op0 = operands[0]; 2688 op1 = operands[1]; 2689 op2 = operands[2]; 2690 t1 = gen_reg_rtx (V2DImode); 2691 t2 = gen_reg_rtx (V2DImode); 2692 t3 = gen_reg_rtx (V2DImode); 2693 t4 = gen_reg_rtx (V2DImode); 2694 t5 = gen_reg_rtx (V2DImode); 2695 t6 = gen_reg_rtx (V2DImode); 2696 thirtytwo = GEN_INT (32); 2697 2698 /* Multiply low parts. */ 2699 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), 2700 gen_lowpart (V4SImode, op2))); 2701 2702 /* Shift input vectors left 32 bits so we can multiply high parts. */ 2703 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); 2704 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); 2705 2706 /* Multiply high parts by low parts. */ 2707 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), 2708 gen_lowpart (V4SImode, t3))); 2709 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), 2710 gen_lowpart (V4SImode, t2))); 2711 2712 /* Shift them back. */ 2713 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); 2714 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); 2715 2716 /* Add the three parts together. */ 2717 emit_insn (gen_addv2di3 (t6, t1, t4)); 2718 emit_insn (gen_addv2di3 (op0, t6, t5)); 2719 DONE; 2720}) 2721 2722(define_insn "ashr<mode>3" 2723 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") 2724 (ashiftrt:SSEMODE24 2725 (match_operand:SSEMODE24 1 "register_operand" "0") 2726 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2727 "TARGET_SSE2" 2728 "psra<ssevecsize>\t{%2, %0|%0, %2}" 2729 [(set_attr "type" "sseishft") 2730 (set_attr "mode" "TI")]) 2731 2732(define_insn "lshr<mode>3" 2733 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2734 (lshiftrt:SSEMODE248 2735 (match_operand:SSEMODE248 1 "register_operand" "0") 2736 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2737 "TARGET_SSE2" 2738 "psrl<ssevecsize>\t{%2, %0|%0, %2}" 2739 [(set_attr "type" "sseishft") 2740 (set_attr "mode" "TI")]) 2741 2742(define_insn "ashl<mode>3" 2743 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 2744 (ashift:SSEMODE248 2745 (match_operand:SSEMODE248 1 "register_operand" "0") 2746 (match_operand:TI 2 "nonmemory_operand" "xn")))] 2747 "TARGET_SSE2" 2748 "psll<ssevecsize>\t{%2, %0|%0, %2}" 2749 [(set_attr "type" "sseishft") 2750 (set_attr "mode" "TI")]) 2751 2752(define_insn "sse2_ashlti3" 2753 [(set (match_operand:TI 0 "register_operand" "=x") 2754 (ashift:TI (match_operand:TI 1 "register_operand" "0") 2755 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2756 "TARGET_SSE2" 2757{ 2758 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2759 return "pslldq\t{%2, %0|%0, %2}"; 2760} 2761 [(set_attr "type" "sseishft") 2762 (set_attr "mode" "TI")]) 2763 2764(define_expand "vec_shl_<mode>" 2765 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2766 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") 2767 (match_operand:SI 2 "general_operand" "")))] 2768 "TARGET_SSE2" 2769{ 2770 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2771 FAIL; 2772 operands[0] = gen_lowpart (TImode, operands[0]); 2773 operands[1] = gen_lowpart (TImode, operands[1]); 2774}) 2775 2776(define_insn "sse2_lshrti3" 2777 [(set (match_operand:TI 0 "register_operand" "=x") 2778 (lshiftrt:TI (match_operand:TI 1 "register_operand" "0") 2779 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] 2780 "TARGET_SSE2" 2781{ 2782 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); 2783 return "psrldq\t{%2, %0|%0, %2}"; 2784} 2785 [(set_attr "type" "sseishft") 2786 (set_attr "mode" "TI")]) 2787 2788(define_expand "vec_shr_<mode>" 2789 [(set (match_operand:SSEMODEI 0 "register_operand" "") 2790 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") 2791 (match_operand:SI 2 "general_operand" "")))] 2792 "TARGET_SSE2" 2793{ 2794 if (!const_0_to_255_mul_8_operand (operands[2], SImode)) 2795 FAIL; 2796 operands[0] = gen_lowpart (TImode, operands[0]); 2797 operands[1] = gen_lowpart (TImode, operands[1]); 2798}) 2799 2800(define_expand "umaxv16qi3" 2801 [(set (match_operand:V16QI 0 "register_operand" "") 2802 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2803 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2804 "TARGET_SSE2" 2805 "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);") 2806 2807(define_insn "*umaxv16qi3" 2808 [(set (match_operand:V16QI 0 "register_operand" "=x") 2809 (umax:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2810 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2811 "TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)" 2812 "pmaxub\t{%2, %0|%0, %2}" 2813 [(set_attr "type" "sseiadd") 2814 (set_attr "mode" "TI")]) 2815 2816(define_expand "smaxv8hi3" 2817 [(set (match_operand:V8HI 0 "register_operand" "") 2818 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2819 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2820 "TARGET_SSE2" 2821 "ix86_fixup_binary_operands_no_copy (SMAX, V8HImode, operands);") 2822 2823(define_insn "*smaxv8hi3" 2824 [(set (match_operand:V8HI 0 "register_operand" "=x") 2825 (smax:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2826 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2827 "TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)" 2828 "pmaxsw\t{%2, %0|%0, %2}" 2829 [(set_attr "type" "sseiadd") 2830 (set_attr "mode" "TI")]) 2831 2832(define_expand "umaxv8hi3" 2833 [(set (match_operand:V8HI 0 "register_operand" "=x") 2834 (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0") 2835 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 2836 (set (match_dup 3) 2837 (plus:V8HI (match_dup 0) (match_dup 2)))] 2838 "TARGET_SSE2" 2839{ 2840 operands[3] = operands[0]; 2841 if (rtx_equal_p (operands[0], operands[2])) 2842 operands[0] = gen_reg_rtx (V8HImode); 2843}) 2844 2845(define_expand "smax<mode>3" 2846 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2847 (smax:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2848 (match_operand:SSEMODE14 2 "register_operand" "")))] 2849 "TARGET_SSE2" 2850{ 2851 rtx xops[6]; 2852 bool ok; 2853 2854 xops[0] = operands[0]; 2855 xops[1] = operands[1]; 2856 xops[2] = operands[2]; 2857 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2858 xops[4] = operands[1]; 2859 xops[5] = operands[2]; 2860 ok = ix86_expand_int_vcond (xops); 2861 gcc_assert (ok); 2862 DONE; 2863}) 2864 2865(define_expand "umaxv4si3" 2866 [(set (match_operand:V4SI 0 "register_operand" "") 2867 (umax:V4SI (match_operand:V4SI 1 "register_operand" "") 2868 (match_operand:V4SI 2 "register_operand" "")))] 2869 "TARGET_SSE2" 2870{ 2871 rtx xops[6]; 2872 bool ok; 2873 2874 xops[0] = operands[0]; 2875 xops[1] = operands[1]; 2876 xops[2] = operands[2]; 2877 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2878 xops[4] = operands[1]; 2879 xops[5] = operands[2]; 2880 ok = ix86_expand_int_vcond (xops); 2881 gcc_assert (ok); 2882 DONE; 2883}) 2884 2885(define_expand "uminv16qi3" 2886 [(set (match_operand:V16QI 0 "register_operand" "") 2887 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "") 2888 (match_operand:V16QI 2 "nonimmediate_operand" "")))] 2889 "TARGET_SSE2" 2890 "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);") 2891 2892(define_insn "*uminv16qi3" 2893 [(set (match_operand:V16QI 0 "register_operand" "=x") 2894 (umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0") 2895 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 2896 "TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)" 2897 "pminub\t{%2, %0|%0, %2}" 2898 [(set_attr "type" "sseiadd") 2899 (set_attr "mode" "TI")]) 2900 2901(define_expand "sminv8hi3" 2902 [(set (match_operand:V8HI 0 "register_operand" "") 2903 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") 2904 (match_operand:V8HI 2 "nonimmediate_operand" "")))] 2905 "TARGET_SSE2" 2906 "ix86_fixup_binary_operands_no_copy (SMIN, V8HImode, operands);") 2907 2908(define_insn "*sminv8hi3" 2909 [(set (match_operand:V8HI 0 "register_operand" "=x") 2910 (smin:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0") 2911 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 2912 "TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)" 2913 "pminsw\t{%2, %0|%0, %2}" 2914 [(set_attr "type" "sseiadd") 2915 (set_attr "mode" "TI")]) 2916 2917(define_expand "smin<mode>3" 2918 [(set (match_operand:SSEMODE14 0 "register_operand" "") 2919 (smin:SSEMODE14 (match_operand:SSEMODE14 1 "register_operand" "") 2920 (match_operand:SSEMODE14 2 "register_operand" "")))] 2921 "TARGET_SSE2" 2922{ 2923 rtx xops[6]; 2924 bool ok; 2925 2926 xops[0] = operands[0]; 2927 xops[1] = operands[2]; 2928 xops[2] = operands[1]; 2929 xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]); 2930 xops[4] = operands[1]; 2931 xops[5] = operands[2]; 2932 ok = ix86_expand_int_vcond (xops); 2933 gcc_assert (ok); 2934 DONE; 2935}) 2936 2937(define_expand "umin<mode>3" 2938 [(set (match_operand:SSEMODE24 0 "register_operand" "") 2939 (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "") 2940 (match_operand:SSEMODE24 2 "register_operand" "")))] 2941 "TARGET_SSE2" 2942{ 2943 rtx xops[6]; 2944 bool ok; 2945 2946 xops[0] = operands[0]; 2947 xops[1] = operands[2]; 2948 xops[2] = operands[1]; 2949 xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]); 2950 xops[4] = operands[1]; 2951 xops[5] = operands[2]; 2952 ok = ix86_expand_int_vcond (xops); 2953 gcc_assert (ok); 2954 DONE; 2955}) 2956 2957;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2958;; 2959;; Parallel integral comparisons 2960;; 2961;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2962 2963(define_insn "sse2_eq<mode>3" 2964 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2965 (eq:SSEMODE124 2966 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") 2967 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2968 "TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 2969 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" 2970 [(set_attr "type" "ssecmp") 2971 (set_attr "mode" "TI")]) 2972 2973(define_insn "sse2_gt<mode>3" 2974 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 2975 (gt:SSEMODE124 2976 (match_operand:SSEMODE124 1 "register_operand" "0") 2977 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 2978 "TARGET_SSE2" 2979 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" 2980 [(set_attr "type" "ssecmp") 2981 (set_attr "mode" "TI")]) 2982 2983(define_expand "vcond<mode>" 2984 [(set (match_operand:SSEMODE124 0 "register_operand" "") 2985 (if_then_else:SSEMODE124 2986 (match_operator 3 "" 2987 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 2988 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 2989 (match_operand:SSEMODE124 1 "general_operand" "") 2990 (match_operand:SSEMODE124 2 "general_operand" "")))] 2991 "TARGET_SSE2" 2992{ 2993 if (ix86_expand_int_vcond (operands)) 2994 DONE; 2995 else 2996 FAIL; 2997}) 2998 2999(define_expand "vcondu<mode>" 3000 [(set (match_operand:SSEMODE124 0 "register_operand" "") 3001 (if_then_else:SSEMODE124 3002 (match_operator 3 "" 3003 [(match_operand:SSEMODE124 4 "nonimmediate_operand" "") 3004 (match_operand:SSEMODE124 5 "nonimmediate_operand" "")]) 3005 (match_operand:SSEMODE124 1 "general_operand" "") 3006 (match_operand:SSEMODE124 2 "general_operand" "")))] 3007 "TARGET_SSE2" 3008{ 3009 if (ix86_expand_int_vcond (operands)) 3010 DONE; 3011 else 3012 FAIL; 3013}) 3014 3015;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3016;; 3017;; Parallel integral logical operations 3018;; 3019;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3020 3021(define_expand "one_cmpl<mode>2" 3022 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3023 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3024 (match_dup 2)))] 3025 "TARGET_SSE2" 3026{ 3027 int i, n = GET_MODE_NUNITS (<MODE>mode); 3028 rtvec v = rtvec_alloc (n); 3029 3030 for (i = 0; i < n; ++i) 3031 RTVEC_ELT (v, i) = constm1_rtx; 3032 3033 operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v)); 3034}) 3035 3036(define_expand "and<mode>3" 3037 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3038 (and:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3039 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3040 "TARGET_SSE2" 3041 "ix86_fixup_binary_operands_no_copy (AND, <MODE>mode, operands);") 3042 3043(define_insn "*and<mode>3" 3044 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3045 (and:SSEMODEI 3046 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3047 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3048 "TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)" 3049 "pand\t{%2, %0|%0, %2}" 3050 [(set_attr "type" "sselog") 3051 (set_attr "mode" "TI")]) 3052 3053(define_insn "sse2_nand<mode>3" 3054 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3055 (and:SSEMODEI 3056 (not:SSEMODEI (match_operand:SSEMODEI 1 "register_operand" "0")) 3057 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3058 "TARGET_SSE2" 3059 "pandn\t{%2, %0|%0, %2}" 3060 [(set_attr "type" "sselog") 3061 (set_attr "mode" "TI")]) 3062 3063(define_expand "ior<mode>3" 3064 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3065 (ior:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3066 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3067 "TARGET_SSE2" 3068 "ix86_fixup_binary_operands_no_copy (IOR, <MODE>mode, operands);") 3069 3070(define_insn "*ior<mode>3" 3071 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3072 (ior:SSEMODEI 3073 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3074 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3075 "TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)" 3076 "por\t{%2, %0|%0, %2}" 3077 [(set_attr "type" "sselog") 3078 (set_attr "mode" "TI")]) 3079 3080(define_expand "xor<mode>3" 3081 [(set (match_operand:SSEMODEI 0 "register_operand" "") 3082 (xor:SSEMODEI (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 3083 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 3084 "TARGET_SSE2" 3085 "ix86_fixup_binary_operands_no_copy (XOR, <MODE>mode, operands);") 3086 3087(define_insn "*xor<mode>3" 3088 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 3089 (xor:SSEMODEI 3090 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 3091 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 3092 "TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)" 3093 "pxor\t{%2, %0|%0, %2}" 3094 [(set_attr "type" "sselog") 3095 (set_attr "mode" "TI")]) 3096 3097;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3098;; 3099;; Parallel integral element swizzling 3100;; 3101;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3102 3103(define_insn "sse2_packsswb" 3104 [(set (match_operand:V16QI 0 "register_operand" "=x") 3105 (vec_concat:V16QI 3106 (ss_truncate:V8QI 3107 (match_operand:V8HI 1 "register_operand" "0")) 3108 (ss_truncate:V8QI 3109 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3110 "TARGET_SSE2" 3111 "packsswb\t{%2, %0|%0, %2}" 3112 [(set_attr "type" "sselog") 3113 (set_attr "mode" "TI")]) 3114 3115(define_insn "sse2_packssdw" 3116 [(set (match_operand:V8HI 0 "register_operand" "=x") 3117 (vec_concat:V8HI 3118 (ss_truncate:V4HI 3119 (match_operand:V4SI 1 "register_operand" "0")) 3120 (ss_truncate:V4HI 3121 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] 3122 "TARGET_SSE2" 3123 "packssdw\t{%2, %0|%0, %2}" 3124 [(set_attr "type" "sselog") 3125 (set_attr "mode" "TI")]) 3126 3127(define_insn "sse2_packuswb" 3128 [(set (match_operand:V16QI 0 "register_operand" "=x") 3129 (vec_concat:V16QI 3130 (us_truncate:V8QI 3131 (match_operand:V8HI 1 "register_operand" "0")) 3132 (us_truncate:V8QI 3133 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))))] 3134 "TARGET_SSE2" 3135 "packuswb\t{%2, %0|%0, %2}" 3136 [(set_attr "type" "sselog") 3137 (set_attr "mode" "TI")]) 3138 3139(define_insn "sse2_punpckhbw" 3140 [(set (match_operand:V16QI 0 "register_operand" "=x") 3141 (vec_select:V16QI 3142 (vec_concat:V32QI 3143 (match_operand:V16QI 1 "register_operand" "0") 3144 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3145 (parallel [(const_int 8) (const_int 24) 3146 (const_int 9) (const_int 25) 3147 (const_int 10) (const_int 26) 3148 (const_int 11) (const_int 27) 3149 (const_int 12) (const_int 28) 3150 (const_int 13) (const_int 29) 3151 (const_int 14) (const_int 30) 3152 (const_int 15) (const_int 31)])))] 3153 "TARGET_SSE2" 3154 "punpckhbw\t{%2, %0|%0, %2}" 3155 [(set_attr "type" "sselog") 3156 (set_attr "mode" "TI")]) 3157 3158(define_insn "sse2_punpcklbw" 3159 [(set (match_operand:V16QI 0 "register_operand" "=x") 3160 (vec_select:V16QI 3161 (vec_concat:V32QI 3162 (match_operand:V16QI 1 "register_operand" "0") 3163 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 3164 (parallel [(const_int 0) (const_int 16) 3165 (const_int 1) (const_int 17) 3166 (const_int 2) (const_int 18) 3167 (const_int 3) (const_int 19) 3168 (const_int 4) (const_int 20) 3169 (const_int 5) (const_int 21) 3170 (const_int 6) (const_int 22) 3171 (const_int 7) (const_int 23)])))] 3172 "TARGET_SSE2" 3173 "punpcklbw\t{%2, %0|%0, %2}" 3174 [(set_attr "type" "sselog") 3175 (set_attr "mode" "TI")]) 3176 3177(define_insn "sse2_punpckhwd" 3178 [(set (match_operand:V8HI 0 "register_operand" "=x") 3179 (vec_select:V8HI 3180 (vec_concat:V16HI 3181 (match_operand:V8HI 1 "register_operand" "0") 3182 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3183 (parallel [(const_int 4) (const_int 12) 3184 (const_int 5) (const_int 13) 3185 (const_int 6) (const_int 14) 3186 (const_int 7) (const_int 15)])))] 3187 "TARGET_SSE2" 3188 "punpckhwd\t{%2, %0|%0, %2}" 3189 [(set_attr "type" "sselog") 3190 (set_attr "mode" "TI")]) 3191 3192(define_insn "sse2_punpcklwd" 3193 [(set (match_operand:V8HI 0 "register_operand" "=x") 3194 (vec_select:V8HI 3195 (vec_concat:V16HI 3196 (match_operand:V8HI 1 "register_operand" "0") 3197 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 3198 (parallel [(const_int 0) (const_int 8) 3199 (const_int 1) (const_int 9) 3200 (const_int 2) (const_int 10) 3201 (const_int 3) (const_int 11)])))] 3202 "TARGET_SSE2" 3203 "punpcklwd\t{%2, %0|%0, %2}" 3204 [(set_attr "type" "sselog") 3205 (set_attr "mode" "TI")]) 3206 3207(define_insn "sse2_punpckhdq" 3208 [(set (match_operand:V4SI 0 "register_operand" "=x") 3209 (vec_select:V4SI 3210 (vec_concat:V8SI 3211 (match_operand:V4SI 1 "register_operand" "0") 3212 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3213 (parallel [(const_int 2) (const_int 6) 3214 (const_int 3) (const_int 7)])))] 3215 "TARGET_SSE2" 3216 "punpckhdq\t{%2, %0|%0, %2}" 3217 [(set_attr "type" "sselog") 3218 (set_attr "mode" "TI")]) 3219 3220(define_insn "sse2_punpckldq" 3221 [(set (match_operand:V4SI 0 "register_operand" "=x") 3222 (vec_select:V4SI 3223 (vec_concat:V8SI 3224 (match_operand:V4SI 1 "register_operand" "0") 3225 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 3226 (parallel [(const_int 0) (const_int 4) 3227 (const_int 1) (const_int 5)])))] 3228 "TARGET_SSE2" 3229 "punpckldq\t{%2, %0|%0, %2}" 3230 [(set_attr "type" "sselog") 3231 (set_attr "mode" "TI")]) 3232 3233(define_insn "sse2_punpckhqdq" 3234 [(set (match_operand:V2DI 0 "register_operand" "=x") 3235 (vec_select:V2DI 3236 (vec_concat:V4DI 3237 (match_operand:V2DI 1 "register_operand" "0") 3238 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3239 (parallel [(const_int 1) 3240 (const_int 3)])))] 3241 "TARGET_SSE2" 3242 "punpckhqdq\t{%2, %0|%0, %2}" 3243 [(set_attr "type" "sselog") 3244 (set_attr "mode" "TI")]) 3245 3246(define_insn "sse2_punpcklqdq" 3247 [(set (match_operand:V2DI 0 "register_operand" "=x") 3248 (vec_select:V2DI 3249 (vec_concat:V4DI 3250 (match_operand:V2DI 1 "register_operand" "0") 3251 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 3252 (parallel [(const_int 0) 3253 (const_int 2)])))] 3254 "TARGET_SSE2" 3255 "punpcklqdq\t{%2, %0|%0, %2}" 3256 [(set_attr "type" "sselog") 3257 (set_attr "mode" "TI")]) 3258 3259(define_expand "sse2_pinsrw" 3260 [(set (match_operand:V8HI 0 "register_operand" "") 3261 (vec_merge:V8HI 3262 (vec_duplicate:V8HI 3263 (match_operand:SI 2 "nonimmediate_operand" "")) 3264 (match_operand:V8HI 1 "register_operand" "") 3265 (match_operand:SI 3 "const_0_to_7_operand" "")))] 3266 "TARGET_SSE2" 3267{ 3268 operands[2] = gen_lowpart (HImode, operands[2]); 3269 operands[3] = GEN_INT ((1 << INTVAL (operands[3]))); 3270}) 3271 3272(define_insn "*sse2_pinsrw" 3273 [(set (match_operand:V8HI 0 "register_operand" "=x") 3274 (vec_merge:V8HI 3275 (vec_duplicate:V8HI 3276 (match_operand:HI 2 "nonimmediate_operand" "rm")) 3277 (match_operand:V8HI 1 "register_operand" "0") 3278 (match_operand:SI 3 "const_pow2_1_to_128_operand" "n")))] 3279 "TARGET_SSE2" 3280{ 3281 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 3282 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; 3283} 3284 [(set_attr "type" "sselog") 3285 (set_attr "mode" "TI")]) 3286 3287(define_insn "sse2_pextrw" 3288 [(set (match_operand:SI 0 "register_operand" "=r") 3289 (zero_extend:SI 3290 (vec_select:HI 3291 (match_operand:V8HI 1 "register_operand" "x") 3292 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 3293 "TARGET_SSE2" 3294 "pextrw\t{%2, %1, %0|%0, %1, %2}" 3295 [(set_attr "type" "sselog") 3296 (set_attr "mode" "TI")]) 3297 3298(define_expand "sse2_pshufd" 3299 [(match_operand:V4SI 0 "register_operand" "") 3300 (match_operand:V4SI 1 "nonimmediate_operand" "") 3301 (match_operand:SI 2 "const_int_operand" "")] 3302 "TARGET_SSE2" 3303{ 3304 int mask = INTVAL (operands[2]); 3305 emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1], 3306 GEN_INT ((mask >> 0) & 3), 3307 GEN_INT ((mask >> 2) & 3), 3308 GEN_INT ((mask >> 4) & 3), 3309 GEN_INT ((mask >> 6) & 3))); 3310 DONE; 3311}) 3312 3313(define_insn "sse2_pshufd_1" 3314 [(set (match_operand:V4SI 0 "register_operand" "=x") 3315 (vec_select:V4SI 3316 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 3317 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3318 (match_operand 3 "const_0_to_3_operand" "") 3319 (match_operand 4 "const_0_to_3_operand" "") 3320 (match_operand 5 "const_0_to_3_operand" "")])))] 3321 "TARGET_SSE2" 3322{ 3323 int mask = 0; 3324 mask |= INTVAL (operands[2]) << 0; 3325 mask |= INTVAL (operands[3]) << 2; 3326 mask |= INTVAL (operands[4]) << 4; 3327 mask |= INTVAL (operands[5]) << 6; 3328 operands[2] = GEN_INT (mask); 3329 3330 return "pshufd\t{%2, %1, %0|%0, %1, %2}"; 3331} 3332 [(set_attr "type" "sselog1") 3333 (set_attr "mode" "TI")]) 3334 3335(define_expand "sse2_pshuflw" 3336 [(match_operand:V8HI 0 "register_operand" "") 3337 (match_operand:V8HI 1 "nonimmediate_operand" "") 3338 (match_operand:SI 2 "const_int_operand" "")] 3339 "TARGET_SSE2" 3340{ 3341 int mask = INTVAL (operands[2]); 3342 emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1], 3343 GEN_INT ((mask >> 0) & 3), 3344 GEN_INT ((mask >> 2) & 3), 3345 GEN_INT ((mask >> 4) & 3), 3346 GEN_INT ((mask >> 6) & 3))); 3347 DONE; 3348}) 3349 3350(define_insn "sse2_pshuflw_1" 3351 [(set (match_operand:V8HI 0 "register_operand" "=x") 3352 (vec_select:V8HI 3353 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3354 (parallel [(match_operand 2 "const_0_to_3_operand" "") 3355 (match_operand 3 "const_0_to_3_operand" "") 3356 (match_operand 4 "const_0_to_3_operand" "") 3357 (match_operand 5 "const_0_to_3_operand" "") 3358 (const_int 4) 3359 (const_int 5) 3360 (const_int 6) 3361 (const_int 7)])))] 3362 "TARGET_SSE2" 3363{ 3364 int mask = 0; 3365 mask |= INTVAL (operands[2]) << 0; 3366 mask |= INTVAL (operands[3]) << 2; 3367 mask |= INTVAL (operands[4]) << 4; 3368 mask |= INTVAL (operands[5]) << 6; 3369 operands[2] = GEN_INT (mask); 3370 3371 return "pshuflw\t{%2, %1, %0|%0, %1, %2}"; 3372} 3373 [(set_attr "type" "sselog") 3374 (set_attr "mode" "TI")]) 3375 3376(define_expand "sse2_pshufhw" 3377 [(match_operand:V8HI 0 "register_operand" "") 3378 (match_operand:V8HI 1 "nonimmediate_operand" "") 3379 (match_operand:SI 2 "const_int_operand" "")] 3380 "TARGET_SSE2" 3381{ 3382 int mask = INTVAL (operands[2]); 3383 emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1], 3384 GEN_INT (((mask >> 0) & 3) + 4), 3385 GEN_INT (((mask >> 2) & 3) + 4), 3386 GEN_INT (((mask >> 4) & 3) + 4), 3387 GEN_INT (((mask >> 6) & 3) + 4))); 3388 DONE; 3389}) 3390 3391(define_insn "sse2_pshufhw_1" 3392 [(set (match_operand:V8HI 0 "register_operand" "=x") 3393 (vec_select:V8HI 3394 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 3395 (parallel [(const_int 0) 3396 (const_int 1) 3397 (const_int 2) 3398 (const_int 3) 3399 (match_operand 2 "const_4_to_7_operand" "") 3400 (match_operand 3 "const_4_to_7_operand" "") 3401 (match_operand 4 "const_4_to_7_operand" "") 3402 (match_operand 5 "const_4_to_7_operand" "")])))] 3403 "TARGET_SSE2" 3404{ 3405 int mask = 0; 3406 mask |= (INTVAL (operands[2]) - 4) << 0; 3407 mask |= (INTVAL (operands[3]) - 4) << 2; 3408 mask |= (INTVAL (operands[4]) - 4) << 4; 3409 mask |= (INTVAL (operands[5]) - 4) << 6; 3410 operands[2] = GEN_INT (mask); 3411 3412 return "pshufhw\t{%2, %1, %0|%0, %1, %2}"; 3413} 3414 [(set_attr "type" "sselog") 3415 (set_attr "mode" "TI")]) 3416 3417(define_expand "sse2_loadd" 3418 [(set (match_operand:V4SI 0 "register_operand" "") 3419 (vec_merge:V4SI 3420 (vec_duplicate:V4SI 3421 (match_operand:SI 1 "nonimmediate_operand" "")) 3422 (match_dup 2) 3423 (const_int 1)))] 3424 "TARGET_SSE" 3425 "operands[2] = CONST0_RTX (V4SImode);") 3426 3427(define_insn "sse2_loadld" 3428 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3429 (vec_merge:V4SI 3430 (vec_duplicate:V4SI 3431 (match_operand:SI 2 "nonimmediate_operand" "mr,m,x")) 3432 (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0") 3433 (const_int 1)))] 3434 "TARGET_SSE" 3435 "@ 3436 movd\t{%2, %0|%0, %2} 3437 movss\t{%2, %0|%0, %2} 3438 movss\t{%2, %0|%0, %2}" 3439 [(set_attr "type" "ssemov") 3440 (set_attr "mode" "TI,V4SF,SF")]) 3441 3442;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3443;; be taken into account, and movdi isn't fully populated even without. 3444(define_insn_and_split "sse2_stored" 3445 [(set (match_operand:SI 0 "nonimmediate_operand" "=mx") 3446 (vec_select:SI 3447 (match_operand:V4SI 1 "register_operand" "x") 3448 (parallel [(const_int 0)])))] 3449 "TARGET_SSE" 3450 "#" 3451 "&& reload_completed" 3452 [(set (match_dup 0) (match_dup 1))] 3453{ 3454 operands[1] = gen_rtx_REG (SImode, REGNO (operands[1])); 3455}) 3456 3457(define_expand "sse_storeq" 3458 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3459 (vec_select:DI 3460 (match_operand:V2DI 1 "register_operand" "") 3461 (parallel [(const_int 0)])))] 3462 "TARGET_SSE" 3463 "") 3464 3465;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must 3466;; be taken into account, and movdi isn't fully populated even without. 3467(define_insn "*sse2_storeq" 3468 [(set (match_operand:DI 0 "nonimmediate_operand" "=mx") 3469 (vec_select:DI 3470 (match_operand:V2DI 1 "register_operand" "x") 3471 (parallel [(const_int 0)])))] 3472 "TARGET_SSE" 3473 "#") 3474 3475(define_split 3476 [(set (match_operand:DI 0 "nonimmediate_operand" "") 3477 (vec_select:DI 3478 (match_operand:V2DI 1 "register_operand" "") 3479 (parallel [(const_int 0)])))] 3480 "TARGET_SSE && reload_completed" 3481 [(set (match_dup 0) (match_dup 1))] 3482{ 3483 operands[1] = gen_rtx_REG (DImode, REGNO (operands[1])); 3484}) 3485 3486(define_insn "*vec_extractv2di_1_sse2" 3487 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3488 (vec_select:DI 3489 (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o") 3490 (parallel [(const_int 1)])))] 3491 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3492 "@ 3493 movhps\t{%1, %0|%0, %1} 3494 psrldq\t{$8, %0|%0, 8} 3495 movq\t{%H1, %0|%0, %H1}" 3496 [(set_attr "type" "ssemov,sseishft,ssemov") 3497 (set_attr "memory" "*,none,*") 3498 (set_attr "mode" "V2SF,TI,TI")]) 3499 3500;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva 3501(define_insn "*vec_extractv2di_1_sse" 3502 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 3503 (vec_select:DI 3504 (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o") 3505 (parallel [(const_int 1)])))] 3506 "!TARGET_SSE2 && TARGET_SSE 3507 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 3508 "@ 3509 movhps\t{%1, %0|%0, %1} 3510 movhlps\t{%1, %0|%0, %1} 3511 movlps\t{%H1, %0|%0, %H1}" 3512 [(set_attr "type" "ssemov") 3513 (set_attr "mode" "V2SF,V4SF,V2SF")]) 3514 3515(define_insn "*vec_dupv4si" 3516 [(set (match_operand:V4SI 0 "register_operand" "=Y,x") 3517 (vec_duplicate:V4SI 3518 (match_operand:SI 1 "register_operand" " Y,0")))] 3519 "TARGET_SSE" 3520 "@ 3521 pshufd\t{$0, %1, %0|%0, %1, 0} 3522 shufps\t{$0, %0, %0|%0, %0, 0}" 3523 [(set_attr "type" "sselog1") 3524 (set_attr "mode" "TI,V4SF")]) 3525 3526(define_insn "*vec_dupv2di" 3527 [(set (match_operand:V2DI 0 "register_operand" "=Y,x") 3528 (vec_duplicate:V2DI 3529 (match_operand:DI 1 "register_operand" " 0,0")))] 3530 "TARGET_SSE" 3531 "@ 3532 punpcklqdq\t%0, %0 3533 movlhps\t%0, %0" 3534 [(set_attr "type" "sselog1,ssemov") 3535 (set_attr "mode" "TI,V4SF")]) 3536 3537;; ??? In theory we can match memory for the MMX alternative, but allowing 3538;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3539;; alternatives pretty much forces the MMX alternative to be chosen. 3540(define_insn "*sse2_concatv2si" 3541 [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y") 3542 (vec_concat:V2SI 3543 (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm") 3544 (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))] 3545 "TARGET_SSE2" 3546 "@ 3547 punpckldq\t{%2, %0|%0, %2} 3548 movd\t{%1, %0|%0, %1} 3549 punpckldq\t{%2, %0|%0, %2} 3550 movd\t{%1, %0|%0, %1}" 3551 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3552 (set_attr "mode" "TI,TI,DI,DI")]) 3553 3554(define_insn "*sse1_concatv2si" 3555 [(set (match_operand:V2SI 0 "register_operand" "=x,x,*y,*y") 3556 (vec_concat:V2SI 3557 (match_operand:SI 1 "nonimmediate_operand" " 0,m, 0,*rm") 3558 (match_operand:SI 2 "reg_or_0_operand" " x,C,*y,C")))] 3559 "TARGET_SSE" 3560 "@ 3561 unpcklps\t{%2, %0|%0, %2} 3562 movss\t{%1, %0|%0, %1} 3563 punpckldq\t{%2, %0|%0, %2} 3564 movd\t{%1, %0|%0, %1}" 3565 [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") 3566 (set_attr "mode" "V4SF,V4SF,DI,DI")]) 3567 3568(define_insn "*vec_concatv4si_1" 3569 [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x") 3570 (vec_concat:V4SI 3571 (match_operand:V2SI 1 "register_operand" " 0,0,0") 3572 (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))] 3573 "TARGET_SSE" 3574 "@ 3575 punpcklqdq\t{%2, %0|%0, %2} 3576 movlhps\t{%2, %0|%0, %2} 3577 movhps\t{%2, %0|%0, %2}" 3578 [(set_attr "type" "sselog,ssemov,ssemov") 3579 (set_attr "mode" "TI,V4SF,V2SF")]) 3580 3581(define_insn "*vec_concatv2di" 3582 [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x") 3583 (vec_concat:V2DI 3584 (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m") 3585 (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))] 3586 "TARGET_SSE" 3587 "@ 3588 movq\t{%1, %0|%0, %1} 3589 movq2dq\t{%1, %0|%0, %1} 3590 punpcklqdq\t{%2, %0|%0, %2} 3591 movlhps\t{%2, %0|%0, %2} 3592 movhps\t{%2, %0|%0, %2} 3593 movlps\t{%1, %0|%0, %1}" 3594 [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov,ssemov") 3595 (set_attr "mode" "TI,TI,TI,V4SF,V2SF,V2SF")]) 3596 3597(define_expand "vec_setv2di" 3598 [(match_operand:V2DI 0 "register_operand" "") 3599 (match_operand:DI 1 "register_operand" "") 3600 (match_operand 2 "const_int_operand" "")] 3601 "TARGET_SSE" 3602{ 3603 ix86_expand_vector_set (false, operands[0], operands[1], 3604 INTVAL (operands[2])); 3605 DONE; 3606}) 3607 3608(define_expand "vec_extractv2di" 3609 [(match_operand:DI 0 "register_operand" "") 3610 (match_operand:V2DI 1 "register_operand" "") 3611 (match_operand 2 "const_int_operand" "")] 3612 "TARGET_SSE" 3613{ 3614 ix86_expand_vector_extract (false, operands[0], operands[1], 3615 INTVAL (operands[2])); 3616 DONE; 3617}) 3618 3619(define_expand "vec_initv2di" 3620 [(match_operand:V2DI 0 "register_operand" "") 3621 (match_operand 1 "" "")] 3622 "TARGET_SSE" 3623{ 3624 ix86_expand_vector_init (false, operands[0], operands[1]); 3625 DONE; 3626}) 3627 3628(define_expand "vec_setv4si" 3629 [(match_operand:V4SI 0 "register_operand" "") 3630 (match_operand:SI 1 "register_operand" "") 3631 (match_operand 2 "const_int_operand" "")] 3632 "TARGET_SSE" 3633{ 3634 ix86_expand_vector_set (false, operands[0], operands[1], 3635 INTVAL (operands[2])); 3636 DONE; 3637}) 3638 3639(define_expand "vec_extractv4si" 3640 [(match_operand:SI 0 "register_operand" "") 3641 (match_operand:V4SI 1 "register_operand" "") 3642 (match_operand 2 "const_int_operand" "")] 3643 "TARGET_SSE" 3644{ 3645 ix86_expand_vector_extract (false, operands[0], operands[1], 3646 INTVAL (operands[2])); 3647 DONE; 3648}) 3649 3650(define_expand "vec_initv4si" 3651 [(match_operand:V4SI 0 "register_operand" "") 3652 (match_operand 1 "" "")] 3653 "TARGET_SSE" 3654{ 3655 ix86_expand_vector_init (false, operands[0], operands[1]); 3656 DONE; 3657}) 3658 3659(define_expand "vec_setv8hi" 3660 [(match_operand:V8HI 0 "register_operand" "") 3661 (match_operand:HI 1 "register_operand" "") 3662 (match_operand 2 "const_int_operand" "")] 3663 "TARGET_SSE" 3664{ 3665 ix86_expand_vector_set (false, operands[0], operands[1], 3666 INTVAL (operands[2])); 3667 DONE; 3668}) 3669 3670(define_expand "vec_extractv8hi" 3671 [(match_operand:HI 0 "register_operand" "") 3672 (match_operand:V8HI 1 "register_operand" "") 3673 (match_operand 2 "const_int_operand" "")] 3674 "TARGET_SSE" 3675{ 3676 ix86_expand_vector_extract (false, operands[0], operands[1], 3677 INTVAL (operands[2])); 3678 DONE; 3679}) 3680 3681(define_expand "vec_initv8hi" 3682 [(match_operand:V8HI 0 "register_operand" "") 3683 (match_operand 1 "" "")] 3684 "TARGET_SSE" 3685{ 3686 ix86_expand_vector_init (false, operands[0], operands[1]); 3687 DONE; 3688}) 3689 3690(define_expand "vec_setv16qi" 3691 [(match_operand:V16QI 0 "register_operand" "") 3692 (match_operand:QI 1 "register_operand" "") 3693 (match_operand 2 "const_int_operand" "")] 3694 "TARGET_SSE" 3695{ 3696 ix86_expand_vector_set (false, operands[0], operands[1], 3697 INTVAL (operands[2])); 3698 DONE; 3699}) 3700 3701(define_expand "vec_extractv16qi" 3702 [(match_operand:QI 0 "register_operand" "") 3703 (match_operand:V16QI 1 "register_operand" "") 3704 (match_operand 2 "const_int_operand" "")] 3705 "TARGET_SSE" 3706{ 3707 ix86_expand_vector_extract (false, operands[0], operands[1], 3708 INTVAL (operands[2])); 3709 DONE; 3710}) 3711 3712(define_expand "vec_initv16qi" 3713 [(match_operand:V16QI 0 "register_operand" "") 3714 (match_operand 1 "" "")] 3715 "TARGET_SSE" 3716{ 3717 ix86_expand_vector_init (false, operands[0], operands[1]); 3718 DONE; 3719}) 3720 3721;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3722;; 3723;; Miscellaneous 3724;; 3725;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3726 3727(define_insn "sse2_uavgv16qi3" 3728 [(set (match_operand:V16QI 0 "register_operand" "=x") 3729 (truncate:V16QI 3730 (lshiftrt:V16HI 3731 (plus:V16HI 3732 (plus:V16HI 3733 (zero_extend:V16HI 3734 (match_operand:V16QI 1 "nonimmediate_operand" "%0")) 3735 (zero_extend:V16HI 3736 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))) 3737 (const_vector:V16QI [(const_int 1) (const_int 1) 3738 (const_int 1) (const_int 1) 3739 (const_int 1) (const_int 1) 3740 (const_int 1) (const_int 1) 3741 (const_int 1) (const_int 1) 3742 (const_int 1) (const_int 1) 3743 (const_int 1) (const_int 1) 3744 (const_int 1) (const_int 1)])) 3745 (const_int 1))))] 3746 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)" 3747 "pavgb\t{%2, %0|%0, %2}" 3748 [(set_attr "type" "sseiadd") 3749 (set_attr "mode" "TI")]) 3750 3751(define_insn "sse2_uavgv8hi3" 3752 [(set (match_operand:V8HI 0 "register_operand" "=x") 3753 (truncate:V8HI 3754 (lshiftrt:V8SI 3755 (plus:V8SI 3756 (plus:V8SI 3757 (zero_extend:V8SI 3758 (match_operand:V8HI 1 "nonimmediate_operand" "%0")) 3759 (zero_extend:V8SI 3760 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))) 3761 (const_vector:V8HI [(const_int 1) (const_int 1) 3762 (const_int 1) (const_int 1) 3763 (const_int 1) (const_int 1) 3764 (const_int 1) (const_int 1)])) 3765 (const_int 1))))] 3766 "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)" 3767 "pavgw\t{%2, %0|%0, %2}" 3768 [(set_attr "type" "sseiadd") 3769 (set_attr "mode" "TI")]) 3770 3771;; The correct representation for this is absolutely enormous, and 3772;; surely not generally useful. 3773(define_insn "sse2_psadbw" 3774 [(set (match_operand:V2DI 0 "register_operand" "=x") 3775 (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0") 3776 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 3777 UNSPEC_PSADBW))] 3778 "TARGET_SSE2" 3779 "psadbw\t{%2, %0|%0, %2}" 3780 [(set_attr "type" "sseiadd") 3781 (set_attr "mode" "TI")]) 3782 3783(define_insn "sse_movmskps" 3784 [(set (match_operand:SI 0 "register_operand" "=r") 3785 (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")] 3786 UNSPEC_MOVMSK))] 3787 "TARGET_SSE" 3788 "movmskps\t{%1, %0|%0, %1}" 3789 [(set_attr "type" "ssecvt") 3790 (set_attr "mode" "V4SF")]) 3791 3792(define_insn "sse2_movmskpd" 3793 [(set (match_operand:SI 0 "register_operand" "=r") 3794 (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")] 3795 UNSPEC_MOVMSK))] 3796 "TARGET_SSE2" 3797 "movmskpd\t{%1, %0|%0, %1}" 3798 [(set_attr "type" "ssecvt") 3799 (set_attr "mode" "V2DF")]) 3800 3801(define_insn "sse2_pmovmskb" 3802 [(set (match_operand:SI 0 "register_operand" "=r") 3803 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 3804 UNSPEC_MOVMSK))] 3805 "TARGET_SSE2" 3806 "pmovmskb\t{%1, %0|%0, %1}" 3807 [(set_attr "type" "ssecvt") 3808 (set_attr "mode" "V2DF")]) 3809 3810(define_expand "sse2_maskmovdqu" 3811 [(set (match_operand:V16QI 0 "memory_operand" "") 3812 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3813 (match_operand:V16QI 2 "register_operand" "x") 3814 (match_dup 0)] 3815 UNSPEC_MASKMOV))] 3816 "TARGET_SSE2" 3817 "") 3818 3819(define_insn "*sse2_maskmovdqu" 3820 [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D")) 3821 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3822 (match_operand:V16QI 2 "register_operand" "x") 3823 (mem:V16QI (match_dup 0))] 3824 UNSPEC_MASKMOV))] 3825 "TARGET_SSE2 && !TARGET_64BIT" 3826 ;; @@@ check ordering of operands in intel/nonintel syntax 3827 "maskmovdqu\t{%2, %1|%1, %2}" 3828 [(set_attr "type" "ssecvt") 3829 (set_attr "mode" "TI")]) 3830 3831(define_insn "*sse2_maskmovdqu_rex64" 3832 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) 3833 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 3834 (match_operand:V16QI 2 "register_operand" "x") 3835 (mem:V16QI (match_dup 0))] 3836 UNSPEC_MASKMOV))] 3837 "TARGET_SSE2 && TARGET_64BIT" 3838 ;; @@@ check ordering of operands in intel/nonintel syntax 3839 "maskmovdqu\t{%2, %1|%1, %2}" 3840 [(set_attr "type" "ssecvt") 3841 (set_attr "mode" "TI")]) 3842 3843(define_insn "sse_ldmxcsr" 3844 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 3845 UNSPECV_LDMXCSR)] 3846 "TARGET_SSE" 3847 "ldmxcsr\t%0" 3848 [(set_attr "type" "sse") 3849 (set_attr "memory" "load")]) 3850 3851(define_insn "sse_stmxcsr" 3852 [(set (match_operand:SI 0 "memory_operand" "=m") 3853 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 3854 "TARGET_SSE" 3855 "stmxcsr\t%0" 3856 [(set_attr "type" "sse") 3857 (set_attr "memory" "store")]) 3858 3859(define_expand "sse_sfence" 3860 [(set (match_dup 0) 3861 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3862 "TARGET_SSE || TARGET_3DNOW_A" 3863{ 3864 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3865 MEM_VOLATILE_P (operands[0]) = 1; 3866}) 3867 3868(define_insn "*sse_sfence" 3869 [(set (match_operand:BLK 0 "" "") 3870 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 3871 "TARGET_SSE || TARGET_3DNOW_A" 3872 "sfence" 3873 [(set_attr "type" "sse") 3874 (set_attr "memory" "unknown")]) 3875 3876(define_insn "sse2_clflush" 3877 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 3878 UNSPECV_CLFLUSH)] 3879 "TARGET_SSE2" 3880 "clflush\t%a0" 3881 [(set_attr "type" "sse") 3882 (set_attr "memory" "unknown")]) 3883 3884(define_expand "sse2_mfence" 3885 [(set (match_dup 0) 3886 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3887 "TARGET_SSE2" 3888{ 3889 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3890 MEM_VOLATILE_P (operands[0]) = 1; 3891}) 3892 3893(define_insn "*sse2_mfence" 3894 [(set (match_operand:BLK 0 "" "") 3895 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 3896 "TARGET_SSE2" 3897 "mfence" 3898 [(set_attr "type" "sse") 3899 (set_attr "memory" "unknown")]) 3900 3901(define_expand "sse2_lfence" 3902 [(set (match_dup 0) 3903 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3904 "TARGET_SSE2" 3905{ 3906 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 3907 MEM_VOLATILE_P (operands[0]) = 1; 3908}) 3909 3910(define_insn "*sse2_lfence" 3911 [(set (match_operand:BLK 0 "" "") 3912 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 3913 "TARGET_SSE2" 3914 "lfence" 3915 [(set_attr "type" "sse") 3916 (set_attr "memory" "unknown")]) 3917 3918(define_insn "sse3_mwait" 3919 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3920 (match_operand:SI 1 "register_operand" "c")] 3921 UNSPECV_MWAIT)] 3922 "TARGET_SSE3" 3923 "mwait\t%0, %1" 3924 [(set_attr "length" "3")]) 3925 3926(define_insn "sse3_monitor" 3927 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 3928 (match_operand:SI 1 "register_operand" "c") 3929 (match_operand:SI 2 "register_operand" "d")] 3930 UNSPECV_MONITOR)] 3931 "TARGET_SSE3" 3932 "monitor\t%0, %1, %2" 3933 [(set_attr "length" "3")]) 3934