1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the X86 SSE instruction set, defining the instructions, 11// and properties of the instructions which are needed for code generation, 12// machine code emission, and analysis. 13// 14//===----------------------------------------------------------------------===// 15 16class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { 17 InstrItinClass rr = arg_rr; 18 InstrItinClass rm = arg_rm; 19} 20 21class SizeItins<OpndItins arg_s, OpndItins arg_d> { 22 OpndItins s = arg_s; 23 OpndItins d = arg_d; 24} 25 26 27class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, 28 InstrItinClass arg_ri> { 29 InstrItinClass rr = arg_rr; 30 InstrItinClass rm = arg_rm; 31 InstrItinClass ri = arg_ri; 32} 33 34 35// scalar 36def SSE_ALU_F32S : OpndItins< 37 IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM 38>; 39 40def SSE_ALU_F64S : OpndItins< 41 IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM 42>; 43 44def SSE_ALU_ITINS_S : SizeItins< 45 SSE_ALU_F32S, SSE_ALU_F64S 46>; 47 48def SSE_MUL_F32S : OpndItins< 49 IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM 50>; 51 52def SSE_MUL_F64S : OpndItins< 53 IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM 54>; 55 56def SSE_MUL_ITINS_S : SizeItins< 57 SSE_MUL_F32S, SSE_MUL_F64S 58>; 59 60def SSE_DIV_F32S : OpndItins< 61 IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM 62>; 63 64def SSE_DIV_F64S : OpndItins< 65 IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM 66>; 67 68def SSE_DIV_ITINS_S : SizeItins< 69 SSE_DIV_F32S, SSE_DIV_F64S 70>; 71 72// parallel 73def SSE_ALU_F32P : OpndItins< 74 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM 75>; 76 77def SSE_ALU_F64P : OpndItins< 78 IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM 79>; 80 81def SSE_ALU_ITINS_P : SizeItins< 82 SSE_ALU_F32P, SSE_ALU_F64P 83>; 84 85def SSE_MUL_F32P : OpndItins< 86 IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM 87>; 88 89def SSE_MUL_F64P : OpndItins< 90 IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM 91>; 92 93def SSE_MUL_ITINS_P : SizeItins< 94 SSE_MUL_F32P, SSE_MUL_F64P 95>; 96 97def SSE_DIV_F32P : OpndItins< 98 IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM 99>; 100 101def SSE_DIV_F64P : OpndItins< 102 IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM 103>; 104 105def SSE_DIV_ITINS_P : SizeItins< 106 SSE_DIV_F32P, SSE_DIV_F64P 107>; 108 109def SSE_BIT_ITINS_P : OpndItins< 110 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 111>; 112 113def SSE_INTALU_ITINS_P : OpndItins< 114 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 115>; 116 117def SSE_INTALUQ_ITINS_P : OpndItins< 118 IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM 119>; 120 121def SSE_INTMUL_ITINS_P : OpndItins< 122 IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM 123>; 124 125def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< 126 IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI 127>; 128 129def SSE_MOVA_ITINS : OpndItins< 130 IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM 131>; 132 133def SSE_MOVU_ITINS : OpndItins< 134 IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM 135>; 136 137//===----------------------------------------------------------------------===// 138// SSE 1 & 2 Instructions Classes 139//===----------------------------------------------------------------------===// 140 141/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 142multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 143 RegisterClass RC, X86MemOperand x86memop, 144 OpndItins itins, 145 bit Is2Addr = 1> { 146 let isCommutable = 1 in { 147 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 148 !if(Is2Addr, 149 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 150 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 151 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>; 152 } 153 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 154 !if(Is2Addr, 155 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 156 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 157 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>; 158} 159 160/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 161multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 162 string asm, string SSEVer, string FPSizeStr, 163 Operand memopr, ComplexPattern mem_cpat, 164 OpndItins itins, 165 bit Is2Addr = 1> { 166 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 167 !if(Is2Addr, 168 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 169 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 170 [(set RC:$dst, (!cast<Intrinsic>( 171 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 172 RC:$src1, RC:$src2))], itins.rr>; 173 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 174 !if(Is2Addr, 175 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 176 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 177 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 178 SSEVer, "_", OpcodeStr, FPSizeStr)) 179 RC:$src1, mem_cpat:$src2))], itins.rm>; 180} 181 182/// sse12_fp_packed - SSE 1 & 2 packed instructions class 183multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 184 RegisterClass RC, ValueType vt, 185 X86MemOperand x86memop, PatFrag mem_frag, 186 Domain d, OpndItins itins, bit Is2Addr = 1> { 187 let isCommutable = 1 in 188 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 189 !if(Is2Addr, 190 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 191 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 192 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>; 193 let mayLoad = 1 in 194 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 195 !if(Is2Addr, 196 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 197 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 198 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 199 itins.rm, d>; 200} 201 202/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 203multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 204 string OpcodeStr, X86MemOperand x86memop, 205 list<dag> pat_rr, list<dag> pat_rm, 206 bit Is2Addr = 1, 207 bit rr_hasSideEffects = 0> { 208 let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in 209 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 210 !if(Is2Addr, 211 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 212 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 213 pat_rr, IIC_DEFAULT, d>; 214 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 215 !if(Is2Addr, 216 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 217 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 218 pat_rm, IIC_DEFAULT, d>; 219} 220 221/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class 222multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 223 string asm, string SSEVer, string FPSizeStr, 224 X86MemOperand x86memop, PatFrag mem_frag, 225 Domain d, OpndItins itins, bit Is2Addr = 1> { 226 def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 227 !if(Is2Addr, 228 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 229 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 230 [(set RC:$dst, (!cast<Intrinsic>( 231 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) 232 RC:$src1, RC:$src2))], IIC_DEFAULT, d>; 233 def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), 234 !if(Is2Addr, 235 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 236 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 237 [(set RC:$dst, (!cast<Intrinsic>( 238 !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr)) 239 RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>; 240} 241 242//===----------------------------------------------------------------------===// 243// Non-instruction patterns 244//===----------------------------------------------------------------------===// 245 246// A vector extract of the first f32/f64 position is a subregister copy 247def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 248 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 249def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 250 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 251 252// A 128-bit subvector extract from the first 256-bit vector position 253// is a subregister copy that needs no instruction. 254def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), 255 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 256def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), 257 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 258 259def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), 260 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 261def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), 262 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 263 264def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), 265 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 266def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), 267 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 268 269// A 128-bit subvector insert to the first 256-bit vector position 270// is a subregister copy that needs no instruction. 271let AddedComplexity = 25 in { // to give priority over vinsertf128rm 272def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), 273 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 274def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), 275 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 276def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), 277 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 278def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), 279 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 280def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), 281 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 282def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), 283 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 284} 285 286// Implicitly promote a 32-bit scalar to a vector. 287def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 288 (COPY_TO_REGCLASS FR32:$src, VR128)>; 289def : Pat<(v8f32 (scalar_to_vector FR32:$src)), 290 (COPY_TO_REGCLASS FR32:$src, VR128)>; 291// Implicitly promote a 64-bit scalar to a vector. 292def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 293 (COPY_TO_REGCLASS FR64:$src, VR128)>; 294def : Pat<(v4f64 (scalar_to_vector FR64:$src)), 295 (COPY_TO_REGCLASS FR64:$src, VR128)>; 296 297// Bitcasts between 128-bit vector types. Return the original type since 298// no instruction is needed for the conversion 299let Predicates = [HasSSE2] in { 300 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 301 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 302 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 303 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 304 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 305 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 306 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 307 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 308 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 309 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 310 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 311 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 312 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 313 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 314 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 315 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 316 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 317 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 318 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 319 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 320 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 321 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 322 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 323 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 324 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 325 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 326 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 327 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 328 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 329 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 330} 331 332// Bitcasts between 256-bit vector types. Return the original type since 333// no instruction is needed for the conversion 334let Predicates = [HasAVX] in { 335 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 336 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 337 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 338 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 339 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 340 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 341 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 342 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 343 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 344 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 345 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 346 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 347 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 348 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 349 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 350 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 351 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 352 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 353 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 354 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 355 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 356 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 357 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 358 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 359 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 360 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 361 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 362 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 363 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 364 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 365} 366 367// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 368// This is expanded by ExpandPostRAPseudos. 369let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 370 isPseudo = 1 in { 371 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 372 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; 373 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 374 [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; 375} 376 377//===----------------------------------------------------------------------===// 378// AVX & SSE - Zero/One Vectors 379//===----------------------------------------------------------------------===// 380 381// Alias instruction that maps zero vector to pxor / xorp* for sse. 382// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 383// swizzled by ExecutionDepsFix to pxor. 384// We set canFoldAsLoad because this can be converted to a constant-pool 385// load of an all-zeros value if folding it would be beneficial. 386let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 387 isPseudo = 1 in { 388def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 389 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 390} 391 392def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 393def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 394def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 395def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 396def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 397 398 399// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 400// and doesn't need it because on sandy bridge the register is set to zero 401// at the rename stage without using any execution unit, so SET0PSY 402// and SET0PDY can be used for vector int instructions without penalty 403let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 404 isPseudo = 1, Predicates = [HasAVX] in { 405def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 406 [(set VR256:$dst, (v8f32 immAllZerosV))]>; 407} 408 409let Predicates = [HasAVX] in 410 def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 411 412let Predicates = [HasAVX2] in { 413 def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 414 def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; 415 def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 416 def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 417} 418 419// AVX1 has no support for 256-bit integer instructions, but since the 128-bit 420// VPXOR instruction writes zero to its upper part, it's safe build zeros. 421let Predicates = [HasAVX1Only] in { 422def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 423def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), 424 (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 425 426def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 427def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), 428 (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 429 430def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 431def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 432 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 433 434def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 435def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 436 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 437} 438 439// We set canFoldAsLoad because this can be converted to a constant-pool 440// load of an all-ones value if folding it would be beneficial. 441let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 442 isPseudo = 1 in { 443 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 444 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 445 let Predicates = [HasAVX2] in 446 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 447 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 448} 449 450 451//===----------------------------------------------------------------------===// 452// SSE 1 & 2 - Move FP Scalar Instructions 453// 454// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 455// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr 456// is used instead. Register-to-register movss/movsd is not modeled as an 457// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable 458// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. 459//===----------------------------------------------------------------------===// 460 461class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> : 462 SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, 463 [(set VR128:$dst, (vt (OpNode VR128:$src1, 464 (scalar_to_vector RC:$src2))))], 465 IIC_SSE_MOV_S_RR>; 466 467// Loading from memory automatically zeroing upper bits. 468class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 469 PatFrag mem_pat, string OpcodeStr> : 470 SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 471 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 472 [(set RC:$dst, (mem_pat addr:$src))], 473 IIC_SSE_MOV_S_RM>; 474 475// AVX 476def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, 477 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V, 478 VEX_LIG; 479def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, 480 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V, 481 VEX_LIG; 482 483// For the disassembler 484let isCodeGenOnly = 1 in { 485 def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 486 (ins VR128:$src1, FR32:$src2), 487 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 488 IIC_SSE_MOV_S_RR>, 489 XS, VEX_4V, VEX_LIG; 490 def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 491 (ins VR128:$src1, FR64:$src2), 492 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 493 IIC_SSE_MOV_S_RR>, 494 XD, VEX_4V, VEX_LIG; 495} 496 497let canFoldAsLoad = 1, isReMaterializable = 1 in { 498 def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX, 499 VEX_LIG; 500 let AddedComplexity = 20 in 501 def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX, 502 VEX_LIG; 503} 504 505def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), 506 "movss\t{$src, $dst|$dst, $src}", 507 [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 508 XS, VEX, VEX_LIG; 509def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), 510 "movsd\t{$src, $dst|$dst, $src}", 511 [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, 512 XD, VEX, VEX_LIG; 513 514// SSE1 & 2 515let Constraints = "$src1 = $dst" in { 516 def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32, 517 "movss\t{$src2, $dst|$dst, $src2}">, XS; 518 def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64, 519 "movsd\t{$src2, $dst|$dst, $src2}">, XD; 520 521 // For the disassembler 522 let isCodeGenOnly = 1 in { 523 def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 524 (ins VR128:$src1, FR32:$src2), 525 "movss\t{$src2, $dst|$dst, $src2}", [], 526 IIC_SSE_MOV_S_RR>, XS; 527 def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 528 (ins VR128:$src1, FR64:$src2), 529 "movsd\t{$src2, $dst|$dst, $src2}", [], 530 IIC_SSE_MOV_S_RR>, XD; 531 } 532} 533 534let canFoldAsLoad = 1, isReMaterializable = 1 in { 535 def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; 536 537 let AddedComplexity = 20 in 538 def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; 539} 540 541def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), 542 "movss\t{$src, $dst|$dst, $src}", 543 [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; 544def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), 545 "movsd\t{$src, $dst|$dst, $src}", 546 [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>; 547 548// Patterns 549let Predicates = [HasAVX] in { 550 let AddedComplexity = 15 in { 551 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 552 // MOVS{S,D} to the lower bits. 553 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 554 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 555 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 556 (VMOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 557 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 558 (VMOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 559 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 560 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 561 562 // Move low f32 and clear high bits. 563 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 564 (SUBREG_TO_REG (i32 0), 565 (VMOVSSrr (v4f32 (V_SET0)), 566 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)), sub_xmm)>; 567 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 568 (SUBREG_TO_REG (i32 0), 569 (VMOVSSrr (v4i32 (V_SET0)), 570 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm)), sub_xmm)>; 571 } 572 573 let AddedComplexity = 20 in { 574 // MOVSSrm zeros the high parts of the register; represent this 575 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 576 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 577 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 578 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 579 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 580 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 581 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 582 583 // MOVSDrm zeros the high parts of the register; represent this 584 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 585 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 586 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 587 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 588 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 589 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 590 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 591 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 592 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 593 def : Pat<(v2f64 (X86vzload addr:$src)), 594 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 595 596 // Represent the same patterns above but in the form they appear for 597 // 256-bit types 598 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 599 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 600 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 601 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 602 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 603 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 604 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 605 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 606 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 607 } 608 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 609 (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), 610 (SUBREG_TO_REG (i32 0), 611 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 612 sub_xmm)>; 613 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 614 (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), 615 (SUBREG_TO_REG (i64 0), 616 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 617 sub_xmm)>; 618 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 619 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 620 (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>; 621 622 // Move low f64 and clear high bits. 623 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 624 (SUBREG_TO_REG (i32 0), 625 (VMOVSDrr (v2f64 (V_SET0)), 626 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm)), sub_xmm)>; 627 628 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 629 (SUBREG_TO_REG (i32 0), 630 (VMOVSDrr (v2i64 (V_SET0)), 631 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm)), sub_xmm)>; 632 633 // Extract and store. 634 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 635 addr:$dst), 636 (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; 637 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 638 addr:$dst), 639 (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; 640 641 // Shuffle with VMOVSS 642 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 643 (VMOVSSrr (v4i32 VR128:$src1), 644 (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; 645 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 646 (VMOVSSrr (v4f32 VR128:$src1), 647 (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; 648 649 // 256-bit variants 650 def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), 651 (SUBREG_TO_REG (i32 0), 652 (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), 653 (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), 654 sub_xmm)>; 655 def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), 656 (SUBREG_TO_REG (i32 0), 657 (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), 658 (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), 659 sub_xmm)>; 660 661 // Shuffle with VMOVSD 662 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 663 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 664 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 665 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 666 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 667 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 668 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 669 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 670 671 // 256-bit variants 672 def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), 673 (SUBREG_TO_REG (i32 0), 674 (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), 675 (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), 676 sub_xmm)>; 677 def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), 678 (SUBREG_TO_REG (i32 0), 679 (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), 680 (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), 681 sub_xmm)>; 682 683 684 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 685 // is during lowering, where it's not possible to recognize the fold cause 686 // it has two uses through a bitcast. One use disappears at isel time and the 687 // fold opportunity reappears. 688 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 689 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 690 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 691 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 692 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 693 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 694 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 695 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 696} 697 698let Predicates = [UseSSE1] in { 699 let AddedComplexity = 15 in { 700 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 701 // MOVSS to the lower bits. 702 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 703 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 704 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 705 (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 706 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 707 (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 708 } 709 710 let AddedComplexity = 20 in { 711 // MOVSSrm already zeros the high parts of the register. 712 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 713 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 714 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 715 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 716 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 717 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 718 } 719 720 // Extract and store. 721 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 722 addr:$dst), 723 (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; 724 725 // Shuffle with MOVSS 726 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 727 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 728 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 729 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 730} 731 732let Predicates = [UseSSE2] in { 733 let AddedComplexity = 15 in { 734 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 735 // MOVSD to the lower bits. 736 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 737 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 738 } 739 740 let AddedComplexity = 20 in { 741 // MOVSDrm already zeros the high parts of the register. 742 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 743 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 744 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 745 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 746 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 747 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 748 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 749 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 750 def : Pat<(v2f64 (X86vzload addr:$src)), 751 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 752 } 753 754 // Extract and store. 755 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 756 addr:$dst), 757 (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; 758 759 // Shuffle with MOVSD 760 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 761 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 762 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 763 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 764 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 765 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 766 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 767 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 768 769 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 770 // is during lowering, where it's not possible to recognize the fold cause 771 // it has two uses through a bitcast. One use disappears at isel time and the 772 // fold opportunity reappears. 773 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 774 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 775 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 776 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 777 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 778 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 779 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 780 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 781} 782 783//===----------------------------------------------------------------------===// 784// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 785//===----------------------------------------------------------------------===// 786 787multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 788 X86MemOperand x86memop, PatFrag ld_frag, 789 string asm, Domain d, 790 OpndItins itins, 791 bit IsReMaterializable = 1> { 792let neverHasSideEffects = 1 in 793 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 794 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>; 795let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 796 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 797 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 798 [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>; 799} 800 801defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 802 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 803 TB, VEX; 804defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 805 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 806 TB, OpSize, VEX; 807defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 808 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 809 TB, VEX; 810defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 811 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 812 TB, OpSize, VEX; 813 814defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 815 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 816 TB, VEX, VEX_L; 817defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 818 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 819 TB, OpSize, VEX, VEX_L; 820defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 821 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 822 TB, VEX, VEX_L; 823defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 824 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 825 TB, OpSize, VEX, VEX_L; 826defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 827 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 828 TB; 829defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 830 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 831 TB, OpSize; 832defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 833 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 834 TB; 835defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 836 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 837 TB, OpSize; 838 839def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 840 "movaps\t{$src, $dst|$dst, $src}", 841 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 842 IIC_SSE_MOVA_P_MR>, VEX; 843def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 844 "movapd\t{$src, $dst|$dst, $src}", 845 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 846 IIC_SSE_MOVA_P_MR>, VEX; 847def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 848 "movups\t{$src, $dst|$dst, $src}", 849 [(store (v4f32 VR128:$src), addr:$dst)], 850 IIC_SSE_MOVU_P_MR>, VEX; 851def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 852 "movupd\t{$src, $dst|$dst, $src}", 853 [(store (v2f64 VR128:$src), addr:$dst)], 854 IIC_SSE_MOVU_P_MR>, VEX; 855def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 856 "movaps\t{$src, $dst|$dst, $src}", 857 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], 858 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 859def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 860 "movapd\t{$src, $dst|$dst, $src}", 861 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], 862 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 863def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 864 "movups\t{$src, $dst|$dst, $src}", 865 [(store (v8f32 VR256:$src), addr:$dst)], 866 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 867def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 868 "movupd\t{$src, $dst|$dst, $src}", 869 [(store (v4f64 VR256:$src), addr:$dst)], 870 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 871 872// For disassembler 873let isCodeGenOnly = 1 in { 874 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 875 (ins VR128:$src), 876 "movaps\t{$src, $dst|$dst, $src}", [], 877 IIC_SSE_MOVA_P_RR>, VEX; 878 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 879 (ins VR128:$src), 880 "movapd\t{$src, $dst|$dst, $src}", [], 881 IIC_SSE_MOVA_P_RR>, VEX; 882 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 883 (ins VR128:$src), 884 "movups\t{$src, $dst|$dst, $src}", [], 885 IIC_SSE_MOVU_P_RR>, VEX; 886 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 887 (ins VR128:$src), 888 "movupd\t{$src, $dst|$dst, $src}", [], 889 IIC_SSE_MOVU_P_RR>, VEX; 890 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 891 (ins VR256:$src), 892 "movaps\t{$src, $dst|$dst, $src}", [], 893 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 894 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 895 (ins VR256:$src), 896 "movapd\t{$src, $dst|$dst, $src}", [], 897 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 898 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 899 (ins VR256:$src), 900 "movups\t{$src, $dst|$dst, $src}", [], 901 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 902 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 903 (ins VR256:$src), 904 "movupd\t{$src, $dst|$dst, $src}", [], 905 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 906} 907 908let Predicates = [HasAVX] in { 909def : Pat<(v8i32 (X86vzmovl 910 (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), 911 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 912def : Pat<(v4i64 (X86vzmovl 913 (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), 914 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 915def : Pat<(v8f32 (X86vzmovl 916 (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), 917 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 918def : Pat<(v4f64 (X86vzmovl 919 (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), 920 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 921} 922 923 924def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 925 (VMOVUPSYmr addr:$dst, VR256:$src)>; 926def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 927 (VMOVUPDYmr addr:$dst, VR256:$src)>; 928 929def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 930 "movaps\t{$src, $dst|$dst, $src}", 931 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 932 IIC_SSE_MOVA_P_MR>; 933def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 934 "movapd\t{$src, $dst|$dst, $src}", 935 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 936 IIC_SSE_MOVA_P_MR>; 937def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 938 "movups\t{$src, $dst|$dst, $src}", 939 [(store (v4f32 VR128:$src), addr:$dst)], 940 IIC_SSE_MOVU_P_MR>; 941def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 942 "movupd\t{$src, $dst|$dst, $src}", 943 [(store (v2f64 VR128:$src), addr:$dst)], 944 IIC_SSE_MOVU_P_MR>; 945 946// For disassembler 947let isCodeGenOnly = 1 in { 948 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 949 "movaps\t{$src, $dst|$dst, $src}", [], 950 IIC_SSE_MOVA_P_RR>; 951 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 952 "movapd\t{$src, $dst|$dst, $src}", [], 953 IIC_SSE_MOVA_P_RR>; 954 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 955 "movups\t{$src, $dst|$dst, $src}", [], 956 IIC_SSE_MOVU_P_RR>; 957 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 958 "movupd\t{$src, $dst|$dst, $src}", [], 959 IIC_SSE_MOVU_P_RR>; 960} 961 962let Predicates = [HasAVX] in { 963 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 964 (VMOVUPSmr addr:$dst, VR128:$src)>; 965 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 966 (VMOVUPDmr addr:$dst, VR128:$src)>; 967} 968 969let Predicates = [UseSSE1] in 970 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 971 (MOVUPSmr addr:$dst, VR128:$src)>; 972let Predicates = [UseSSE2] in 973 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 974 (MOVUPDmr addr:$dst, VR128:$src)>; 975 976// Use vmovaps/vmovups for AVX integer load/store. 977let Predicates = [HasAVX] in { 978 // 128-bit load/store 979 def : Pat<(alignedloadv2i64 addr:$src), 980 (VMOVAPSrm addr:$src)>; 981 def : Pat<(loadv2i64 addr:$src), 982 (VMOVUPSrm addr:$src)>; 983 984 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 985 (VMOVAPSmr addr:$dst, VR128:$src)>; 986 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 987 (VMOVAPSmr addr:$dst, VR128:$src)>; 988 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 989 (VMOVAPSmr addr:$dst, VR128:$src)>; 990 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 991 (VMOVAPSmr addr:$dst, VR128:$src)>; 992 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 993 (VMOVUPSmr addr:$dst, VR128:$src)>; 994 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 995 (VMOVUPSmr addr:$dst, VR128:$src)>; 996 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 997 (VMOVUPSmr addr:$dst, VR128:$src)>; 998 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 999 (VMOVUPSmr addr:$dst, VR128:$src)>; 1000 1001 // 256-bit load/store 1002 def : Pat<(alignedloadv4i64 addr:$src), 1003 (VMOVAPSYrm addr:$src)>; 1004 def : Pat<(loadv4i64 addr:$src), 1005 (VMOVUPSYrm addr:$src)>; 1006 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 1007 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1008 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 1009 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1010 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 1011 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1012 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 1013 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1014 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 1015 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1016 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 1017 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1018 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 1019 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1020 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 1021 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1022 1023 // Special patterns for storing subvector extracts of lower 128-bits 1024 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 1025 def : Pat<(alignedstore (v2f64 (extract_subvector 1026 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1027 (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1028 def : Pat<(alignedstore (v4f32 (extract_subvector 1029 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1030 (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1031 def : Pat<(alignedstore (v2i64 (extract_subvector 1032 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1033 (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1034 def : Pat<(alignedstore (v4i32 (extract_subvector 1035 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1036 (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1037 def : Pat<(alignedstore (v8i16 (extract_subvector 1038 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1039 (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1040 def : Pat<(alignedstore (v16i8 (extract_subvector 1041 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1042 (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1043 1044 def : Pat<(store (v2f64 (extract_subvector 1045 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1046 (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1047 def : Pat<(store (v4f32 (extract_subvector 1048 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1049 (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1050 def : Pat<(store (v2i64 (extract_subvector 1051 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1052 (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1053 def : Pat<(store (v4i32 (extract_subvector 1054 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1055 (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1056 def : Pat<(store (v8i16 (extract_subvector 1057 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1058 (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1059 def : Pat<(store (v16i8 (extract_subvector 1060 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1061 (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1062} 1063 1064// Use movaps / movups for SSE integer load / store (one byte shorter). 1065// The instructions selected below are then converted to MOVDQA/MOVDQU 1066// during the SSE domain pass. 1067let Predicates = [UseSSE1] in { 1068 def : Pat<(alignedloadv2i64 addr:$src), 1069 (MOVAPSrm addr:$src)>; 1070 def : Pat<(loadv2i64 addr:$src), 1071 (MOVUPSrm addr:$src)>; 1072 1073 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1074 (MOVAPSmr addr:$dst, VR128:$src)>; 1075 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1076 (MOVAPSmr addr:$dst, VR128:$src)>; 1077 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1078 (MOVAPSmr addr:$dst, VR128:$src)>; 1079 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1080 (MOVAPSmr addr:$dst, VR128:$src)>; 1081 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1082 (MOVUPSmr addr:$dst, VR128:$src)>; 1083 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1084 (MOVUPSmr addr:$dst, VR128:$src)>; 1085 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1086 (MOVUPSmr addr:$dst, VR128:$src)>; 1087 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1088 (MOVUPSmr addr:$dst, VR128:$src)>; 1089} 1090 1091// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper 1092// bits are disregarded. FIXME: Set encoding to pseudo! 1093let neverHasSideEffects = 1 in { 1094def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 1095 "movaps\t{$src, $dst|$dst, $src}", [], 1096 IIC_SSE_MOVA_P_RR>, VEX; 1097def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 1098 "movapd\t{$src, $dst|$dst, $src}", [], 1099 IIC_SSE_MOVA_P_RR>, VEX; 1100def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 1101 "movaps\t{$src, $dst|$dst, $src}", [], 1102 IIC_SSE_MOVA_P_RR>; 1103def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 1104 "movapd\t{$src, $dst|$dst, $src}", [], 1105 IIC_SSE_MOVA_P_RR>; 1106} 1107 1108// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 1109// bits are disregarded. FIXME: Set encoding to pseudo! 1110let canFoldAsLoad = 1, isReMaterializable = 1 in { 1111let isCodeGenOnly = 1 in { 1112 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1113 "movaps\t{$src, $dst|$dst, $src}", 1114 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1115 IIC_SSE_MOVA_P_RM>, VEX; 1116 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1117 "movapd\t{$src, $dst|$dst, $src}", 1118 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1119 IIC_SSE_MOVA_P_RM>, VEX; 1120} 1121def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1122 "movaps\t{$src, $dst|$dst, $src}", 1123 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1124 IIC_SSE_MOVA_P_RM>; 1125def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1126 "movapd\t{$src, $dst|$dst, $src}", 1127 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1128 IIC_SSE_MOVA_P_RM>; 1129} 1130 1131//===----------------------------------------------------------------------===// 1132// SSE 1 & 2 - Move Low packed FP Instructions 1133//===----------------------------------------------------------------------===// 1134 1135multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, 1136 SDNode psnode, SDNode pdnode, string base_opc, 1137 string asm_opr, InstrItinClass itin> { 1138 def PSrm : PI<opc, MRMSrcMem, 1139 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1140 !strconcat(base_opc, "s", asm_opr), 1141 [(set RC:$dst, 1142 (psnode RC:$src1, 1143 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 1144 itin, SSEPackedSingle>, TB; 1145 1146 def PDrm : PI<opc, MRMSrcMem, 1147 (outs RC:$dst), (ins RC:$src1, f64mem:$src2), 1148 !strconcat(base_opc, "d", asm_opr), 1149 [(set RC:$dst, (v2f64 (pdnode RC:$src1, 1150 (scalar_to_vector (loadf64 addr:$src2)))))], 1151 itin, SSEPackedDouble>, TB, OpSize; 1152} 1153 1154let AddedComplexity = 20 in { 1155 defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", 1156 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1157 IIC_SSE_MOV_LH>, VEX_4V; 1158} 1159let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1160 defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp", 1161 "\t{$src2, $dst|$dst, $src2}", 1162 IIC_SSE_MOV_LH>; 1163} 1164 1165def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1166 "movlps\t{$src, $dst|$dst, $src}", 1167 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1168 (iPTR 0))), addr:$dst)], 1169 IIC_SSE_MOV_LH>, VEX; 1170def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1171 "movlpd\t{$src, $dst|$dst, $src}", 1172 [(store (f64 (vector_extract (v2f64 VR128:$src), 1173 (iPTR 0))), addr:$dst)], 1174 IIC_SSE_MOV_LH>, VEX; 1175def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1176 "movlps\t{$src, $dst|$dst, $src}", 1177 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1178 (iPTR 0))), addr:$dst)], 1179 IIC_SSE_MOV_LH>; 1180def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1181 "movlpd\t{$src, $dst|$dst, $src}", 1182 [(store (f64 (vector_extract (v2f64 VR128:$src), 1183 (iPTR 0))), addr:$dst)], 1184 IIC_SSE_MOV_LH>; 1185 1186let Predicates = [HasAVX] in { 1187 // Shuffle with VMOVLPS 1188 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1189 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1190 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1191 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1192 1193 // Shuffle with VMOVLPD 1194 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1195 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1196 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1197 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1198 1199 // Store patterns 1200 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1201 addr:$src1), 1202 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1203 def : Pat<(store (v4i32 (X86Movlps 1204 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1205 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1206 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1207 addr:$src1), 1208 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1209 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1210 addr:$src1), 1211 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1212} 1213 1214let Predicates = [UseSSE1] in { 1215 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1216 def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), 1217 (iPTR 0))), addr:$src1), 1218 (MOVLPSmr addr:$src1, VR128:$src2)>; 1219 1220 // Shuffle with MOVLPS 1221 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1222 (MOVLPSrm VR128:$src1, addr:$src2)>; 1223 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1224 (MOVLPSrm VR128:$src1, addr:$src2)>; 1225 def : Pat<(X86Movlps VR128:$src1, 1226 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1227 (MOVLPSrm VR128:$src1, addr:$src2)>; 1228 1229 // Store patterns 1230 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1231 addr:$src1), 1232 (MOVLPSmr addr:$src1, VR128:$src2)>; 1233 def : Pat<(store (v4i32 (X86Movlps 1234 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1235 addr:$src1), 1236 (MOVLPSmr addr:$src1, VR128:$src2)>; 1237} 1238 1239let Predicates = [UseSSE2] in { 1240 // Shuffle with MOVLPD 1241 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1242 (MOVLPDrm VR128:$src1, addr:$src2)>; 1243 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1244 (MOVLPDrm VR128:$src1, addr:$src2)>; 1245 1246 // Store patterns 1247 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1248 addr:$src1), 1249 (MOVLPDmr addr:$src1, VR128:$src2)>; 1250 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1251 addr:$src1), 1252 (MOVLPDmr addr:$src1, VR128:$src2)>; 1253} 1254 1255//===----------------------------------------------------------------------===// 1256// SSE 1 & 2 - Move Hi packed FP Instructions 1257//===----------------------------------------------------------------------===// 1258 1259let AddedComplexity = 20 in { 1260 defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", 1261 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1262 IIC_SSE_MOV_LH>, VEX_4V; 1263} 1264let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1265 defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp", 1266 "\t{$src2, $dst|$dst, $src2}", 1267 IIC_SSE_MOV_LH>; 1268} 1269 1270// v2f64 extract element 1 is always custom lowered to unpack high to low 1271// and extract element 0 so the non-store version isn't too horrible. 1272def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1273 "movhps\t{$src, $dst|$dst, $src}", 1274 [(store (f64 (vector_extract 1275 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1276 (bc_v2f64 (v4f32 VR128:$src))), 1277 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1278def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1279 "movhpd\t{$src, $dst|$dst, $src}", 1280 [(store (f64 (vector_extract 1281 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1282 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1283def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1284 "movhps\t{$src, $dst|$dst, $src}", 1285 [(store (f64 (vector_extract 1286 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1287 (bc_v2f64 (v4f32 VR128:$src))), 1288 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1289def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1290 "movhpd\t{$src, $dst|$dst, $src}", 1291 [(store (f64 (vector_extract 1292 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1293 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1294 1295let Predicates = [HasAVX] in { 1296 // VMOVHPS patterns 1297 def : Pat<(X86Movlhps VR128:$src1, 1298 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1299 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1300 def : Pat<(X86Movlhps VR128:$src1, 1301 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1302 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1303 1304 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1305 // is during lowering, where it's not possible to recognize the load fold 1306 // cause it has two uses through a bitcast. One use disappears at isel time 1307 // and the fold opportunity reappears. 1308 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1309 (scalar_to_vector (loadf64 addr:$src2)))), 1310 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1311} 1312 1313let Predicates = [UseSSE1] in { 1314 // MOVHPS patterns 1315 def : Pat<(X86Movlhps VR128:$src1, 1316 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1317 (MOVHPSrm VR128:$src1, addr:$src2)>; 1318 def : Pat<(X86Movlhps VR128:$src1, 1319 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1320 (MOVHPSrm VR128:$src1, addr:$src2)>; 1321} 1322 1323let Predicates = [UseSSE2] in { 1324 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1325 // is during lowering, where it's not possible to recognize the load fold 1326 // cause it has two uses through a bitcast. One use disappears at isel time 1327 // and the fold opportunity reappears. 1328 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1329 (scalar_to_vector (loadf64 addr:$src2)))), 1330 (MOVHPDrm VR128:$src1, addr:$src2)>; 1331} 1332 1333//===----------------------------------------------------------------------===// 1334// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1335//===----------------------------------------------------------------------===// 1336 1337let AddedComplexity = 20 in { 1338 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1339 (ins VR128:$src1, VR128:$src2), 1340 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1341 [(set VR128:$dst, 1342 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1343 IIC_SSE_MOV_LH>, 1344 VEX_4V; 1345 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1346 (ins VR128:$src1, VR128:$src2), 1347 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1348 [(set VR128:$dst, 1349 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1350 IIC_SSE_MOV_LH>, 1351 VEX_4V; 1352} 1353let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1354 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1355 (ins VR128:$src1, VR128:$src2), 1356 "movlhps\t{$src2, $dst|$dst, $src2}", 1357 [(set VR128:$dst, 1358 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1359 IIC_SSE_MOV_LH>; 1360 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1361 (ins VR128:$src1, VR128:$src2), 1362 "movhlps\t{$src2, $dst|$dst, $src2}", 1363 [(set VR128:$dst, 1364 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1365 IIC_SSE_MOV_LH>; 1366} 1367 1368let Predicates = [HasAVX] in { 1369 // MOVLHPS patterns 1370 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1371 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1372 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1373 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1374 1375 // MOVHLPS patterns 1376 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1377 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1378} 1379 1380let Predicates = [UseSSE1] in { 1381 // MOVLHPS patterns 1382 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1383 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1384 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1385 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1386 1387 // MOVHLPS patterns 1388 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1389 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1390} 1391 1392//===----------------------------------------------------------------------===// 1393// SSE 1 & 2 - Conversion Instructions 1394//===----------------------------------------------------------------------===// 1395 1396def SSE_CVT_PD : OpndItins< 1397 IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM 1398>; 1399 1400def SSE_CVT_PS : OpndItins< 1401 IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM 1402>; 1403 1404def SSE_CVT_Scalar : OpndItins< 1405 IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM 1406>; 1407 1408def SSE_CVT_SS2SI_32 : OpndItins< 1409 IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM 1410>; 1411 1412def SSE_CVT_SS2SI_64 : OpndItins< 1413 IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM 1414>; 1415 1416def SSE_CVT_SD2SI : OpndItins< 1417 IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM 1418>; 1419 1420multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1421 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1422 string asm, OpndItins itins> { 1423 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1424 [(set DstRC:$dst, (OpNode SrcRC:$src))], 1425 itins.rr>; 1426 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1427 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], 1428 itins.rm>; 1429} 1430 1431multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1432 X86MemOperand x86memop, string asm, Domain d, 1433 OpndItins itins> { 1434let neverHasSideEffects = 1 in { 1435 def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1436 [], itins.rr, d>; 1437 let mayLoad = 1 in 1438 def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1439 [], itins.rm, d>; 1440} 1441} 1442 1443multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1444 X86MemOperand x86memop, string asm> { 1445let neverHasSideEffects = 1 in { 1446 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1447 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; 1448 let mayLoad = 1 in 1449 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1450 (ins DstRC:$src1, x86memop:$src), 1451 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; 1452} // neverHasSideEffects = 1 1453} 1454 1455defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1456 "cvttss2si\t{$src, $dst|$dst, $src}", 1457 SSE_CVT_SS2SI_32>, 1458 XS, VEX, VEX_LIG; 1459defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1460 "cvttss2si\t{$src, $dst|$dst, $src}", 1461 SSE_CVT_SS2SI_64>, 1462 XS, VEX, VEX_W, VEX_LIG; 1463defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1464 "cvttsd2si\t{$src, $dst|$dst, $src}", 1465 SSE_CVT_SD2SI>, 1466 XD, VEX, VEX_LIG; 1467defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1468 "cvttsd2si\t{$src, $dst|$dst, $src}", 1469 SSE_CVT_SD2SI>, 1470 XD, VEX, VEX_W, VEX_LIG; 1471 1472def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1473 (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1474def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1475 (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1476def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1477 (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1478def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1479 (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1480def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1481 (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1482def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1483 (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1484def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1485 (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1486def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1487 (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1488 1489// The assembler can recognize rr 64-bit instructions by seeing a rxx 1490// register, but the same isn't true when only using memory operands, 1491// provide other assembly "l" and "q" forms to address this explicitly 1492// where appropriate to do so. 1493defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, 1494 XS, VEX_4V, VEX_LIG; 1495defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, 1496 XS, VEX_4V, VEX_W, VEX_LIG; 1497defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, 1498 XD, VEX_4V, VEX_LIG; 1499defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, 1500 XD, VEX_4V, VEX_W, VEX_LIG; 1501 1502def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1503 (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1504def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1505 (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; 1506 1507let Predicates = [HasAVX] in { 1508 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1509 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1510 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1511 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1512 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1513 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1514 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1515 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1516 1517 def : Pat<(f32 (sint_to_fp GR32:$src)), 1518 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1519 def : Pat<(f32 (sint_to_fp GR64:$src)), 1520 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1521 def : Pat<(f64 (sint_to_fp GR32:$src)), 1522 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1523 def : Pat<(f64 (sint_to_fp GR64:$src)), 1524 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1525} 1526 1527defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1528 "cvttss2si\t{$src, $dst|$dst, $src}", 1529 SSE_CVT_SS2SI_32>, XS; 1530defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1531 "cvttss2si\t{$src, $dst|$dst, $src}", 1532 SSE_CVT_SS2SI_64>, XS, REX_W; 1533defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1534 "cvttsd2si\t{$src, $dst|$dst, $src}", 1535 SSE_CVT_SD2SI>, XD; 1536defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1537 "cvttsd2si\t{$src, $dst|$dst, $src}", 1538 SSE_CVT_SD2SI>, XD, REX_W; 1539defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1540 "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1541 SSE_CVT_Scalar>, XS; 1542defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1543 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1544 SSE_CVT_Scalar>, XS, REX_W; 1545defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1546 "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1547 SSE_CVT_Scalar>, XD; 1548defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1549 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1550 SSE_CVT_Scalar>, XD, REX_W; 1551 1552def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1553 (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1554def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1555 (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1556def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1557 (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1558def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1559 (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1560def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1561 (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1562def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1563 (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1564def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1565 (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1566def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1567 (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1568 1569def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1570 (CVTSI2SSrm FR64:$dst, i32mem:$src)>; 1571def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1572 (CVTSI2SDrm FR64:$dst, i32mem:$src)>; 1573 1574// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1575// and/or XMM operand(s). 1576 1577multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1578 Intrinsic Int, Operand memop, ComplexPattern mem_cpat, 1579 string asm, OpndItins itins> { 1580 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1581 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1582 [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>; 1583 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1584 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1585 [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>; 1586} 1587 1588multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1589 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1590 PatFrag ld_frag, string asm, OpndItins itins, 1591 bit Is2Addr = 1> { 1592 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1593 !if(Is2Addr, 1594 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1595 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1596 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 1597 itins.rr>; 1598 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1599 (ins DstRC:$src1, x86memop:$src2), 1600 !if(Is2Addr, 1601 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1602 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1603 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 1604 itins.rm>; 1605} 1606 1607defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, 1608 int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", 1609 SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; 1610defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1611 int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", 1612 SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; 1613 1614defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1615 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; 1616defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1617 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1618 1619 1620defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1621 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", 1622 SSE_CVT_Scalar, 0>, XS, VEX_4V; 1623defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1624 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", 1625 SSE_CVT_Scalar, 0>, XS, VEX_4V, 1626 VEX_W; 1627defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1628 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", 1629 SSE_CVT_Scalar, 0>, XD, VEX_4V; 1630defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1631 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", 1632 SSE_CVT_Scalar, 0>, XD, 1633 VEX_4V, VEX_W; 1634 1635let Constraints = "$src1 = $dst" in { 1636 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1637 int_x86_sse_cvtsi2ss, i32mem, loadi32, 1638 "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; 1639 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1640 int_x86_sse_cvtsi642ss, i64mem, loadi64, 1641 "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; 1642 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1643 int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1644 "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; 1645 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1646 int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1647 "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; 1648} 1649 1650/// SSE 1 Only 1651 1652// Aliases for intrinsics 1653defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1654 ssmem, sse_load_f32, "cvttss2si", 1655 SSE_CVT_SS2SI_32>, XS, VEX; 1656defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1657 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1658 "cvttss2si", SSE_CVT_SS2SI_64>, 1659 XS, VEX, VEX_W; 1660defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1661 sdmem, sse_load_f64, "cvttsd2si", 1662 SSE_CVT_SD2SI>, XD, VEX; 1663defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1664 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1665 "cvttsd2si", SSE_CVT_SD2SI>, 1666 XD, VEX, VEX_W; 1667defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1668 ssmem, sse_load_f32, "cvttss2si", 1669 SSE_CVT_SS2SI_32>, XS; 1670defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1671 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1672 "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; 1673defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1674 sdmem, sse_load_f64, "cvttsd2si", 1675 SSE_CVT_SD2SI>, XD; 1676defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1677 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1678 "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1679 1680defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1681 ssmem, sse_load_f32, "cvtss2si", 1682 SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; 1683defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1684 ssmem, sse_load_f32, "cvtss2si", 1685 SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; 1686 1687defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1688 ssmem, sse_load_f32, "cvtss2si", 1689 SSE_CVT_SS2SI_32>, XS; 1690defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1691 ssmem, sse_load_f32, "cvtss2si", 1692 SSE_CVT_SS2SI_64>, XS, REX_W; 1693 1694defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1695 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1696 SSEPackedSingle, SSE_CVT_PS>, 1697 TB, VEX, Requires<[HasAVX]>; 1698defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, 1699 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1700 SSEPackedSingle, SSE_CVT_PS>, 1701 TB, VEX, VEX_L, Requires<[HasAVX]>; 1702 1703defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1704 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1705 SSEPackedSingle, SSE_CVT_PS>, 1706 TB, Requires<[UseSSE2]>; 1707 1708def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1709 (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1710def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1711 (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1712def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1713 (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1714def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1715 (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1716def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1717 (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1718def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1719 (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1720def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1721 (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1722def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1723 (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; 1724 1725def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1726 (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1727def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1728 (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1729def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1730 (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1731def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1732 (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1733def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1734 (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1735def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1736 (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1737def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1738 (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1739def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1740 (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; 1741 1742/// SSE 2 Only 1743 1744// Convert scalar double to scalar single 1745let neverHasSideEffects = 1 in { 1746def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1747 (ins FR64:$src1, FR64:$src2), 1748 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 1749 IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG; 1750let mayLoad = 1 in 1751def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1752 (ins FR64:$src1, f64mem:$src2), 1753 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1754 [], IIC_SSE_CVT_Scalar_RM>, 1755 XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG; 1756} 1757 1758def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1759 Requires<[HasAVX]>; 1760 1761def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1762 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1763 [(set FR32:$dst, (fround FR64:$src))], 1764 IIC_SSE_CVT_Scalar_RR>; 1765def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1766 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1767 [(set FR32:$dst, (fround (loadf64 addr:$src)))], 1768 IIC_SSE_CVT_Scalar_RM>, 1769 XD, 1770 Requires<[UseSSE2, OptForSize]>; 1771 1772def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, 1773 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1774 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1775 [(set VR128:$dst, 1776 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1777 IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[HasAVX]>; 1778def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, 1779 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1780 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1781 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1782 VR128:$src1, sse_load_f64:$src2))], 1783 IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[HasAVX]>; 1784 1785let Constraints = "$src1 = $dst" in { 1786def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, 1787 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1788 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1789 [(set VR128:$dst, 1790 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1791 IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>; 1792def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, 1793 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1794 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1795 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1796 VR128:$src1, sse_load_f64:$src2))], 1797 IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>; 1798} 1799 1800// Convert scalar single to scalar double 1801// SSE2 instructions with XS prefix 1802let neverHasSideEffects = 1 in { 1803def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1804 (ins FR32:$src1, FR32:$src2), 1805 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1806 [], IIC_SSE_CVT_Scalar_RR>, 1807 XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG; 1808let mayLoad = 1 in 1809def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1810 (ins FR32:$src1, f32mem:$src2), 1811 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1812 [], IIC_SSE_CVT_Scalar_RM>, 1813 XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>; 1814} 1815 1816def : Pat<(f64 (fextend FR32:$src)), 1817 (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; 1818def : Pat<(fextend (loadf32 addr:$src)), 1819 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX]>; 1820 1821def : Pat<(extloadf32 addr:$src), 1822 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, 1823 Requires<[HasAVX, OptForSize]>; 1824def : Pat<(extloadf32 addr:$src), 1825 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, 1826 Requires<[HasAVX, OptForSpeed]>; 1827 1828def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1829 "cvtss2sd\t{$src, $dst|$dst, $src}", 1830 [(set FR64:$dst, (fextend FR32:$src))], 1831 IIC_SSE_CVT_Scalar_RR>, XS, 1832 Requires<[UseSSE2]>; 1833def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1834 "cvtss2sd\t{$src, $dst|$dst, $src}", 1835 [(set FR64:$dst, (extloadf32 addr:$src))], 1836 IIC_SSE_CVT_Scalar_RM>, XS, 1837 Requires<[UseSSE2, OptForSize]>; 1838 1839// extload f32 -> f64. This matches load+fextend because we have a hack in 1840// the isel (PreprocessForFPConvert) that can introduce loads after dag 1841// combine. 1842// Since these loads aren't folded into the fextend, we have to match it 1843// explicitly here. 1844def : Pat<(fextend (loadf32 addr:$src)), 1845 (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; 1846def : Pat<(extloadf32 addr:$src), 1847 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; 1848 1849def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1850 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1851 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1852 [(set VR128:$dst, 1853 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1854 IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[HasAVX]>; 1855def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1856 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1857 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1858 [(set VR128:$dst, 1859 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1860 IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[HasAVX]>; 1861let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1862def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1863 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1864 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1865 [(set VR128:$dst, 1866 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1867 IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>; 1868def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1869 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1870 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1871 [(set VR128:$dst, 1872 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1873 IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>; 1874} 1875 1876// Convert packed single/double fp to doubleword 1877def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1878 "cvtps2dq\t{$src, $dst|$dst, $src}", 1879 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1880 IIC_SSE_CVT_PS_RR>, VEX; 1881def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1882 "cvtps2dq\t{$src, $dst|$dst, $src}", 1883 [(set VR128:$dst, 1884 (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1885 IIC_SSE_CVT_PS_RM>, VEX; 1886def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1887 "cvtps2dq\t{$src, $dst|$dst, $src}", 1888 [(set VR256:$dst, 1889 (int_x86_avx_cvt_ps2dq_256 VR256:$src))], 1890 IIC_SSE_CVT_PS_RR>, VEX, VEX_L; 1891def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1892 "cvtps2dq\t{$src, $dst|$dst, $src}", 1893 [(set VR256:$dst, 1894 (int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)))], 1895 IIC_SSE_CVT_PS_RM>, VEX, VEX_L; 1896def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1897 "cvtps2dq\t{$src, $dst|$dst, $src}", 1898 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1899 IIC_SSE_CVT_PS_RR>; 1900def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1901 "cvtps2dq\t{$src, $dst|$dst, $src}", 1902 [(set VR128:$dst, 1903 (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1904 IIC_SSE_CVT_PS_RM>; 1905 1906 1907// Convert Packed Double FP to Packed DW Integers 1908let Predicates = [HasAVX] in { 1909// The assembler can recognize rr 256-bit instructions by seeing a ymm 1910// register, but the same isn't true when using memory operands instead. 1911// Provide other assembly rr and rm forms to address this explicitly. 1912def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1913 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 1914 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 1915 VEX; 1916 1917// XMM only 1918def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1919 (VCVTPD2DQrr VR128:$dst, VR128:$src)>; 1920def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1921 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", 1922 [(set VR128:$dst, 1923 (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))]>, VEX; 1924 1925// YMM only 1926def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 1927 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1928 [(set VR128:$dst, 1929 (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L; 1930def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 1931 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 1932 [(set VR128:$dst, 1933 (int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)))]>, 1934 VEX, VEX_L; 1935def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", 1936 (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; 1937} 1938 1939def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1940 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1941 [(set VR128:$dst, 1942 (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], 1943 IIC_SSE_CVT_PD_RM>; 1944def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1945 "cvtpd2dq\t{$src, $dst|$dst, $src}", 1946 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], 1947 IIC_SSE_CVT_PD_RR>; 1948 1949// Convert with truncation packed single/double fp to doubleword 1950// SSE2 packed instructions with XS prefix 1951def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1952 "cvttps2dq\t{$src, $dst|$dst, $src}", 1953 [(set VR128:$dst, 1954 (int_x86_sse2_cvttps2dq VR128:$src))], 1955 IIC_SSE_CVT_PS_RR>, VEX; 1956def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1957 "cvttps2dq\t{$src, $dst|$dst, $src}", 1958 [(set VR128:$dst, (int_x86_sse2_cvttps2dq 1959 (memopv4f32 addr:$src)))], 1960 IIC_SSE_CVT_PS_RM>, VEX; 1961def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1962 "cvttps2dq\t{$src, $dst|$dst, $src}", 1963 [(set VR256:$dst, 1964 (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], 1965 IIC_SSE_CVT_PS_RR>, VEX, VEX_L; 1966def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1967 "cvttps2dq\t{$src, $dst|$dst, $src}", 1968 [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 1969 (memopv8f32 addr:$src)))], 1970 IIC_SSE_CVT_PS_RM>, VEX, VEX_L; 1971 1972def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1973 "cvttps2dq\t{$src, $dst|$dst, $src}", 1974 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], 1975 IIC_SSE_CVT_PS_RR>; 1976def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1977 "cvttps2dq\t{$src, $dst|$dst, $src}", 1978 [(set VR128:$dst, 1979 (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], 1980 IIC_SSE_CVT_PS_RM>; 1981 1982let Predicates = [HasAVX] in { 1983 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 1984 (VCVTDQ2PSrr VR128:$src)>; 1985 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 1986 (VCVTDQ2PSrm addr:$src)>; 1987 1988 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 1989 (VCVTDQ2PSrr VR128:$src)>; 1990 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 1991 (VCVTDQ2PSrm addr:$src)>; 1992 1993 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 1994 (VCVTTPS2DQrr VR128:$src)>; 1995 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 1996 (VCVTTPS2DQrm addr:$src)>; 1997 1998 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 1999 (VCVTDQ2PSYrr VR256:$src)>; 2000 def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))), 2001 (VCVTDQ2PSYrm addr:$src)>; 2002 2003 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 2004 (VCVTTPS2DQYrr VR256:$src)>; 2005 def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))), 2006 (VCVTTPS2DQYrm addr:$src)>; 2007} 2008 2009let Predicates = [UseSSE2] in { 2010 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2011 (CVTDQ2PSrr VR128:$src)>; 2012 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2013 (CVTDQ2PSrm addr:$src)>; 2014 2015 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2016 (CVTDQ2PSrr VR128:$src)>; 2017 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 2018 (CVTDQ2PSrm addr:$src)>; 2019 2020 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2021 (CVTTPS2DQrr VR128:$src)>; 2022 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 2023 (CVTTPS2DQrm addr:$src)>; 2024} 2025 2026def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2027 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2028 [(set VR128:$dst, 2029 (int_x86_sse2_cvttpd2dq VR128:$src))], 2030 IIC_SSE_CVT_PD_RR>, VEX; 2031 2032// The assembler can recognize rr 256-bit instructions by seeing a ymm 2033// register, but the same isn't true when using memory operands instead. 2034// Provide other assembly rr and rm forms to address this explicitly. 2035 2036// XMM only 2037def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 2038 (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; 2039def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2040 "cvttpd2dqx\t{$src, $dst|$dst, $src}", 2041 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2042 (memopv2f64 addr:$src)))], 2043 IIC_SSE_CVT_PD_RM>, VEX; 2044 2045// YMM only 2046def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2047 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2048 [(set VR128:$dst, 2049 (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], 2050 IIC_SSE_CVT_PD_RR>, VEX, VEX_L; 2051def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2052 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2053 [(set VR128:$dst, 2054 (int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)))], 2055 IIC_SSE_CVT_PD_RM>, VEX, VEX_L; 2056def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", 2057 (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; 2058 2059let Predicates = [HasAVX] in { 2060 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 2061 (VCVTTPD2DQYrr VR256:$src)>; 2062 def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))), 2063 (VCVTTPD2DQYrm addr:$src)>; 2064} // Predicates = [HasAVX] 2065 2066def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2067 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2068 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], 2069 IIC_SSE_CVT_PD_RR>; 2070def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 2071 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2072 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2073 (memopv2f64 addr:$src)))], 2074 IIC_SSE_CVT_PD_RM>; 2075 2076// Convert packed single to packed double 2077let Predicates = [HasAVX] in { 2078 // SSE2 instructions without OpSize prefix 2079def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2080 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2081 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2082 IIC_SSE_CVT_PD_RR>, TB, VEX; 2083def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2084 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2085 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2086 IIC_SSE_CVT_PD_RM>, TB, VEX; 2087def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2088 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2089 [(set VR256:$dst, 2090 (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], 2091 IIC_SSE_CVT_PD_RR>, TB, VEX, VEX_L; 2092def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 2093 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2094 [(set VR256:$dst, 2095 (int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)))], 2096 IIC_SSE_CVT_PD_RM>, TB, VEX, VEX_L; 2097} 2098 2099let Predicates = [UseSSE2] in { 2100def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2101 "cvtps2pd\t{$src, $dst|$dst, $src}", 2102 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2103 IIC_SSE_CVT_PD_RR>, TB; 2104def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2105 "cvtps2pd\t{$src, $dst|$dst, $src}", 2106 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2107 IIC_SSE_CVT_PD_RM>, TB; 2108} 2109 2110// Convert Packed DW Integers to Packed Double FP 2111let Predicates = [HasAVX] in { 2112let neverHasSideEffects = 1, mayLoad = 1 in 2113def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2114 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2115 []>, VEX; 2116def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2117 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2118 [(set VR128:$dst, 2119 (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX; 2120def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 2121 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2122 [(set VR256:$dst, 2123 (int_x86_avx_cvtdq2_pd_256 2124 (bitconvert (memopv2i64 addr:$src))))]>, VEX, VEX_L; 2125def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2126 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2127 [(set VR256:$dst, 2128 (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L; 2129} 2130 2131let neverHasSideEffects = 1, mayLoad = 1 in 2132def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2133 "cvtdq2pd\t{$src, $dst|$dst, $src}", [], 2134 IIC_SSE_CVT_PD_RR>; 2135def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2136 "cvtdq2pd\t{$src, $dst|$dst, $src}", 2137 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], 2138 IIC_SSE_CVT_PD_RM>; 2139 2140// AVX 256-bit register conversion intrinsics 2141let Predicates = [HasAVX] in { 2142 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 2143 (VCVTDQ2PDYrr VR128:$src)>; 2144 def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2145 (VCVTDQ2PDYrm addr:$src)>; 2146} // Predicates = [HasAVX] 2147 2148// Convert packed double to packed single 2149// The assembler can recognize rr 256-bit instructions by seeing a ymm 2150// register, but the same isn't true when using memory operands instead. 2151// Provide other assembly rr and rm forms to address this explicitly. 2152def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2153 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2154 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2155 IIC_SSE_CVT_PD_RR>, VEX; 2156 2157// XMM only 2158def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 2159 (VCVTPD2PSrr VR128:$dst, VR128:$src)>; 2160def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2161 "cvtpd2psx\t{$src, $dst|$dst, $src}", 2162 [(set VR128:$dst, 2163 (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2164 IIC_SSE_CVT_PD_RM>, VEX; 2165 2166// YMM only 2167def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2168 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2169 [(set VR128:$dst, 2170 (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], 2171 IIC_SSE_CVT_PD_RR>, VEX, VEX_L; 2172def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2173 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2174 [(set VR128:$dst, 2175 (int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)))], 2176 IIC_SSE_CVT_PD_RM>, VEX, VEX_L; 2177def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", 2178 (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; 2179 2180def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2181 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2182 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2183 IIC_SSE_CVT_PD_RR>; 2184def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2185 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2186 [(set VR128:$dst, 2187 (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2188 IIC_SSE_CVT_PD_RM>; 2189 2190 2191// AVX 256-bit register conversion intrinsics 2192// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 2193// whenever possible to avoid declaring two versions of each one. 2194let Predicates = [HasAVX] in { 2195 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 2196 (VCVTDQ2PSYrr VR256:$src)>; 2197 def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))), 2198 (VCVTDQ2PSYrm addr:$src)>; 2199 2200 // Match fround and fextend for 128/256-bit conversions 2201 def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 2202 (VCVTPD2PSYrr VR256:$src)>; 2203 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 2204 (VCVTPD2PSYrm addr:$src)>; 2205 2206 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2207 (VCVTPS2PDrr VR128:$src)>; 2208 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 2209 (VCVTPS2PDYrr VR128:$src)>; 2210 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 2211 (VCVTPS2PDYrm addr:$src)>; 2212} 2213 2214let Predicates = [UseSSE2] in { 2215 // Match fextend for 128 conversions 2216 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2217 (CVTPS2PDrr VR128:$src)>; 2218} 2219 2220//===----------------------------------------------------------------------===// 2221// SSE 1 & 2 - Compare Instructions 2222//===----------------------------------------------------------------------===// 2223 2224// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 2225multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 2226 Operand CC, SDNode OpNode, ValueType VT, 2227 PatFrag ld_frag, string asm, string asm_alt, 2228 OpndItins itins> { 2229 def rr : SIi8<0xC2, MRMSrcReg, 2230 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2231 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))], 2232 itins.rr>; 2233 def rm : SIi8<0xC2, MRMSrcMem, 2234 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2235 [(set RC:$dst, (OpNode (VT RC:$src1), 2236 (ld_frag addr:$src2), imm:$cc))], 2237 itins.rm>; 2238 2239 // Accept explicit immediate argument form instead of comparison code. 2240 let neverHasSideEffects = 1 in { 2241 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 2242 (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [], 2243 IIC_SSE_ALU_F32S_RR>; 2244 let mayLoad = 1 in 2245 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 2246 (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [], 2247 IIC_SSE_ALU_F32S_RM>; 2248 } 2249} 2250 2251defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32, 2252 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2253 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2254 SSE_ALU_F32S>, 2255 XS, VEX_4V, VEX_LIG; 2256defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64, 2257 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2258 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2259 SSE_ALU_F32S>, // same latency as 32 bit compare 2260 XD, VEX_4V, VEX_LIG; 2261 2262let Constraints = "$src1 = $dst" in { 2263 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32, 2264 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 2265 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>, 2266 XS; 2267 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64, 2268 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2269 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2270 SSE_ALU_F32S>, // same latency as 32 bit compare 2271 XD; 2272} 2273 2274multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, 2275 Intrinsic Int, string asm, OpndItins itins> { 2276 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2277 (ins VR128:$src1, VR128:$src, CC:$cc), asm, 2278 [(set VR128:$dst, (Int VR128:$src1, 2279 VR128:$src, imm:$cc))], 2280 itins.rr>; 2281 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2282 (ins VR128:$src1, x86memop:$src, CC:$cc), asm, 2283 [(set VR128:$dst, (Int VR128:$src1, 2284 (load addr:$src), imm:$cc))], 2285 itins.rm>; 2286} 2287 2288// Aliases to match intrinsics which expect XMM operand(s). 2289defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, 2290 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", 2291 SSE_ALU_F32S>, 2292 XS, VEX_4V; 2293defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, 2294 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", 2295 SSE_ALU_F32S>, // same latency as f32 2296 XD, VEX_4V; 2297let Constraints = "$src1 = $dst" in { 2298 defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, 2299 "cmp${cc}ss\t{$src, $dst|$dst, $src}", 2300 SSE_ALU_F32S>, XS; 2301 defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, 2302 "cmp${cc}sd\t{$src, $dst|$dst, $src}", 2303 SSE_ALU_F32S>, // same latency as f32 2304 XD; 2305} 2306 2307 2308// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2309multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2310 ValueType vt, X86MemOperand x86memop, 2311 PatFrag ld_frag, string OpcodeStr, Domain d> { 2312 def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2313 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2314 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], 2315 IIC_SSE_COMIS_RR, d>; 2316 def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2317 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2318 [(set EFLAGS, (OpNode (vt RC:$src1), 2319 (ld_frag addr:$src2)))], 2320 IIC_SSE_COMIS_RM, d>; 2321} 2322 2323let Defs = [EFLAGS] in { 2324 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2325 "ucomiss", SSEPackedSingle>, TB, VEX, VEX_LIG; 2326 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2327 "ucomisd", SSEPackedDouble>, TB, OpSize, VEX, 2328 VEX_LIG; 2329 let Pattern = []<dag> in { 2330 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2331 "comiss", SSEPackedSingle>, TB, VEX, 2332 VEX_LIG; 2333 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2334 "comisd", SSEPackedDouble>, TB, OpSize, VEX, 2335 VEX_LIG; 2336 } 2337 2338 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2339 load, "ucomiss", SSEPackedSingle>, TB, VEX; 2340 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2341 load, "ucomisd", SSEPackedDouble>, TB, OpSize, VEX; 2342 2343 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2344 load, "comiss", SSEPackedSingle>, TB, VEX; 2345 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2346 load, "comisd", SSEPackedDouble>, TB, OpSize, VEX; 2347 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2348 "ucomiss", SSEPackedSingle>, TB; 2349 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2350 "ucomisd", SSEPackedDouble>, TB, OpSize; 2351 2352 let Pattern = []<dag> in { 2353 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2354 "comiss", SSEPackedSingle>, TB; 2355 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2356 "comisd", SSEPackedDouble>, TB, OpSize; 2357 } 2358 2359 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2360 load, "ucomiss", SSEPackedSingle>, TB; 2361 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2362 load, "ucomisd", SSEPackedDouble>, TB, OpSize; 2363 2364 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2365 "comiss", SSEPackedSingle>, TB; 2366 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2367 "comisd", SSEPackedDouble>, TB, OpSize; 2368} // Defs = [EFLAGS] 2369 2370// sse12_cmp_packed - sse 1 & 2 compare packed instructions 2371multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2372 Operand CC, Intrinsic Int, string asm, 2373 string asm_alt, Domain d> { 2374 def rri : PIi8<0xC2, MRMSrcReg, 2375 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2376 [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], 2377 IIC_SSE_CMPP_RR, d>; 2378 def rmi : PIi8<0xC2, MRMSrcMem, 2379 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2380 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], 2381 IIC_SSE_CMPP_RM, d>; 2382 2383 // Accept explicit immediate argument form instead of comparison code. 2384 let neverHasSideEffects = 1 in { 2385 def rri_alt : PIi8<0xC2, MRMSrcReg, 2386 (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), 2387 asm_alt, [], IIC_SSE_CMPP_RR, d>; 2388 def rmi_alt : PIi8<0xC2, MRMSrcMem, 2389 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), 2390 asm_alt, [], IIC_SSE_CMPP_RM, d>; 2391 } 2392} 2393 2394defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, 2395 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2396 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2397 SSEPackedSingle>, TB, VEX_4V; 2398defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, 2399 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2400 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2401 SSEPackedDouble>, TB, OpSize, VEX_4V; 2402defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, 2403 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2404 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2405 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2406defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, 2407 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2408 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2409 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2410let Constraints = "$src1 = $dst" in { 2411 defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, 2412 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2413 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2414 SSEPackedSingle>, TB; 2415 defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, 2416 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2417 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2418 SSEPackedDouble>, TB, OpSize; 2419} 2420 2421let Predicates = [HasAVX] in { 2422def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2423 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2424def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2425 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2426def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2427 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2428def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2429 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2430 2431def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2432 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2433def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)), 2434 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2435def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2436 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2437def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)), 2438 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2439} 2440 2441let Predicates = [UseSSE1] in { 2442def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2443 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2444def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), 2445 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2446} 2447 2448let Predicates = [UseSSE2] in { 2449def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2450 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2451def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), 2452 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2453} 2454 2455//===----------------------------------------------------------------------===// 2456// SSE 1 & 2 - Shuffle Instructions 2457//===----------------------------------------------------------------------===// 2458 2459/// sse12_shuffle - sse 1 & 2 shuffle instructions 2460multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2461 ValueType vt, string asm, PatFrag mem_frag, 2462 Domain d, bit IsConvertibleToThreeAddress = 0> { 2463 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2464 (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm, 2465 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2466 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; 2467 let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in 2468 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2469 (ins RC:$src1, RC:$src2, i8imm:$src3), asm, 2470 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2471 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>; 2472} 2473 2474defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2475 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2476 memopv4f32, SSEPackedSingle>, TB, VEX_4V; 2477defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2478 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2479 memopv8f32, SSEPackedSingle>, TB, VEX_4V, VEX_L; 2480defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2481 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", 2482 memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V; 2483defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2484 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", 2485 memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2486 2487let Constraints = "$src1 = $dst" in { 2488 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2489 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2490 memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, 2491 TB; 2492 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2493 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2494 memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>, 2495 TB, OpSize; 2496} 2497 2498let Predicates = [HasAVX] in { 2499 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2500 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2501 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2502 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2503 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2504 2505 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2506 (memopv2i64 addr:$src2), (i8 imm:$imm))), 2507 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2508 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2509 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2510 2511 // 256-bit patterns 2512 def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2513 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2514 def : Pat<(v8i32 (X86Shufp VR256:$src1, 2515 (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 2516 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2517 2518 def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2519 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2520 def : Pat<(v4i64 (X86Shufp VR256:$src1, 2521 (memopv4i64 addr:$src2), (i8 imm:$imm))), 2522 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2523} 2524 2525let Predicates = [UseSSE1] in { 2526 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2527 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2528 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2529 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2530 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2531} 2532 2533let Predicates = [UseSSE2] in { 2534 // Generic SHUFPD patterns 2535 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2536 (memopv2i64 addr:$src2), (i8 imm:$imm))), 2537 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2538 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2539 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2540} 2541 2542//===----------------------------------------------------------------------===// 2543// SSE 1 & 2 - Unpack Instructions 2544//===----------------------------------------------------------------------===// 2545 2546/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave 2547multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2548 PatFrag mem_frag, RegisterClass RC, 2549 X86MemOperand x86memop, string asm, 2550 Domain d> { 2551 def rr : PI<opc, MRMSrcReg, 2552 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2553 asm, [(set RC:$dst, 2554 (vt (OpNode RC:$src1, RC:$src2)))], 2555 IIC_SSE_UNPCK, d>; 2556 def rm : PI<opc, MRMSrcMem, 2557 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2558 asm, [(set RC:$dst, 2559 (vt (OpNode RC:$src1, 2560 (mem_frag addr:$src2))))], 2561 IIC_SSE_UNPCK, d>; 2562} 2563 2564defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2565 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2566 SSEPackedSingle>, TB, VEX_4V; 2567defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2568 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2569 SSEPackedDouble>, TB, OpSize, VEX_4V; 2570defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2571 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2572 SSEPackedSingle>, TB, VEX_4V; 2573defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2574 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2575 SSEPackedDouble>, TB, OpSize, VEX_4V; 2576 2577defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32, 2578 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2579 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2580defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64, 2581 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2582 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2583defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32, 2584 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2585 SSEPackedSingle>, TB, VEX_4V, VEX_L; 2586defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64, 2587 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2588 SSEPackedDouble>, TB, OpSize, VEX_4V, VEX_L; 2589 2590let Constraints = "$src1 = $dst" in { 2591 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2592 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2593 SSEPackedSingle>, TB; 2594 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2595 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2596 SSEPackedDouble>, TB, OpSize; 2597 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2598 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2599 SSEPackedSingle>, TB; 2600 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2601 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2602 SSEPackedDouble>, TB, OpSize; 2603} // Constraints = "$src1 = $dst" 2604 2605let Predicates = [HasAVX1Only] in { 2606 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), 2607 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2608 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2609 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2610 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), 2611 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2612 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2613 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2614 2615 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))), 2616 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2617 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2618 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2619 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))), 2620 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2621 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2622 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2623} 2624 2625let Predicates = [HasAVX] in { 2626 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2627 // problem is during lowering, where it's not possible to recognize the load 2628 // fold cause it has two uses through a bitcast. One use disappears at isel 2629 // time and the fold opportunity reappears. 2630 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2631 (VUNPCKLPDrr VR128:$src, VR128:$src)>; 2632} 2633 2634let Predicates = [UseSSE2] in { 2635 // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the 2636 // problem is during lowering, where it's not possible to recognize the load 2637 // fold cause it has two uses through a bitcast. One use disappears at isel 2638 // time and the fold opportunity reappears. 2639 def : Pat<(v2f64 (X86Movddup VR128:$src)), 2640 (UNPCKLPDrr VR128:$src, VR128:$src)>; 2641} 2642 2643//===----------------------------------------------------------------------===// 2644// SSE 1 & 2 - Extract Floating-Point Sign mask 2645//===----------------------------------------------------------------------===// 2646 2647/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2648multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2649 Domain d> { 2650 def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), 2651 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2652 [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>; 2653 def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src), 2654 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], 2655 IIC_SSE_MOVMSK, d>, REX_W; 2656} 2657 2658let Predicates = [HasAVX] in { 2659 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2660 "movmskps", SSEPackedSingle>, TB, VEX; 2661 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2662 "movmskpd", SSEPackedDouble>, TB, 2663 OpSize, VEX; 2664 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2665 "movmskps", SSEPackedSingle>, TB, 2666 VEX, VEX_L; 2667 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2668 "movmskpd", SSEPackedDouble>, TB, 2669 OpSize, VEX, VEX_L; 2670 2671 def : Pat<(i32 (X86fgetsign FR32:$src)), 2672 (VMOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>; 2673 def : Pat<(i64 (X86fgetsign FR32:$src)), 2674 (VMOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>; 2675 def : Pat<(i32 (X86fgetsign FR64:$src)), 2676 (VMOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>; 2677 def : Pat<(i64 (X86fgetsign FR64:$src)), 2678 (VMOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>; 2679 2680 // Assembler Only 2681 def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 2682 "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, 2683 SSEPackedSingle>, TB, VEX; 2684 def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 2685 "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, 2686 SSEPackedDouble>, TB, 2687 OpSize, VEX; 2688 def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), 2689 "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, 2690 SSEPackedSingle>, TB, VEX, VEX_L; 2691 def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), 2692 "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK, 2693 SSEPackedDouble>, TB, 2694 OpSize, VEX, VEX_L; 2695} 2696 2697defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2698 SSEPackedSingle>, TB; 2699defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2700 SSEPackedDouble>, TB, OpSize; 2701 2702def : Pat<(i32 (X86fgetsign FR32:$src)), 2703 (MOVMSKPSrr32 (COPY_TO_REGCLASS FR32:$src, VR128))>, 2704 Requires<[UseSSE1]>; 2705def : Pat<(i64 (X86fgetsign FR32:$src)), 2706 (MOVMSKPSrr64 (COPY_TO_REGCLASS FR32:$src, VR128))>, 2707 Requires<[UseSSE1]>; 2708def : Pat<(i32 (X86fgetsign FR64:$src)), 2709 (MOVMSKPDrr32 (COPY_TO_REGCLASS FR64:$src, VR128))>, 2710 Requires<[UseSSE2]>; 2711def : Pat<(i64 (X86fgetsign FR64:$src)), 2712 (MOVMSKPDrr64 (COPY_TO_REGCLASS FR64:$src, VR128))>, 2713 Requires<[UseSSE2]>; 2714 2715//===---------------------------------------------------------------------===// 2716// SSE2 - Packed Integer Logical Instructions 2717//===---------------------------------------------------------------------===// 2718 2719let ExeDomain = SSEPackedInt in { // SSE integer instructions 2720 2721/// PDI_binop_rm - Simple SSE2 binary operator. 2722multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2723 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2724 X86MemOperand x86memop, 2725 OpndItins itins, 2726 bit IsCommutable = 0, 2727 bit Is2Addr = 1> { 2728 let isCommutable = IsCommutable in 2729 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2730 (ins RC:$src1, RC:$src2), 2731 !if(Is2Addr, 2732 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2733 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2734 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>; 2735 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2736 (ins RC:$src1, x86memop:$src2), 2737 !if(Is2Addr, 2738 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2739 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2740 [(set RC:$dst, (OpVT (OpNode RC:$src1, 2741 (bitconvert (memop_frag addr:$src2)))))], 2742 itins.rm>; 2743} 2744} // ExeDomain = SSEPackedInt 2745 2746// These are ordered here for pattern ordering requirements with the fp versions 2747 2748let Predicates = [HasAVX] in { 2749defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64, 2750 i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; 2751defm VPOR : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64, 2752 i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; 2753defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64, 2754 i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V; 2755defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64, 2756 i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V; 2757} 2758 2759let Constraints = "$src1 = $dst" in { 2760defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64, 2761 i128mem, SSE_BIT_ITINS_P, 1>; 2762defm POR : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64, 2763 i128mem, SSE_BIT_ITINS_P, 1>; 2764defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64, 2765 i128mem, SSE_BIT_ITINS_P, 1>; 2766defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64, 2767 i128mem, SSE_BIT_ITINS_P, 0>; 2768} // Constraints = "$src1 = $dst" 2769 2770let Predicates = [HasAVX2] in { 2771defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64, 2772 i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; 2773defm VPORY : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64, 2774 i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; 2775defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64, 2776 i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V, VEX_L; 2777defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64, 2778 i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V, VEX_L; 2779} 2780 2781//===----------------------------------------------------------------------===// 2782// SSE 1 & 2 - Logical Instructions 2783//===----------------------------------------------------------------------===// 2784 2785/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops 2786/// 2787multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, 2788 SDNode OpNode, OpndItins itins> { 2789 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2790 FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>, 2791 TB, VEX_4V; 2792 2793 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2794 FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>, 2795 TB, OpSize, VEX_4V; 2796 2797 let Constraints = "$src1 = $dst" in { 2798 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2799 f32, f128mem, memopfsf32, SSEPackedSingle, itins>, 2800 TB; 2801 2802 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2803 f64, f128mem, memopfsf64, SSEPackedDouble, itins>, 2804 TB, OpSize; 2805 } 2806} 2807 2808// Alias bitwise logical operations using SSE logical ops on packed FP values. 2809defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand, 2810 SSE_BIT_ITINS_P>; 2811defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for, 2812 SSE_BIT_ITINS_P>; 2813defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor, 2814 SSE_BIT_ITINS_P>; 2815 2816let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in 2817 defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef, 2818 SSE_BIT_ITINS_P>; 2819 2820/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2821/// 2822multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2823 SDNode OpNode> { 2824 // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2825 // are all promoted to v2i64, and the patterns are covered by the int 2826 // version. This is needed in SSE only, because v2i64 isn't supported on 2827 // SSE1, but only on SSE2. 2828 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2829 !strconcat(OpcodeStr, "ps"), f128mem, [], 2830 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2831 (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V; 2832 2833 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2834 !strconcat(OpcodeStr, "pd"), f128mem, 2835 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2836 (bc_v2i64 (v2f64 VR128:$src2))))], 2837 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2838 (memopv2i64 addr:$src2)))], 0>, 2839 TB, OpSize, VEX_4V; 2840 let Constraints = "$src1 = $dst" in { 2841 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2842 !strconcat(OpcodeStr, "ps"), f128mem, 2843 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2844 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2845 (memopv2i64 addr:$src2)))]>, TB; 2846 2847 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2848 !strconcat(OpcodeStr, "pd"), f128mem, 2849 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2850 (bc_v2i64 (v2f64 VR128:$src2))))], 2851 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2852 (memopv2i64 addr:$src2)))]>, TB, OpSize; 2853 } 2854} 2855 2856/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms 2857/// 2858multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr, 2859 SDNode OpNode> { 2860 defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2861 !strconcat(OpcodeStr, "ps"), f256mem, 2862 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2863 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2864 (memopv4i64 addr:$src2)))], 0>, TB, VEX_4V, VEX_L; 2865 2866 defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2867 !strconcat(OpcodeStr, "pd"), f256mem, 2868 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2869 (bc_v4i64 (v4f64 VR256:$src2))))], 2870 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2871 (memopv4i64 addr:$src2)))], 0>, 2872 TB, OpSize, VEX_4V, VEX_L; 2873} 2874 2875// AVX 256-bit packed logical ops forms 2876defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; 2877defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; 2878defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; 2879defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; 2880 2881defm AND : sse12_fp_packed_logical<0x54, "and", and>; 2882defm OR : sse12_fp_packed_logical<0x56, "or", or>; 2883defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2884let isCommutable = 0 in 2885 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2886 2887//===----------------------------------------------------------------------===// 2888// SSE 1 & 2 - Arithmetic Instructions 2889//===----------------------------------------------------------------------===// 2890 2891/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 2892/// vector forms. 2893/// 2894/// In addition, we also have a special variant of the scalar form here to 2895/// represent the associated intrinsic operation. This form is unlike the 2896/// plain scalar form, in that it takes an entire vector (instead of a scalar) 2897/// and leaves the top elements unmodified (therefore these cannot be commuted). 2898/// 2899/// These three forms can each be reg+reg or reg+mem. 2900/// 2901 2902/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 2903/// classes below 2904multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 2905 SizeItins itins, 2906 bit Is2Addr = 1> { 2907 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 2908 OpNode, FR32, f32mem, 2909 itins.s, Is2Addr>, XS; 2910 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 2911 OpNode, FR64, f64mem, 2912 itins.d, Is2Addr>, XD; 2913} 2914 2915multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 2916 SizeItins itins, 2917 bit Is2Addr = 1> { 2918 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2919 v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>, 2920 TB; 2921 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2922 v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>, 2923 TB, OpSize; 2924} 2925 2926multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, 2927 SDNode OpNode, 2928 SizeItins itins> { 2929 defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, 2930 v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>, 2931 TB, VEX_L; 2932 defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, 2933 v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>, 2934 TB, OpSize, VEX_L; 2935} 2936 2937multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 2938 SizeItins itins, 2939 bit Is2Addr = 1> { 2940 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2941 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 2942 itins.s, Is2Addr>, XS; 2943 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 2944 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 2945 itins.d, Is2Addr>, XD; 2946} 2947 2948multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, 2949 SizeItins itins, 2950 bit Is2Addr = 1> { 2951 defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, 2952 !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32, 2953 SSEPackedSingle, itins.s, Is2Addr>, 2954 TB; 2955 2956 defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, 2957 !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64, 2958 SSEPackedDouble, itins.d, Is2Addr>, 2959 TB, OpSize; 2960} 2961 2962multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr, 2963 SizeItins itins> { 2964 defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256, 2965 !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32, 2966 SSEPackedSingle, itins.s, 0>, TB, VEX_L; 2967 2968 defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256, 2969 !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64, 2970 SSEPackedDouble, itins.d, 0>, TB, OpSize, VEX_L; 2971} 2972 2973// Binary Arithmetic instructions 2974defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>, 2975 basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>, 2976 VEX_4V, VEX_LIG; 2977defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>, 2978 basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>, 2979 VEX_4V; 2980defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>, 2981 basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>, 2982 VEX_4V, VEX_LIG; 2983defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>, 2984 basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 2985 VEX_4V; 2986 2987let isCommutable = 0 in { 2988 defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>, 2989 basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>, 2990 VEX_4V, VEX_LIG; 2991 defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>, 2992 basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 2993 VEX_4V; 2994 defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>, 2995 basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>, 2996 VEX_4V, VEX_LIG; 2997 defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>, 2998 basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 2999 VEX_4V; 3000 defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>, 3001 basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>, 3002 VEX_4V, VEX_LIG; 3003 defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>, 3004 basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>, 3005 basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3006 basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>, 3007 VEX_4V; 3008 defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>, 3009 basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>, 3010 VEX_4V, VEX_LIG; 3011 defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>, 3012 basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>, 3013 basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>, 3014 basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3015 VEX_4V; 3016} 3017 3018let Constraints = "$src1 = $dst" in { 3019 defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, 3020 basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, 3021 basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; 3022 defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, 3023 basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 3024 basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; 3025 3026 let isCommutable = 0 in { 3027 defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, 3028 basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 3029 basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; 3030 defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, 3031 basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 3032 basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; 3033 defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, 3034 basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3035 basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>, 3036 basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>; 3037 defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, 3038 basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3039 basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>, 3040 basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>; 3041 } 3042} 3043 3044let isCodeGenOnly = 1 in { 3045 defm VMAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S, 0>, 3046 VEX_4V, VEX_LIG; 3047 defm VMAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P, 0>, 3048 basic_sse12_fp_binop_p_y<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, VEX_4V; 3049 defm VMINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S, 0>, 3050 VEX_4V, VEX_LIG; 3051 defm VMINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P, 0>, 3052 basic_sse12_fp_binop_p_y<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, VEX_4V; 3053 let Constraints = "$src1 = $dst" in { 3054 defm MAXC: basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>, 3055 basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>; 3056 defm MINC: basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>, 3057 basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>; 3058 } 3059} 3060 3061/// Unop Arithmetic 3062/// In addition, we also have a special variant of the scalar form here to 3063/// represent the associated intrinsic operation. This form is unlike the 3064/// plain scalar form, in that it takes an entire vector (instead of a 3065/// scalar) and leaves the top elements undefined. 3066/// 3067/// And, we have a special variant form for a full-vector intrinsic form. 3068 3069def SSE_SQRTP : OpndItins< 3070 IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM 3071>; 3072 3073def SSE_SQRTS : OpndItins< 3074 IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM 3075>; 3076 3077def SSE_RCPP : OpndItins< 3078 IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM 3079>; 3080 3081def SSE_RCPS : OpndItins< 3082 IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM 3083>; 3084 3085/// sse1_fp_unop_s - SSE1 unops in scalar form. 3086multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, 3087 SDNode OpNode, Intrinsic F32Int, OpndItins itins> { 3088 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), 3089 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3090 [(set FR32:$dst, (OpNode FR32:$src))]>; 3091 // For scalar unary operations, fold a load into the operation 3092 // only in OptForSize mode. It eliminates an instruction, but it also 3093 // eliminates a whole-register clobber (the load), so it introduces a 3094 // partial register update condition. 3095 def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), 3096 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3097 [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS, 3098 Requires<[UseSSE1, OptForSize]>; 3099 def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3100 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3101 [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>; 3102 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), 3103 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), 3104 [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>; 3105} 3106 3107/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. 3108multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { 3109 def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), 3110 !strconcat(OpcodeStr, 3111 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3112 let mayLoad = 1 in { 3113 def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1,f32mem:$src2), 3114 !strconcat(OpcodeStr, 3115 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3116 def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), 3117 (ins VR128:$src1, ssmem:$src2), 3118 !strconcat(OpcodeStr, 3119 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3120 } 3121} 3122 3123/// sse1_fp_unop_p - SSE1 unops in packed form. 3124multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 3125 OpndItins itins> { 3126 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3127 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3128 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>; 3129 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3130 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3131 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>; 3132} 3133 3134/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. 3135multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, 3136 OpndItins itins> { 3137 def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3138 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3139 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], 3140 itins.rr>, VEX_L; 3141 def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3142 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3143 [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))], 3144 itins.rm>, VEX_L; 3145} 3146 3147/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 3148multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3149 Intrinsic V4F32Int, OpndItins itins> { 3150 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3151 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3152 [(set VR128:$dst, (V4F32Int VR128:$src))], 3153 itins.rr>; 3154 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3155 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3156 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], 3157 itins.rm>; 3158} 3159 3160/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms. 3161multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, 3162 Intrinsic V4F32Int, OpndItins itins> { 3163 def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3164 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3165 [(set VR256:$dst, (V4F32Int VR256:$src))], 3166 itins.rr>, VEX_L; 3167 def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3168 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3169 [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))], 3170 itins.rm>, VEX_L; 3171} 3172 3173/// sse2_fp_unop_s - SSE2 unops in scalar form. 3174multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, 3175 SDNode OpNode, Intrinsic F64Int, OpndItins itins> { 3176 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), 3177 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3178 [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>; 3179 // See the comments in sse1_fp_unop_s for why this is OptForSize. 3180 def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), 3181 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3182 [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD, 3183 Requires<[UseSSE2, OptForSize]>; 3184 def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3185 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3186 [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>; 3187 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), 3188 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), 3189 [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>; 3190} 3191 3192/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. 3193let hasSideEffects = 0 in 3194multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> { 3195 def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), 3196 !strconcat(OpcodeStr, 3197 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3198 let mayLoad = 1 in { 3199 def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2), 3200 !strconcat(OpcodeStr, 3201 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3202 def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), 3203 (ins VR128:$src1, sdmem:$src2), 3204 !strconcat(OpcodeStr, 3205 "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; 3206 } 3207} 3208 3209/// sse2_fp_unop_p - SSE2 unops in vector forms. 3210multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 3211 SDNode OpNode, OpndItins itins> { 3212 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3213 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3214 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>; 3215 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3216 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3217 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>; 3218} 3219 3220/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. 3221multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode, 3222 OpndItins itins> { 3223 def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3224 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3225 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], 3226 itins.rr>, VEX_L; 3227 def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3228 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3229 [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))], 3230 itins.rm>, VEX_L; 3231} 3232 3233/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. 3234multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3235 Intrinsic V2F64Int, OpndItins itins> { 3236 def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3237 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3238 [(set VR128:$dst, (V2F64Int VR128:$src))], 3239 itins.rr>; 3240 def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3241 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3242 [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))], 3243 itins.rm>; 3244} 3245 3246/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms. 3247multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr, 3248 Intrinsic V2F64Int, OpndItins itins> { 3249 def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3250 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3251 [(set VR256:$dst, (V2F64Int VR256:$src))], 3252 itins.rr>, VEX_L; 3253 def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3254 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3255 [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))], 3256 itins.rm>, VEX_L; 3257} 3258 3259let Predicates = [HasAVX] in { 3260 // Square root. 3261 defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt">, 3262 sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG; 3263 3264 defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, 3265 sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>, 3266 sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, 3267 sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>, 3268 sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps, 3269 SSE_SQRTP>, 3270 sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd, 3271 SSE_SQRTP>, 3272 sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256, 3273 SSE_SQRTP>, 3274 sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256, 3275 SSE_SQRTP>, 3276 VEX; 3277 3278 // Reciprocal approximations. Note that these typically require refinement 3279 // in order to obtain suitable precision. 3280 defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG; 3281 defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, 3282 sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>, 3283 sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256, 3284 SSE_SQRTP>, 3285 sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps, 3286 SSE_SQRTP>, VEX; 3287 3288 defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG; 3289 defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>, 3290 sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>, 3291 sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256, 3292 SSE_RCPP>, 3293 sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps, 3294 SSE_RCPP>, VEX; 3295} 3296 3297def : Pat<(f32 (fsqrt FR32:$src)), 3298 (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3299def : Pat<(f32 (fsqrt (load addr:$src))), 3300 (VSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3301 Requires<[HasAVX, OptForSize]>; 3302def : Pat<(f64 (fsqrt FR64:$src)), 3303 (VSQRTSDr (f64 (IMPLICIT_DEF)), FR64:$src)>, Requires<[HasAVX]>; 3304def : Pat<(f64 (fsqrt (load addr:$src))), 3305 (VSQRTSDm (f64 (IMPLICIT_DEF)), addr:$src)>, 3306 Requires<[HasAVX, OptForSize]>; 3307 3308def : Pat<(f32 (X86frsqrt FR32:$src)), 3309 (VRSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3310def : Pat<(f32 (X86frsqrt (load addr:$src))), 3311 (VRSQRTSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3312 Requires<[HasAVX, OptForSize]>; 3313 3314def : Pat<(f32 (X86frcp FR32:$src)), 3315 (VRCPSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>; 3316def : Pat<(f32 (X86frcp (load addr:$src))), 3317 (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>, 3318 Requires<[HasAVX, OptForSize]>; 3319 3320let Predicates = [HasAVX] in { 3321 def : Pat<(int_x86_sse_sqrt_ss VR128:$src), 3322 (COPY_TO_REGCLASS (VSQRTSSr (f32 (IMPLICIT_DEF)), 3323 (COPY_TO_REGCLASS VR128:$src, FR32)), 3324 VR128)>; 3325 def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src), 3326 (VSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3327 3328 def : Pat<(int_x86_sse2_sqrt_sd VR128:$src), 3329 (COPY_TO_REGCLASS (VSQRTSDr (f64 (IMPLICIT_DEF)), 3330 (COPY_TO_REGCLASS VR128:$src, FR64)), 3331 VR128)>; 3332 def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src), 3333 (VSQRTSDm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>; 3334 3335 def : Pat<(int_x86_sse_rsqrt_ss VR128:$src), 3336 (COPY_TO_REGCLASS (VRSQRTSSr (f32 (IMPLICIT_DEF)), 3337 (COPY_TO_REGCLASS VR128:$src, FR32)), 3338 VR128)>; 3339 def : Pat<(int_x86_sse_rsqrt_ss sse_load_f32:$src), 3340 (VRSQRTSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3341 3342 def : Pat<(int_x86_sse_rcp_ss VR128:$src), 3343 (COPY_TO_REGCLASS (VRCPSSr (f32 (IMPLICIT_DEF)), 3344 (COPY_TO_REGCLASS VR128:$src, FR32)), 3345 VR128)>; 3346 def : Pat<(int_x86_sse_rcp_ss sse_load_f32:$src), 3347 (VRCPSSm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>; 3348} 3349 3350// Square root. 3351defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss, 3352 SSE_SQRTS>, 3353 sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, 3354 sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps, SSE_SQRTS>, 3355 sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd, 3356 SSE_SQRTS>, 3357 sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTS>, 3358 sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>; 3359 3360// Reciprocal approximations. Note that these typically require refinement 3361// in order to obtain suitable precision. 3362defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss, 3363 SSE_SQRTS>, 3364 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>, 3365 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, 3366 SSE_SQRTS>; 3367defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss, 3368 SSE_RCPS>, 3369 sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>, 3370 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>; 3371 3372// There is no f64 version of the reciprocal approximation instructions. 3373 3374//===----------------------------------------------------------------------===// 3375// SSE 1 & 2 - Non-temporal stores 3376//===----------------------------------------------------------------------===// 3377 3378let AddedComplexity = 400 in { // Prefer non-temporal versions 3379 def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3380 (ins f128mem:$dst, VR128:$src), 3381 "movntps\t{$src, $dst|$dst, $src}", 3382 [(alignednontemporalstore (v4f32 VR128:$src), 3383 addr:$dst)], 3384 IIC_SSE_MOVNT>, VEX; 3385 def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3386 (ins f128mem:$dst, VR128:$src), 3387 "movntpd\t{$src, $dst|$dst, $src}", 3388 [(alignednontemporalstore (v2f64 VR128:$src), 3389 addr:$dst)], 3390 IIC_SSE_MOVNT>, VEX; 3391 3392 let ExeDomain = SSEPackedInt in 3393 def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3394 (ins f128mem:$dst, VR128:$src), 3395 "movntdq\t{$src, $dst|$dst, $src}", 3396 [(alignednontemporalstore (v2i64 VR128:$src), 3397 addr:$dst)], 3398 IIC_SSE_MOVNT>, VEX; 3399 3400 def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3401 (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>; 3402 3403 def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3404 (ins f256mem:$dst, VR256:$src), 3405 "movntps\t{$src, $dst|$dst, $src}", 3406 [(alignednontemporalstore (v8f32 VR256:$src), 3407 addr:$dst)], 3408 IIC_SSE_MOVNT>, VEX, VEX_L; 3409 def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3410 (ins f256mem:$dst, VR256:$src), 3411 "movntpd\t{$src, $dst|$dst, $src}", 3412 [(alignednontemporalstore (v4f64 VR256:$src), 3413 addr:$dst)], 3414 IIC_SSE_MOVNT>, VEX, VEX_L; 3415 let ExeDomain = SSEPackedInt in 3416 def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3417 (ins f256mem:$dst, VR256:$src), 3418 "movntdq\t{$src, $dst|$dst, $src}", 3419 [(alignednontemporalstore (v4i64 VR256:$src), 3420 addr:$dst)], 3421 IIC_SSE_MOVNT>, VEX, VEX_L; 3422} 3423 3424let AddedComplexity = 400 in { // Prefer non-temporal versions 3425def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3426 "movntps\t{$src, $dst|$dst, $src}", 3427 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], 3428 IIC_SSE_MOVNT>; 3429def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3430 "movntpd\t{$src, $dst|$dst, $src}", 3431 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], 3432 IIC_SSE_MOVNT>; 3433 3434let ExeDomain = SSEPackedInt in 3435def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3436 "movntdq\t{$src, $dst|$dst, $src}", 3437 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], 3438 IIC_SSE_MOVNT>; 3439 3440def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst), 3441 (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[UseSSE2]>; 3442 3443// There is no AVX form for instructions below this point 3444def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3445 "movnti{l}\t{$src, $dst|$dst, $src}", 3446 [(nontemporalstore (i32 GR32:$src), addr:$dst)], 3447 IIC_SSE_MOVNT>, 3448 TB, Requires<[HasSSE2]>; 3449def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3450 "movnti{q}\t{$src, $dst|$dst, $src}", 3451 [(nontemporalstore (i64 GR64:$src), addr:$dst)], 3452 IIC_SSE_MOVNT>, 3453 TB, Requires<[HasSSE2]>; 3454} 3455 3456//===----------------------------------------------------------------------===// 3457// SSE 1 & 2 - Prefetch and memory fence 3458//===----------------------------------------------------------------------===// 3459 3460// Prefetch intrinsic. 3461let Predicates = [HasSSE1] in { 3462def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3463 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], 3464 IIC_SSE_PREFETCH>, TB; 3465def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3466 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], 3467 IIC_SSE_PREFETCH>, TB; 3468def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3469 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], 3470 IIC_SSE_PREFETCH>, TB; 3471def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3472 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], 3473 IIC_SSE_PREFETCH>, TB; 3474} 3475 3476// Flush cache 3477def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3478 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], 3479 IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>; 3480 3481// Pause. This "instruction" is encoded as "rep; nop", so even though it 3482// was introduced with SSE2, it's backward compatible. 3483def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP; 3484 3485// Load, store, and memory fence 3486def SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3487 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, 3488 TB, Requires<[HasSSE1]>; 3489def LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3490 "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, 3491 TB, Requires<[HasSSE2]>; 3492def MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3493 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, 3494 TB, Requires<[HasSSE2]>; 3495 3496def : Pat<(X86SFence), (SFENCE)>; 3497def : Pat<(X86LFence), (LFENCE)>; 3498def : Pat<(X86MFence), (MFENCE)>; 3499 3500//===----------------------------------------------------------------------===// 3501// SSE 1 & 2 - Load/Store XCSR register 3502//===----------------------------------------------------------------------===// 3503 3504def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3505 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3506 IIC_SSE_LDMXCSR>, VEX; 3507def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3508 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3509 IIC_SSE_STMXCSR>, VEX; 3510 3511def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3512 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3513 IIC_SSE_LDMXCSR>; 3514def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3515 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3516 IIC_SSE_STMXCSR>; 3517 3518//===---------------------------------------------------------------------===// 3519// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3520//===---------------------------------------------------------------------===// 3521 3522let ExeDomain = SSEPackedInt in { // SSE integer instructions 3523 3524let neverHasSideEffects = 1 in { 3525def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3526 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3527 VEX; 3528def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3529 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3530 VEX, VEX_L; 3531} 3532def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3533 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3534 VEX; 3535def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3536 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3537 VEX, VEX_L; 3538 3539// For Disassembler 3540let isCodeGenOnly = 1 in { 3541def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3542 "movdqa\t{$src, $dst|$dst, $src}", [], 3543 IIC_SSE_MOVA_P_RR>, 3544 VEX; 3545def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3546 "movdqa\t{$src, $dst|$dst, $src}", [], 3547 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 3548def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3549 "movdqu\t{$src, $dst|$dst, $src}", [], 3550 IIC_SSE_MOVU_P_RR>, 3551 VEX; 3552def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3553 "movdqu\t{$src, $dst|$dst, $src}", [], 3554 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 3555} 3556 3557let canFoldAsLoad = 1, mayLoad = 1 in { 3558def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3559 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3560 VEX; 3561def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3562 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3563 VEX, VEX_L; 3564let Predicates = [HasAVX] in { 3565 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3566 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3567 XS, VEX; 3568 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3569 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3570 XS, VEX, VEX_L; 3571} 3572} 3573 3574let mayStore = 1 in { 3575def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3576 (ins i128mem:$dst, VR128:$src), 3577 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3578 VEX; 3579def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3580 (ins i256mem:$dst, VR256:$src), 3581 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3582 VEX, VEX_L; 3583let Predicates = [HasAVX] in { 3584def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3585 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3586 XS, VEX; 3587def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3588 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3589 XS, VEX, VEX_L; 3590} 3591} 3592 3593let neverHasSideEffects = 1 in 3594def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3595 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; 3596 3597def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3598 "movdqu\t{$src, $dst|$dst, $src}", 3599 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3600 3601// For Disassembler 3602let isCodeGenOnly = 1 in { 3603def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3604 "movdqa\t{$src, $dst|$dst, $src}", [], 3605 IIC_SSE_MOVA_P_RR>; 3606 3607def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3608 "movdqu\t{$src, $dst|$dst, $src}", 3609 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3610} 3611 3612let canFoldAsLoad = 1, mayLoad = 1 in { 3613def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3614 "movdqa\t{$src, $dst|$dst, $src}", 3615 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], 3616 IIC_SSE_MOVA_P_RM>; 3617def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3618 "movdqu\t{$src, $dst|$dst, $src}", 3619 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], 3620 IIC_SSE_MOVU_P_RM>, 3621 XS, Requires<[UseSSE2]>; 3622} 3623 3624let mayStore = 1 in { 3625def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3626 "movdqa\t{$src, $dst|$dst, $src}", 3627 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], 3628 IIC_SSE_MOVA_P_MR>; 3629def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3630 "movdqu\t{$src, $dst|$dst, $src}", 3631 [/*(store (v2i64 VR128:$src), addr:$dst)*/], 3632 IIC_SSE_MOVU_P_MR>, 3633 XS, Requires<[UseSSE2]>; 3634} 3635 3636// Intrinsic forms of MOVDQU load and store 3637def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3638 "vmovdqu\t{$src, $dst|$dst, $src}", 3639 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], 3640 IIC_SSE_MOVU_P_MR>, 3641 XS, VEX, Requires<[HasAVX]>; 3642 3643def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3644 "movdqu\t{$src, $dst|$dst, $src}", 3645 [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)], 3646 IIC_SSE_MOVU_P_MR>, 3647 XS, Requires<[UseSSE2]>; 3648 3649} // ExeDomain = SSEPackedInt 3650 3651let Predicates = [HasAVX] in { 3652 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 3653 (VMOVDQUYmr addr:$dst, VR256:$src)>; 3654} 3655 3656//===---------------------------------------------------------------------===// 3657// SSE2 - Packed Integer Arithmetic Instructions 3658//===---------------------------------------------------------------------===// 3659 3660def SSE_PMADD : OpndItins< 3661 IIC_SSE_PMADD, IIC_SSE_PMADD 3662>; 3663 3664let ExeDomain = SSEPackedInt in { // SSE integer instructions 3665 3666multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 3667 RegisterClass RC, PatFrag memop_frag, 3668 X86MemOperand x86memop, 3669 OpndItins itins, 3670 bit IsCommutable = 0, 3671 bit Is2Addr = 1> { 3672 let isCommutable = IsCommutable in 3673 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3674 (ins RC:$src1, RC:$src2), 3675 !if(Is2Addr, 3676 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3677 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3678 [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>; 3679 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3680 (ins RC:$src1, x86memop:$src2), 3681 !if(Is2Addr, 3682 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3683 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3684 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], 3685 itins.rm>; 3686} 3687 3688multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3689 string OpcodeStr, SDNode OpNode, 3690 SDNode OpNode2, RegisterClass RC, 3691 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, 3692 ShiftOpndItins itins, 3693 bit Is2Addr = 1> { 3694 // src2 is always 128-bit 3695 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3696 (ins RC:$src1, VR128:$src2), 3697 !if(Is2Addr, 3698 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3699 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3700 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], 3701 itins.rr>; 3702 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3703 (ins RC:$src1, i128mem:$src2), 3704 !if(Is2Addr, 3705 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3706 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3707 [(set RC:$dst, (DstVT (OpNode RC:$src1, 3708 (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>; 3709 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3710 (ins RC:$src1, i32i8imm:$src2), 3711 !if(Is2Addr, 3712 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3713 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3714 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>; 3715} 3716 3717/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types 3718multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3719 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3720 PatFrag memop_frag, X86MemOperand x86memop, 3721 OpndItins itins, 3722 bit IsCommutable = 0, bit Is2Addr = 1> { 3723 let isCommutable = IsCommutable in 3724 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3725 (ins RC:$src1, RC:$src2), 3726 !if(Is2Addr, 3727 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3728 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3729 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>; 3730 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3731 (ins RC:$src1, x86memop:$src2), 3732 !if(Is2Addr, 3733 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3734 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3735 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 3736 (bitconvert (memop_frag addr:$src2)))))]>; 3737} 3738} // ExeDomain = SSEPackedInt 3739 3740// 128-bit Integer Arithmetic 3741 3742let Predicates = [HasAVX] in { 3743defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, 3744 i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>, 3745 VEX_4V; 3746defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, 3747 i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3748defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, 3749 i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3750defm VPADDQ : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64, 3751 i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V; 3752defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, 3753 i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; 3754defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, 3755 i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3756defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, 3757 i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3758defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, 3759 i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3760defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64, 3761 i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V; 3762defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, 3763 memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, 3764 VEX_4V; 3765 3766// Intrinsic forms 3767defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 3768 VR128, memopv2i64, i128mem, 3769 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3770defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 3771 VR128, memopv2i64, i128mem, 3772 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3773defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 3774 VR128, memopv2i64, i128mem, 3775 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3776defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 3777 VR128, memopv2i64, i128mem, 3778 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 3779defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 3780 VR128, memopv2i64, i128mem, 3781 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3782defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 3783 VR128, memopv2i64, i128mem, 3784 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3785defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 3786 VR128, memopv2i64, i128mem, 3787 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3788defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 3789 VR128, memopv2i64, i128mem, 3790 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3791defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 3792 VR128, memopv2i64, i128mem, 3793 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; 3794defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 3795 VR128, memopv2i64, i128mem, 3796 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; 3797defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 3798 VR128, memopv2i64, i128mem, 3799 SSE_PMADD, 1, 0>, VEX_4V; 3800defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 3801 VR128, memopv2i64, i128mem, 3802 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3803defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 3804 VR128, memopv2i64, i128mem, 3805 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3806defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 3807 VR128, memopv2i64, i128mem, 3808 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3809defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 3810 VR128, memopv2i64, i128mem, 3811 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3812defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 3813 VR128, memopv2i64, i128mem, 3814 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3815defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 3816 VR128, memopv2i64, i128mem, 3817 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3818defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 3819 VR128, memopv2i64, i128mem, 3820 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 3821} 3822 3823let Predicates = [HasAVX2] in { 3824defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, 3825 i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3826defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, 3827 i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3828defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, 3829 i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3830defm VPADDQY : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64, 3831 i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3832defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, 3833 i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3834defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, 3835 i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3836defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, 3837 i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3838defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, 3839 i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3840defm VPSUBQY : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64, 3841 i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3842defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, 3843 VR256, memopv4i64, i256mem, 3844 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3845 3846// Intrinsic forms 3847defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, 3848 VR256, memopv4i64, i256mem, 3849 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3850defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, 3851 VR256, memopv4i64, i256mem, 3852 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3853defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, 3854 VR256, memopv4i64, i256mem, 3855 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3856defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, 3857 VR256, memopv4i64, i256mem, 3858 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 3859defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, 3860 VR256, memopv4i64, i256mem, 3861 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3862defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, 3863 VR256, memopv4i64, i256mem, 3864 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3865defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, 3866 VR256, memopv4i64, i256mem, 3867 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3868defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, 3869 VR256, memopv4i64, i256mem, 3870 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3871defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, 3872 VR256, memopv4i64, i256mem, 3873 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3874defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, 3875 VR256, memopv4i64, i256mem, 3876 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3877defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, 3878 VR256, memopv4i64, i256mem, 3879 SSE_PMADD, 1, 0>, VEX_4V, VEX_L; 3880defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, 3881 VR256, memopv4i64, i256mem, 3882 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3883defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, 3884 VR256, memopv4i64, i256mem, 3885 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3886defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, 3887 VR256, memopv4i64, i256mem, 3888 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3889defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, 3890 VR256, memopv4i64, i256mem, 3891 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3892defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, 3893 VR256, memopv4i64, i256mem, 3894 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3895defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, 3896 VR256, memopv4i64, i256mem, 3897 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3898defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, 3899 VR256, memopv4i64, i256mem, 3900 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 3901} 3902 3903let Constraints = "$src1 = $dst" in { 3904defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, 3905 i128mem, SSE_INTALU_ITINS_P, 1>; 3906defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, 3907 i128mem, SSE_INTALU_ITINS_P, 1>; 3908defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, 3909 i128mem, SSE_INTALU_ITINS_P, 1>; 3910defm PADDQ : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64, 3911 i128mem, SSE_INTALUQ_ITINS_P, 1>; 3912defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, 3913 i128mem, SSE_INTMUL_ITINS_P, 1>; 3914defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, 3915 i128mem, SSE_INTALU_ITINS_P>; 3916defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, 3917 i128mem, SSE_INTALU_ITINS_P>; 3918defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, 3919 i128mem, SSE_INTALU_ITINS_P>; 3920defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64, 3921 i128mem, SSE_INTALUQ_ITINS_P>; 3922defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, 3923 memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; 3924 3925// Intrinsic forms 3926defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, 3927 VR128, memopv2i64, i128mem, 3928 SSE_INTALU_ITINS_P>; 3929defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, 3930 VR128, memopv2i64, i128mem, 3931 SSE_INTALU_ITINS_P>; 3932defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, 3933 VR128, memopv2i64, i128mem, 3934 SSE_INTALU_ITINS_P>; 3935defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, 3936 VR128, memopv2i64, i128mem, 3937 SSE_INTALU_ITINS_P>; 3938defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 3939 VR128, memopv2i64, i128mem, 3940 SSE_INTALU_ITINS_P, 1>; 3941defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 3942 VR128, memopv2i64, i128mem, 3943 SSE_INTALU_ITINS_P, 1>; 3944defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 3945 VR128, memopv2i64, i128mem, 3946 SSE_INTALU_ITINS_P, 1>; 3947defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 3948 VR128, memopv2i64, i128mem, 3949 SSE_INTALU_ITINS_P, 1>; 3950defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 3951 VR128, memopv2i64, i128mem, 3952 SSE_INTMUL_ITINS_P, 1>; 3953defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 3954 VR128, memopv2i64, i128mem, 3955 SSE_INTMUL_ITINS_P, 1>; 3956defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 3957 VR128, memopv2i64, i128mem, 3958 SSE_PMADD, 1>; 3959defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 3960 VR128, memopv2i64, i128mem, 3961 SSE_INTALU_ITINS_P, 1>; 3962defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 3963 VR128, memopv2i64, i128mem, 3964 SSE_INTALU_ITINS_P, 1>; 3965defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 3966 VR128, memopv2i64, i128mem, 3967 SSE_INTALU_ITINS_P, 1>; 3968defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 3969 VR128, memopv2i64, i128mem, 3970 SSE_INTALU_ITINS_P, 1>; 3971defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 3972 VR128, memopv2i64, i128mem, 3973 SSE_INTALU_ITINS_P, 1>; 3974defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 3975 VR128, memopv2i64, i128mem, 3976 SSE_INTALU_ITINS_P, 1>; 3977defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 3978 VR128, memopv2i64, i128mem, 3979 SSE_INTALU_ITINS_P, 1>; 3980 3981} // Constraints = "$src1 = $dst" 3982 3983//===---------------------------------------------------------------------===// 3984// SSE2 - Packed Integer Logical Instructions 3985//===---------------------------------------------------------------------===// 3986 3987let Predicates = [HasAVX] in { 3988defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 3989 VR128, v8i16, v8i16, bc_v8i16, 3990 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3991defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 3992 VR128, v4i32, v4i32, bc_v4i32, 3993 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3994defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 3995 VR128, v2i64, v2i64, bc_v2i64, 3996 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 3997 3998defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 3999 VR128, v8i16, v8i16, bc_v8i16, 4000 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4001defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4002 VR128, v4i32, v4i32, bc_v4i32, 4003 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4004defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4005 VR128, v2i64, v2i64, bc_v2i64, 4006 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4007 4008defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4009 VR128, v8i16, v8i16, bc_v8i16, 4010 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4011defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4012 VR128, v4i32, v4i32, bc_v4i32, 4013 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4014 4015let ExeDomain = SSEPackedInt in { 4016 // 128-bit logical shifts. 4017 def VPSLLDQri : PDIi8<0x73, MRM7r, 4018 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4019 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4020 [(set VR128:$dst, 4021 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>, 4022 VEX_4V; 4023 def VPSRLDQri : PDIi8<0x73, MRM3r, 4024 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4025 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4026 [(set VR128:$dst, 4027 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>, 4028 VEX_4V; 4029 // PSRADQri doesn't exist in SSE[1-3]. 4030} 4031} // Predicates = [HasAVX] 4032 4033let Predicates = [HasAVX2] in { 4034defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4035 VR256, v16i16, v8i16, bc_v8i16, 4036 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4037defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4038 VR256, v8i32, v4i32, bc_v4i32, 4039 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4040defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4041 VR256, v4i64, v2i64, bc_v2i64, 4042 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4043 4044defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4045 VR256, v16i16, v8i16, bc_v8i16, 4046 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4047defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4048 VR256, v8i32, v4i32, bc_v4i32, 4049 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4050defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4051 VR256, v4i64, v2i64, bc_v2i64, 4052 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4053 4054defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4055 VR256, v16i16, v8i16, bc_v8i16, 4056 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4057defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4058 VR256, v8i32, v4i32, bc_v4i32, 4059 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4060 4061let ExeDomain = SSEPackedInt in { 4062 // 256-bit logical shifts. 4063 def VPSLLDQYri : PDIi8<0x73, MRM7r, 4064 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4065 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4066 [(set VR256:$dst, 4067 (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>, 4068 VEX_4V, VEX_L; 4069 def VPSRLDQYri : PDIi8<0x73, MRM3r, 4070 (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), 4071 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4072 [(set VR256:$dst, 4073 (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>, 4074 VEX_4V, VEX_L; 4075 // PSRADQYri doesn't exist in SSE[1-3]. 4076} 4077} // Predicates = [HasAVX2] 4078 4079let Constraints = "$src1 = $dst" in { 4080defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 4081 VR128, v8i16, v8i16, bc_v8i16, 4082 SSE_INTSHIFT_ITINS_P>; 4083defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 4084 VR128, v4i32, v4i32, bc_v4i32, 4085 SSE_INTSHIFT_ITINS_P>; 4086defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 4087 VR128, v2i64, v2i64, bc_v2i64, 4088 SSE_INTSHIFT_ITINS_P>; 4089 4090defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 4091 VR128, v8i16, v8i16, bc_v8i16, 4092 SSE_INTSHIFT_ITINS_P>; 4093defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 4094 VR128, v4i32, v4i32, bc_v4i32, 4095 SSE_INTSHIFT_ITINS_P>; 4096defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 4097 VR128, v2i64, v2i64, bc_v2i64, 4098 SSE_INTSHIFT_ITINS_P>; 4099 4100defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 4101 VR128, v8i16, v8i16, bc_v8i16, 4102 SSE_INTSHIFT_ITINS_P>; 4103defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 4104 VR128, v4i32, v4i32, bc_v4i32, 4105 SSE_INTSHIFT_ITINS_P>; 4106 4107let ExeDomain = SSEPackedInt in { 4108 // 128-bit logical shifts. 4109 def PSLLDQri : PDIi8<0x73, MRM7r, 4110 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4111 "pslldq\t{$src2, $dst|$dst, $src2}", 4112 [(set VR128:$dst, 4113 (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>; 4114 def PSRLDQri : PDIi8<0x73, MRM3r, 4115 (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), 4116 "psrldq\t{$src2, $dst|$dst, $src2}", 4117 [(set VR128:$dst, 4118 (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>; 4119 // PSRADQri doesn't exist in SSE[1-3]. 4120} 4121} // Constraints = "$src1 = $dst" 4122 4123let Predicates = [HasAVX] in { 4124 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4125 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4126 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4127 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4128 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4129 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4130 4131 // Shift up / down and insert zero's. 4132 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4133 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4134 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4135 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4136} 4137 4138let Predicates = [HasAVX2] in { 4139 def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), 4140 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4141 def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), 4142 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>; 4143} 4144 4145let Predicates = [UseSSE2] in { 4146 def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), 4147 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4148 def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2), 4149 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4150 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4151 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4152 4153 // Shift up / down and insert zero's. 4154 def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))), 4155 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4156 def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))), 4157 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>; 4158} 4159 4160//===---------------------------------------------------------------------===// 4161// SSE2 - Packed Integer Comparison Instructions 4162//===---------------------------------------------------------------------===// 4163 4164let Predicates = [HasAVX] in { 4165 defm VPCMPEQB : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8, 4166 VR128, memopv2i64, i128mem, 4167 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 4168 defm VPCMPEQW : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16, 4169 VR128, memopv2i64, i128mem, 4170 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 4171 defm VPCMPEQD : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32, 4172 VR128, memopv2i64, i128mem, 4173 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V; 4174 defm VPCMPGTB : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8, 4175 VR128, memopv2i64, i128mem, 4176 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4177 defm VPCMPGTW : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16, 4178 VR128, memopv2i64, i128mem, 4179 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4180 defm VPCMPGTD : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32, 4181 VR128, memopv2i64, i128mem, 4182 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4183} 4184 4185let Predicates = [HasAVX2] in { 4186 defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8, 4187 VR256, memopv4i64, i256mem, 4188 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4189 defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16, 4190 VR256, memopv4i64, i256mem, 4191 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4192 defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32, 4193 VR256, memopv4i64, i256mem, 4194 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4195 defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8, 4196 VR256, memopv4i64, i256mem, 4197 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4198 defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16, 4199 VR256, memopv4i64, i256mem, 4200 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4201 defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32, 4202 VR256, memopv4i64, i256mem, 4203 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4204} 4205 4206let Constraints = "$src1 = $dst" in { 4207 defm PCMPEQB : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8, 4208 VR128, memopv2i64, i128mem, 4209 SSE_INTALU_ITINS_P, 1>; 4210 defm PCMPEQW : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16, 4211 VR128, memopv2i64, i128mem, 4212 SSE_INTALU_ITINS_P, 1>; 4213 defm PCMPEQD : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32, 4214 VR128, memopv2i64, i128mem, 4215 SSE_INTALU_ITINS_P, 1>; 4216 defm PCMPGTB : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8, 4217 VR128, memopv2i64, i128mem, 4218 SSE_INTALU_ITINS_P>; 4219 defm PCMPGTW : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16, 4220 VR128, memopv2i64, i128mem, 4221 SSE_INTALU_ITINS_P>; 4222 defm PCMPGTD : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32, 4223 VR128, memopv2i64, i128mem, 4224 SSE_INTALU_ITINS_P>; 4225} // Constraints = "$src1 = $dst" 4226 4227//===---------------------------------------------------------------------===// 4228// SSE2 - Packed Integer Pack Instructions 4229//===---------------------------------------------------------------------===// 4230 4231let Predicates = [HasAVX] in { 4232defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, 4233 VR128, memopv2i64, i128mem, 4234 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4235defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, 4236 VR128, memopv2i64, i128mem, 4237 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4238defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, 4239 VR128, memopv2i64, i128mem, 4240 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V; 4241} 4242 4243let Predicates = [HasAVX2] in { 4244defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, 4245 VR256, memopv4i64, i256mem, 4246 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4247defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, 4248 VR256, memopv4i64, i256mem, 4249 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4250defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, 4251 VR256, memopv4i64, i256mem, 4252 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V, VEX_L; 4253} 4254 4255let Constraints = "$src1 = $dst" in { 4256defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128, 4257 VR128, memopv2i64, i128mem, 4258 SSE_INTALU_ITINS_P>; 4259defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, 4260 VR128, memopv2i64, i128mem, 4261 SSE_INTALU_ITINS_P>; 4262defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, 4263 VR128, memopv2i64, i128mem, 4264 SSE_INTALU_ITINS_P>; 4265} // Constraints = "$src1 = $dst" 4266 4267//===---------------------------------------------------------------------===// 4268// SSE2 - Packed Integer Shuffle Instructions 4269//===---------------------------------------------------------------------===// 4270 4271let ExeDomain = SSEPackedInt in { 4272multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> { 4273def ri : Ii8<0x70, MRMSrcReg, 4274 (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), 4275 !strconcat(OpcodeStr, 4276 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4277 [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))], 4278 IIC_SSE_PSHUF>; 4279def mi : Ii8<0x70, MRMSrcMem, 4280 (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), 4281 !strconcat(OpcodeStr, 4282 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4283 [(set VR128:$dst, 4284 (vt (OpNode (bitconvert (memopv2i64 addr:$src1)), 4285 (i8 imm:$src2))))], 4286 IIC_SSE_PSHUF>; 4287} 4288 4289multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> { 4290def Yri : Ii8<0x70, MRMSrcReg, 4291 (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), 4292 !strconcat(OpcodeStr, 4293 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4294 [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>; 4295def Ymi : Ii8<0x70, MRMSrcMem, 4296 (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), 4297 !strconcat(OpcodeStr, 4298 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4299 [(set VR256:$dst, 4300 (vt (OpNode (bitconvert (memopv4i64 addr:$src1)), 4301 (i8 imm:$src2))))]>; 4302} 4303} // ExeDomain = SSEPackedInt 4304 4305let Predicates = [HasAVX] in { 4306 let AddedComplexity = 5 in 4307 defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX; 4308 4309 // SSE2 with ImmT == Imm8 and XS prefix. 4310 defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX; 4311 4312 // SSE2 with ImmT == Imm8 and XD prefix. 4313 defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX; 4314 4315 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4316 (VPSHUFDmi addr:$src1, imm:$imm)>; 4317 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4318 (VPSHUFDri VR128:$src1, imm:$imm)>; 4319} 4320 4321let Predicates = [HasAVX2] in { 4322 defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, 4323 TB, OpSize, VEX,VEX_L; 4324 defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, 4325 XS, VEX, VEX_L; 4326 defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, 4327 XD, VEX, VEX_L; 4328} 4329 4330let Predicates = [UseSSE2] in { 4331 let AddedComplexity = 5 in 4332 defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize; 4333 4334 // SSE2 with ImmT == Imm8 and XS prefix. 4335 defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS; 4336 4337 // SSE2 with ImmT == Imm8 and XD prefix. 4338 defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD; 4339 4340 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4341 (PSHUFDmi addr:$src1, imm:$imm)>; 4342 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4343 (PSHUFDri VR128:$src1, imm:$imm)>; 4344} 4345 4346//===---------------------------------------------------------------------===// 4347// SSE2 - Packed Integer Unpack Instructions 4348//===---------------------------------------------------------------------===// 4349 4350let ExeDomain = SSEPackedInt in { 4351multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 4352 SDNode OpNode, PatFrag bc_frag, bit Is2Addr = 1> { 4353 def rr : PDI<opc, MRMSrcReg, 4354 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4355 !if(Is2Addr, 4356 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4357 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4358 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], 4359 IIC_SSE_UNPCK>; 4360 def rm : PDI<opc, MRMSrcMem, 4361 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4362 !if(Is2Addr, 4363 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4364 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4365 [(set VR128:$dst, (OpNode VR128:$src1, 4366 (bc_frag (memopv2i64 4367 addr:$src2))))], 4368 IIC_SSE_UNPCK>; 4369} 4370 4371multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, 4372 SDNode OpNode, PatFrag bc_frag> { 4373 def Yrr : PDI<opc, MRMSrcReg, 4374 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4375 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4376 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>; 4377 def Yrm : PDI<opc, MRMSrcMem, 4378 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4379 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4380 [(set VR256:$dst, (OpNode VR256:$src1, 4381 (bc_frag (memopv4i64 addr:$src2))))]>; 4382} 4383 4384let Predicates = [HasAVX] in { 4385 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, 4386 bc_v16i8, 0>, VEX_4V; 4387 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, 4388 bc_v8i16, 0>, VEX_4V; 4389 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, 4390 bc_v4i32, 0>, VEX_4V; 4391 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, 4392 bc_v2i64, 0>, VEX_4V; 4393 4394 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, 4395 bc_v16i8, 0>, VEX_4V; 4396 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, 4397 bc_v8i16, 0>, VEX_4V; 4398 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, 4399 bc_v4i32, 0>, VEX_4V; 4400 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, 4401 bc_v2i64, 0>, VEX_4V; 4402} 4403 4404let Predicates = [HasAVX2] in { 4405 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, 4406 bc_v32i8>, VEX_4V, VEX_L; 4407 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, 4408 bc_v16i16>, VEX_4V, VEX_L; 4409 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, 4410 bc_v8i32>, VEX_4V, VEX_L; 4411 defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, 4412 bc_v4i64>, VEX_4V, VEX_L; 4413 4414 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, 4415 bc_v32i8>, VEX_4V, VEX_L; 4416 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, 4417 bc_v16i16>, VEX_4V, VEX_L; 4418 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, 4419 bc_v8i32>, VEX_4V, VEX_L; 4420 defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, 4421 bc_v4i64>, VEX_4V, VEX_L; 4422} 4423 4424let Constraints = "$src1 = $dst" in { 4425 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, 4426 bc_v16i8>; 4427 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, 4428 bc_v8i16>; 4429 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, 4430 bc_v4i32>; 4431 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, 4432 bc_v2i64>; 4433 4434 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, 4435 bc_v16i8>; 4436 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, 4437 bc_v8i16>; 4438 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, 4439 bc_v4i32>; 4440 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, 4441 bc_v2i64>; 4442} 4443} // ExeDomain = SSEPackedInt 4444 4445//===---------------------------------------------------------------------===// 4446// SSE2 - Packed Integer Extract and Insert 4447//===---------------------------------------------------------------------===// 4448 4449let ExeDomain = SSEPackedInt in { 4450multiclass sse2_pinsrw<bit Is2Addr = 1> { 4451 def rri : Ii8<0xC4, MRMSrcReg, 4452 (outs VR128:$dst), (ins VR128:$src1, 4453 GR32:$src2, i32i8imm:$src3), 4454 !if(Is2Addr, 4455 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4456 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4457 [(set VR128:$dst, 4458 (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>; 4459 def rmi : Ii8<0xC4, MRMSrcMem, 4460 (outs VR128:$dst), (ins VR128:$src1, 4461 i16mem:$src2, i32i8imm:$src3), 4462 !if(Is2Addr, 4463 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4464 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4465 [(set VR128:$dst, 4466 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4467 imm:$src3))], IIC_SSE_PINSRW>; 4468} 4469 4470// Extract 4471let Predicates = [HasAVX] in 4472def VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4473 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), 4474 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4475 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), 4476 imm:$src2))]>, TB, OpSize, VEX; 4477def PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4478 (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), 4479 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4480 [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), 4481 imm:$src2))], IIC_SSE_PEXTRW>; 4482 4483// Insert 4484let Predicates = [HasAVX] in { 4485 defm VPINSRW : sse2_pinsrw<0>, TB, OpSize, VEX_4V; 4486 def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), 4487 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 4488 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 4489 []>, TB, OpSize, VEX_4V; 4490} 4491 4492let Constraints = "$src1 = $dst" in 4493 defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[UseSSE2]>; 4494 4495} // ExeDomain = SSEPackedInt 4496 4497//===---------------------------------------------------------------------===// 4498// SSE2 - Packed Mask Creation 4499//===---------------------------------------------------------------------===// 4500 4501let ExeDomain = SSEPackedInt in { 4502 4503def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), 4504 "pmovmskb\t{$src, $dst|$dst, $src}", 4505 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4506 IIC_SSE_MOVMSK>, VEX; 4507def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), 4508 "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX; 4509 4510let Predicates = [HasAVX2] in { 4511def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), 4512 "pmovmskb\t{$src, $dst|$dst, $src}", 4513 [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX, VEX_L; 4514def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), 4515 "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; 4516} 4517 4518def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), 4519 "pmovmskb\t{$src, $dst|$dst, $src}", 4520 [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4521 IIC_SSE_MOVMSK>; 4522 4523} // ExeDomain = SSEPackedInt 4524 4525//===---------------------------------------------------------------------===// 4526// SSE2 - Conditional Store 4527//===---------------------------------------------------------------------===// 4528 4529let ExeDomain = SSEPackedInt in { 4530 4531let Uses = [EDI] in 4532def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4533 (ins VR128:$src, VR128:$mask), 4534 "maskmovdqu\t{$mask, $src|$src, $mask}", 4535 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4536 IIC_SSE_MASKMOV>, VEX; 4537let Uses = [RDI] in 4538def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4539 (ins VR128:$src, VR128:$mask), 4540 "maskmovdqu\t{$mask, $src|$src, $mask}", 4541 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4542 IIC_SSE_MASKMOV>, VEX; 4543 4544let Uses = [EDI] in 4545def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4546 "maskmovdqu\t{$mask, $src|$src, $mask}", 4547 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4548 IIC_SSE_MASKMOV>; 4549let Uses = [RDI] in 4550def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4551 "maskmovdqu\t{$mask, $src|$src, $mask}", 4552 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4553 IIC_SSE_MASKMOV>; 4554 4555} // ExeDomain = SSEPackedInt 4556 4557//===---------------------------------------------------------------------===// 4558// SSE2 - Move Doubleword 4559//===---------------------------------------------------------------------===// 4560 4561//===---------------------------------------------------------------------===// 4562// Move Int Doubleword to Packed Double Int 4563// 4564def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4565 "movd\t{$src, $dst|$dst, $src}", 4566 [(set VR128:$dst, 4567 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4568 VEX; 4569def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4570 "movd\t{$src, $dst|$dst, $src}", 4571 [(set VR128:$dst, 4572 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4573 IIC_SSE_MOVDQ>, 4574 VEX; 4575def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4576 "mov{d|q}\t{$src, $dst|$dst, $src}", 4577 [(set VR128:$dst, 4578 (v2i64 (scalar_to_vector GR64:$src)))], 4579 IIC_SSE_MOVDQ>, VEX; 4580def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4581 "mov{d|q}\t{$src, $dst|$dst, $src}", 4582 [(set FR64:$dst, (bitconvert GR64:$src))], 4583 IIC_SSE_MOVDQ>, VEX; 4584 4585def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4586 "movd\t{$src, $dst|$dst, $src}", 4587 [(set VR128:$dst, 4588 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>; 4589def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4590 "movd\t{$src, $dst|$dst, $src}", 4591 [(set VR128:$dst, 4592 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4593 IIC_SSE_MOVDQ>; 4594def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4595 "mov{d|q}\t{$src, $dst|$dst, $src}", 4596 [(set VR128:$dst, 4597 (v2i64 (scalar_to_vector GR64:$src)))], 4598 IIC_SSE_MOVDQ>; 4599def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4600 "mov{d|q}\t{$src, $dst|$dst, $src}", 4601 [(set FR64:$dst, (bitconvert GR64:$src))], 4602 IIC_SSE_MOVDQ>; 4603 4604//===---------------------------------------------------------------------===// 4605// Move Int Doubleword to Single Scalar 4606// 4607def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4608 "movd\t{$src, $dst|$dst, $src}", 4609 [(set FR32:$dst, (bitconvert GR32:$src))], 4610 IIC_SSE_MOVDQ>, VEX; 4611 4612def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4613 "movd\t{$src, $dst|$dst, $src}", 4614 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4615 IIC_SSE_MOVDQ>, 4616 VEX; 4617def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4618 "movd\t{$src, $dst|$dst, $src}", 4619 [(set FR32:$dst, (bitconvert GR32:$src))], 4620 IIC_SSE_MOVDQ>; 4621 4622def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4623 "movd\t{$src, $dst|$dst, $src}", 4624 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4625 IIC_SSE_MOVDQ>; 4626 4627//===---------------------------------------------------------------------===// 4628// Move Packed Doubleword Int to Packed Double Int 4629// 4630def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4631 "movd\t{$src, $dst|$dst, $src}", 4632 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4633 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX; 4634def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), 4635 (ins i32mem:$dst, VR128:$src), 4636 "movd\t{$src, $dst|$dst, $src}", 4637 [(store (i32 (vector_extract (v4i32 VR128:$src), 4638 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, 4639 VEX; 4640def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4641 "movd\t{$src, $dst|$dst, $src}", 4642 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4643 (iPTR 0)))], IIC_SSE_MOVD_ToGP>; 4644def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4645 "movd\t{$src, $dst|$dst, $src}", 4646 [(store (i32 (vector_extract (v4i32 VR128:$src), 4647 (iPTR 0))), addr:$dst)], 4648 IIC_SSE_MOVDQ>; 4649 4650//===---------------------------------------------------------------------===// 4651// Move Packed Doubleword Int first element to Doubleword Int 4652// 4653def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4654 "vmov{d|q}\t{$src, $dst|$dst, $src}", 4655 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4656 (iPTR 0)))], 4657 IIC_SSE_MOVD_ToGP>, 4658 TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>; 4659 4660def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4661 "mov{d|q}\t{$src, $dst|$dst, $src}", 4662 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4663 (iPTR 0)))], 4664 IIC_SSE_MOVD_ToGP>; 4665 4666//===---------------------------------------------------------------------===// 4667// Bitcast FR64 <-> GR64 4668// 4669let Predicates = [HasAVX] in 4670def VMOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4671 "vmovq\t{$src, $dst|$dst, $src}", 4672 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 4673 VEX; 4674def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4675 "mov{d|q}\t{$src, $dst|$dst, $src}", 4676 [(set GR64:$dst, (bitconvert FR64:$src))], 4677 IIC_SSE_MOVDQ>, VEX; 4678def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4679 "movq\t{$src, $dst|$dst, $src}", 4680 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4681 IIC_SSE_MOVDQ>, VEX; 4682 4683def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4684 "movq\t{$src, $dst|$dst, $src}", 4685 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], 4686 IIC_SSE_MOVDQ>; 4687def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4688 "mov{d|q}\t{$src, $dst|$dst, $src}", 4689 [(set GR64:$dst, (bitconvert FR64:$src))], 4690 IIC_SSE_MOVD_ToGP>; 4691def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4692 "movq\t{$src, $dst|$dst, $src}", 4693 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4694 IIC_SSE_MOVDQ>; 4695 4696//===---------------------------------------------------------------------===// 4697// Move Scalar Single to Double Int 4698// 4699def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4700 "movd\t{$src, $dst|$dst, $src}", 4701 [(set GR32:$dst, (bitconvert FR32:$src))], 4702 IIC_SSE_MOVD_ToGP>, VEX; 4703def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4704 "movd\t{$src, $dst|$dst, $src}", 4705 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4706 IIC_SSE_MOVDQ>, VEX; 4707def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4708 "movd\t{$src, $dst|$dst, $src}", 4709 [(set GR32:$dst, (bitconvert FR32:$src))], 4710 IIC_SSE_MOVD_ToGP>; 4711def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4712 "movd\t{$src, $dst|$dst, $src}", 4713 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4714 IIC_SSE_MOVDQ>; 4715 4716//===---------------------------------------------------------------------===// 4717// Patterns and instructions to describe movd/movq to XMM register zero-extends 4718// 4719let AddedComplexity = 15 in { 4720def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4721 "movd\t{$src, $dst|$dst, $src}", 4722 [(set VR128:$dst, (v4i32 (X86vzmovl 4723 (v4i32 (scalar_to_vector GR32:$src)))))], 4724 IIC_SSE_MOVDQ>, VEX; 4725def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4726 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4727 [(set VR128:$dst, (v2i64 (X86vzmovl 4728 (v2i64 (scalar_to_vector GR64:$src)))))], 4729 IIC_SSE_MOVDQ>, 4730 VEX, VEX_W; 4731} 4732let AddedComplexity = 15 in { 4733def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4734 "movd\t{$src, $dst|$dst, $src}", 4735 [(set VR128:$dst, (v4i32 (X86vzmovl 4736 (v4i32 (scalar_to_vector GR32:$src)))))], 4737 IIC_SSE_MOVDQ>; 4738def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4739 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4740 [(set VR128:$dst, (v2i64 (X86vzmovl 4741 (v2i64 (scalar_to_vector GR64:$src)))))], 4742 IIC_SSE_MOVDQ>; 4743} 4744 4745let AddedComplexity = 20 in { 4746def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4747 "movd\t{$src, $dst|$dst, $src}", 4748 [(set VR128:$dst, 4749 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector 4750 (loadi32 addr:$src))))))], 4751 IIC_SSE_MOVDQ>, VEX; 4752def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4753 "movd\t{$src, $dst|$dst, $src}", 4754 [(set VR128:$dst, 4755 (v4i32 (X86vzmovl (v4i32 (scalar_to_vector 4756 (loadi32 addr:$src))))))], 4757 IIC_SSE_MOVDQ>; 4758} 4759 4760let Predicates = [HasAVX] in { 4761 // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. 4762 let AddedComplexity = 20 in { 4763 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4764 (VMOVZDI2PDIrm addr:$src)>; 4765 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4766 (VMOVZDI2PDIrm addr:$src)>; 4767 } 4768 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4769 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4770 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4771 (SUBREG_TO_REG (i32 0), (VMOVZDI2PDIrr GR32:$src), sub_xmm)>; 4772 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4773 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4774 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 4775} 4776 4777let Predicates = [UseSSE2], AddedComplexity = 20 in { 4778 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4779 (MOVZDI2PDIrm addr:$src)>; 4780 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4781 (MOVZDI2PDIrm addr:$src)>; 4782} 4783 4784// These are the correct encodings of the instructions so that we know how to 4785// read correct assembly, even though we continue to emit the wrong ones for 4786// compatibility with Darwin's buggy assembler. 4787def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4788 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4789def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4790 (MOV64toSDrr FR64:$dst, GR64:$src), 0>; 4791def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4792 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4793def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4794 (MOVSDto64rr GR64:$dst, FR64:$src), 0>; 4795def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4796 (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; 4797def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4798 (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>; 4799 4800//===---------------------------------------------------------------------===// 4801// SSE2 - Move Quadword 4802//===---------------------------------------------------------------------===// 4803 4804//===---------------------------------------------------------------------===// 4805// Move Quadword Int to Packed Quadword Int 4806// 4807def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4808 "vmovq\t{$src, $dst|$dst, $src}", 4809 [(set VR128:$dst, 4810 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4811 VEX, Requires<[HasAVX]>; 4812def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4813 "movq\t{$src, $dst|$dst, $src}", 4814 [(set VR128:$dst, 4815 (v2i64 (scalar_to_vector (loadi64 addr:$src))))], 4816 IIC_SSE_MOVDQ>, XS, 4817 Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4818 4819//===---------------------------------------------------------------------===// 4820// Move Packed Quadword Int to Quadword Int 4821// 4822def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4823 "movq\t{$src, $dst|$dst, $src}", 4824 [(store (i64 (vector_extract (v2i64 VR128:$src), 4825 (iPTR 0))), addr:$dst)], 4826 IIC_SSE_MOVDQ>, VEX; 4827def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4828 "movq\t{$src, $dst|$dst, $src}", 4829 [(store (i64 (vector_extract (v2i64 VR128:$src), 4830 (iPTR 0))), addr:$dst)], 4831 IIC_SSE_MOVDQ>; 4832 4833//===---------------------------------------------------------------------===// 4834// Store / copy lower 64-bits of a XMM register. 4835// 4836def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4837 "movq\t{$src, $dst|$dst, $src}", 4838 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; 4839def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4840 "movq\t{$src, $dst|$dst, $src}", 4841 [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)], 4842 IIC_SSE_MOVDQ>; 4843 4844let AddedComplexity = 20 in 4845def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4846 "vmovq\t{$src, $dst|$dst, $src}", 4847 [(set VR128:$dst, 4848 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4849 (loadi64 addr:$src))))))], 4850 IIC_SSE_MOVDQ>, 4851 XS, VEX, Requires<[HasAVX]>; 4852 4853let AddedComplexity = 20 in 4854def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4855 "movq\t{$src, $dst|$dst, $src}", 4856 [(set VR128:$dst, 4857 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 4858 (loadi64 addr:$src))))))], 4859 IIC_SSE_MOVDQ>, 4860 XS, Requires<[UseSSE2]>; 4861 4862let Predicates = [HasAVX], AddedComplexity = 20 in { 4863 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4864 (VMOVZQI2PQIrm addr:$src)>; 4865 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4866 (VMOVZQI2PQIrm addr:$src)>; 4867 def : Pat<(v2i64 (X86vzload addr:$src)), 4868 (VMOVZQI2PQIrm addr:$src)>; 4869} 4870 4871let Predicates = [UseSSE2], AddedComplexity = 20 in { 4872 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4873 (MOVZQI2PQIrm addr:$src)>; 4874 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 4875 (MOVZQI2PQIrm addr:$src)>; 4876 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 4877} 4878 4879let Predicates = [HasAVX] in { 4880def : Pat<(v4i64 (alignedX86vzload addr:$src)), 4881 (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; 4882def : Pat<(v4i64 (X86vzload addr:$src)), 4883 (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; 4884} 4885 4886//===---------------------------------------------------------------------===// 4887// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 4888// IA32 document. movq xmm1, xmm2 does clear the high bits. 4889// 4890let AddedComplexity = 15 in 4891def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4892 "vmovq\t{$src, $dst|$dst, $src}", 4893 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4894 IIC_SSE_MOVQ_RR>, 4895 XS, VEX, Requires<[HasAVX]>; 4896let AddedComplexity = 15 in 4897def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4898 "movq\t{$src, $dst|$dst, $src}", 4899 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 4900 IIC_SSE_MOVQ_RR>, 4901 XS, Requires<[UseSSE2]>; 4902 4903let AddedComplexity = 20 in 4904def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4905 "vmovq\t{$src, $dst|$dst, $src}", 4906 [(set VR128:$dst, (v2i64 (X86vzmovl 4907 (loadv2i64 addr:$src))))], 4908 IIC_SSE_MOVDQ>, 4909 XS, VEX, Requires<[HasAVX]>; 4910let AddedComplexity = 20 in { 4911def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 4912 "movq\t{$src, $dst|$dst, $src}", 4913 [(set VR128:$dst, (v2i64 (X86vzmovl 4914 (loadv2i64 addr:$src))))], 4915 IIC_SSE_MOVDQ>, 4916 XS, Requires<[UseSSE2]>; 4917} 4918 4919let AddedComplexity = 20 in { 4920 let Predicates = [HasAVX] in { 4921 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4922 (VMOVZPQILo2PQIrm addr:$src)>; 4923 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4924 (VMOVZPQILo2PQIrr VR128:$src)>; 4925 } 4926 let Predicates = [UseSSE2] in { 4927 def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), 4928 (MOVZPQILo2PQIrm addr:$src)>; 4929 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 4930 (MOVZPQILo2PQIrr VR128:$src)>; 4931 } 4932} 4933 4934// Instructions to match in the assembler 4935def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4936 "movq\t{$src, $dst|$dst, $src}", [], 4937 IIC_SSE_MOVDQ>, VEX, VEX_W; 4938def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4939 "movq\t{$src, $dst|$dst, $src}", [], 4940 IIC_SSE_MOVDQ>, VEX, VEX_W; 4941// Recognize "movd" with GR64 destination, but encode as a "movq" 4942def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4943 "movd\t{$src, $dst|$dst, $src}", [], 4944 IIC_SSE_MOVDQ>, VEX, VEX_W; 4945 4946// Instructions for the disassembler 4947// xr = XMM register 4948// xm = mem64 4949 4950let Predicates = [HasAVX] in 4951def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4952 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; 4953def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 4954 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS; 4955 4956//===---------------------------------------------------------------------===// 4957// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 4958//===---------------------------------------------------------------------===// 4959multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 4960 ValueType vt, RegisterClass RC, PatFrag mem_frag, 4961 X86MemOperand x86memop> { 4962def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 4963 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4964 [(set RC:$dst, (vt (OpNode RC:$src)))], 4965 IIC_SSE_MOV_LH>; 4966def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 4967 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 4968 [(set RC:$dst, (OpNode (mem_frag addr:$src)))], 4969 IIC_SSE_MOV_LH>; 4970} 4971 4972let Predicates = [HasAVX] in { 4973 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4974 v4f32, VR128, memopv4f32, f128mem>, VEX; 4975 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4976 v4f32, VR128, memopv4f32, f128mem>, VEX; 4977 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 4978 v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L; 4979 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 4980 v8f32, VR256, memopv8f32, f256mem>, VEX, VEX_L; 4981} 4982defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 4983 memopv4f32, f128mem>; 4984defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 4985 memopv4f32, f128mem>; 4986 4987let Predicates = [HasAVX] in { 4988 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 4989 (VMOVSHDUPrr VR128:$src)>; 4990 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 4991 (VMOVSHDUPrm addr:$src)>; 4992 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 4993 (VMOVSLDUPrr VR128:$src)>; 4994 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 4995 (VMOVSLDUPrm addr:$src)>; 4996 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 4997 (VMOVSHDUPYrr VR256:$src)>; 4998 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (memopv4i64 addr:$src)))), 4999 (VMOVSHDUPYrm addr:$src)>; 5000 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 5001 (VMOVSLDUPYrr VR256:$src)>; 5002 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (memopv4i64 addr:$src)))), 5003 (VMOVSLDUPYrm addr:$src)>; 5004} 5005 5006let Predicates = [UseSSE3] in { 5007 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5008 (MOVSHDUPrr VR128:$src)>; 5009 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 5010 (MOVSHDUPrm addr:$src)>; 5011 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5012 (MOVSLDUPrr VR128:$src)>; 5013 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 5014 (MOVSLDUPrm addr:$src)>; 5015} 5016 5017//===---------------------------------------------------------------------===// 5018// SSE3 - Replicate Double FP - MOVDDUP 5019//===---------------------------------------------------------------------===// 5020 5021multiclass sse3_replicate_dfp<string OpcodeStr> { 5022let neverHasSideEffects = 1 in 5023def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5024 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5025 [], IIC_SSE_MOV_LH>; 5026def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 5027 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5028 [(set VR128:$dst, 5029 (v2f64 (X86Movddup 5030 (scalar_to_vector (loadf64 addr:$src)))))], 5031 IIC_SSE_MOV_LH>; 5032} 5033 5034// FIXME: Merge with above classe when there're patterns for the ymm version 5035multiclass sse3_replicate_dfp_y<string OpcodeStr> { 5036def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 5037 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5038 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>; 5039def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 5040 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5041 [(set VR256:$dst, 5042 (v4f64 (X86Movddup 5043 (scalar_to_vector (loadf64 addr:$src)))))]>; 5044} 5045 5046let Predicates = [HasAVX] in { 5047 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 5048 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; 5049} 5050 5051defm MOVDDUP : sse3_replicate_dfp<"movddup">; 5052 5053let Predicates = [HasAVX] in { 5054 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 5055 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5056 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 5057 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5058 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 5059 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5060 def : Pat<(X86Movddup (bc_v2f64 5061 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5062 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5063 5064 // 256-bit version 5065 def : Pat<(X86Movddup (memopv4f64 addr:$src)), 5066 (VMOVDDUPYrm addr:$src)>; 5067 def : Pat<(X86Movddup (memopv4i64 addr:$src)), 5068 (VMOVDDUPYrm addr:$src)>; 5069 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 5070 (VMOVDDUPYrm addr:$src)>; 5071 def : Pat<(X86Movddup (v4i64 VR256:$src)), 5072 (VMOVDDUPYrr VR256:$src)>; 5073} 5074 5075let Predicates = [UseSSE3] in { 5076 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 5077 (MOVDDUPrm addr:$src)>; 5078 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 5079 (MOVDDUPrm addr:$src)>; 5080 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 5081 (MOVDDUPrm addr:$src)>; 5082 def : Pat<(X86Movddup (bc_v2f64 5083 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5084 (MOVDDUPrm addr:$src)>; 5085} 5086 5087//===---------------------------------------------------------------------===// 5088// SSE3 - Move Unaligned Integer 5089//===---------------------------------------------------------------------===// 5090 5091let Predicates = [HasAVX] in { 5092 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5093 "vlddqu\t{$src, $dst|$dst, $src}", 5094 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 5095 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 5096 "vlddqu\t{$src, $dst|$dst, $src}", 5097 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 5098 VEX, VEX_L; 5099} 5100def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5101 "lddqu\t{$src, $dst|$dst, $src}", 5102 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], 5103 IIC_SSE_LDDQU>; 5104 5105//===---------------------------------------------------------------------===// 5106// SSE3 - Arithmetic 5107//===---------------------------------------------------------------------===// 5108 5109multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 5110 X86MemOperand x86memop, OpndItins itins, 5111 bit Is2Addr = 1> { 5112 def rr : I<0xD0, MRMSrcReg, 5113 (outs RC:$dst), (ins RC:$src1, RC:$src2), 5114 !if(Is2Addr, 5115 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5116 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5117 [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>; 5118 def rm : I<0xD0, MRMSrcMem, 5119 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5120 !if(Is2Addr, 5121 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5122 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5123 [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>; 5124} 5125 5126let Predicates = [HasAVX] in { 5127 let ExeDomain = SSEPackedSingle in { 5128 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 5129 f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V; 5130 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 5131 f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V, VEX_L; 5132 } 5133 let ExeDomain = SSEPackedDouble in { 5134 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 5135 f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V; 5136 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 5137 f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V, VEX_L; 5138 } 5139} 5140let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 5141 let ExeDomain = SSEPackedSingle in 5142 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 5143 f128mem, SSE_ALU_F32P>, TB, XD; 5144 let ExeDomain = SSEPackedDouble in 5145 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 5146 f128mem, SSE_ALU_F64P>, TB, OpSize; 5147} 5148 5149//===---------------------------------------------------------------------===// 5150// SSE3 Instructions 5151//===---------------------------------------------------------------------===// 5152 5153// Horizontal ops 5154multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5155 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5156 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5157 !if(Is2Addr, 5158 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5159 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5160 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; 5161 5162 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5163 !if(Is2Addr, 5164 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5165 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5166 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5167 IIC_SSE_HADDSUB_RM>; 5168} 5169multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5170 X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> { 5171 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5172 !if(Is2Addr, 5173 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5174 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5175 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>; 5176 5177 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5178 !if(Is2Addr, 5179 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5180 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5181 [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))], 5182 IIC_SSE_HADDSUB_RM>; 5183} 5184 5185let Predicates = [HasAVX] in { 5186 let ExeDomain = SSEPackedSingle in { 5187 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 5188 X86fhadd, 0>, VEX_4V; 5189 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 5190 X86fhsub, 0>, VEX_4V; 5191 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 5192 X86fhadd, 0>, VEX_4V, VEX_L; 5193 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 5194 X86fhsub, 0>, VEX_4V, VEX_L; 5195 } 5196 let ExeDomain = SSEPackedDouble in { 5197 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 5198 X86fhadd, 0>, VEX_4V; 5199 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 5200 X86fhsub, 0>, VEX_4V; 5201 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 5202 X86fhadd, 0>, VEX_4V, VEX_L; 5203 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 5204 X86fhsub, 0>, VEX_4V, VEX_L; 5205 } 5206} 5207 5208let Constraints = "$src1 = $dst" in { 5209 let ExeDomain = SSEPackedSingle in { 5210 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>; 5211 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>; 5212 } 5213 let ExeDomain = SSEPackedDouble in { 5214 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>; 5215 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>; 5216 } 5217} 5218 5219//===---------------------------------------------------------------------===// 5220// SSSE3 - Packed Absolute Instructions 5221//===---------------------------------------------------------------------===// 5222 5223 5224/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5225multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, 5226 Intrinsic IntId128> { 5227 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5228 (ins VR128:$src), 5229 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5230 [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, 5231 OpSize; 5232 5233 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5234 (ins i128mem:$src), 5235 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5236 [(set VR128:$dst, 5237 (IntId128 5238 (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>, 5239 OpSize; 5240} 5241 5242/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5243multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, 5244 Intrinsic IntId256> { 5245 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5246 (ins VR256:$src), 5247 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5248 [(set VR256:$dst, (IntId256 VR256:$src))]>, 5249 OpSize; 5250 5251 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5252 (ins i256mem:$src), 5253 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5254 [(set VR256:$dst, 5255 (IntId256 5256 (bitconvert (memopv4i64 addr:$src))))]>, OpSize; 5257} 5258 5259let Predicates = [HasAVX] in { 5260 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", 5261 int_x86_ssse3_pabs_b_128>, VEX; 5262 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", 5263 int_x86_ssse3_pabs_w_128>, VEX; 5264 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", 5265 int_x86_ssse3_pabs_d_128>, VEX; 5266} 5267 5268let Predicates = [HasAVX2] in { 5269 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", 5270 int_x86_avx2_pabs_b>, VEX, VEX_L; 5271 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", 5272 int_x86_avx2_pabs_w>, VEX, VEX_L; 5273 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", 5274 int_x86_avx2_pabs_d>, VEX, VEX_L; 5275} 5276 5277defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", 5278 int_x86_ssse3_pabs_b_128>; 5279defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", 5280 int_x86_ssse3_pabs_w_128>; 5281defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", 5282 int_x86_ssse3_pabs_d_128>; 5283 5284//===---------------------------------------------------------------------===// 5285// SSSE3 - Packed Binary Operator Instructions 5286//===---------------------------------------------------------------------===// 5287 5288def SSE_PHADDSUBD : OpndItins< 5289 IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM 5290>; 5291def SSE_PHADDSUBSW : OpndItins< 5292 IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM 5293>; 5294def SSE_PHADDSUBW : OpndItins< 5295 IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM 5296>; 5297def SSE_PSHUFB : OpndItins< 5298 IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM 5299>; 5300def SSE_PSIGN : OpndItins< 5301 IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM 5302>; 5303def SSE_PMULHRSW : OpndItins< 5304 IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW 5305>; 5306 5307/// SS3I_binop_rm - Simple SSSE3 bin op 5308multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5309 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5310 X86MemOperand x86memop, OpndItins itins, 5311 bit Is2Addr = 1> { 5312 let isCommutable = 1 in 5313 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 5314 (ins RC:$src1, RC:$src2), 5315 !if(Is2Addr, 5316 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5317 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5318 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 5319 OpSize; 5320 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 5321 (ins RC:$src1, x86memop:$src2), 5322 !if(Is2Addr, 5323 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5324 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5325 [(set RC:$dst, 5326 (OpVT (OpNode RC:$src1, 5327 (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize; 5328} 5329 5330/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 5331multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 5332 Intrinsic IntId128, OpndItins itins, 5333 bit Is2Addr = 1> { 5334 let isCommutable = 1 in 5335 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5336 (ins VR128:$src1, VR128:$src2), 5337 !if(Is2Addr, 5338 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5339 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5340 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 5341 OpSize; 5342 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5343 (ins VR128:$src1, i128mem:$src2), 5344 !if(Is2Addr, 5345 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5346 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5347 [(set VR128:$dst, 5348 (IntId128 VR128:$src1, 5349 (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; 5350} 5351 5352multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 5353 Intrinsic IntId256> { 5354 let isCommutable = 1 in 5355 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5356 (ins VR256:$src1, VR256:$src2), 5357 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5358 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 5359 OpSize; 5360 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5361 (ins VR256:$src1, i256mem:$src2), 5362 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5363 [(set VR256:$dst, 5364 (IntId256 VR256:$src1, 5365 (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; 5366} 5367 5368let ImmT = NoImm, Predicates = [HasAVX] in { 5369let isCommutable = 0 in { 5370 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, 5371 memopv2i64, i128mem, 5372 SSE_PHADDSUBW, 0>, VEX_4V; 5373 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, 5374 memopv2i64, i128mem, 5375 SSE_PHADDSUBD, 0>, VEX_4V; 5376 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, 5377 memopv2i64, i128mem, 5378 SSE_PHADDSUBW, 0>, VEX_4V; 5379 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, 5380 memopv2i64, i128mem, 5381 SSE_PHADDSUBD, 0>, VEX_4V; 5382 defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, 5383 memopv2i64, i128mem, 5384 SSE_PSIGN, 0>, VEX_4V; 5385 defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, 5386 memopv2i64, i128mem, 5387 SSE_PSIGN, 0>, VEX_4V; 5388 defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, 5389 memopv2i64, i128mem, 5390 SSE_PSIGN, 0>, VEX_4V; 5391 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, 5392 memopv2i64, i128mem, 5393 SSE_PSHUFB, 0>, VEX_4V; 5394 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 5395 int_x86_ssse3_phadd_sw_128, 5396 SSE_PHADDSUBSW, 0>, VEX_4V; 5397 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 5398 int_x86_ssse3_phsub_sw_128, 5399 SSE_PHADDSUBSW, 0>, VEX_4V; 5400 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", 5401 int_x86_ssse3_pmadd_ub_sw_128, 5402 SSE_PMADD, 0>, VEX_4V; 5403} 5404defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", 5405 int_x86_ssse3_pmul_hr_sw_128, 5406 SSE_PMULHRSW, 0>, VEX_4V; 5407} 5408 5409let ImmT = NoImm, Predicates = [HasAVX2] in { 5410let isCommutable = 0 in { 5411 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, 5412 memopv4i64, i256mem, 5413 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5414 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, 5415 memopv4i64, i256mem, 5416 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5417 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, 5418 memopv4i64, i256mem, 5419 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5420 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, 5421 memopv4i64, i256mem, 5422 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5423 defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, 5424 memopv4i64, i256mem, 5425 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5426 defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, 5427 memopv4i64, i256mem, 5428 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5429 defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, 5430 memopv4i64, i256mem, 5431 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5432 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, 5433 memopv4i64, i256mem, 5434 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5435 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 5436 int_x86_avx2_phadd_sw>, VEX_4V, VEX_L; 5437 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 5438 int_x86_avx2_phsub_sw>, VEX_4V, VEX_L; 5439 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", 5440 int_x86_avx2_pmadd_ub_sw>, VEX_4V, VEX_L; 5441} 5442defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", 5443 int_x86_avx2_pmul_hr_sw>, VEX_4V, VEX_L; 5444} 5445 5446// None of these have i8 immediate fields. 5447let ImmT = NoImm, Constraints = "$src1 = $dst" in { 5448let isCommutable = 0 in { 5449 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, 5450 memopv2i64, i128mem, SSE_PHADDSUBW>; 5451 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, 5452 memopv2i64, i128mem, SSE_PHADDSUBD>; 5453 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, 5454 memopv2i64, i128mem, SSE_PHADDSUBW>; 5455 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, 5456 memopv2i64, i128mem, SSE_PHADDSUBD>; 5457 defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, 5458 memopv2i64, i128mem, SSE_PSIGN>; 5459 defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, 5460 memopv2i64, i128mem, SSE_PSIGN>; 5461 defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, 5462 memopv2i64, i128mem, SSE_PSIGN>; 5463 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, 5464 memopv2i64, i128mem, SSE_PSHUFB>; 5465 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 5466 int_x86_ssse3_phadd_sw_128, 5467 SSE_PHADDSUBSW>; 5468 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 5469 int_x86_ssse3_phsub_sw_128, 5470 SSE_PHADDSUBSW>; 5471 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", 5472 int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>; 5473} 5474defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", 5475 int_x86_ssse3_pmul_hr_sw_128, 5476 SSE_PMULHRSW>; 5477} 5478 5479//===---------------------------------------------------------------------===// 5480// SSSE3 - Packed Align Instruction Patterns 5481//===---------------------------------------------------------------------===// 5482 5483multiclass ssse3_palign<string asm, bit Is2Addr = 1> { 5484 let neverHasSideEffects = 1 in { 5485 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 5486 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 5487 !if(Is2Addr, 5488 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5489 !strconcat(asm, 5490 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5491 [], IIC_SSE_PALIGNR>, OpSize; 5492 let mayLoad = 1 in 5493 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 5494 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 5495 !if(Is2Addr, 5496 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5497 !strconcat(asm, 5498 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5499 [], IIC_SSE_PALIGNR>, OpSize; 5500 } 5501} 5502 5503multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> { 5504 let neverHasSideEffects = 1 in { 5505 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), 5506 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 5507 !strconcat(asm, 5508 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5509 []>, OpSize; 5510 let mayLoad = 1 in 5511 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), 5512 (ins VR256:$src1, i256mem:$src2, i8imm:$src3), 5513 !strconcat(asm, 5514 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5515 []>, OpSize; 5516 } 5517} 5518 5519let Predicates = [HasAVX] in 5520 defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; 5521let Predicates = [HasAVX2] in 5522 defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V, VEX_L; 5523let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 5524 defm PALIGN : ssse3_palign<"palignr">; 5525 5526let Predicates = [HasAVX2] in { 5527def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5528 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5529def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5530 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5531def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5532 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5533def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5534 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5535} 5536 5537let Predicates = [HasAVX] in { 5538def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5539 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5540def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5541 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5542def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5543 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5544def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5545 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5546} 5547 5548let Predicates = [UseSSSE3] in { 5549def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5550 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5551def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5552 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5553def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5554 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5555def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5556 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5557} 5558 5559//===---------------------------------------------------------------------===// 5560// SSSE3 - Thread synchronization 5561//===---------------------------------------------------------------------===// 5562 5563let usesCustomInserter = 1 in { 5564def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 5565 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, 5566 Requires<[HasSSE3]>; 5567} 5568 5569let Uses = [EAX, ECX, EDX] in 5570def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, 5571 TB, Requires<[HasSSE3]>; 5572let Uses = [ECX, EAX] in 5573def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 5574 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, 5575 TB, Requires<[HasSSE3]>; 5576 5577def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>; 5578def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>; 5579 5580def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>, 5581 Requires<[In32BitMode]>; 5582def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>, 5583 Requires<[In64BitMode]>; 5584 5585//===----------------------------------------------------------------------===// 5586// SSE4.1 - Packed Move with Sign/Zero Extend 5587//===----------------------------------------------------------------------===// 5588 5589multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5590 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5591 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5592 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5593 5594 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5595 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5596 [(set VR128:$dst, 5597 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, 5598 OpSize; 5599} 5600 5601multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr, 5602 Intrinsic IntId> { 5603 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5604 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5605 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5606 5607 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 5608 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5609 [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize; 5610} 5611 5612let Predicates = [HasAVX] in { 5613defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, 5614 VEX; 5615defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, 5616 VEX; 5617defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, 5618 VEX; 5619defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, 5620 VEX; 5621defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, 5622 VEX; 5623defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, 5624 VEX; 5625} 5626 5627let Predicates = [HasAVX2] in { 5628defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", 5629 int_x86_avx2_pmovsxbw>, VEX, VEX_L; 5630defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", 5631 int_x86_avx2_pmovsxwd>, VEX, VEX_L; 5632defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", 5633 int_x86_avx2_pmovsxdq>, VEX, VEX_L; 5634defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", 5635 int_x86_avx2_pmovzxbw>, VEX, VEX_L; 5636defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", 5637 int_x86_avx2_pmovzxwd>, VEX, VEX_L; 5638defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", 5639 int_x86_avx2_pmovzxdq>, VEX, VEX_L; 5640} 5641 5642defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; 5643defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; 5644defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; 5645defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>; 5646defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>; 5647defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>; 5648 5649let Predicates = [HasAVX] in { 5650 // Common patterns involving scalar load. 5651 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5652 (VPMOVSXBWrm addr:$src)>; 5653 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5654 (VPMOVSXBWrm addr:$src)>; 5655 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5656 (VPMOVSXBWrm addr:$src)>; 5657 5658 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5659 (VPMOVSXWDrm addr:$src)>; 5660 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5661 (VPMOVSXWDrm addr:$src)>; 5662 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5663 (VPMOVSXWDrm addr:$src)>; 5664 5665 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5666 (VPMOVSXDQrm addr:$src)>; 5667 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5668 (VPMOVSXDQrm addr:$src)>; 5669 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5670 (VPMOVSXDQrm addr:$src)>; 5671 5672 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5673 (VPMOVZXBWrm addr:$src)>; 5674 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5675 (VPMOVZXBWrm addr:$src)>; 5676 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5677 (VPMOVZXBWrm addr:$src)>; 5678 5679 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5680 (VPMOVZXWDrm addr:$src)>; 5681 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5682 (VPMOVZXWDrm addr:$src)>; 5683 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5684 (VPMOVZXWDrm addr:$src)>; 5685 5686 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5687 (VPMOVZXDQrm addr:$src)>; 5688 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5689 (VPMOVZXDQrm addr:$src)>; 5690 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5691 (VPMOVZXDQrm addr:$src)>; 5692} 5693 5694let Predicates = [UseSSE41] in { 5695 // Common patterns involving scalar load. 5696 def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)), 5697 (PMOVSXBWrm addr:$src)>; 5698 def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)), 5699 (PMOVSXBWrm addr:$src)>; 5700 def : Pat<(int_x86_sse41_pmovsxbw (bc_v16i8 (loadv2i64 addr:$src))), 5701 (PMOVSXBWrm addr:$src)>; 5702 5703 def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)), 5704 (PMOVSXWDrm addr:$src)>; 5705 def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)), 5706 (PMOVSXWDrm addr:$src)>; 5707 def : Pat<(int_x86_sse41_pmovsxwd (bc_v8i16 (loadv2i64 addr:$src))), 5708 (PMOVSXWDrm addr:$src)>; 5709 5710 def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)), 5711 (PMOVSXDQrm addr:$src)>; 5712 def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)), 5713 (PMOVSXDQrm addr:$src)>; 5714 def : Pat<(int_x86_sse41_pmovsxdq (bc_v4i32 (loadv2i64 addr:$src))), 5715 (PMOVSXDQrm addr:$src)>; 5716 5717 def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)), 5718 (PMOVZXBWrm addr:$src)>; 5719 def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)), 5720 (PMOVZXBWrm addr:$src)>; 5721 def : Pat<(int_x86_sse41_pmovzxbw (bc_v16i8 (loadv2i64 addr:$src))), 5722 (PMOVZXBWrm addr:$src)>; 5723 5724 def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)), 5725 (PMOVZXWDrm addr:$src)>; 5726 def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)), 5727 (PMOVZXWDrm addr:$src)>; 5728 def : Pat<(int_x86_sse41_pmovzxwd (bc_v8i16 (loadv2i64 addr:$src))), 5729 (PMOVZXWDrm addr:$src)>; 5730 5731 def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)), 5732 (PMOVZXDQrm addr:$src)>; 5733 def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)), 5734 (PMOVZXDQrm addr:$src)>; 5735 def : Pat<(int_x86_sse41_pmovzxdq (bc_v4i32 (loadv2i64 addr:$src))), 5736 (PMOVZXDQrm addr:$src)>; 5737} 5738 5739let Predicates = [HasAVX2] in { 5740 let AddedComplexity = 15 in { 5741 def : Pat<(v4i64 (X86vzmovly (v4i32 VR128:$src))), 5742 (VPMOVZXDQYrr VR128:$src)>; 5743 def : Pat<(v8i32 (X86vzmovly (v8i16 VR128:$src))), 5744 (VPMOVZXWDYrr VR128:$src)>; 5745 } 5746 5747 def : Pat<(v4i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQYrr VR128:$src)>; 5748 def : Pat<(v8i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDYrr VR128:$src)>; 5749} 5750 5751let Predicates = [HasAVX] in { 5752 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>; 5753 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>; 5754} 5755 5756let Predicates = [UseSSE41] in { 5757 def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>; 5758 def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>; 5759} 5760 5761 5762multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5763 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5764 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5765 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5766 5767 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 5768 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5769 [(set VR128:$dst, 5770 (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, 5771 OpSize; 5772} 5773 5774multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr, 5775 Intrinsic IntId> { 5776 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5777 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5778 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5779 5780 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src), 5781 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5782 [(set VR256:$dst, 5783 (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>, 5784 OpSize; 5785} 5786 5787let Predicates = [HasAVX] in { 5788defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, 5789 VEX; 5790defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, 5791 VEX; 5792defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, 5793 VEX; 5794defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, 5795 VEX; 5796} 5797 5798let Predicates = [HasAVX2] in { 5799defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", 5800 int_x86_avx2_pmovsxbd>, VEX, VEX_L; 5801defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", 5802 int_x86_avx2_pmovsxwq>, VEX, VEX_L; 5803defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", 5804 int_x86_avx2_pmovzxbd>, VEX, VEX_L; 5805defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", 5806 int_x86_avx2_pmovzxwq>, VEX, VEX_L; 5807} 5808 5809defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; 5810defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; 5811defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; 5812defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>; 5813 5814let Predicates = [HasAVX] in { 5815 // Common patterns involving scalar load 5816 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5817 (VPMOVSXBDrm addr:$src)>; 5818 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5819 (VPMOVSXWQrm addr:$src)>; 5820 5821 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5822 (VPMOVZXBDrm addr:$src)>; 5823 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5824 (VPMOVZXWQrm addr:$src)>; 5825} 5826 5827let Predicates = [UseSSE41] in { 5828 // Common patterns involving scalar load 5829 def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)), 5830 (PMOVSXBDrm addr:$src)>; 5831 def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)), 5832 (PMOVSXWQrm addr:$src)>; 5833 5834 def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)), 5835 (PMOVZXBDrm addr:$src)>; 5836 def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)), 5837 (PMOVZXWQrm addr:$src)>; 5838} 5839 5840multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { 5841 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5842 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5843 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize; 5844 5845 // Expecting a i16 load any extended to i32 value. 5846 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src), 5847 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5848 [(set VR128:$dst, (IntId (bitconvert 5849 (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>, 5850 OpSize; 5851} 5852 5853multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr, 5854 Intrinsic IntId> { 5855 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 5856 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5857 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize; 5858 5859 // Expecting a i16 load any extended to i32 value. 5860 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src), 5861 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5862 [(set VR256:$dst, (IntId (bitconvert 5863 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>, 5864 OpSize; 5865} 5866 5867let Predicates = [HasAVX] in { 5868defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, 5869 VEX; 5870defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, 5871 VEX; 5872} 5873let Predicates = [HasAVX2] in { 5874defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", 5875 int_x86_avx2_pmovsxbq>, VEX, VEX_L; 5876defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", 5877 int_x86_avx2_pmovzxbq>, VEX, VEX_L; 5878} 5879defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; 5880defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; 5881 5882let Predicates = [HasAVX] in { 5883 // Common patterns involving scalar load 5884 def : Pat<(int_x86_sse41_pmovsxbq 5885 (bitconvert (v4i32 (X86vzmovl 5886 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5887 (VPMOVSXBQrm addr:$src)>; 5888 5889 def : Pat<(int_x86_sse41_pmovzxbq 5890 (bitconvert (v4i32 (X86vzmovl 5891 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5892 (VPMOVZXBQrm addr:$src)>; 5893} 5894 5895let Predicates = [UseSSE41] in { 5896 // Common patterns involving scalar load 5897 def : Pat<(int_x86_sse41_pmovsxbq 5898 (bitconvert (v4i32 (X86vzmovl 5899 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5900 (PMOVSXBQrm addr:$src)>; 5901 5902 def : Pat<(int_x86_sse41_pmovzxbq 5903 (bitconvert (v4i32 (X86vzmovl 5904 (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), 5905 (PMOVZXBQrm addr:$src)>; 5906} 5907 5908//===----------------------------------------------------------------------===// 5909// SSE4.1 - Extract Instructions 5910//===----------------------------------------------------------------------===// 5911 5912/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 5913multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 5914 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5915 (ins VR128:$src1, i32i8imm:$src2), 5916 !strconcat(OpcodeStr, 5917 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5918 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, 5919 OpSize; 5920 let neverHasSideEffects = 1, mayStore = 1 in 5921 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5922 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2), 5923 !strconcat(OpcodeStr, 5924 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5925 []>, OpSize; 5926// FIXME: 5927// There's an AssertZext in the way of writing the store pattern 5928// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 5929} 5930 5931let Predicates = [HasAVX] in { 5932 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 5933 def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), 5934 (ins VR128:$src1, i32i8imm:$src2), 5935 "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX; 5936} 5937 5938defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 5939 5940 5941/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 5942multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 5943 let neverHasSideEffects = 1, mayStore = 1 in 5944 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5945 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2), 5946 !strconcat(OpcodeStr, 5947 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5948 []>, OpSize; 5949// FIXME: 5950// There's an AssertZext in the way of writing the store pattern 5951// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) 5952} 5953 5954let Predicates = [HasAVX] in 5955 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 5956 5957defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 5958 5959 5960/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5961multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 5962 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 5963 (ins VR128:$src1, i32i8imm:$src2), 5964 !strconcat(OpcodeStr, 5965 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5966 [(set GR32:$dst, 5967 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize; 5968 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5969 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2), 5970 !strconcat(OpcodeStr, 5971 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5972 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 5973 addr:$dst)]>, OpSize; 5974} 5975 5976let Predicates = [HasAVX] in 5977 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 5978 5979defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 5980 5981/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 5982multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 5983 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 5984 (ins VR128:$src1, i32i8imm:$src2), 5985 !strconcat(OpcodeStr, 5986 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5987 [(set GR64:$dst, 5988 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; 5989 def mr : SS4AIi8<opc, MRMDestMem, (outs), 5990 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), 5991 !strconcat(OpcodeStr, 5992 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5993 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 5994 addr:$dst)]>, OpSize, REX_W; 5995} 5996 5997let Predicates = [HasAVX] in 5998 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 5999 6000defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 6001 6002/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 6003/// destination 6004multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { 6005 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 6006 (ins VR128:$src1, i32i8imm:$src2), 6007 !strconcat(OpcodeStr, 6008 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6009 [(set GR32:$dst, 6010 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>, 6011 OpSize; 6012 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6013 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2), 6014 !strconcat(OpcodeStr, 6015 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6016 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 6017 addr:$dst)]>, OpSize; 6018} 6019 6020let ExeDomain = SSEPackedSingle in { 6021 let Predicates = [HasAVX] in { 6022 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 6023 def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), 6024 (ins VR128:$src1, i32i8imm:$src2), 6025 "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}", 6026 []>, OpSize, VEX; 6027 } 6028 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; 6029} 6030 6031// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 6032def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6033 imm:$src2))), 6034 addr:$dst), 6035 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6036 Requires<[HasAVX]>; 6037def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6038 imm:$src2))), 6039 addr:$dst), 6040 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6041 Requires<[UseSSE41]>; 6042 6043//===----------------------------------------------------------------------===// 6044// SSE4.1 - Insert Instructions 6045//===----------------------------------------------------------------------===// 6046 6047multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 6048 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6049 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 6050 !if(Is2Addr, 6051 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6052 !strconcat(asm, 6053 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6054 [(set VR128:$dst, 6055 (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; 6056 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6057 (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), 6058 !if(Is2Addr, 6059 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6060 !strconcat(asm, 6061 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6062 [(set VR128:$dst, 6063 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 6064 imm:$src3))]>, OpSize; 6065} 6066 6067let Predicates = [HasAVX] in 6068 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 6069let Constraints = "$src1 = $dst" in 6070 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 6071 6072multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 6073 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6074 (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), 6075 !if(Is2Addr, 6076 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6077 !strconcat(asm, 6078 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6079 [(set VR128:$dst, 6080 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 6081 OpSize; 6082 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6083 (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), 6084 !if(Is2Addr, 6085 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6086 !strconcat(asm, 6087 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6088 [(set VR128:$dst, 6089 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 6090 imm:$src3)))]>, OpSize; 6091} 6092 6093let Predicates = [HasAVX] in 6094 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 6095let Constraints = "$src1 = $dst" in 6096 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 6097 6098multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 6099 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6100 (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), 6101 !if(Is2Addr, 6102 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6103 !strconcat(asm, 6104 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6105 [(set VR128:$dst, 6106 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 6107 OpSize; 6108 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6109 (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), 6110 !if(Is2Addr, 6111 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6112 !strconcat(asm, 6113 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6114 [(set VR128:$dst, 6115 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 6116 imm:$src3)))]>, OpSize; 6117} 6118 6119let Predicates = [HasAVX] in 6120 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 6121let Constraints = "$src1 = $dst" in 6122 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 6123 6124// insertps has a few different modes, there's the first two here below which 6125// are optimized inserts that won't zero arbitrary elements in the destination 6126// vector. The next one matches the intrinsic and could zero arbitrary elements 6127// in the target vector. 6128multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { 6129 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6130 (ins VR128:$src1, VR128:$src2, u32u8imm:$src3), 6131 !if(Is2Addr, 6132 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6133 !strconcat(asm, 6134 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6135 [(set VR128:$dst, 6136 (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, 6137 OpSize; 6138 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6139 (ins VR128:$src1, f32mem:$src2, u32u8imm:$src3), 6140 !if(Is2Addr, 6141 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6142 !strconcat(asm, 6143 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6144 [(set VR128:$dst, 6145 (X86insrtps VR128:$src1, 6146 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 6147 imm:$src3))]>, OpSize; 6148} 6149 6150let ExeDomain = SSEPackedSingle in { 6151 let Predicates = [HasAVX] in 6152 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 6153 let Constraints = "$src1 = $dst" in 6154 defm INSERTPS : SS41I_insertf32<0x21, "insertps">; 6155} 6156 6157//===----------------------------------------------------------------------===// 6158// SSE4.1 - Round Instructions 6159//===----------------------------------------------------------------------===// 6160 6161multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 6162 X86MemOperand x86memop, RegisterClass RC, 6163 PatFrag mem_frag32, PatFrag mem_frag64, 6164 Intrinsic V4F32Int, Intrinsic V2F64Int> { 6165let ExeDomain = SSEPackedSingle in { 6166 // Intrinsic operation, reg. 6167 // Vector intrinsic operation, reg 6168 def PSr : SS4AIi8<opcps, MRMSrcReg, 6169 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6170 !strconcat(OpcodeStr, 6171 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6172 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>, 6173 OpSize; 6174 6175 // Vector intrinsic operation, mem 6176 def PSm : SS4AIi8<opcps, MRMSrcMem, 6177 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6178 !strconcat(OpcodeStr, 6179 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6180 [(set RC:$dst, 6181 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>, 6182 OpSize; 6183} // ExeDomain = SSEPackedSingle 6184 6185let ExeDomain = SSEPackedDouble in { 6186 // Vector intrinsic operation, reg 6187 def PDr : SS4AIi8<opcpd, MRMSrcReg, 6188 (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2), 6189 !strconcat(OpcodeStr, 6190 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6191 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>, 6192 OpSize; 6193 6194 // Vector intrinsic operation, mem 6195 def PDm : SS4AIi8<opcpd, MRMSrcMem, 6196 (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2), 6197 !strconcat(OpcodeStr, 6198 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6199 [(set RC:$dst, 6200 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>, 6201 OpSize; 6202} // ExeDomain = SSEPackedDouble 6203} 6204 6205multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 6206 string OpcodeStr, 6207 Intrinsic F32Int, 6208 Intrinsic F64Int, bit Is2Addr = 1> { 6209let ExeDomain = GenericDomain in { 6210 // Operation, reg. 6211 def SSr : SS4AIi8<opcss, MRMSrcReg, 6212 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3), 6213 !if(Is2Addr, 6214 !strconcat(OpcodeStr, 6215 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6216 !strconcat(OpcodeStr, 6217 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6218 []>, OpSize; 6219 6220 // Intrinsic operation, reg. 6221 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 6222 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6223 !if(Is2Addr, 6224 !strconcat(OpcodeStr, 6225 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6226 !strconcat(OpcodeStr, 6227 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6228 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6229 OpSize; 6230 6231 // Intrinsic operation, mem. 6232 def SSm : SS4AIi8<opcss, MRMSrcMem, 6233 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), 6234 !if(Is2Addr, 6235 !strconcat(OpcodeStr, 6236 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6237 !strconcat(OpcodeStr, 6238 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6239 [(set VR128:$dst, 6240 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 6241 OpSize; 6242 6243 // Operation, reg. 6244 def SDr : SS4AIi8<opcsd, MRMSrcReg, 6245 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3), 6246 !if(Is2Addr, 6247 !strconcat(OpcodeStr, 6248 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6249 !strconcat(OpcodeStr, 6250 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6251 []>, OpSize; 6252 6253 // Intrinsic operation, reg. 6254 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 6255 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), 6256 !if(Is2Addr, 6257 !strconcat(OpcodeStr, 6258 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6259 !strconcat(OpcodeStr, 6260 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6261 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6262 OpSize; 6263 6264 // Intrinsic operation, mem. 6265 def SDm : SS4AIi8<opcsd, MRMSrcMem, 6266 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), 6267 !if(Is2Addr, 6268 !strconcat(OpcodeStr, 6269 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6270 !strconcat(OpcodeStr, 6271 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6272 [(set VR128:$dst, 6273 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 6274 OpSize; 6275} // ExeDomain = GenericDomain 6276} 6277 6278// FP round - roundss, roundps, roundsd, roundpd 6279let Predicates = [HasAVX] in { 6280 // Intrinsic form 6281 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 6282 memopv4f32, memopv2f64, 6283 int_x86_sse41_round_ps, 6284 int_x86_sse41_round_pd>, VEX; 6285 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 6286 memopv8f32, memopv4f64, 6287 int_x86_avx_round_ps_256, 6288 int_x86_avx_round_pd_256>, VEX, VEX_L; 6289 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 6290 int_x86_sse41_round_ss, 6291 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 6292 6293 def : Pat<(ffloor FR32:$src), 6294 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6295 def : Pat<(f64 (ffloor FR64:$src)), 6296 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6297 def : Pat<(f32 (fnearbyint FR32:$src)), 6298 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6299 def : Pat<(f64 (fnearbyint FR64:$src)), 6300 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6301 def : Pat<(f32 (fceil FR32:$src)), 6302 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6303 def : Pat<(f64 (fceil FR64:$src)), 6304 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6305 def : Pat<(f32 (frint FR32:$src)), 6306 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6307 def : Pat<(f64 (frint FR64:$src)), 6308 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6309 def : Pat<(f32 (ftrunc FR32:$src)), 6310 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6311 def : Pat<(f64 (ftrunc FR64:$src)), 6312 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6313 6314 def : Pat<(v4f32 (ffloor VR128:$src)), 6315 (VROUNDPSr VR128:$src, (i32 0x1))>; 6316 def : Pat<(v2f64 (ffloor VR128:$src)), 6317 (VROUNDPDr VR128:$src, (i32 0x1))>; 6318 def : Pat<(v8f32 (ffloor VR256:$src)), 6319 (VROUNDYPSr VR256:$src, (i32 0x1))>; 6320 def : Pat<(v4f64 (ffloor VR256:$src)), 6321 (VROUNDYPDr VR256:$src, (i32 0x1))>; 6322} 6323 6324defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 6325 memopv4f32, memopv2f64, 6326 int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 6327let Constraints = "$src1 = $dst" in 6328defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 6329 int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 6330 6331let Predicates = [UseSSE41] in { 6332 def : Pat<(ffloor FR32:$src), 6333 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6334 def : Pat<(f64 (ffloor FR64:$src)), 6335 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6336 def : Pat<(f32 (fnearbyint FR32:$src)), 6337 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6338 def : Pat<(f64 (fnearbyint FR64:$src)), 6339 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6340 def : Pat<(f32 (fceil FR32:$src)), 6341 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6342 def : Pat<(f64 (fceil FR64:$src)), 6343 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6344 def : Pat<(f32 (frint FR32:$src)), 6345 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6346 def : Pat<(f64 (frint FR64:$src)), 6347 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6348 def : Pat<(f32 (ftrunc FR32:$src)), 6349 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6350 def : Pat<(f64 (ftrunc FR64:$src)), 6351 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6352 6353 def : Pat<(v4f32 (ffloor VR128:$src)), 6354 (ROUNDPSr VR128:$src, (i32 0x1))>; 6355 def : Pat<(v2f64 (ffloor VR128:$src)), 6356 (ROUNDPDr VR128:$src, (i32 0x1))>; 6357} 6358 6359//===----------------------------------------------------------------------===// 6360// SSE4.1 - Packed Bit Test 6361//===----------------------------------------------------------------------===// 6362 6363// ptest instruction we'll lower to this in X86ISelLowering primarily from 6364// the intel intrinsic that corresponds to this. 6365let Defs = [EFLAGS], Predicates = [HasAVX] in { 6366def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6367 "vptest\t{$src2, $src1|$src1, $src2}", 6368 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6369 OpSize, VEX; 6370def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6371 "vptest\t{$src2, $src1|$src1, $src2}", 6372 [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6373 OpSize, VEX; 6374 6375def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 6376 "vptest\t{$src2, $src1|$src1, $src2}", 6377 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 6378 OpSize, VEX, VEX_L; 6379def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 6380 "vptest\t{$src2, $src1|$src1, $src2}", 6381 [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>, 6382 OpSize, VEX, VEX_L; 6383} 6384 6385let Defs = [EFLAGS] in { 6386def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6387 "ptest\t{$src2, $src1|$src1, $src2}", 6388 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6389 OpSize; 6390def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6391 "ptest\t{$src2, $src1|$src1, $src2}", 6392 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6393 OpSize; 6394} 6395 6396// The bit test instructions below are AVX only 6397multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 6398 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 6399 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 6400 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6401 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX; 6402 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 6403 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6404 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 6405 OpSize, VEX; 6406} 6407 6408let Defs = [EFLAGS], Predicates = [HasAVX] in { 6409let ExeDomain = SSEPackedSingle in { 6410defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; 6411defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>, 6412 VEX_L; 6413} 6414let ExeDomain = SSEPackedDouble in { 6415defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; 6416defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>, 6417 VEX_L; 6418} 6419} 6420 6421//===----------------------------------------------------------------------===// 6422// SSE4.1 - Misc Instructions 6423//===----------------------------------------------------------------------===// 6424 6425let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 6426 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 6427 "popcnt{w}\t{$src, $dst|$dst, $src}", 6428 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)]>, 6429 OpSize, XS; 6430 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 6431 "popcnt{w}\t{$src, $dst|$dst, $src}", 6432 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 6433 (implicit EFLAGS)]>, OpSize, XS; 6434 6435 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 6436 "popcnt{l}\t{$src, $dst|$dst, $src}", 6437 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)]>, 6438 XS; 6439 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 6440 "popcnt{l}\t{$src, $dst|$dst, $src}", 6441 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 6442 (implicit EFLAGS)]>, XS; 6443 6444 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 6445 "popcnt{q}\t{$src, $dst|$dst, $src}", 6446 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)]>, 6447 XS; 6448 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 6449 "popcnt{q}\t{$src, $dst|$dst, $src}", 6450 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 6451 (implicit EFLAGS)]>, XS; 6452} 6453 6454 6455 6456// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 6457multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 6458 Intrinsic IntId128> { 6459 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6460 (ins VR128:$src), 6461 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6462 [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; 6463 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6464 (ins i128mem:$src), 6465 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6466 [(set VR128:$dst, 6467 (IntId128 6468 (bitconvert (memopv2i64 addr:$src))))]>, OpSize; 6469} 6470 6471let Predicates = [HasAVX] in 6472defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 6473 int_x86_sse41_phminposuw>, VEX; 6474defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 6475 int_x86_sse41_phminposuw>; 6476 6477/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator 6478multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, 6479 Intrinsic IntId128, bit Is2Addr = 1> { 6480 let isCommutable = 1 in 6481 def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6482 (ins VR128:$src1, VR128:$src2), 6483 !if(Is2Addr, 6484 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6485 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6486 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize; 6487 def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6488 (ins VR128:$src1, i128mem:$src2), 6489 !if(Is2Addr, 6490 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6491 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6492 [(set VR128:$dst, 6493 (IntId128 VR128:$src1, 6494 (bitconvert (memopv2i64 addr:$src2))))]>, OpSize; 6495} 6496 6497/// SS41I_binop_rm_int_y - Simple SSE 4.1 binary operator 6498multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 6499 Intrinsic IntId256> { 6500 let isCommutable = 1 in 6501 def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), 6502 (ins VR256:$src1, VR256:$src2), 6503 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6504 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize; 6505 def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), 6506 (ins VR256:$src1, i256mem:$src2), 6507 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6508 [(set VR256:$dst, 6509 (IntId256 VR256:$src1, 6510 (bitconvert (memopv4i64 addr:$src2))))]>, OpSize; 6511} 6512 6513let Predicates = [HasAVX] in { 6514 let isCommutable = 0 in 6515 defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 6516 0>, VEX_4V; 6517 defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, 6518 0>, VEX_4V; 6519 defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, 6520 0>, VEX_4V; 6521 defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, 6522 0>, VEX_4V; 6523 defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, 6524 0>, VEX_4V; 6525 defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, 6526 0>, VEX_4V; 6527 defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, 6528 0>, VEX_4V; 6529 defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, 6530 0>, VEX_4V; 6531 defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, 6532 0>, VEX_4V; 6533 defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, 6534 0>, VEX_4V; 6535} 6536 6537let Predicates = [HasAVX2] in { 6538 let isCommutable = 0 in 6539 defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", 6540 int_x86_avx2_packusdw>, VEX_4V, VEX_L; 6541 defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", 6542 int_x86_avx2_pmins_b>, VEX_4V, VEX_L; 6543 defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", 6544 int_x86_avx2_pmins_d>, VEX_4V, VEX_L; 6545 defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", 6546 int_x86_avx2_pminu_d>, VEX_4V, VEX_L; 6547 defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", 6548 int_x86_avx2_pminu_w>, VEX_4V, VEX_L; 6549 defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", 6550 int_x86_avx2_pmaxs_b>, VEX_4V, VEX_L; 6551 defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", 6552 int_x86_avx2_pmaxs_d>, VEX_4V, VEX_L; 6553 defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", 6554 int_x86_avx2_pmaxu_d>, VEX_4V, VEX_L; 6555 defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", 6556 int_x86_avx2_pmaxu_w>, VEX_4V, VEX_L; 6557 defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", 6558 int_x86_avx2_pmul_dq>, VEX_4V, VEX_L; 6559} 6560 6561let Constraints = "$src1 = $dst" in { 6562 let isCommutable = 0 in 6563 defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; 6564 defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; 6565 defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; 6566 defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; 6567 defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; 6568 defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; 6569 defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; 6570 defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; 6571 defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; 6572 defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; 6573} 6574 6575/// SS48I_binop_rm - Simple SSE41 binary operator. 6576multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6577 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6578 X86MemOperand x86memop, bit Is2Addr = 1> { 6579 let isCommutable = 1 in 6580 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 6581 (ins RC:$src1, RC:$src2), 6582 !if(Is2Addr, 6583 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6584 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6585 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize; 6586 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 6587 (ins RC:$src1, x86memop:$src2), 6588 !if(Is2Addr, 6589 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6590 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6591 [(set RC:$dst, 6592 (OpVT (OpNode RC:$src1, 6593 (bitconvert (memop_frag addr:$src2)))))]>, OpSize; 6594} 6595 6596let Predicates = [HasAVX] in { 6597 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 6598 memopv2i64, i128mem, 0>, VEX_4V; 6599 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 6600 memopv2i64, i128mem, 0>, VEX_4V; 6601} 6602let Predicates = [HasAVX2] in { 6603 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 6604 memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6605 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 6606 memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6607} 6608 6609let Constraints = "$src1 = $dst" in { 6610 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 6611 memopv2i64, i128mem>; 6612 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 6613 memopv2i64, i128mem>; 6614} 6615 6616/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 6617multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 6618 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 6619 X86MemOperand x86memop, bit Is2Addr = 1> { 6620 let isCommutable = 1 in 6621 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6622 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 6623 !if(Is2Addr, 6624 !strconcat(OpcodeStr, 6625 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6626 !strconcat(OpcodeStr, 6627 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6628 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 6629 OpSize; 6630 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6631 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 6632 !if(Is2Addr, 6633 !strconcat(OpcodeStr, 6634 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6635 !strconcat(OpcodeStr, 6636 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6637 [(set RC:$dst, 6638 (IntId RC:$src1, 6639 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 6640 OpSize; 6641} 6642 6643let Predicates = [HasAVX] in { 6644 let isCommutable = 0 in { 6645 let ExeDomain = SSEPackedSingle in { 6646 defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, 6647 VR128, memopv4f32, f128mem, 0>, VEX_4V; 6648 defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps", 6649 int_x86_avx_blend_ps_256, VR256, memopv8f32, 6650 f256mem, 0>, VEX_4V, VEX_L; 6651 } 6652 let ExeDomain = SSEPackedDouble in { 6653 defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, 6654 VR128, memopv2f64, f128mem, 0>, VEX_4V; 6655 defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd", 6656 int_x86_avx_blend_pd_256,VR256, memopv4f64, 6657 f256mem, 0>, VEX_4V, VEX_L; 6658 } 6659 defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, 6660 VR128, memopv2i64, i128mem, 0>, VEX_4V; 6661 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 6662 VR128, memopv2i64, i128mem, 0>, VEX_4V; 6663 } 6664 let ExeDomain = SSEPackedSingle in 6665 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 6666 VR128, memopv4f32, f128mem, 0>, VEX_4V; 6667 let ExeDomain = SSEPackedDouble in 6668 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 6669 VR128, memopv2f64, f128mem, 0>, VEX_4V; 6670 let ExeDomain = SSEPackedSingle in 6671 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 6672 VR256, memopv8f32, i256mem, 0>, VEX_4V, VEX_L; 6673} 6674 6675let Predicates = [HasAVX2] in { 6676 let isCommutable = 0 in { 6677 defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, 6678 VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6679 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 6680 VR256, memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6681 } 6682} 6683 6684let Constraints = "$src1 = $dst" in { 6685 let isCommutable = 0 in { 6686 let ExeDomain = SSEPackedSingle in 6687 defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, 6688 VR128, memopv4f32, f128mem>; 6689 let ExeDomain = SSEPackedDouble in 6690 defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd, 6691 VR128, memopv2f64, f128mem>; 6692 defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw, 6693 VR128, memopv2i64, i128mem>; 6694 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 6695 VR128, memopv2i64, i128mem>; 6696 } 6697 let ExeDomain = SSEPackedSingle in 6698 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 6699 VR128, memopv4f32, f128mem>; 6700 let ExeDomain = SSEPackedDouble in 6701 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 6702 VR128, memopv2f64, f128mem>; 6703} 6704 6705/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 6706multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 6707 RegisterClass RC, X86MemOperand x86memop, 6708 PatFrag mem_frag, Intrinsic IntId> { 6709 def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), 6710 (ins RC:$src1, RC:$src2, RC:$src3), 6711 !strconcat(OpcodeStr, 6712 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6713 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 6714 IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 6715 6716 def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), 6717 (ins RC:$src1, x86memop:$src2, RC:$src3), 6718 !strconcat(OpcodeStr, 6719 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 6720 [(set RC:$dst, 6721 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 6722 RC:$src3))], 6723 IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; 6724} 6725 6726let Predicates = [HasAVX] in { 6727let ExeDomain = SSEPackedDouble in { 6728defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, 6729 memopv2f64, int_x86_sse41_blendvpd>; 6730defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, 6731 memopv4f64, int_x86_avx_blendv_pd_256>, VEX_L; 6732} // ExeDomain = SSEPackedDouble 6733let ExeDomain = SSEPackedSingle in { 6734defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, 6735 memopv4f32, int_x86_sse41_blendvps>; 6736defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, 6737 memopv8f32, int_x86_avx_blendv_ps_256>, VEX_L; 6738} // ExeDomain = SSEPackedSingle 6739defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 6740 memopv2i64, int_x86_sse41_pblendvb>; 6741} 6742 6743let Predicates = [HasAVX2] in { 6744defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, 6745 memopv4i64, int_x86_avx2_pblendvb>, VEX_L; 6746} 6747 6748let Predicates = [HasAVX] in { 6749 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 6750 (v16i8 VR128:$src2))), 6751 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6752 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 6753 (v4i32 VR128:$src2))), 6754 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6755 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 6756 (v4f32 VR128:$src2))), 6757 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6758 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 6759 (v2i64 VR128:$src2))), 6760 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6761 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 6762 (v2f64 VR128:$src2))), 6763 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 6764 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 6765 (v8i32 VR256:$src2))), 6766 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6767 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 6768 (v8f32 VR256:$src2))), 6769 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6770 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 6771 (v4i64 VR256:$src2))), 6772 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6773 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 6774 (v4f64 VR256:$src2))), 6775 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6776 6777 def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2), 6778 (imm:$mask))), 6779 (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>; 6780 def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2), 6781 (imm:$mask))), 6782 (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>; 6783 6784 def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), 6785 (imm:$mask))), 6786 (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; 6787 def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), 6788 (imm:$mask))), 6789 (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; 6790 def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), 6791 (imm:$mask))), 6792 (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; 6793} 6794 6795let Predicates = [HasAVX2] in { 6796 def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), 6797 (v32i8 VR256:$src2))), 6798 (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 6799 def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2), 6800 (imm:$mask))), 6801 (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>; 6802} 6803 6804/// SS41I_ternary_int - SSE 4.1 ternary operator 6805let Uses = [XMM0], Constraints = "$src1 = $dst" in { 6806 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 6807 X86MemOperand x86memop, Intrinsic IntId> { 6808 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6809 (ins VR128:$src1, VR128:$src2), 6810 !strconcat(OpcodeStr, 6811 "\t{$src2, $dst|$dst, $src2}"), 6812 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, 6813 OpSize; 6814 6815 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6816 (ins VR128:$src1, x86memop:$src2), 6817 !strconcat(OpcodeStr, 6818 "\t{$src2, $dst|$dst, $src2}"), 6819 [(set VR128:$dst, 6820 (IntId VR128:$src1, 6821 (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize; 6822 } 6823} 6824 6825let ExeDomain = SSEPackedDouble in 6826defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, 6827 int_x86_sse41_blendvpd>; 6828let ExeDomain = SSEPackedSingle in 6829defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, 6830 int_x86_sse41_blendvps>; 6831defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, 6832 int_x86_sse41_pblendvb>; 6833 6834// Aliases with the implicit xmm0 argument 6835def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6836 (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; 6837def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6838 (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; 6839def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6840 (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; 6841def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6842 (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; 6843def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6844 (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; 6845def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}", 6846 (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; 6847 6848let Predicates = [UseSSE41] in { 6849 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 6850 (v16i8 VR128:$src2))), 6851 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 6852 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 6853 (v4i32 VR128:$src2))), 6854 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6855 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 6856 (v4f32 VR128:$src2))), 6857 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 6858 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 6859 (v2i64 VR128:$src2))), 6860 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6861 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 6862 (v2f64 VR128:$src2))), 6863 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 6864 6865 def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2), 6866 (imm:$mask))), 6867 (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>; 6868 def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2), 6869 (imm:$mask))), 6870 (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>; 6871 def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2), 6872 (imm:$mask))), 6873 (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>; 6874 6875} 6876 6877let Predicates = [HasAVX] in 6878def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6879 "vmovntdqa\t{$src, $dst|$dst, $src}", 6880 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 6881 OpSize, VEX; 6882let Predicates = [HasAVX2] in 6883def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 6884 "vmovntdqa\t{$src, $dst|$dst, $src}", 6885 [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, 6886 OpSize, VEX, VEX_L; 6887def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 6888 "movntdqa\t{$src, $dst|$dst, $src}", 6889 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 6890 OpSize; 6891 6892//===----------------------------------------------------------------------===// 6893// SSE4.2 - Compare Instructions 6894//===----------------------------------------------------------------------===// 6895 6896/// SS42I_binop_rm - Simple SSE 4.2 binary operator 6897multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6898 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6899 X86MemOperand x86memop, bit Is2Addr = 1> { 6900 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 6901 (ins RC:$src1, RC:$src2), 6902 !if(Is2Addr, 6903 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6904 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6905 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6906 OpSize; 6907 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 6908 (ins RC:$src1, x86memop:$src2), 6909 !if(Is2Addr, 6910 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6911 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6912 [(set RC:$dst, 6913 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize; 6914} 6915 6916let Predicates = [HasAVX] in 6917 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 6918 memopv2i64, i128mem, 0>, VEX_4V; 6919 6920let Predicates = [HasAVX2] in 6921 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 6922 memopv4i64, i256mem, 0>, VEX_4V, VEX_L; 6923 6924let Constraints = "$src1 = $dst" in 6925 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 6926 memopv2i64, i128mem>; 6927 6928//===----------------------------------------------------------------------===// 6929// SSE4.2 - String/text Processing Instructions 6930//===----------------------------------------------------------------------===// 6931 6932// Packed Compare Implicit Length Strings, Return Mask 6933multiclass pseudo_pcmpistrm<string asm> { 6934 def REG : PseudoI<(outs VR128:$dst), 6935 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6936 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 6937 imm:$src3))]>; 6938 def MEM : PseudoI<(outs VR128:$dst), 6939 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6940 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 6941 VR128:$src1, (load addr:$src2), imm:$src3))]>; 6942} 6943 6944let Defs = [EFLAGS], usesCustomInserter = 1 in { 6945 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; 6946 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[UseSSE42]>; 6947} 6948 6949let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { 6950 def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), 6951 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6952 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; 6953 let mayLoad = 1 in 6954 def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), 6955 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6956 "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; 6957} 6958 6959let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { 6960 def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), 6961 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 6962 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; 6963 let mayLoad = 1 in 6964 def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), 6965 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 6966 "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; 6967} 6968 6969// Packed Compare Explicit Length Strings, Return Mask 6970multiclass pseudo_pcmpestrm<string asm> { 6971 def REG : PseudoI<(outs VR128:$dst), 6972 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6973 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 6974 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 6975 def MEM : PseudoI<(outs VR128:$dst), 6976 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6977 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 6978 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>; 6979} 6980 6981let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 6982 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; 6983 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[UseSSE42]>; 6984} 6985 6986let Predicates = [HasAVX], 6987 Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 6988 def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), 6989 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 6990 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; 6991 let mayLoad = 1 in 6992 def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), 6993 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 6994 "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; 6995} 6996 6997let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 6998 def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), 6999 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7000 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; 7001 let mayLoad = 1 in 7002 def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), 7003 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7004 "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; 7005} 7006 7007// Packed Compare Implicit Length Strings, Return Index 7008let Defs = [ECX, EFLAGS], neverHasSideEffects = 1 in { 7009 multiclass SS42AI_pcmpistri<string asm> { 7010 def rr : SS42AI<0x63, MRMSrcReg, (outs), 7011 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7012 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7013 []>, OpSize; 7014 let mayLoad = 1 in 7015 def rm : SS42AI<0x63, MRMSrcMem, (outs), 7016 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7017 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7018 []>, OpSize; 7019 } 7020} 7021 7022let Predicates = [HasAVX] in 7023defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 7024defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 7025 7026// Packed Compare Explicit Length Strings, Return Index 7027let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { 7028 multiclass SS42AI_pcmpestri<string asm> { 7029 def rr : SS42AI<0x61, MRMSrcReg, (outs), 7030 (ins VR128:$src1, VR128:$src3, i8imm:$src5), 7031 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7032 []>, OpSize; 7033 let mayLoad = 1 in 7034 def rm : SS42AI<0x61, MRMSrcMem, (outs), 7035 (ins VR128:$src1, i128mem:$src3, i8imm:$src5), 7036 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7037 []>, OpSize; 7038 } 7039} 7040 7041let Predicates = [HasAVX] in 7042defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 7043defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 7044 7045//===----------------------------------------------------------------------===// 7046// SSE4.2 - CRC Instructions 7047//===----------------------------------------------------------------------===// 7048 7049// No CRC instructions have AVX equivalents 7050 7051// crc intrinsic instruction 7052// This set of instructions are only rm, the only difference is the size 7053// of r and m. 7054let Constraints = "$src1 = $dst" in { 7055 def CRC32r32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), 7056 (ins GR32:$src1, i8mem:$src2), 7057 "crc32{b} \t{$src2, $src1|$src1, $src2}", 7058 [(set GR32:$dst, 7059 (int_x86_sse42_crc32_32_8 GR32:$src1, 7060 (load addr:$src2)))]>; 7061 def CRC32r32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), 7062 (ins GR32:$src1, GR8:$src2), 7063 "crc32{b} \t{$src2, $src1|$src1, $src2}", 7064 [(set GR32:$dst, 7065 (int_x86_sse42_crc32_32_8 GR32:$src1, GR8:$src2))]>; 7066 def CRC32r32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), 7067 (ins GR32:$src1, i16mem:$src2), 7068 "crc32{w} \t{$src2, $src1|$src1, $src2}", 7069 [(set GR32:$dst, 7070 (int_x86_sse42_crc32_32_16 GR32:$src1, 7071 (load addr:$src2)))]>, 7072 OpSize; 7073 def CRC32r32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), 7074 (ins GR32:$src1, GR16:$src2), 7075 "crc32{w} \t{$src2, $src1|$src1, $src2}", 7076 [(set GR32:$dst, 7077 (int_x86_sse42_crc32_32_16 GR32:$src1, GR16:$src2))]>, 7078 OpSize; 7079 def CRC32r32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), 7080 (ins GR32:$src1, i32mem:$src2), 7081 "crc32{l} \t{$src2, $src1|$src1, $src2}", 7082 [(set GR32:$dst, 7083 (int_x86_sse42_crc32_32_32 GR32:$src1, 7084 (load addr:$src2)))]>; 7085 def CRC32r32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), 7086 (ins GR32:$src1, GR32:$src2), 7087 "crc32{l} \t{$src2, $src1|$src1, $src2}", 7088 [(set GR32:$dst, 7089 (int_x86_sse42_crc32_32_32 GR32:$src1, GR32:$src2))]>; 7090 def CRC32r64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), 7091 (ins GR64:$src1, i8mem:$src2), 7092 "crc32{b} \t{$src2, $src1|$src1, $src2}", 7093 [(set GR64:$dst, 7094 (int_x86_sse42_crc32_64_8 GR64:$src1, 7095 (load addr:$src2)))]>, 7096 REX_W; 7097 def CRC32r64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), 7098 (ins GR64:$src1, GR8:$src2), 7099 "crc32{b} \t{$src2, $src1|$src1, $src2}", 7100 [(set GR64:$dst, 7101 (int_x86_sse42_crc32_64_8 GR64:$src1, GR8:$src2))]>, 7102 REX_W; 7103 def CRC32r64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), 7104 (ins GR64:$src1, i64mem:$src2), 7105 "crc32{q} \t{$src2, $src1|$src1, $src2}", 7106 [(set GR64:$dst, 7107 (int_x86_sse42_crc32_64_64 GR64:$src1, 7108 (load addr:$src2)))]>, 7109 REX_W; 7110 def CRC32r64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), 7111 (ins GR64:$src1, GR64:$src2), 7112 "crc32{q} \t{$src2, $src1|$src1, $src2}", 7113 [(set GR64:$dst, 7114 (int_x86_sse42_crc32_64_64 GR64:$src1, GR64:$src2))]>, 7115 REX_W; 7116} 7117 7118//===----------------------------------------------------------------------===// 7119// AES-NI Instructions 7120//===----------------------------------------------------------------------===// 7121 7122multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, 7123 Intrinsic IntId128, bit Is2Addr = 1> { 7124 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 7125 (ins VR128:$src1, VR128:$src2), 7126 !if(Is2Addr, 7127 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7128 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7129 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 7130 OpSize; 7131 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 7132 (ins VR128:$src1, i128mem:$src2), 7133 !if(Is2Addr, 7134 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7135 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7136 [(set VR128:$dst, 7137 (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize; 7138} 7139 7140// Perform One Round of an AES Encryption/Decryption Flow 7141let Predicates = [HasAVX, HasAES] in { 7142 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 7143 int_x86_aesni_aesenc, 0>, VEX_4V; 7144 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 7145 int_x86_aesni_aesenclast, 0>, VEX_4V; 7146 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 7147 int_x86_aesni_aesdec, 0>, VEX_4V; 7148 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 7149 int_x86_aesni_aesdeclast, 0>, VEX_4V; 7150} 7151 7152let Constraints = "$src1 = $dst" in { 7153 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 7154 int_x86_aesni_aesenc>; 7155 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 7156 int_x86_aesni_aesenclast>; 7157 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 7158 int_x86_aesni_aesdec>; 7159 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 7160 int_x86_aesni_aesdeclast>; 7161} 7162 7163// Perform the AES InvMixColumn Transformation 7164let Predicates = [HasAVX, HasAES] in { 7165 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7166 (ins VR128:$src1), 7167 "vaesimc\t{$src1, $dst|$dst, $src1}", 7168 [(set VR128:$dst, 7169 (int_x86_aesni_aesimc VR128:$src1))]>, 7170 OpSize, VEX; 7171 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7172 (ins i128mem:$src1), 7173 "vaesimc\t{$src1, $dst|$dst, $src1}", 7174 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7175 OpSize, VEX; 7176} 7177def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7178 (ins VR128:$src1), 7179 "aesimc\t{$src1, $dst|$dst, $src1}", 7180 [(set VR128:$dst, 7181 (int_x86_aesni_aesimc VR128:$src1))]>, 7182 OpSize; 7183def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7184 (ins i128mem:$src1), 7185 "aesimc\t{$src1, $dst|$dst, $src1}", 7186 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7187 OpSize; 7188 7189// AES Round Key Generation Assist 7190let Predicates = [HasAVX, HasAES] in { 7191 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7192 (ins VR128:$src1, i8imm:$src2), 7193 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7194 [(set VR128:$dst, 7195 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7196 OpSize, VEX; 7197 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7198 (ins i128mem:$src1, i8imm:$src2), 7199 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7200 [(set VR128:$dst, 7201 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7202 OpSize, VEX; 7203} 7204def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7205 (ins VR128:$src1, i8imm:$src2), 7206 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7207 [(set VR128:$dst, 7208 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7209 OpSize; 7210def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7211 (ins i128mem:$src1, i8imm:$src2), 7212 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7213 [(set VR128:$dst, 7214 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7215 OpSize; 7216 7217//===----------------------------------------------------------------------===// 7218// PCLMUL Instructions 7219//===----------------------------------------------------------------------===// 7220 7221// AVX carry-less Multiplication instructions 7222def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7223 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7224 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7225 [(set VR128:$dst, 7226 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; 7227 7228def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7229 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7230 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7231 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7232 (memopv2i64 addr:$src2), imm:$src3))]>; 7233 7234// Carry-less Multiplication instructions 7235let Constraints = "$src1 = $dst" in { 7236def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7237 (ins VR128:$src1, VR128:$src2, i8imm:$src3), 7238 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7239 [(set VR128:$dst, 7240 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>; 7241 7242def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7243 (ins VR128:$src1, i128mem:$src2, i8imm:$src3), 7244 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7245 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7246 (memopv2i64 addr:$src2), imm:$src3))]>; 7247} // Constraints = "$src1 = $dst" 7248 7249 7250multiclass pclmul_alias<string asm, int immop> { 7251 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7252 (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>; 7253 7254 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7255 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>; 7256 7257 def : InstAlias<!strconcat("vpclmul", asm, 7258 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7259 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>; 7260 7261 def : InstAlias<!strconcat("vpclmul", asm, 7262 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7263 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>; 7264} 7265defm : pclmul_alias<"hqhq", 0x11>; 7266defm : pclmul_alias<"hqlq", 0x01>; 7267defm : pclmul_alias<"lqhq", 0x10>; 7268defm : pclmul_alias<"lqlq", 0x00>; 7269 7270//===----------------------------------------------------------------------===// 7271// SSE4A Instructions 7272//===----------------------------------------------------------------------===// 7273 7274let Predicates = [HasSSE4A] in { 7275 7276let Constraints = "$src = $dst" in { 7277def EXTRQI : Ii8<0x78, MRM0r, (outs VR128:$dst), 7278 (ins VR128:$src, i8imm:$len, i8imm:$idx), 7279 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 7280 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, 7281 imm:$idx))]>, TB, OpSize; 7282def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7283 (ins VR128:$src, VR128:$mask), 7284 "extrq\t{$mask, $src|$src, $mask}", 7285 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 7286 VR128:$mask))]>, TB, OpSize; 7287 7288def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 7289 (ins VR128:$src, VR128:$src2, i8imm:$len, i8imm:$idx), 7290 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 7291 [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, 7292 VR128:$src2, imm:$len, imm:$idx))]>, XD; 7293def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7294 (ins VR128:$src, VR128:$mask), 7295 "insertq\t{$mask, $src|$src, $mask}", 7296 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 7297 VR128:$mask))]>, XD; 7298} 7299 7300def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 7301 "movntss\t{$src, $dst|$dst, $src}", 7302 [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; 7303 7304def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7305 "movntsd\t{$src, $dst|$dst, $src}", 7306 [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; 7307} 7308 7309//===----------------------------------------------------------------------===// 7310// AVX Instructions 7311//===----------------------------------------------------------------------===// 7312 7313//===----------------------------------------------------------------------===// 7314// VBROADCAST - Load from memory and broadcast to all elements of the 7315// destination operand 7316// 7317class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 7318 X86MemOperand x86memop, Intrinsic Int> : 7319 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7320 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7321 [(set RC:$dst, (Int addr:$src))]>, VEX; 7322 7323// AVX2 adds register forms 7324class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, 7325 Intrinsic Int> : 7326 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7327 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7328 [(set RC:$dst, (Int VR128:$src))]>, VEX; 7329 7330let ExeDomain = SSEPackedSingle in { 7331 def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, 7332 int_x86_avx_vbroadcast_ss>; 7333 def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, 7334 int_x86_avx_vbroadcast_ss_256>, VEX_L; 7335} 7336let ExeDomain = SSEPackedDouble in 7337def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, 7338 int_x86_avx_vbroadcast_sd_256>, VEX_L; 7339def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 7340 int_x86_avx_vbroadcastf128_pd_256>, VEX_L; 7341 7342let ExeDomain = SSEPackedSingle in { 7343 def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, 7344 int_x86_avx2_vbroadcast_ss_ps>; 7345 def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, 7346 int_x86_avx2_vbroadcast_ss_ps_256>, VEX_L; 7347} 7348let ExeDomain = SSEPackedDouble in 7349def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, 7350 int_x86_avx2_vbroadcast_sd_pd_256>, VEX_L; 7351 7352let Predicates = [HasAVX2] in 7353def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, 7354 int_x86_avx2_vbroadcasti128>, VEX_L; 7355 7356let Predicates = [HasAVX] in 7357def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 7358 (VBROADCASTF128 addr:$src)>; 7359 7360 7361//===----------------------------------------------------------------------===// 7362// VINSERTF128 - Insert packed floating-point values 7363// 7364let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 7365def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7366 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 7367 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7368 []>, VEX_4V, VEX_L; 7369let mayLoad = 1 in 7370def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7371 (ins VR256:$src1, f128mem:$src2, i8imm:$src3), 7372 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7373 []>, VEX_4V, VEX_L; 7374} 7375 7376let Predicates = [HasAVX] in { 7377def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 7378 (iPTR imm)), 7379 (VINSERTF128rr VR256:$src1, VR128:$src2, 7380 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7381def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 7382 (iPTR imm)), 7383 (VINSERTF128rr VR256:$src1, VR128:$src2, 7384 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7385 7386def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (memopv4f32 addr:$src2), 7387 (iPTR imm)), 7388 (VINSERTF128rm VR256:$src1, addr:$src2, 7389 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7390def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (memopv2f64 addr:$src2), 7391 (iPTR imm)), 7392 (VINSERTF128rm VR256:$src1, addr:$src2, 7393 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7394} 7395 7396let Predicates = [HasAVX1Only] in { 7397def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 7398 (iPTR imm)), 7399 (VINSERTF128rr VR256:$src1, VR128:$src2, 7400 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7401def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 7402 (iPTR imm)), 7403 (VINSERTF128rr VR256:$src1, VR128:$src2, 7404 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7405def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 7406 (iPTR imm)), 7407 (VINSERTF128rr VR256:$src1, VR128:$src2, 7408 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7409def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 7410 (iPTR imm)), 7411 (VINSERTF128rr VR256:$src1, VR128:$src2, 7412 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7413 7414def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), 7415 (iPTR imm)), 7416 (VINSERTF128rm VR256:$src1, addr:$src2, 7417 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7418def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), 7419 (bc_v4i32 (memopv2i64 addr:$src2)), 7420 (iPTR imm)), 7421 (VINSERTF128rm VR256:$src1, addr:$src2, 7422 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7423def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), 7424 (bc_v16i8 (memopv2i64 addr:$src2)), 7425 (iPTR imm)), 7426 (VINSERTF128rm VR256:$src1, addr:$src2, 7427 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7428def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), 7429 (bc_v8i16 (memopv2i64 addr:$src2)), 7430 (iPTR imm)), 7431 (VINSERTF128rm VR256:$src1, addr:$src2, 7432 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7433} 7434 7435//===----------------------------------------------------------------------===// 7436// VEXTRACTF128 - Extract packed floating-point values 7437// 7438let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in { 7439def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7440 (ins VR256:$src1, i8imm:$src2), 7441 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7442 []>, VEX, VEX_L; 7443let mayStore = 1 in 7444def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7445 (ins f128mem:$dst, VR256:$src1, i8imm:$src2), 7446 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7447 []>, VEX, VEX_L; 7448} 7449 7450// AVX1 patterns 7451let Predicates = [HasAVX] in { 7452def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7453 (v4f32 (VEXTRACTF128rr 7454 (v8f32 VR256:$src1), 7455 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7456def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7457 (v2f64 (VEXTRACTF128rr 7458 (v4f64 VR256:$src1), 7459 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7460 7461def : Pat<(alignedstore (v4f32 (vextractf128_extract:$ext (v8f32 VR256:$src1), 7462 (iPTR imm))), addr:$dst), 7463 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7464 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7465def : Pat<(alignedstore (v2f64 (vextractf128_extract:$ext (v4f64 VR256:$src1), 7466 (iPTR imm))), addr:$dst), 7467 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7468 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7469} 7470 7471let Predicates = [HasAVX1Only] in { 7472def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7473 (v2i64 (VEXTRACTF128rr 7474 (v4i64 VR256:$src1), 7475 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7476def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7477 (v4i32 (VEXTRACTF128rr 7478 (v8i32 VR256:$src1), 7479 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7480def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7481 (v8i16 (VEXTRACTF128rr 7482 (v16i16 VR256:$src1), 7483 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7484def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 7485 (v16i8 (VEXTRACTF128rr 7486 (v32i8 VR256:$src1), 7487 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 7488 7489def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), 7490 (iPTR imm))), addr:$dst), 7491 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7492 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7493def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), 7494 (iPTR imm))), addr:$dst), 7495 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7496 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7497def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), 7498 (iPTR imm))), addr:$dst), 7499 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7500 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7501def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), 7502 (iPTR imm))), addr:$dst), 7503 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7504 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 7505} 7506 7507//===----------------------------------------------------------------------===// 7508// VMASKMOV - Conditional SIMD Packed Loads and Stores 7509// 7510multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 7511 Intrinsic IntLd, Intrinsic IntLd256, 7512 Intrinsic IntSt, Intrinsic IntSt256> { 7513 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 7514 (ins VR128:$src1, f128mem:$src2), 7515 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7516 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 7517 VEX_4V; 7518 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 7519 (ins VR256:$src1, f256mem:$src2), 7520 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7521 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 7522 VEX_4V, VEX_L; 7523 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 7524 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 7525 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7526 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 7527 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 7528 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 7529 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7530 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 7531} 7532 7533let ExeDomain = SSEPackedSingle in 7534defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 7535 int_x86_avx_maskload_ps, 7536 int_x86_avx_maskload_ps_256, 7537 int_x86_avx_maskstore_ps, 7538 int_x86_avx_maskstore_ps_256>; 7539let ExeDomain = SSEPackedDouble in 7540defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 7541 int_x86_avx_maskload_pd, 7542 int_x86_avx_maskload_pd_256, 7543 int_x86_avx_maskstore_pd, 7544 int_x86_avx_maskstore_pd_256>; 7545 7546//===----------------------------------------------------------------------===// 7547// VPERMIL - Permute Single and Double Floating-Point Values 7548// 7549multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 7550 RegisterClass RC, X86MemOperand x86memop_f, 7551 X86MemOperand x86memop_i, PatFrag i_frag, 7552 Intrinsic IntVar, ValueType vt> { 7553 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 7554 (ins RC:$src1, RC:$src2), 7555 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7556 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V; 7557 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 7558 (ins RC:$src1, x86memop_i:$src2), 7559 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7560 [(set RC:$dst, (IntVar RC:$src1, 7561 (bitconvert (i_frag addr:$src2))))]>, VEX_4V; 7562 7563 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 7564 (ins RC:$src1, i8imm:$src2), 7565 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7566 [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX; 7567 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 7568 (ins x86memop_f:$src1, i8imm:$src2), 7569 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7570 [(set RC:$dst, 7571 (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX; 7572} 7573 7574let ExeDomain = SSEPackedSingle in { 7575 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 7576 memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>; 7577 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 7578 memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; 7579} 7580let ExeDomain = SSEPackedDouble in { 7581 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 7582 memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>; 7583 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 7584 memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; 7585} 7586 7587let Predicates = [HasAVX] in { 7588def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 7589 (VPERMILPSYri VR256:$src1, imm:$imm)>; 7590def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))), 7591 (VPERMILPDYri VR256:$src1, imm:$imm)>; 7592def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)), 7593 (i8 imm:$imm))), 7594 (VPERMILPSYmi addr:$src1, imm:$imm)>; 7595def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))), 7596 (VPERMILPDYmi addr:$src1, imm:$imm)>; 7597 7598def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))), 7599 (VPERMILPDri VR128:$src1, imm:$imm)>; 7600def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))), 7601 (VPERMILPDmi addr:$src1, imm:$imm)>; 7602} 7603 7604//===----------------------------------------------------------------------===// 7605// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 7606// 7607let ExeDomain = SSEPackedSingle in { 7608def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 7609 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 7610 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7611 [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7612 (i8 imm:$src3))))]>, VEX_4V, VEX_L; 7613def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 7614 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 7615 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7616 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2), 7617 (i8 imm:$src3)))]>, VEX_4V, VEX_L; 7618} 7619 7620let Predicates = [HasAVX] in { 7621def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7622 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7623def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, 7624 (memopv4f64 addr:$src2), (i8 imm:$imm))), 7625 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7626} 7627 7628let Predicates = [HasAVX1Only] in { 7629def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7630 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7631def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7632 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7633def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7634 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7635def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7636 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7637 7638def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, 7639 (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 7640 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7641def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 7642 (memopv4i64 addr:$src2), (i8 imm:$imm))), 7643 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7644def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, 7645 (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 7646 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7647def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 7648 (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 7649 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 7650} 7651 7652//===----------------------------------------------------------------------===// 7653// VZERO - Zero YMM registers 7654// 7655let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 7656 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 7657 // Zero All YMM registers 7658 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 7659 [(int_x86_avx_vzeroall)]>, TB, VEX, VEX_L, Requires<[HasAVX]>; 7660 7661 // Zero Upper bits of YMM registers 7662 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 7663 [(int_x86_avx_vzeroupper)]>, TB, VEX, Requires<[HasAVX]>; 7664} 7665 7666//===----------------------------------------------------------------------===// 7667// Half precision conversion instructions 7668//===----------------------------------------------------------------------===// 7669multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 7670 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7671 "vcvtph2ps\t{$src, $dst|$dst, $src}", 7672 [(set RC:$dst, (Int VR128:$src))]>, 7673 T8, OpSize, VEX; 7674 let neverHasSideEffects = 1, mayLoad = 1 in 7675 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7676 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; 7677} 7678 7679multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 7680 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 7681 (ins RC:$src1, i32i8imm:$src2), 7682 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7683 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, 7684 TA, OpSize, VEX; 7685 let neverHasSideEffects = 1, mayStore = 1 in 7686 def mr : Ii8<0x1D, MRMDestMem, (outs), 7687 (ins x86memop:$dst, RC:$src1, i32i8imm:$src2), 7688 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 7689 TA, OpSize, VEX; 7690} 7691 7692let Predicates = [HasAVX, HasF16C] in { 7693 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; 7694 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; 7695 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; 7696 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; 7697} 7698 7699//===----------------------------------------------------------------------===// 7700// AVX2 Instructions 7701//===----------------------------------------------------------------------===// 7702 7703/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate 7704multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr, 7705 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 7706 X86MemOperand x86memop> { 7707 let isCommutable = 1 in 7708 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 7709 (ins RC:$src1, RC:$src2, u32u8imm:$src3), 7710 !strconcat(OpcodeStr, 7711 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7712 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>, 7713 VEX_4V; 7714 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 7715 (ins RC:$src1, x86memop:$src2, u32u8imm:$src3), 7716 !strconcat(OpcodeStr, 7717 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7718 [(set RC:$dst, 7719 (IntId RC:$src1, 7720 (bitconvert (memop_frag addr:$src2)), imm:$src3))]>, 7721 VEX_4V; 7722} 7723 7724let isCommutable = 0 in { 7725defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, 7726 VR128, memopv2i64, i128mem>; 7727defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, 7728 VR256, memopv4i64, i256mem>, VEX_L; 7729} 7730 7731//===----------------------------------------------------------------------===// 7732// VPBROADCAST - Load from memory and broadcast to all elements of the 7733// destination operand 7734// 7735multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 7736 X86MemOperand x86memop, PatFrag ld_frag, 7737 Intrinsic Int128, Intrinsic Int256> { 7738 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 7739 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7740 [(set VR128:$dst, (Int128 VR128:$src))]>, VEX; 7741 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 7742 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7743 [(set VR128:$dst, 7744 (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX; 7745 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 7746 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7747 [(set VR256:$dst, (Int256 VR128:$src))]>, VEX, VEX_L; 7748 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 7749 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7750 [(set VR256:$dst, 7751 (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, 7752 VEX, VEX_L; 7753} 7754 7755defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 7756 int_x86_avx2_pbroadcastb_128, 7757 int_x86_avx2_pbroadcastb_256>; 7758defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 7759 int_x86_avx2_pbroadcastw_128, 7760 int_x86_avx2_pbroadcastw_256>; 7761defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 7762 int_x86_avx2_pbroadcastd_128, 7763 int_x86_avx2_pbroadcastd_256>; 7764defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 7765 int_x86_avx2_pbroadcastq_128, 7766 int_x86_avx2_pbroadcastq_256>; 7767 7768let Predicates = [HasAVX2] in { 7769 def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), 7770 (VPBROADCASTBrm addr:$src)>; 7771 def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), 7772 (VPBROADCASTBYrm addr:$src)>; 7773 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 7774 (VPBROADCASTWrm addr:$src)>; 7775 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 7776 (VPBROADCASTWYrm addr:$src)>; 7777 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 7778 (VPBROADCASTDrm addr:$src)>; 7779 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 7780 (VPBROADCASTDYrm addr:$src)>; 7781 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 7782 (VPBROADCASTQrm addr:$src)>; 7783 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 7784 (VPBROADCASTQYrm addr:$src)>; 7785 7786 def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), 7787 (VPBROADCASTBrr VR128:$src)>; 7788 def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), 7789 (VPBROADCASTBYrr VR128:$src)>; 7790 def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), 7791 (VPBROADCASTWrr VR128:$src)>; 7792 def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), 7793 (VPBROADCASTWYrr VR128:$src)>; 7794 def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), 7795 (VPBROADCASTDrr VR128:$src)>; 7796 def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), 7797 (VPBROADCASTDYrr VR128:$src)>; 7798 def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), 7799 (VPBROADCASTQrr VR128:$src)>; 7800 def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), 7801 (VPBROADCASTQYrr VR128:$src)>; 7802 def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), 7803 (VBROADCASTSSrr VR128:$src)>; 7804 def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), 7805 (VBROADCASTSSYrr VR128:$src)>; 7806 def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), 7807 (VPBROADCASTQrr VR128:$src)>; 7808 def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), 7809 (VBROADCASTSDYrr VR128:$src)>; 7810 7811 // Provide fallback in case the load node that is used in the patterns above 7812 // is used by additional users, which prevents the pattern selection. 7813 let AddedComplexity = 20 in { 7814 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7815 (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 7816 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7817 (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 7818 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7819 (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 7820 7821 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7822 (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 7823 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7824 (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 7825 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7826 (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 7827 } 7828} 7829 7830// AVX1 broadcast patterns 7831let Predicates = [HasAVX1Only] in { 7832def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 7833 (VBROADCASTSSYrm addr:$src)>; 7834def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 7835 (VBROADCASTSDYrm addr:$src)>; 7836def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 7837 (VBROADCASTSSrm addr:$src)>; 7838} 7839 7840let Predicates = [HasAVX] in { 7841def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), 7842 (VBROADCASTSSYrm addr:$src)>; 7843def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), 7844 (VBROADCASTSDYrm addr:$src)>; 7845def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), 7846 (VBROADCASTSSrm addr:$src)>; 7847 7848 // Provide fallback in case the load node that is used in the patterns above 7849 // is used by additional users, which prevents the pattern selection. 7850 let AddedComplexity = 20 in { 7851 // 128bit broadcasts: 7852 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 7853 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; 7854 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 7855 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 7856 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), 7857 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; 7858 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 7859 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 7860 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), 7861 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; 7862 7863 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 7864 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; 7865 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 7866 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 7867 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), 7868 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; 7869 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 7870 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 7871 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), 7872 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; 7873 } 7874} 7875 7876//===----------------------------------------------------------------------===// 7877// VPERM - Permute instructions 7878// 7879 7880multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7881 ValueType OpVT> { 7882 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 7883 (ins VR256:$src1, VR256:$src2), 7884 !strconcat(OpcodeStr, 7885 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7886 [(set VR256:$dst, 7887 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 7888 VEX_4V, VEX_L; 7889 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 7890 (ins VR256:$src1, i256mem:$src2), 7891 !strconcat(OpcodeStr, 7892 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7893 [(set VR256:$dst, 7894 (OpVT (X86VPermv VR256:$src1, 7895 (bitconvert (mem_frag addr:$src2)))))]>, 7896 VEX_4V, VEX_L; 7897} 7898 7899defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, v8i32>; 7900let ExeDomain = SSEPackedSingle in 7901defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, v8f32>; 7902 7903multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7904 ValueType OpVT> { 7905 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 7906 (ins VR256:$src1, i8imm:$src2), 7907 !strconcat(OpcodeStr, 7908 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7909 [(set VR256:$dst, 7910 (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 7911 VEX, VEX_L; 7912 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 7913 (ins i256mem:$src1, i8imm:$src2), 7914 !strconcat(OpcodeStr, 7915 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 7916 [(set VR256:$dst, 7917 (OpVT (X86VPermi (mem_frag addr:$src1), 7918 (i8 imm:$src2))))]>, VEX, VEX_L; 7919} 7920 7921defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, v4i64>, VEX_W; 7922let ExeDomain = SSEPackedDouble in 7923defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W; 7924 7925//===----------------------------------------------------------------------===// 7926// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 7927// 7928def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 7929 (ins VR256:$src1, VR256:$src2, i8imm:$src3), 7930 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7931 [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 7932 (i8 imm:$src3))))]>, VEX_4V, VEX_L; 7933def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 7934 (ins VR256:$src1, f256mem:$src2, i8imm:$src3), 7935 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7936 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), 7937 (i8 imm:$src3)))]>, VEX_4V, VEX_L; 7938 7939let Predicates = [HasAVX2] in { 7940def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7941 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7942def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7943 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7944def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 7945 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 7946 7947def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)), 7948 (i8 imm:$imm))), 7949 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 7950def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 7951 (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))), 7952 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 7953def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)), 7954 (i8 imm:$imm))), 7955 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 7956} 7957 7958 7959//===----------------------------------------------------------------------===// 7960// VINSERTI128 - Insert packed integer values 7961// 7962let neverHasSideEffects = 1 in { 7963def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 7964 (ins VR256:$src1, VR128:$src2, i8imm:$src3), 7965 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7966 []>, VEX_4V, VEX_L; 7967let mayLoad = 1 in 7968def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 7969 (ins VR256:$src1, i128mem:$src2, i8imm:$src3), 7970 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7971 []>, VEX_4V, VEX_L; 7972} 7973 7974let Predicates = [HasAVX2] in { 7975def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 7976 (iPTR imm)), 7977 (VINSERTI128rr VR256:$src1, VR128:$src2, 7978 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7979def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 7980 (iPTR imm)), 7981 (VINSERTI128rr VR256:$src1, VR128:$src2, 7982 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7983def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 7984 (iPTR imm)), 7985 (VINSERTI128rr VR256:$src1, VR128:$src2, 7986 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7987def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 7988 (iPTR imm)), 7989 (VINSERTI128rr VR256:$src1, VR128:$src2, 7990 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7991 7992def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (memopv2i64 addr:$src2), 7993 (iPTR imm)), 7994 (VINSERTI128rm VR256:$src1, addr:$src2, 7995 (INSERT_get_vinsertf128_imm VR256:$ins))>; 7996def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), 7997 (bc_v4i32 (memopv2i64 addr:$src2)), 7998 (iPTR imm)), 7999 (VINSERTI128rm VR256:$src1, addr:$src2, 8000 (INSERT_get_vinsertf128_imm VR256:$ins))>; 8001def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), 8002 (bc_v16i8 (memopv2i64 addr:$src2)), 8003 (iPTR imm)), 8004 (VINSERTI128rm VR256:$src1, addr:$src2, 8005 (INSERT_get_vinsertf128_imm VR256:$ins))>; 8006def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), 8007 (bc_v8i16 (memopv2i64 addr:$src2)), 8008 (iPTR imm)), 8009 (VINSERTI128rm VR256:$src1, addr:$src2, 8010 (INSERT_get_vinsertf128_imm VR256:$ins))>; 8011} 8012 8013//===----------------------------------------------------------------------===// 8014// VEXTRACTI128 - Extract packed integer values 8015// 8016def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 8017 (ins VR256:$src1, i8imm:$src2), 8018 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8019 [(set VR128:$dst, 8020 (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, 8021 VEX, VEX_L; 8022let neverHasSideEffects = 1, mayStore = 1 in 8023def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 8024 (ins i128mem:$dst, VR256:$src1, i8imm:$src2), 8025 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8026 VEX, VEX_L; 8027 8028let Predicates = [HasAVX2] in { 8029def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 8030 (v2i64 (VEXTRACTI128rr 8031 (v4i64 VR256:$src1), 8032 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 8033def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 8034 (v4i32 (VEXTRACTI128rr 8035 (v8i32 VR256:$src1), 8036 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 8037def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 8038 (v8i16 (VEXTRACTI128rr 8039 (v16i16 VR256:$src1), 8040 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 8041def : Pat<(vextractf128_extract:$ext VR256:$src1, (iPTR imm)), 8042 (v16i8 (VEXTRACTI128rr 8043 (v32i8 VR256:$src1), 8044 (EXTRACT_get_vextractf128_imm VR128:$ext)))>; 8045 8046def : Pat<(alignedstore (v2i64 (vextractf128_extract:$ext (v4i64 VR256:$src1), 8047 (iPTR imm))), addr:$dst), 8048 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8049 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 8050def : Pat<(alignedstore (v4i32 (vextractf128_extract:$ext (v8i32 VR256:$src1), 8051 (iPTR imm))), addr:$dst), 8052 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8053 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 8054def : Pat<(alignedstore (v8i16 (vextractf128_extract:$ext (v16i16 VR256:$src1), 8055 (iPTR imm))), addr:$dst), 8056 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8057 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 8058def : Pat<(alignedstore (v16i8 (vextractf128_extract:$ext (v32i8 VR256:$src1), 8059 (iPTR imm))), addr:$dst), 8060 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8061 (EXTRACT_get_vextractf128_imm VR128:$ext))>; 8062} 8063 8064//===----------------------------------------------------------------------===// 8065// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 8066// 8067multiclass avx2_pmovmask<string OpcodeStr, 8068 Intrinsic IntLd128, Intrinsic IntLd256, 8069 Intrinsic IntSt128, Intrinsic IntSt256> { 8070 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 8071 (ins VR128:$src1, i128mem:$src2), 8072 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8073 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; 8074 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 8075 (ins VR256:$src1, i256mem:$src2), 8076 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8077 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8078 VEX_4V, VEX_L; 8079 def mr : AVX28I<0x8e, MRMDestMem, (outs), 8080 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 8081 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8082 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8083 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 8084 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 8085 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8086 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8087} 8088 8089defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 8090 int_x86_avx2_maskload_d, 8091 int_x86_avx2_maskload_d_256, 8092 int_x86_avx2_maskstore_d, 8093 int_x86_avx2_maskstore_d_256>; 8094defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 8095 int_x86_avx2_maskload_q, 8096 int_x86_avx2_maskload_q_256, 8097 int_x86_avx2_maskstore_q, 8098 int_x86_avx2_maskstore_q_256>, VEX_W; 8099 8100 8101//===----------------------------------------------------------------------===// 8102// Variable Bit Shifts 8103// 8104multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 8105 ValueType vt128, ValueType vt256> { 8106 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 8107 (ins VR128:$src1, VR128:$src2), 8108 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8109 [(set VR128:$dst, 8110 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 8111 VEX_4V; 8112 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 8113 (ins VR128:$src1, i128mem:$src2), 8114 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8115 [(set VR128:$dst, 8116 (vt128 (OpNode VR128:$src1, 8117 (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>, 8118 VEX_4V; 8119 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8120 (ins VR256:$src1, VR256:$src2), 8121 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8122 [(set VR256:$dst, 8123 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 8124 VEX_4V, VEX_L; 8125 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8126 (ins VR256:$src1, i256mem:$src2), 8127 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8128 [(set VR256:$dst, 8129 (vt256 (OpNode VR256:$src1, 8130 (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>, 8131 VEX_4V, VEX_L; 8132} 8133 8134defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; 8135defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; 8136defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; 8137defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; 8138defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; 8139 8140//===----------------------------------------------------------------------===// 8141// VGATHER - GATHER Operations 8142multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8143 X86MemOperand memop128, X86MemOperand memop256> { 8144 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), 8145 (ins VR128:$src1, memop128:$src2, VR128:$mask), 8146 !strconcat(OpcodeStr, 8147 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8148 []>, VEX_4VOp3; 8149 def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), 8150 (ins RC256:$src1, memop256:$src2, RC256:$mask), 8151 !strconcat(OpcodeStr, 8152 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8153 []>, VEX_4VOp3, VEX_L; 8154} 8155 8156let mayLoad = 1, Constraints = "$src1 = $dst, $mask = $mask_wb" in { 8157 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; 8158 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; 8159 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; 8160 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; 8161 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; 8162 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; 8163 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; 8164 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; 8165} 8166