X86InstrFragmentsSIMD.td (210299) | X86InstrFragmentsSIMD.td (212904) |
---|---|
1//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 103 unchanged lines hidden (view full) --- 112def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; 113def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; 114def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; 115def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; 116def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; 117def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; 118 119def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, | 1//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// --- 103 unchanged lines hidden (view full) --- 112def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; 113def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; 114def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; 115def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; 116def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; 117def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; 118 119def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, |
120 SDTCisVT<1, v4f32>, 121 SDTCisVT<2, v4f32>]>; | 120 SDTCisVec<1>, 121 SDTCisSameAs<2, 1>]>; |
122def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; | 122def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; |
123def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; |
|
123 | 124 |
125// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get 126// translated into one of the target nodes below during lowering. 127// Note: this is a work in progress... 128def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 129def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 130 SDTCisSameAs<0,2>]>; 131 132def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, 133 SDTCisSameAs<0,1>, SDTCisInt<2>]>; 134def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 135 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 136 137def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>, 138 SDTCisInt<2>]>; 139 140def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>; 141 142def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; 143def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; 144def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; 145 146def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>; 147def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>; 148 149def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>; 150def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>; 151 152def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; 153def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; 154def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>; 155 156def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; 157def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; 158 159def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; 160def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; 161def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; 162def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>; 163 164def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; 165def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; 166 167def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; 168def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; 169def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; 170def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; 171 172def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; 173def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; 174def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>; 175def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>; 176 177def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>; 178def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>; 179def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>; 180def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>; 181 |
|
124//===----------------------------------------------------------------------===// 125// SSE Complex Patterns 126//===----------------------------------------------------------------------===// 127 128// These are 'extloads' from a scalar to the low element of a vector, zeroing 129// the top elements. These are used for the SSE 'ss' and 'sd' instruction 130// forms. 131def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], --- 11 unchanged lines hidden (view full) --- 143 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); 144 let ParserMatchClass = X86MemAsmOperand; 145} 146 147//===----------------------------------------------------------------------===// 148// SSE pattern fragments 149//===----------------------------------------------------------------------===// 150 | 182//===----------------------------------------------------------------------===// 183// SSE Complex Patterns 184//===----------------------------------------------------------------------===// 185 186// These are 'extloads' from a scalar to the low element of a vector, zeroing 187// the top elements. These are used for the SSE 'ss' and 'sd' instruction 188// forms. 189def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], --- 11 unchanged lines hidden (view full) --- 201 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); 202 let ParserMatchClass = X86MemAsmOperand; 203} 204 205//===----------------------------------------------------------------------===// 206// SSE pattern fragments 207//===----------------------------------------------------------------------===// 208 |
209// 128-bit load pattern fragments |
|
151def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; 152def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; 153def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; 154def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; 155 | 210def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; 211def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; 212def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; 213def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; 214 |
156// FIXME: move this to a more appropriate place after all AVX is done. | 215// 256-bit load pattern fragments |
157def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; 158def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; 159def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; 160def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; 161 162// Like 'store', but always requires vector alignment. 163def alignedstore : PatFrag<(ops node:$val, node:$ptr), 164 (store node:$val, node:$ptr), [{ --- 4 unchanged lines hidden (view full) --- 169def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 170 return cast<LoadSDNode>(N)->getAlignment() >= 16; 171}]>; 172 173def alignedloadfsf32 : PatFrag<(ops node:$ptr), 174 (f32 (alignedload node:$ptr))>; 175def alignedloadfsf64 : PatFrag<(ops node:$ptr), 176 (f64 (alignedload node:$ptr))>; | 216def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; 217def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; 218def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; 219def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; 220 221// Like 'store', but always requires vector alignment. 222def alignedstore : PatFrag<(ops node:$val, node:$ptr), 223 (store node:$val, node:$ptr), [{ --- 4 unchanged lines hidden (view full) --- 228def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 229 return cast<LoadSDNode>(N)->getAlignment() >= 16; 230}]>; 231 232def alignedloadfsf32 : PatFrag<(ops node:$ptr), 233 (f32 (alignedload node:$ptr))>; 234def alignedloadfsf64 : PatFrag<(ops node:$ptr), 235 (f64 (alignedload node:$ptr))>; |
236 237// 128-bit aligned load pattern fragments |
|
177def alignedloadv4f32 : PatFrag<(ops node:$ptr), 178 (v4f32 (alignedload node:$ptr))>; 179def alignedloadv2f64 : PatFrag<(ops node:$ptr), 180 (v2f64 (alignedload node:$ptr))>; 181def alignedloadv4i32 : PatFrag<(ops node:$ptr), 182 (v4i32 (alignedload node:$ptr))>; 183def alignedloadv2i64 : PatFrag<(ops node:$ptr), 184 (v2i64 (alignedload node:$ptr))>; 185 | 238def alignedloadv4f32 : PatFrag<(ops node:$ptr), 239 (v4f32 (alignedload node:$ptr))>; 240def alignedloadv2f64 : PatFrag<(ops node:$ptr), 241 (v2f64 (alignedload node:$ptr))>; 242def alignedloadv4i32 : PatFrag<(ops node:$ptr), 243 (v4i32 (alignedload node:$ptr))>; 244def alignedloadv2i64 : PatFrag<(ops node:$ptr), 245 (v2i64 (alignedload node:$ptr))>; 246 |
186// FIXME: move this to a more appropriate place after all AVX is done. | 247// 256-bit aligned load pattern fragments |
187def alignedloadv8f32 : PatFrag<(ops node:$ptr), 188 (v8f32 (alignedload node:$ptr))>; 189def alignedloadv4f64 : PatFrag<(ops node:$ptr), 190 (v4f64 (alignedload node:$ptr))>; 191def alignedloadv8i32 : PatFrag<(ops node:$ptr), 192 (v8i32 (alignedload node:$ptr))>; 193def alignedloadv4i64 : PatFrag<(ops node:$ptr), 194 (v4i64 (alignedload node:$ptr))>; --- 6 unchanged lines hidden (view full) --- 201// Opteron 10h and later implement such a feature. 202def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 203 return Subtarget->hasVectorUAMem() 204 || cast<LoadSDNode>(N)->getAlignment() >= 16; 205}]>; 206 207def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; 208def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; | 248def alignedloadv8f32 : PatFrag<(ops node:$ptr), 249 (v8f32 (alignedload node:$ptr))>; 250def alignedloadv4f64 : PatFrag<(ops node:$ptr), 251 (v4f64 (alignedload node:$ptr))>; 252def alignedloadv8i32 : PatFrag<(ops node:$ptr), 253 (v8i32 (alignedload node:$ptr))>; 254def alignedloadv4i64 : PatFrag<(ops node:$ptr), 255 (v4i64 (alignedload node:$ptr))>; --- 6 unchanged lines hidden (view full) --- 262// Opteron 10h and later implement such a feature. 263def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 264 return Subtarget->hasVectorUAMem() 265 || cast<LoadSDNode>(N)->getAlignment() >= 16; 266}]>; 267 268def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; 269def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; |
270 271// 128-bit memop pattern fragments |
|
209def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; 210def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; 211def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; 212def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; 213def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; 214 | 272def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; 273def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; 274def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; 275def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; 276def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; 277 |
215// FIXME: move this to a more appropriate place after all AVX is done. | 278// 256-bit memop pattern fragments 279def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; |
216def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; 217def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; | 280def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; 281def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; |
282def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; 283def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; |
|
218 219// SSSE3 uses MMX registers for some instructions. They aren't aligned on a 220// 16-byte boundary. 221// FIXME: 8 byte alignment for mmx reads is not required 222def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 223 return cast<LoadSDNode>(N)->getAlignment() >= 8; 224}]>; 225 --- 23 unchanged lines hidden (view full) --- 249def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), 250 (st node:$val, node:$ptr), [{ 251 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) 252 return ST->isNonTemporal() && 253 ST->getAlignment() < 16; 254 return false; 255}]>; 256 | 284 285// SSSE3 uses MMX registers for some instructions. They aren't aligned on a 286// 16-byte boundary. 287// FIXME: 8 byte alignment for mmx reads is not required 288def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 289 return cast<LoadSDNode>(N)->getAlignment() >= 8; 290}]>; 291 --- 23 unchanged lines hidden (view full) --- 315def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), 316 (st node:$val, node:$ptr), [{ 317 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) 318 return ST->isNonTemporal() && 319 ST->getAlignment() < 16; 320 return false; 321}]>; 322 |
323// 128-bit bitconvert pattern fragments |
|
257def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; 258def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; 259def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; 260def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; 261def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; 262def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; 263 | 324def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; 325def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; 326def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; 327def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; 328def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; 329def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; 330 |
331// 256-bit bitconvert pattern fragments 332def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; 333 |
|
264def vzmovl_v2i64 : PatFrag<(ops node:$src), 265 (bitconvert (v2i64 (X86vzmovl 266 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; 267def vzmovl_v4i32 : PatFrag<(ops node:$src), 268 (bitconvert (v4i32 (X86vzmovl 269 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; 270 271def vzload_v2i64 : PatFrag<(ops node:$src), --- 127 unchanged lines hidden --- | 334def vzmovl_v2i64 : PatFrag<(ops node:$src), 335 (bitconvert (v2i64 (X86vzmovl 336 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; 337def vzmovl_v4i32 : PatFrag<(ops node:$src), 338 (bitconvert (v4i32 (X86vzmovl 339 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; 340 341def vzload_v2i64 : PatFrag<(ops node:$src), --- 127 unchanged lines hidden --- |