Deleted Added
full compact
X86InstrFragmentsSIMD.td (210299) X86InstrFragmentsSIMD.td (212904)
1//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//

--- 103 unchanged lines hidden (view full) ---

112def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
113def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
114def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
115def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
116def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
117def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
118
119def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
1//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//

--- 103 unchanged lines hidden (view full) ---

112def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
113def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
114def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
115def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
116def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
117def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
118
119def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
120 SDTCisVT<1, v4f32>,
121 SDTCisVT<2, v4f32>]>;
120 SDTCisVec<1>,
121 SDTCisSameAs<2, 1>]>;
122def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
122def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
123def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
123
124
125// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
126// translated into one of the target nodes below during lowering.
127// Note: this is a work in progress...
128def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
129def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
130 SDTCisSameAs<0,2>]>;
131
132def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
133 SDTCisSameAs<0,1>, SDTCisInt<2>]>;
134def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
135 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
136
137def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
138 SDTCisInt<2>]>;
139
140def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
141
142def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
143def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
144def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
145
146def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
147def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
148
149def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
150def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
151
152def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
153def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
154def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
155
156def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
157def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
158
159def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
160def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
161def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
162def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
163
164def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
165def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
166
167def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
168def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
169def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
170def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
171
172def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
173def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
174def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
175def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
176
177def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
178def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
179def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
180def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
181
124//===----------------------------------------------------------------------===//
125// SSE Complex Patterns
126//===----------------------------------------------------------------------===//
127
128// These are 'extloads' from a scalar to the low element of a vector, zeroing
129// the top elements. These are used for the SSE 'ss' and 'sd' instruction
130// forms.
131def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],

--- 11 unchanged lines hidden (view full) ---

143 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
144 let ParserMatchClass = X86MemAsmOperand;
145}
146
147//===----------------------------------------------------------------------===//
148// SSE pattern fragments
149//===----------------------------------------------------------------------===//
150
182//===----------------------------------------------------------------------===//
183// SSE Complex Patterns
184//===----------------------------------------------------------------------===//
185
186// These are 'extloads' from a scalar to the low element of a vector, zeroing
187// the top elements. These are used for the SSE 'ss' and 'sd' instruction
188// forms.
189def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],

--- 11 unchanged lines hidden (view full) ---

201 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
202 let ParserMatchClass = X86MemAsmOperand;
203}
204
205//===----------------------------------------------------------------------===//
206// SSE pattern fragments
207//===----------------------------------------------------------------------===//
208
209// 128-bit load pattern fragments
151def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
152def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
153def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
154def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
155
210def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
211def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
212def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
213def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
214
156// FIXME: move this to a more appropriate place after all AVX is done.
215// 256-bit load pattern fragments
157def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
158def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
159def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
160def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
161
162// Like 'store', but always requires vector alignment.
163def alignedstore : PatFrag<(ops node:$val, node:$ptr),
164 (store node:$val, node:$ptr), [{

--- 4 unchanged lines hidden (view full) ---

169def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
170 return cast<LoadSDNode>(N)->getAlignment() >= 16;
171}]>;
172
173def alignedloadfsf32 : PatFrag<(ops node:$ptr),
174 (f32 (alignedload node:$ptr))>;
175def alignedloadfsf64 : PatFrag<(ops node:$ptr),
176 (f64 (alignedload node:$ptr))>;
216def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
217def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
218def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
219def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
220
221// Like 'store', but always requires vector alignment.
222def alignedstore : PatFrag<(ops node:$val, node:$ptr),
223 (store node:$val, node:$ptr), [{

--- 4 unchanged lines hidden (view full) ---

228def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
229 return cast<LoadSDNode>(N)->getAlignment() >= 16;
230}]>;
231
232def alignedloadfsf32 : PatFrag<(ops node:$ptr),
233 (f32 (alignedload node:$ptr))>;
234def alignedloadfsf64 : PatFrag<(ops node:$ptr),
235 (f64 (alignedload node:$ptr))>;
236
237// 128-bit aligned load pattern fragments
177def alignedloadv4f32 : PatFrag<(ops node:$ptr),
178 (v4f32 (alignedload node:$ptr))>;
179def alignedloadv2f64 : PatFrag<(ops node:$ptr),
180 (v2f64 (alignedload node:$ptr))>;
181def alignedloadv4i32 : PatFrag<(ops node:$ptr),
182 (v4i32 (alignedload node:$ptr))>;
183def alignedloadv2i64 : PatFrag<(ops node:$ptr),
184 (v2i64 (alignedload node:$ptr))>;
185
238def alignedloadv4f32 : PatFrag<(ops node:$ptr),
239 (v4f32 (alignedload node:$ptr))>;
240def alignedloadv2f64 : PatFrag<(ops node:$ptr),
241 (v2f64 (alignedload node:$ptr))>;
242def alignedloadv4i32 : PatFrag<(ops node:$ptr),
243 (v4i32 (alignedload node:$ptr))>;
244def alignedloadv2i64 : PatFrag<(ops node:$ptr),
245 (v2i64 (alignedload node:$ptr))>;
246
186// FIXME: move this to a more appropriate place after all AVX is done.
247// 256-bit aligned load pattern fragments
187def alignedloadv8f32 : PatFrag<(ops node:$ptr),
188 (v8f32 (alignedload node:$ptr))>;
189def alignedloadv4f64 : PatFrag<(ops node:$ptr),
190 (v4f64 (alignedload node:$ptr))>;
191def alignedloadv8i32 : PatFrag<(ops node:$ptr),
192 (v8i32 (alignedload node:$ptr))>;
193def alignedloadv4i64 : PatFrag<(ops node:$ptr),
194 (v4i64 (alignedload node:$ptr))>;

--- 6 unchanged lines hidden (view full) ---

201// Opteron 10h and later implement such a feature.
202def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
203 return Subtarget->hasVectorUAMem()
204 || cast<LoadSDNode>(N)->getAlignment() >= 16;
205}]>;
206
207def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
208def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
248def alignedloadv8f32 : PatFrag<(ops node:$ptr),
249 (v8f32 (alignedload node:$ptr))>;
250def alignedloadv4f64 : PatFrag<(ops node:$ptr),
251 (v4f64 (alignedload node:$ptr))>;
252def alignedloadv8i32 : PatFrag<(ops node:$ptr),
253 (v8i32 (alignedload node:$ptr))>;
254def alignedloadv4i64 : PatFrag<(ops node:$ptr),
255 (v4i64 (alignedload node:$ptr))>;

--- 6 unchanged lines hidden (view full) ---

262// Opteron 10h and later implement such a feature.
263def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
264 return Subtarget->hasVectorUAMem()
265 || cast<LoadSDNode>(N)->getAlignment() >= 16;
266}]>;
267
268def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
269def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
270
271// 128-bit memop pattern fragments
209def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
210def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
211def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
212def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
213def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
214
272def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
273def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
274def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
275def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
276def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
277
215// FIXME: move this to a more appropriate place after all AVX is done.
278// 256-bit memop pattern fragments
279def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
216def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
217def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
280def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
281def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
282def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
283def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
218
219// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
220// 16-byte boundary.
221// FIXME: 8 byte alignment for mmx reads is not required
222def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
223 return cast<LoadSDNode>(N)->getAlignment() >= 8;
224}]>;
225

--- 23 unchanged lines hidden (view full) ---

249def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
250 (st node:$val, node:$ptr), [{
251 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
252 return ST->isNonTemporal() &&
253 ST->getAlignment() < 16;
254 return false;
255}]>;
256
284
285// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
286// 16-byte boundary.
287// FIXME: 8 byte alignment for mmx reads is not required
288def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
289 return cast<LoadSDNode>(N)->getAlignment() >= 8;
290}]>;
291

--- 23 unchanged lines hidden (view full) ---

315def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
316 (st node:$val, node:$ptr), [{
317 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
318 return ST->isNonTemporal() &&
319 ST->getAlignment() < 16;
320 return false;
321}]>;
322
323// 128-bit bitconvert pattern fragments
257def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
258def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
259def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
260def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
261def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
262def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
263
324def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
325def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
326def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
327def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
328def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
329def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
330
331// 256-bit bitconvert pattern fragments
332def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
333
264def vzmovl_v2i64 : PatFrag<(ops node:$src),
265 (bitconvert (v2i64 (X86vzmovl
266 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
267def vzmovl_v4i32 : PatFrag<(ops node:$src),
268 (bitconvert (v4i32 (X86vzmovl
269 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
270
271def vzload_v2i64 : PatFrag<(ops node:$src),

--- 127 unchanged lines hidden ---
334def vzmovl_v2i64 : PatFrag<(ops node:$src),
335 (bitconvert (v2i64 (X86vzmovl
336 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
337def vzmovl_v4i32 : PatFrag<(ops node:$src),
338 (bitconvert (v4i32 (X86vzmovl
339 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
340
341def vzload_v2i64 : PatFrag<(ops node:$src),

--- 127 unchanged lines hidden ---