ARMInstrNEON.td revision 194710
1194710Sed//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
2194710Sed//
3194710Sed//                     The LLVM Compiler Infrastructure
4194710Sed//
5194710Sed// This file is distributed under the University of Illinois Open Source
6194710Sed// License. See LICENSE.TXT for details.
7194710Sed//
8194710Sed//===----------------------------------------------------------------------===//
9194710Sed//
10194710Sed// This file describes the ARM NEON instruction set.
11194710Sed//
12194710Sed//===----------------------------------------------------------------------===//
13194710Sed
14194710Sed//===----------------------------------------------------------------------===//
15194710Sed// NEON-specific DAG Nodes.
16194710Sed//===----------------------------------------------------------------------===//
17194710Sed
18194710Seddef SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
19194710Sed
20194710Seddef NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
21194710Seddef NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
22194710Seddef NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
23194710Seddef NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
24194710Seddef NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
25194710Seddef NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
26194710Sed
27194710Sed// Types for vector shift by immediates.  The "SHX" version is for long and
28194710Sed// narrow operations where the source and destination vectors have different
29194710Sed// types.  The "SHINS" version is for shift and insert operations.
30194710Seddef SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
31194710Sed                                         SDTCisVT<2, i32>]>;
32194710Seddef SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
33194710Sed                                         SDTCisVT<2, i32>]>;
34194710Seddef SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
35194710Sed                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
36194710Sed
37194710Seddef NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
38194710Seddef NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
39194710Seddef NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
40194710Seddef NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
41194710Seddef NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
42194710Seddef NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
43194710Seddef NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
44194710Sed
45194710Seddef NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
46194710Seddef NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
47194710Seddef NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
48194710Sed
49194710Seddef NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
50194710Seddef NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
51194710Seddef NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
52194710Seddef NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
53194710Seddef NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
54194710Seddef NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
55194710Sed
56194710Seddef NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
57194710Seddef NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
58194710Seddef NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
59194710Sed
60194710Seddef NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
61194710Seddef NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
62194710Sed
63194710Seddef SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
64194710Sed                                         SDTCisVT<2, i32>]>;
65194710Seddef NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
66194710Seddef NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
67194710Sed
68194710Seddef NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ",
69194710Sed                           SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>;
70194710Sed
71194710Sed//===----------------------------------------------------------------------===//
72194710Sed// NEON operand definitions
73194710Sed//===----------------------------------------------------------------------===//
74194710Sed
75194710Sed// addrmode_neonldstm := reg
76194710Sed//
77194710Sed/* TODO: Take advantage of vldm.
78194710Seddef addrmode_neonldstm : Operand<i32>,
79194710Sed                ComplexPattern<i32, 2, "SelectAddrModeNeonLdStM", []> {
80194710Sed  let PrintMethod = "printAddrNeonLdStMOperand";
81194710Sed  let MIOperandInfo = (ops GPR, i32imm);
82194710Sed}
83194710Sed*/
84194710Sed
85194710Sed//===----------------------------------------------------------------------===//
86194710Sed// NEON load / store instructions
87194710Sed//===----------------------------------------------------------------------===//
88194710Sed
89194710Sed/* TODO: Take advantage of vldm.
90194710Sedlet mayLoad = 1 in {
91194710Seddef VLDMD : NI<(outs),
92194710Sed               (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
93194710Sed               "vldm${addr:submode} ${addr:base}, $dst1",
94194710Sed               []>;
95194710Sed
96194710Seddef VLDMS : NI<(outs),
97194710Sed               (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
98194710Sed               "vldm${addr:submode} ${addr:base}, $dst1",
99194710Sed               []>;
100194710Sed}
101194710Sed*/
102194710Sed
103194710Sed// Use vldmia to load a Q register as a D register pair.
104194710Seddef VLDRQ : NI<(outs QPR:$dst), (ins GPR:$addr),
105194710Sed               "vldmia $addr, ${dst:dregpair}",
106194710Sed               [(set QPR:$dst, (v2f64 (load GPR:$addr)))]>;
107194710Sed
108194710Sed// Use vstmia to store a Q register as a D register pair.
109194710Seddef VSTRQ : NI<(outs), (ins QPR:$src, GPR:$addr),
110194710Sed               "vstmia $addr, ${src:dregpair}",
111194710Sed               [(store (v2f64 QPR:$src), GPR:$addr)]>;
112194710Sed
113194710Sed
114194710Sed//===----------------------------------------------------------------------===//
115194710Sed// NEON pattern fragments
116194710Sed//===----------------------------------------------------------------------===//
117194710Sed
118194710Sed// Extract D sub-registers of Q registers.
119194710Sed// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6)
120194710Seddef SubReg_i8_reg  : SDNodeXForm<imm, [{
121194710Sed  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32);
122194710Sed}]>;
123194710Seddef SubReg_i16_reg : SDNodeXForm<imm, [{
124194710Sed  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32);
125194710Sed}]>;
126194710Seddef SubReg_i32_reg : SDNodeXForm<imm, [{
127194710Sed  return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32);
128194710Sed}]>;
129194710Seddef SubReg_f64_reg : SDNodeXForm<imm, [{
130194710Sed  return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32);
131194710Sed}]>;
132194710Sed
133194710Sed// Translate lane numbers from Q registers to D subregs.
134194710Seddef SubReg_i8_lane  : SDNodeXForm<imm, [{
135194710Sed  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
136194710Sed}]>;
137194710Seddef SubReg_i16_lane : SDNodeXForm<imm, [{
138194710Sed  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
139194710Sed}]>;
140194710Seddef SubReg_i32_lane : SDNodeXForm<imm, [{
141194710Sed  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
142194710Sed}]>;
143194710Sed
144194710Sed//===----------------------------------------------------------------------===//
145194710Sed// Instruction Classes
146194710Sed//===----------------------------------------------------------------------===//
147194710Sed
148194710Sed// Basic 2-register operations, both double- and quad-register.
149194710Sedclass N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
150194710Sed           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
151194710Sed           ValueType ResTy, ValueType OpTy, SDNode OpNode>
152194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
153194710Sed        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
154194710Sed        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
155194710Sedclass N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
156194710Sed           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
157194710Sed           ValueType ResTy, ValueType OpTy, SDNode OpNode>
158194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
159194710Sed        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
160194710Sed        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
161194710Sed
162194710Sed// Basic 2-register intrinsics, both double- and quad-register.
163194710Sedclass N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
164194710Sed              bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
165194710Sed              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
166194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
167194710Sed        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
168194710Sed        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
169194710Sedclass N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
170194710Sed              bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
171194710Sed              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
172194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
173194710Sed        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
174194710Sed        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
175194710Sed
176194710Sed// Narrow 2-register intrinsics.
177194710Sedclass N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
178194710Sed              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
179194710Sed              string OpcodeStr, ValueType TyD, ValueType TyQ, Intrinsic IntOp>
180194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
181194710Sed        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
182194710Sed        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
183194710Sed
184194710Sed// Long 2-register intrinsics.  (This is currently only used for VMOVL and is
185194710Sed// derived from N2VImm instead of N2V because of the way the size is encoded.)
186194710Sedclass N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
187194710Sed              bit op6, bit op4, string OpcodeStr, ValueType TyQ, ValueType TyD,
188194710Sed              Intrinsic IntOp>
189194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst),
190194710Sed        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
191194710Sed        [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
192194710Sed
193194710Sed// Basic 3-register operations, both double- and quad-register.
194194710Sedclass N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
195194710Sed           string OpcodeStr, ValueType ResTy, ValueType OpTy,
196194710Sed           SDNode OpNode, bit Commutable>
197194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
198194710Sed        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
199194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
200194710Sed        [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
201194710Sed  let isCommutable = Commutable;
202194710Sed}
203194710Sedclass N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
204194710Sed           string OpcodeStr, ValueType ResTy, ValueType OpTy,
205194710Sed           SDNode OpNode, bit Commutable>
206194710Sed  : N3V<op24, op23, op21_20, op11_8, 1, op4,
207194710Sed        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
208194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
209194710Sed        [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
210194710Sed  let isCommutable = Commutable;
211194710Sed}
212194710Sed
213194710Sed// Basic 3-register intrinsics, both double- and quad-register.
214194710Sedclass N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
215194710Sed              string OpcodeStr, ValueType ResTy, ValueType OpTy,
216194710Sed              Intrinsic IntOp, bit Commutable>
217194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
218194710Sed        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
219194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
220194710Sed        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
221194710Sed  let isCommutable = Commutable;
222194710Sed}
223194710Sedclass N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
224194710Sed              string OpcodeStr, ValueType ResTy, ValueType OpTy,
225194710Sed              Intrinsic IntOp, bit Commutable>
226194710Sed  : N3V<op24, op23, op21_20, op11_8, 1, op4,
227194710Sed        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
228194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
229194710Sed        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
230194710Sed  let isCommutable = Commutable;
231194710Sed}
232194710Sed
233194710Sed// Multiply-Add/Sub operations, both double- and quad-register.
234194710Sedclass N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
235194710Sed                string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode>
236194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
237194710Sed        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
238194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
239194710Sed        [(set DPR:$dst, (Ty (OpNode DPR:$src1,
240194710Sed                             (Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
241194710Sedclass N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
242194710Sed                string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode OpNode>
243194710Sed  : N3V<op24, op23, op21_20, op11_8, 1, op4,
244194710Sed        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
245194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
246194710Sed        [(set QPR:$dst, (Ty (OpNode QPR:$src1,
247194710Sed                             (Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
248194710Sed
249194710Sed// Neon 3-argument intrinsics, both double- and quad-register.
250194710Sed// The destination register is also used as the first source operand register.
251194710Sedclass N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
252194710Sed               string OpcodeStr, ValueType ResTy, ValueType OpTy,
253194710Sed               Intrinsic IntOp>
254194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
255194710Sed        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
256194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
257194710Sed        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
258194710Sed                                      (OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
259194710Sedclass N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
260194710Sed               string OpcodeStr, ValueType ResTy, ValueType OpTy,
261194710Sed               Intrinsic IntOp>
262194710Sed  : N3V<op24, op23, op21_20, op11_8, 1, op4,
263194710Sed        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
264194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
265194710Sed        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
266194710Sed                                      (OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
267194710Sed
268194710Sed// Neon Long 3-argument intrinsic.  The destination register is
269194710Sed// a quad-register and is also used as the first source operand register.
270194710Sedclass N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
271194710Sed               string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp>
272194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
273194710Sed        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3),
274194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2, $src3"), "$src1 = $dst",
275194710Sed        [(set QPR:$dst,
276194710Sed          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
277194710Sed
278194710Sed// Narrowing 3-register intrinsics.
279194710Sedclass N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
280194710Sed              string OpcodeStr, ValueType TyD, ValueType TyQ,
281194710Sed              Intrinsic IntOp, bit Commutable>
282194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
283194710Sed        (outs DPR:$dst), (ins QPR:$src1, QPR:$src2),
284194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
285194710Sed        [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
286194710Sed  let isCommutable = Commutable;
287194710Sed}
288194710Sed
289194710Sed// Long 3-register intrinsics.
290194710Sedclass N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
291194710Sed              string OpcodeStr, ValueType TyQ, ValueType TyD,
292194710Sed              Intrinsic IntOp, bit Commutable>
293194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
294194710Sed        (outs QPR:$dst), (ins DPR:$src1, DPR:$src2),
295194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
296194710Sed        [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
297194710Sed  let isCommutable = Commutable;
298194710Sed}
299194710Sed
300194710Sed// Wide 3-register intrinsics.
301194710Sedclass N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
302194710Sed              string OpcodeStr, ValueType TyQ, ValueType TyD,
303194710Sed              Intrinsic IntOp, bit Commutable>
304194710Sed  : N3V<op24, op23, op21_20, op11_8, 0, op4,
305194710Sed        (outs QPR:$dst), (ins QPR:$src1, DPR:$src2),
306194710Sed        !strconcat(OpcodeStr, "\t$dst, $src1, $src2"), "",
307194710Sed        [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
308194710Sed  let isCommutable = Commutable;
309194710Sed}
310194710Sed
311194710Sed// Pairwise long 2-register intrinsics, both double- and quad-register.
312194710Sedclass N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
313194710Sed                bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
314194710Sed                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
315194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
316194710Sed        (ins DPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
317194710Sed        [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src))))]>;
318194710Sedclass N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
319194710Sed                bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
320194710Sed                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
321194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
322194710Sed        (ins QPR:$src), !strconcat(OpcodeStr, "\t$dst, $src"), "",
323194710Sed        [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
324194710Sed
325194710Sed// Pairwise long 2-register accumulate intrinsics,
326194710Sed// both double- and quad-register.
327194710Sed// The destination register is also used as the first source operand register.
328194710Sedclass N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
329194710Sed                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
330194710Sed                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
331194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
332194710Sed        (outs DPR:$dst), (ins DPR:$src1, DPR:$src2),
333194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
334194710Sed        [(set DPR:$dst, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$src2))))]>;
335194710Sedclass N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
336194710Sed                 bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
337194710Sed                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
338194710Sed  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
339194710Sed        (outs QPR:$dst), (ins QPR:$src1, QPR:$src2),
340194710Sed        !strconcat(OpcodeStr, "\t$dst, $src2"), "$src1 = $dst",
341194710Sed        [(set QPR:$dst, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$src2))))]>;
342194710Sed
343194710Sed// Shift by immediate,
344194710Sed// both double- and quad-register.
345194710Sedclass N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
346194710Sed             bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode>
347194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
348194710Sed           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM),
349194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
350194710Sed           [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
351194710Sedclass N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
352194710Sed             bit op4, string OpcodeStr, ValueType Ty, SDNode OpNode>
353194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
354194710Sed           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM),
355194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
356194710Sed           [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
357194710Sed
358194710Sed// Long shift by immediate.
359194710Sedclass N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
360194710Sed             bit op6, bit op4, string OpcodeStr, ValueType ResTy,
361194710Sed             ValueType OpTy, SDNode OpNode>
362194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4,
363194710Sed           (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM),
364194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
365194710Sed           [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
366194710Sed                                          (i32 imm:$SIMM))))]>;
367194710Sed
368194710Sed// Narrow shift by immediate.
369194710Sedclass N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
370194710Sed             bit op6, bit op4, string OpcodeStr, ValueType ResTy,
371194710Sed             ValueType OpTy, SDNode OpNode>
372194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4,
373194710Sed           (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM),
374194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
375194710Sed           [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
376194710Sed                                          (i32 imm:$SIMM))))]>;
377194710Sed
378194710Sed// Shift right by immediate and accumulate,
379194710Sed// both double- and quad-register.
380194710Sedclass N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
381194710Sed                bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
382194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
383194710Sed           (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM),
384194710Sed           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
385194710Sed           [(set DPR:$dst, (Ty (add DPR:$src1,
386194710Sed                                (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
387194710Sedclass N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
388194710Sed                bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
389194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
390194710Sed           (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM),
391194710Sed           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
392194710Sed           [(set QPR:$dst, (Ty (add QPR:$src1,
393194710Sed                                (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
394194710Sed
395194710Sed// Shift by immediate and insert,
396194710Sed// both double- and quad-register.
397194710Sedclass N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
398194710Sed                bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
399194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
400194710Sed           (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM),
401194710Sed           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
402194710Sed           [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
403194710Sedclass N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
404194710Sed                bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp>
405194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
406194710Sed           (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM),
407194710Sed           !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst",
408194710Sed           [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
409194710Sed
410194710Sed// Convert, with fractional bits immediate,
411194710Sed// both double- and quad-register.
412194710Sedclass N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
413194710Sed              bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy,
414194710Sed              Intrinsic IntOp>
415194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4,
416194710Sed           (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM),
417194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
418194710Sed           [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
419194710Sedclass N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
420194710Sed              bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy,
421194710Sed              Intrinsic IntOp>
422194710Sed  : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4,
423194710Sed           (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM),
424194710Sed           !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "",
425194710Sed           [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
426194710Sed
427194710Sed//===----------------------------------------------------------------------===//
428194710Sed// Multiclasses
429194710Sed//===----------------------------------------------------------------------===//
430194710Sed
431194710Sed// Neon 3-register vector operations.
432194710Sed
433194710Sed// First with only element sizes of 8, 16 and 32 bits:
434194710Sedmulticlass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
435194710Sed                   string OpcodeStr, SDNode OpNode, bit Commutable = 0> {
436194710Sed  // 64-bit vector types.
437194710Sed  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
438194710Sed                   v8i8, v8i8, OpNode, Commutable>;
439194710Sed  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"),
440194710Sed                   v4i16, v4i16, OpNode, Commutable>;
441194710Sed  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"),
442194710Sed                   v2i32, v2i32, OpNode, Commutable>;
443194710Sed
444194710Sed  // 128-bit vector types.
445194710Sed  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
446194710Sed                   v16i8, v16i8, OpNode, Commutable>;
447194710Sed  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr, "16"),
448194710Sed                   v8i16, v8i16, OpNode, Commutable>;
449194710Sed  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr, "32"),
450194710Sed                   v4i32, v4i32, OpNode, Commutable>;
451194710Sed}
452194710Sed
453194710Sed// ....then also with element size 64 bits:
454194710Sedmulticlass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
455194710Sed                    string OpcodeStr, SDNode OpNode, bit Commutable = 0>
456194710Sed  : N3V_QHS<op24, op23, op11_8, op4, OpcodeStr, OpNode, Commutable> {
457194710Sed  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"),
458194710Sed                   v1i64, v1i64, OpNode, Commutable>;
459194710Sed  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr, "64"),
460194710Sed                   v2i64, v2i64, OpNode, Commutable>;
461194710Sed}
462194710Sed
463194710Sed
464194710Sed// Neon Narrowing 2-register vector intrinsics,
465194710Sed//   source operand element sizes of 16, 32 and 64 bits:
466194710Sedmulticlass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
467194710Sed                       bits<5> op11_7, bit op6, bit op4, string OpcodeStr,
468194710Sed                       Intrinsic IntOp> {
469194710Sed  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
470194710Sed                      !strconcat(OpcodeStr, "16"), v8i8, v8i16, IntOp>;
471194710Sed  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
472194710Sed                      !strconcat(OpcodeStr, "32"), v4i16, v4i32, IntOp>;
473194710Sed  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
474194710Sed                      !strconcat(OpcodeStr, "64"), v2i32, v2i64, IntOp>;
475194710Sed}
476194710Sed
477194710Sed
478194710Sed// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
479194710Sed//   source operand element sizes of 16, 32 and 64 bits:
480194710Sedmulticlass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
481194710Sed                       bit op4, string OpcodeStr, Intrinsic IntOp> {
482194710Sed  def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4,
483194710Sed                      !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
484194710Sed  def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4,
485194710Sed                      !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
486194710Sed  def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4,
487194710Sed                      !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
488194710Sed}
489194710Sed
490194710Sed
491194710Sed// Neon 3-register vector intrinsics.
492194710Sed
493194710Sed// First with only element sizes of 16 and 32 bits:
494194710Sedmulticlass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
495194710Sed                     string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
496194710Sed  // 64-bit vector types.
497194710Sed  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
498194710Sed                      v4i16, v4i16, IntOp, Commutable>;
499194710Sed  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
500194710Sed                      v2i32, v2i32, IntOp, Commutable>;
501194710Sed
502194710Sed  // 128-bit vector types.
503194710Sed  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
504194710Sed                      v8i16, v8i16, IntOp, Commutable>;
505194710Sed  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
506194710Sed                      v4i32, v4i32, IntOp, Commutable>;
507194710Sed}
508194710Sed
509194710Sed// ....then also with element size of 8 bits:
510194710Sedmulticlass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
511194710Sed                      string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
512194710Sed  : N3VInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
513194710Sed  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
514194710Sed                      v8i8, v8i8, IntOp, Commutable>;
515194710Sed  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
516194710Sed                      v16i8, v16i8, IntOp, Commutable>;
517194710Sed}
518194710Sed
519194710Sed// ....then also with element size of 64 bits:
520194710Sedmulticlass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
521194710Sed                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
522194710Sed  : N3VInt_QHS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
523194710Sed  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"),
524194710Sed                      v1i64, v1i64, IntOp, Commutable>;
525194710Sed  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, !strconcat(OpcodeStr,"64"),
526194710Sed                      v2i64, v2i64, IntOp, Commutable>;
527194710Sed}
528194710Sed
529194710Sed
530194710Sed// Neon Narrowing 3-register vector intrinsics,
531194710Sed//   source operand element sizes of 16, 32 and 64 bits:
532194710Sedmulticlass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
533194710Sed                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
534194710Sed  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr,"16"),
535194710Sed                      v8i8, v8i16, IntOp, Commutable>;
536194710Sed  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"32"),
537194710Sed                      v4i16, v4i32, IntOp, Commutable>;
538194710Sed  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"64"),
539194710Sed                      v2i32, v2i64, IntOp, Commutable>;
540194710Sed}
541194710Sed
542194710Sed
543194710Sed// Neon Long 3-register vector intrinsics.
544194710Sed
545194710Sed// First with only element sizes of 16 and 32 bits:
546194710Sedmulticlass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
547194710Sed                      string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
548194710Sed  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
549194710Sed                      v4i32, v4i16, IntOp, Commutable>;
550194710Sed  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
551194710Sed                      v2i64, v2i32, IntOp, Commutable>;
552194710Sed}
553194710Sed
554194710Sed// ....then also with element size of 8 bits:
555194710Sedmulticlass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
556194710Sed                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0>
557194710Sed  : N3VLInt_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp, Commutable> {
558194710Sed  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
559194710Sed                      v8i16, v8i8, IntOp, Commutable>;
560194710Sed}
561194710Sed
562194710Sed
563194710Sed// Neon Wide 3-register vector intrinsics,
564194710Sed//   source operand element sizes of 8, 16 and 32 bits:
565194710Sedmulticlass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
566194710Sed                       string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> {
567194710Sed  def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, !strconcat(OpcodeStr, "8"),
568194710Sed                      v8i16, v8i8, IntOp, Commutable>;
569194710Sed  def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, !strconcat(OpcodeStr,"16"),
570194710Sed                      v4i32, v4i16, IntOp, Commutable>;
571194710Sed  def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, !strconcat(OpcodeStr,"32"),
572194710Sed                      v2i64, v2i32, IntOp, Commutable>;
573194710Sed}
574194710Sed
575194710Sed
576194710Sed// Neon Multiply-Op vector operations,
577194710Sed//   element sizes of 8, 16 and 32 bits:
578194710Sedmulticlass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
579194710Sed                        string OpcodeStr, SDNode OpNode> {
580194710Sed  // 64-bit vector types.
581194710Sed  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4,
582194710Sed                        !strconcat(OpcodeStr, "8"), v8i8, mul, OpNode>;
583194710Sed  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4,
584194710Sed                        !strconcat(OpcodeStr, "16"), v4i16, mul, OpNode>;
585194710Sed  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4,
586194710Sed                        !strconcat(OpcodeStr, "32"), v2i32, mul, OpNode>;
587194710Sed
588194710Sed  // 128-bit vector types.
589194710Sed  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4,
590194710Sed                        !strconcat(OpcodeStr, "8"), v16i8, mul, OpNode>;
591194710Sed  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4,
592194710Sed                        !strconcat(OpcodeStr, "16"), v8i16, mul, OpNode>;
593194710Sed  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4,
594194710Sed                        !strconcat(OpcodeStr, "32"), v4i32, mul, OpNode>;
595194710Sed}
596194710Sed
597194710Sed
598194710Sed// Neon 3-argument intrinsics,
599194710Sed//   element sizes of 8, 16 and 32 bits:
600194710Sedmulticlass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
601194710Sed                       string OpcodeStr, Intrinsic IntOp> {
602194710Sed  // 64-bit vector types.
603194710Sed  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4,
604194710Sed                        !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
605194710Sed  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4,
606194710Sed                        !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
607194710Sed  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4,
608194710Sed                        !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
609194710Sed
610194710Sed  // 128-bit vector types.
611194710Sed  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4,
612194710Sed                        !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
613194710Sed  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4,
614194710Sed                        !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
615194710Sed  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4,
616194710Sed                        !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
617194710Sed}
618194710Sed
619194710Sed
620194710Sed// Neon Long 3-argument intrinsics.
621194710Sed
622194710Sed// First with only element sizes of 16 and 32 bits:
623194710Sedmulticlass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
624194710Sed                       string OpcodeStr, Intrinsic IntOp> {
625194710Sed  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4,
626194710Sed                       !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>;
627194710Sed  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4,
628194710Sed                       !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>;
629194710Sed}
630194710Sed
631194710Sed// ....then also with element size of 8 bits:
632194710Sedmulticlass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
633194710Sed                        string OpcodeStr, Intrinsic IntOp>
634194710Sed  : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> {
635194710Sed  def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4,
636194710Sed                       !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>;
637194710Sed}
638194710Sed
639194710Sed
640194710Sed// Neon 2-register vector intrinsics,
641194710Sed//   element sizes of 8, 16 and 32 bits:
642194710Sedmulticlass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
643194710Sed                      bits<5> op11_7, bit op4, string OpcodeStr,
644194710Sed                      Intrinsic IntOp> {
645194710Sed  // 64-bit vector types.
646194710Sed  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
647194710Sed                      !strconcat(OpcodeStr, "8"), v8i8, v8i8, IntOp>;
648194710Sed  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
649194710Sed                      !strconcat(OpcodeStr, "16"), v4i16, v4i16, IntOp>;
650194710Sed  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
651194710Sed                      !strconcat(OpcodeStr, "32"), v2i32, v2i32, IntOp>;
652194710Sed
653194710Sed  // 128-bit vector types.
654194710Sed  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
655194710Sed                      !strconcat(OpcodeStr, "8"), v16i8, v16i8, IntOp>;
656194710Sed  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
657194710Sed                      !strconcat(OpcodeStr, "16"), v8i16, v8i16, IntOp>;
658194710Sed  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
659194710Sed                      !strconcat(OpcodeStr, "32"), v4i32, v4i32, IntOp>;
660194710Sed}
661194710Sed
662194710Sed
663194710Sed// Neon Pairwise long 2-register intrinsics,
664194710Sed//   element sizes of 8, 16 and 32 bits:
665194710Sedmulticlass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
666194710Sed                        bits<5> op11_7, bit op4,
667194710Sed                        string OpcodeStr, Intrinsic IntOp> {
668194710Sed  // 64-bit vector types.
669194710Sed  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
670194710Sed                        !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
671194710Sed  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
672194710Sed                        !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
673194710Sed  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
674194710Sed                        !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
675194710Sed
676194710Sed  // 128-bit vector types.
677194710Sed  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
678194710Sed                        !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
679194710Sed  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
680194710Sed                        !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
681194710Sed  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
682194710Sed                        !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
683194710Sed}
684194710Sed
685194710Sed
686194710Sed// Neon Pairwise long 2-register accumulate intrinsics,
687194710Sed//   element sizes of 8, 16 and 32 bits:
688194710Sedmulticlass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
689194710Sed                         bits<5> op11_7, bit op4,
690194710Sed                         string OpcodeStr, Intrinsic IntOp> {
691194710Sed  // 64-bit vector types.
692194710Sed  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
693194710Sed                         !strconcat(OpcodeStr, "8"), v4i16, v8i8, IntOp>;
694194710Sed  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
695194710Sed                         !strconcat(OpcodeStr, "16"), v2i32, v4i16, IntOp>;
696194710Sed  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
697194710Sed                         !strconcat(OpcodeStr, "32"), v1i64, v2i32, IntOp>;
698194710Sed
699194710Sed  // 128-bit vector types.
700194710Sed  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
701194710Sed                         !strconcat(OpcodeStr, "8"), v8i16, v16i8, IntOp>;
702194710Sed  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
703194710Sed                         !strconcat(OpcodeStr, "16"), v4i32, v8i16, IntOp>;
704194710Sed  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
705194710Sed                         !strconcat(OpcodeStr, "32"), v2i64, v4i32, IntOp>;
706194710Sed}
707194710Sed
708194710Sed
709194710Sed// Neon 2-register vector shift by immediate,
710194710Sed//   element sizes of 8, 16, 32 and 64 bits:
711194710Sedmulticlass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
712194710Sed                      string OpcodeStr, SDNode OpNode> {
713194710Sed  // 64-bit vector types.
714194710Sed  def v8i8  : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4,
715194710Sed                     !strconcat(OpcodeStr, "8"), v8i8, OpNode>;
716194710Sed  def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4,
717194710Sed                     !strconcat(OpcodeStr, "16"), v4i16, OpNode>;
718194710Sed  def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4,
719194710Sed                     !strconcat(OpcodeStr, "32"), v2i32, OpNode>;
720194710Sed  def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4,
721194710Sed                     !strconcat(OpcodeStr, "64"), v1i64, OpNode>;
722194710Sed
723194710Sed  // 128-bit vector types.
724194710Sed  def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4,
725194710Sed                     !strconcat(OpcodeStr, "8"), v16i8, OpNode>;
726194710Sed  def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4,
727194710Sed                     !strconcat(OpcodeStr, "16"), v8i16, OpNode>;
728194710Sed  def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4,
729194710Sed                     !strconcat(OpcodeStr, "32"), v4i32, OpNode>;
730194710Sed  def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4,
731194710Sed                     !strconcat(OpcodeStr, "64"), v2i64, OpNode>;
732194710Sed}
733194710Sed
734194710Sed
735194710Sed// Neon Shift-Accumulate vector operations,
736194710Sed//   element sizes of 8, 16, 32 and 64 bits:
737194710Sedmulticlass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
738194710Sed                         string OpcodeStr, SDNode ShOp> {
739194710Sed  // 64-bit vector types.
740194710Sed  def v8i8  : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4,
741194710Sed                        !strconcat(OpcodeStr, "8"), v8i8, ShOp>;
742194710Sed  def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4,
743194710Sed                        !strconcat(OpcodeStr, "16"), v4i16, ShOp>;
744194710Sed  def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4,
745194710Sed                        !strconcat(OpcodeStr, "32"), v2i32, ShOp>;
746194710Sed  def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4,
747194710Sed                        !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
748194710Sed
749194710Sed  // 128-bit vector types.
750194710Sed  def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4,
751194710Sed                        !strconcat(OpcodeStr, "8"), v16i8, ShOp>;
752194710Sed  def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4,
753194710Sed                        !strconcat(OpcodeStr, "16"), v8i16, ShOp>;
754194710Sed  def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4,
755194710Sed                        !strconcat(OpcodeStr, "32"), v4i32, ShOp>;
756194710Sed  def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4,
757194710Sed                        !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
758194710Sed}
759194710Sed
760194710Sed
761194710Sed// Neon Shift-Insert vector operations,
762194710Sed//   element sizes of 8, 16, 32 and 64 bits:
763194710Sedmulticlass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
764194710Sed                         string OpcodeStr, SDNode ShOp> {
765194710Sed  // 64-bit vector types.
766194710Sed  def v8i8  : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4,
767194710Sed                        !strconcat(OpcodeStr, "8"), v8i8, ShOp>;
768194710Sed  def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4,
769194710Sed                        !strconcat(OpcodeStr, "16"), v4i16, ShOp>;
770194710Sed  def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4,
771194710Sed                        !strconcat(OpcodeStr, "32"), v2i32, ShOp>;
772194710Sed  def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4,
773194710Sed                        !strconcat(OpcodeStr, "64"), v1i64, ShOp>;
774194710Sed
775194710Sed  // 128-bit vector types.
776194710Sed  def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4,
777194710Sed                        !strconcat(OpcodeStr, "8"), v16i8, ShOp>;
778194710Sed  def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4,
779194710Sed                        !strconcat(OpcodeStr, "16"), v8i16, ShOp>;
780194710Sed  def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4,
781194710Sed                        !strconcat(OpcodeStr, "32"), v4i32, ShOp>;
782194710Sed  def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4,
783194710Sed                        !strconcat(OpcodeStr, "64"), v2i64, ShOp>;
784194710Sed}
785194710Sed
786194710Sed//===----------------------------------------------------------------------===//
787194710Sed// Instruction Definitions.
788194710Sed//===----------------------------------------------------------------------===//
789194710Sed
790194710Sed// Vector Add Operations.
791194710Sed
792194710Sed//   VADD     : Vector Add (integer and floating-point)
793194710Seddefm VADD     : N3V_QHSD<0, 0, 0b1000, 0, "vadd.i", add, 1>;
794194710Seddef  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd, 1>;
795194710Seddef  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, "vadd.f32", v4f32, v4f32, fadd, 1>;
796194710Sed//   VADDL    : Vector Add Long (Q = D + D)
797194710Seddefm VADDLs   : N3VLInt_QHS<0,1,0b0000,0, "vaddl.s", int_arm_neon_vaddls, 1>;
798194710Seddefm VADDLu   : N3VLInt_QHS<1,1,0b0000,0, "vaddl.u", int_arm_neon_vaddlu, 1>;
799194710Sed//   VADDW    : Vector Add Wide (Q = Q + D)
800194710Seddefm VADDWs   : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>;
801194710Seddefm VADDWu   : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>;
802194710Sed//   VHADD    : Vector Halving Add
803194710Seddefm VHADDs   : N3VInt_QHS<0,0,0b0000,0, "vhadd.s", int_arm_neon_vhadds, 1>;
804194710Seddefm VHADDu   : N3VInt_QHS<1,0,0b0000,0, "vhadd.u", int_arm_neon_vhaddu, 1>;
805194710Sed//   VRHADD   : Vector Rounding Halving Add
806194710Seddefm VRHADDs  : N3VInt_QHS<0,0,0b0001,0, "vrhadd.s", int_arm_neon_vrhadds, 1>;
807194710Seddefm VRHADDu  : N3VInt_QHS<1,0,0b0001,0, "vrhadd.u", int_arm_neon_vrhaddu, 1>;
808194710Sed//   VQADD    : Vector Saturating Add
809194710Seddefm VQADDs   : N3VInt_QHSD<0,0,0b0000,1, "vqadd.s", int_arm_neon_vqadds, 1>;
810194710Seddefm VQADDu   : N3VInt_QHSD<1,0,0b0000,1, "vqadd.u", int_arm_neon_vqaddu, 1>;
811194710Sed//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
812194710Seddefm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>;
813194710Sed//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
814194710Seddefm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>;
815194710Sed
816194710Sed// Vector Multiply Operations.
817194710Sed
818194710Sed//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
819194710Seddefm VMUL     : N3V_QHS<0, 0, 0b1001, 1, "vmul.i", mul, 1>;
820194710Seddef  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v8i8, v8i8,
821194710Sed                        int_arm_neon_vmulp, 1>;
822194710Seddef  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, "vmul.p8", v16i8, v16i8,
823194710Sed                        int_arm_neon_vmulp, 1>;
824194710Seddef  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul, 1>;
825194710Seddef  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, "vmul.f32", v4f32, v4f32, fmul, 1>;
826194710Sed//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
827194710Seddefm VQDMULH  : N3VInt_HS<0,0,0b1011,0, "vqdmulh.s", int_arm_neon_vqdmulh, 1>;
828194710Sed//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
829194710Seddefm VQRDMULH : N3VInt_HS<1,0,0b1011,0, "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>;
830194710Sed//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
831194710Seddefm VMULLs   : N3VLInt_QHS<0,1,0b1100,0, "vmull.s", int_arm_neon_vmulls, 1>;
832194710Seddefm VMULLu   : N3VLInt_QHS<1,1,0b1100,0, "vmull.u", int_arm_neon_vmullu, 1>;
833194710Seddef  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, "vmull.p8", v8i16, v8i8,
834194710Sed                        int_arm_neon_vmullp, 1>;
835194710Sed//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
836194710Seddefm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, "vqdmull.s", int_arm_neon_vqdmull, 1>;
837194710Sed
838194710Sed// Vector Multiply-Accumulate and Multiply-Subtract Operations.
839194710Sed
840194710Sed//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
841194710Seddefm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmla.i", add>;
842194710Seddef  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v2f32, fmul, fadd>;
843194710Seddef  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, "vmla.f32", v4f32, fmul, fadd>;
844194710Sed//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
845194710Seddefm VMLALs   : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>;
846194710Seddefm VMLALu   : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>;
847194710Sed//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
848194710Seddefm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>;
849194710Sed//   VMLS     : Vector Multiply Subtract (integer and floating-point)
850194710Seddefm VMLS     : N3VMulOp_QHS<0, 0, 0b1001, 0, "vmls.i", sub>;
851194710Seddef  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v2f32, fmul, fsub>;
852194710Seddef  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, "vmls.f32", v4f32, fmul, fsub>;
853194710Sed//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
854194710Seddefm VMLSLs   : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>;
855194710Seddefm VMLSLu   : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>;
856194710Sed//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
857194710Seddefm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>;
858194710Sed
859194710Sed// Vector Subtract Operations.
860194710Sed
861194710Sed//   VSUB     : Vector Subtract (integer and floating-point)
862194710Seddefm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, "vsub.i", sub, 0>;
863194710Seddef  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub, 0>;
864194710Seddef  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, "vsub.f32", v4f32, v4f32, fsub, 0>;
865194710Sed//   VSUBL    : Vector Subtract Long (Q = D - D)
866194710Seddefm VSUBLs   : N3VLInt_QHS<0,1,0b0010,0, "vsubl.s", int_arm_neon_vsubls, 1>;
867194710Seddefm VSUBLu   : N3VLInt_QHS<1,1,0b0010,0, "vsubl.u", int_arm_neon_vsublu, 1>;
868194710Sed//   VSUBW    : Vector Subtract Wide (Q = Q - D)
869194710Seddefm VSUBWs   : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>;
870194710Seddefm VSUBWu   : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>;
871194710Sed//   VHSUB    : Vector Halving Subtract
872194710Seddefm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, "vhsub.s", int_arm_neon_vhsubs, 0>;
873194710Seddefm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, "vhsub.u", int_arm_neon_vhsubu, 0>;
874194710Sed//   VQSUB    : Vector Saturing Subtract
875194710Seddefm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, "vqsub.s", int_arm_neon_vqsubs, 0>;
876194710Seddefm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, "vqsub.u", int_arm_neon_vqsubu, 0>;
877194710Sed//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
878194710Seddefm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>;
879194710Sed//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
880194710Seddefm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>;
881194710Sed
882194710Sed// Vector Comparisons.
883194710Sed
884194710Sed//   VCEQ     : Vector Compare Equal
885194710Seddefm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, "vceq.i", NEONvceq, 1>;
886194710Seddef  VCEQfd   : N3VD<0,0,0b00,0b1110,0, "vceq.f32", v2i32, v2f32, NEONvceq, 1>;
887194710Seddef  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, "vceq.f32", v4i32, v4f32, NEONvceq, 1>;
888194710Sed//   VCGE     : Vector Compare Greater Than or Equal
889194710Seddefm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, "vcge.s", NEONvcge, 0>;
890194710Seddefm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, "vcge.u", NEONvcgeu, 0>;
891194710Seddef  VCGEfd   : N3VD<1,0,0b00,0b1110,0, "vcge.f32", v2i32, v2f32, NEONvcge, 0>;
892194710Seddef  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, "vcge.f32", v4i32, v4f32, NEONvcge, 0>;
893194710Sed//   VCGT     : Vector Compare Greater Than
894194710Seddefm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, "vcgt.s", NEONvcgt, 0>;
895194710Seddefm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, "vcgt.u", NEONvcgtu, 0>;
896194710Seddef  VCGTfd   : N3VD<1,0,0b10,0b1110,0, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>;
897194710Seddef  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>;
898194710Sed//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
899194710Seddef  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v2i32, v2f32,
900194710Sed                        int_arm_neon_vacged, 0>;
901194710Seddef  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, "vacge.f32", v4i32, v4f32,
902194710Sed                        int_arm_neon_vacgeq, 0>;
903194710Sed//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
904194710Seddef  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v2i32, v2f32,
905194710Sed                        int_arm_neon_vacgtd, 0>;
906194710Seddef  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, "vacgt.f32", v4i32, v4f32,
907194710Sed                        int_arm_neon_vacgtq, 0>;
908194710Sed//   VTST     : Vector Test Bits
909194710Seddefm VTST     : N3V_QHS<0, 0, 0b1000, 1, "vtst.i", NEONvtst, 1>;
910194710Sed
911194710Sed// Vector Bitwise Operations.
912194710Sed
913194710Sed//   VAND     : Vector Bitwise AND
914194710Seddef  VANDd    : N3VD<0, 0, 0b00, 0b0001, 1, "vand", v2i32, v2i32, and, 1>;
915194710Seddef  VANDq    : N3VQ<0, 0, 0b00, 0b0001, 1, "vand", v4i32, v4i32, and, 1>;
916194710Sed
917194710Sed//   VEOR     : Vector Bitwise Exclusive OR
918194710Seddef  VEORd    : N3VD<1, 0, 0b00, 0b0001, 1, "veor", v2i32, v2i32, xor, 1>;
919194710Seddef  VEORq    : N3VQ<1, 0, 0b00, 0b0001, 1, "veor", v4i32, v4i32, xor, 1>;
920194710Sed
921194710Sed//   VORR     : Vector Bitwise OR
922194710Seddef  VORRd    : N3VD<0, 0, 0b10, 0b0001, 1, "vorr", v2i32, v2i32, or, 1>;
923194710Seddef  VORRq    : N3VQ<0, 0, 0b10, 0b0001, 1, "vorr", v4i32, v4i32, or, 1>;
924194710Sed
925194710Sed//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
926194710Seddef  VBICd    : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
927194710Sed                    (ins DPR:$src1, DPR:$src2), "vbic\t$dst, $src1, $src2", "",
928194710Sed                    [(set DPR:$dst, (v2i32 (and DPR:$src1,(vnot DPR:$src2))))]>;
929194710Seddef  VBICq    : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
930194710Sed                    (ins QPR:$src1, QPR:$src2), "vbic\t$dst, $src1, $src2", "",
931194710Sed                    [(set QPR:$dst, (v4i32 (and QPR:$src1,(vnot QPR:$src2))))]>;
932194710Sed
933194710Sed//   VORN     : Vector Bitwise OR NOT
934194710Seddef  VORNd    : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
935194710Sed                    (ins DPR:$src1, DPR:$src2), "vorn\t$dst, $src1, $src2", "",
936194710Sed                    [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot DPR:$src2))))]>;
937194710Seddef  VORNq    : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
938194710Sed                    (ins QPR:$src1, QPR:$src2), "vorn\t$dst, $src1, $src2", "",
939194710Sed                    [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot QPR:$src2))))]>;
940194710Sed
941194710Sed//   VMVN     : Vector Bitwise NOT
942194710Seddef  VMVNd    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
943194710Sed                    (outs DPR:$dst), (ins DPR:$src), "vmvn\t$dst, $src", "",
944194710Sed                    [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
945194710Seddef  VMVNq    : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
946194710Sed                    (outs QPR:$dst), (ins QPR:$src), "vmvn\t$dst, $src", "",
947194710Sed                    [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
948194710Seddef : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
949194710Seddef : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
950194710Sed
951194710Sed//   VBSL     : Vector Bitwise Select
952194710Seddef  VBSLd    : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
953194710Sed                    (ins DPR:$src1, DPR:$src2, DPR:$src3),
954194710Sed                    "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
955194710Sed                    [(set DPR:$dst,
956194710Sed                      (v2i32 (or (and DPR:$src2, DPR:$src1),
957194710Sed                                 (and DPR:$src3, (vnot DPR:$src1)))))]>;
958194710Seddef  VBSLq    : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
959194710Sed                    (ins QPR:$src1, QPR:$src2, QPR:$src3),
960194710Sed                    "vbsl\t$dst, $src2, $src3", "$src1 = $dst",
961194710Sed                    [(set QPR:$dst,
962194710Sed                      (v4i32 (or (and QPR:$src2, QPR:$src1),
963194710Sed                                 (and QPR:$src3, (vnot QPR:$src1)))))]>;
964194710Sed
965194710Sed//   VBIF     : Vector Bitwise Insert if False
966194710Sed//              like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst",
967194710Sed//   VBIT     : Vector Bitwise Insert if True
968194710Sed//              like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst",
969194710Sed// These are not yet implemented.  The TwoAddress pass will not go looking
970194710Sed// for equivalent operations with different register constraints; it just
971194710Sed// inserts copies.
972194710Sed
973194710Sed// Vector Absolute Differences.
974194710Sed
975194710Sed//   VABD     : Vector Absolute Difference
976194710Seddefm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, "vabd.s", int_arm_neon_vabds, 0>;
977194710Seddefm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, "vabd.u", int_arm_neon_vabdu, 0>;
978194710Seddef  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v2f32, v2f32,
979194710Sed                        int_arm_neon_vabdf, 0>;
980194710Seddef  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, "vabd.f32", v4f32, v4f32,
981194710Sed                        int_arm_neon_vabdf, 0>;
982194710Sed
983194710Sed//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
984194710Seddefm VABDLs   : N3VLInt_QHS<0,1,0b0111,0, "vabdl.s", int_arm_neon_vabdls, 0>;
985194710Seddefm VABDLu   : N3VLInt_QHS<1,1,0b0111,0, "vabdl.u", int_arm_neon_vabdlu, 0>;
986194710Sed
987194710Sed//   VABA     : Vector Absolute Difference and Accumulate
988194710Seddefm VABAs    : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>;
989194710Seddefm VABAu    : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>;
990194710Sed
991194710Sed//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
992194710Seddefm VABALs   : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>;
993194710Seddefm VABALu   : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>;
994194710Sed
995194710Sed// Vector Maximum and Minimum.
996194710Sed
997194710Sed//   VMAX     : Vector Maximum
998194710Seddefm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, "vmax.s", int_arm_neon_vmaxs, 1>;
999194710Seddefm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, "vmax.u", int_arm_neon_vmaxu, 1>;
1000194710Seddef  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v2f32, v2f32,
1001194710Sed                        int_arm_neon_vmaxf, 1>;
1002194710Seddef  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, "vmax.f32", v4f32, v4f32,
1003194710Sed                        int_arm_neon_vmaxf, 1>;
1004194710Sed
1005194710Sed//   VMIN     : Vector Minimum
1006194710Seddefm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, "vmin.s", int_arm_neon_vmins, 1>;
1007194710Seddefm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, "vmin.u", int_arm_neon_vminu, 1>;
1008194710Seddef  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v2f32, v2f32,
1009194710Sed                        int_arm_neon_vminf, 1>;
1010194710Seddef  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
1011194710Sed                        int_arm_neon_vminf, 1>;
1012194710Sed
1013194710Sed// Vector Pairwise Operations.
1014194710Sed
1015194710Sed//   VPADD    : Vector Pairwise Add
1016194710Seddef  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8,
1017194710Sed                        int_arm_neon_vpaddi, 0>;
1018194710Seddef  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16,
1019194710Sed                        int_arm_neon_vpaddi, 0>;
1020194710Seddef  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32,
1021194710Sed                        int_arm_neon_vpaddi, 0>;
1022194710Seddef  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32,
1023194710Sed                        int_arm_neon_vpaddf, 0>;
1024194710Sed
1025194710Sed//   VPADDL   : Vector Pairwise Add Long
1026194710Seddefm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
1027194710Sed                             int_arm_neon_vpaddls>;
1028194710Seddefm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u",
1029194710Sed                             int_arm_neon_vpaddlu>;
1030194710Sed
1031194710Sed//   VPADAL   : Vector Pairwise Add and Accumulate Long
1032194710Seddefm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s",
1033194710Sed                              int_arm_neon_vpadals>;
1034194710Seddefm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u",
1035194710Sed                              int_arm_neon_vpadalu>;
1036194710Sed
1037194710Sed//   VPMAX    : Vector Pairwise Maximum
1038194710Seddef  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, "vpmax.s8", v8i8, v8i8,
1039194710Sed                        int_arm_neon_vpmaxs, 0>;
1040194710Seddef  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, "vpmax.s16", v4i16, v4i16,
1041194710Sed                        int_arm_neon_vpmaxs, 0>;
1042194710Seddef  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, "vpmax.s32", v2i32, v2i32,
1043194710Sed                        int_arm_neon_vpmaxs, 0>;
1044194710Seddef  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, "vpmax.u8", v8i8, v8i8,
1045194710Sed                        int_arm_neon_vpmaxu, 0>;
1046194710Seddef  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, "vpmax.u16", v4i16, v4i16,
1047194710Sed                        int_arm_neon_vpmaxu, 0>;
1048194710Seddef  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, "vpmax.u32", v2i32, v2i32,
1049194710Sed                        int_arm_neon_vpmaxu, 0>;
1050194710Seddef  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, "vpmax.f32", v2f32, v2f32,
1051194710Sed                        int_arm_neon_vpmaxf, 0>;
1052194710Sed
1053194710Sed//   VPMIN    : Vector Pairwise Minimum
1054194710Seddef  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, "vpmin.s8", v8i8, v8i8,
1055194710Sed                        int_arm_neon_vpmins, 0>;
1056194710Seddef  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, "vpmin.s16", v4i16, v4i16,
1057194710Sed                        int_arm_neon_vpmins, 0>;
1058194710Seddef  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, "vpmin.s32", v2i32, v2i32,
1059194710Sed                        int_arm_neon_vpmins, 0>;
1060194710Seddef  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, "vpmin.u8", v8i8, v8i8,
1061194710Sed                        int_arm_neon_vpminu, 0>;
1062194710Seddef  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, "vpmin.u16", v4i16, v4i16,
1063194710Sed                        int_arm_neon_vpminu, 0>;
1064194710Seddef  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, "vpmin.u32", v2i32, v2i32,
1065194710Sed                        int_arm_neon_vpminu, 0>;
1066194710Seddef  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, "vpmin.f32", v2f32, v2f32,
1067194710Sed                        int_arm_neon_vpminf, 0>;
1068194710Sed
1069194710Sed// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
1070194710Sed
1071194710Sed//   VRECPE   : Vector Reciprocal Estimate
1072194710Seddef  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
1073194710Sed                        v2i32, v2i32, int_arm_neon_vrecpe>;
1074194710Seddef  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, "vrecpe.u32",
1075194710Sed                        v4i32, v4i32, int_arm_neon_vrecpe>;
1076194710Seddef  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
1077194710Sed                        v2f32, v2f32, int_arm_neon_vrecpef>;
1078194710Seddef  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, "vrecpe.f32",
1079194710Sed                        v4f32, v4f32, int_arm_neon_vrecpef>;
1080194710Sed
1081194710Sed//   VRECPS   : Vector Reciprocal Step
1082194710Seddef  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v2f32, v2f32,
1083194710Sed                        int_arm_neon_vrecps, 1>;
1084194710Seddef  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, "vrecps.f32", v4f32, v4f32,
1085194710Sed                        int_arm_neon_vrecps, 1>;
1086194710Sed
1087194710Sed//   VRSQRTE  : Vector Reciprocal Square Root Estimate
1088194710Seddef  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
1089194710Sed                        v2i32, v2i32, int_arm_neon_vrsqrte>;
1090194710Seddef  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, "vrsqrte.u32",
1091194710Sed                        v4i32, v4i32, int_arm_neon_vrsqrte>;
1092194710Seddef  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
1093194710Sed                        v2f32, v2f32, int_arm_neon_vrsqrtef>;
1094194710Seddef  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, "vrsqrte.f32",
1095194710Sed                        v4f32, v4f32, int_arm_neon_vrsqrtef>;
1096194710Sed
1097194710Sed//   VRSQRTS  : Vector Reciprocal Square Root Step
1098194710Seddef VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v2f32, v2f32,
1099194710Sed                        int_arm_neon_vrsqrts, 1>;
1100194710Seddef VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, "vrsqrts.f32", v4f32, v4f32,
1101194710Sed                        int_arm_neon_vrsqrts, 1>;
1102194710Sed
1103194710Sed// Vector Shifts.
1104194710Sed
1105194710Sed//   VSHL     : Vector Shift
1106194710Seddefm VSHLs    : N3VInt_QHSD<0, 0, 0b0100, 0, "vshl.s", int_arm_neon_vshifts, 0>;
1107194710Seddefm VSHLu    : N3VInt_QHSD<1, 0, 0b0100, 0, "vshl.u", int_arm_neon_vshiftu, 0>;
1108194710Sed//   VSHL     : Vector Shift Left (Immediate)
1109194710Seddefm VSHLi    : N2VSh_QHSD<0, 1, 0b0111, 1, "vshl.i", NEONvshl>;
1110194710Sed//   VSHR     : Vector Shift Right (Immediate)
1111194710Seddefm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, "vshr.s", NEONvshrs>;
1112194710Seddefm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, "vshr.u", NEONvshru>;
1113194710Sed
1114194710Sed//   VSHLL    : Vector Shift Left Long
1115194710Seddef  VSHLLs8  : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8",
1116194710Sed                       v8i16, v8i8, NEONvshlls>;
1117194710Seddef  VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16",
1118194710Sed                       v4i32, v4i16, NEONvshlls>;
1119194710Seddef  VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32",
1120194710Sed                       v2i64, v2i32, NEONvshlls>;
1121194710Seddef  VSHLLu8  : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8",
1122194710Sed                       v8i16, v8i8, NEONvshllu>;
1123194710Seddef  VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16",
1124194710Sed                       v4i32, v4i16, NEONvshllu>;
1125194710Seddef  VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32",
1126194710Sed                       v2i64, v2i32, NEONvshllu>;
1127194710Sed
1128194710Sed//   VSHLL    : Vector Shift Left Long (with maximum shift count)
1129194710Seddef  VSHLLi8  : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8",
1130194710Sed                       v8i16, v8i8, NEONvshlli>;
1131194710Seddef  VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16",
1132194710Sed                       v4i32, v4i16, NEONvshlli>;
1133194710Seddef  VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32",
1134194710Sed                       v2i64, v2i32, NEONvshlli>;
1135194710Sed
1136194710Sed//   VSHRN    : Vector Shift Right and Narrow
1137194710Seddef  VSHRN16  : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, "vshrn.i16",
1138194710Sed                       v8i8, v8i16, NEONvshrn>;
1139194710Seddef  VSHRN32  : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, "vshrn.i32",
1140194710Sed                       v4i16, v4i32, NEONvshrn>;
1141194710Seddef  VSHRN64  : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, "vshrn.i64",
1142194710Sed                       v2i32, v2i64, NEONvshrn>;
1143194710Sed
1144194710Sed//   VRSHL    : Vector Rounding Shift
1145194710Seddefm VRSHLs   : N3VInt_QHSD<0,0,0b0101,0, "vrshl.s", int_arm_neon_vrshifts, 0>;
1146194710Seddefm VRSHLu   : N3VInt_QHSD<1,0,0b0101,0, "vrshl.u", int_arm_neon_vrshiftu, 0>;
1147194710Sed//   VRSHR    : Vector Rounding Shift Right
1148194710Seddefm VRSHRs   : N2VSh_QHSD<0, 1, 0b0010, 1, "vrshr.s", NEONvrshrs>;
1149194710Seddefm VRSHRu   : N2VSh_QHSD<1, 1, 0b0010, 1, "vrshr.u", NEONvrshru>;
1150194710Sed
1151194710Sed//   VRSHRN   : Vector Rounding Shift Right and Narrow
1152194710Seddef  VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, "vrshrn.i16",
1153194710Sed                       v8i8, v8i16, NEONvrshrn>;
1154194710Seddef  VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, "vrshrn.i32",
1155194710Sed                       v4i16, v4i32, NEONvrshrn>;
1156194710Seddef  VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, "vrshrn.i64",
1157194710Sed                       v2i32, v2i64, NEONvrshrn>;
1158194710Sed
1159194710Sed//   VQSHL    : Vector Saturating Shift
1160194710Seddefm VQSHLs   : N3VInt_QHSD<0,0,0b0100,1, "vqshl.s", int_arm_neon_vqshifts, 0>;
1161194710Seddefm VQSHLu   : N3VInt_QHSD<1,0,0b0100,1, "vqshl.u", int_arm_neon_vqshiftu, 0>;
1162194710Sed//   VQSHL    : Vector Saturating Shift Left (Immediate)
1163194710Seddefm VQSHLsi  : N2VSh_QHSD<0, 1, 0b0111, 1, "vqshl.s", NEONvqshls>;
1164194710Seddefm VQSHLui  : N2VSh_QHSD<1, 1, 0b0111, 1, "vqshl.u", NEONvqshlu>;
1165194710Sed//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
1166194710Seddefm VQSHLsu  : N2VSh_QHSD<1, 1, 0b0110, 1, "vqshlu.s", NEONvqshlsu>;
1167194710Sed
1168194710Sed//   VQSHRN   : Vector Saturating Shift Right and Narrow
1169194710Seddef VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.s16",
1170194710Sed                       v8i8, v8i16, NEONvqshrns>;
1171194710Seddef VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.s32",
1172194710Sed                       v4i16, v4i32, NEONvqshrns>;
1173194710Seddef VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.s64",
1174194710Sed                       v2i32, v2i64, NEONvqshrns>;
1175194710Seddef VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, "vqshrn.u16",
1176194710Sed                       v8i8, v8i16, NEONvqshrnu>;
1177194710Seddef VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, "vqshrn.u32",
1178194710Sed                       v4i16, v4i32, NEONvqshrnu>;
1179194710Seddef VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, "vqshrn.u64",
1180194710Sed                       v2i32, v2i64, NEONvqshrnu>;
1181194710Sed
1182194710Sed//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
1183194710Seddef VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, "vqshrun.s16",
1184194710Sed                       v8i8, v8i16, NEONvqshrnsu>;
1185194710Seddef VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, "vqshrun.s32",
1186194710Sed                       v4i16, v4i32, NEONvqshrnsu>;
1187194710Seddef VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, "vqshrun.s64",
1188194710Sed                       v2i32, v2i64, NEONvqshrnsu>;
1189194710Sed
1190194710Sed//   VQRSHL   : Vector Saturating Rounding Shift
1191194710Seddefm VQRSHLs  : N3VInt_QHSD<0, 0, 0b0101, 1, "vqrshl.s",
1192194710Sed                            int_arm_neon_vqrshifts, 0>;
1193194710Seddefm VQRSHLu  : N3VInt_QHSD<1, 0, 0b0101, 1, "vqrshl.u",
1194194710Sed                            int_arm_neon_vqrshiftu, 0>;
1195194710Sed
1196194710Sed//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
1197194710Seddef VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.s16",
1198194710Sed                       v8i8, v8i16, NEONvqrshrns>;
1199194710Seddef VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.s32",
1200194710Sed                       v4i16, v4i32, NEONvqrshrns>;
1201194710Seddef VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.s64",
1202194710Sed                       v2i32, v2i64, NEONvqrshrns>;
1203194710Seddef VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, "vqrshrn.u16",
1204194710Sed                       v8i8, v8i16, NEONvqrshrnu>;
1205194710Seddef VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, "vqrshrn.u32",
1206194710Sed                       v4i16, v4i32, NEONvqrshrnu>;
1207194710Seddef VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, "vqrshrn.u64",
1208194710Sed                       v2i32, v2i64, NEONvqrshrnu>;
1209194710Sed
1210194710Sed//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
1211194710Seddef VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, "vqrshrun.s16",
1212194710Sed                       v8i8, v8i16, NEONvqrshrnsu>;
1213194710Seddef VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, "vqrshrun.s32",
1214194710Sed                       v4i16, v4i32, NEONvqrshrnsu>;
1215194710Seddef VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, "vqrshrun.s64",
1216194710Sed                       v2i32, v2i64, NEONvqrshrnsu>;
1217194710Sed
1218194710Sed//   VSRA     : Vector Shift Right and Accumulate
1219194710Seddefm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>;
1220194710Seddefm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>;
1221194710Sed//   VRSRA    : Vector Rounding Shift Right and Accumulate
1222194710Seddefm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>;
1223194710Seddefm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>;
1224194710Sed
1225194710Sed//   VSLI     : Vector Shift Left and Insert
1226194710Seddefm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>;
1227194710Sed//   VSRI     : Vector Shift Right and Insert
1228194710Seddefm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>;
1229194710Sed
1230194710Sed// Vector Absolute and Saturating Absolute.
1231194710Sed
1232194710Sed//   VABS     : Vector Absolute Value
1233194710Seddefm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, "vabs.s",
1234194710Sed                           int_arm_neon_vabs>;
1235194710Seddef  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
1236194710Sed                        v2f32, v2f32, int_arm_neon_vabsf>;
1237194710Seddef  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, "vabs.f32",
1238194710Sed                        v4f32, v4f32, int_arm_neon_vabsf>;
1239194710Sed
1240194710Sed//   VQABS    : Vector Saturating Absolute Value
1241194710Seddefm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, "vqabs.s",
1242194710Sed                           int_arm_neon_vqabs>;
1243194710Sed
1244194710Sed// Vector Negate.
1245194710Sed
1246194710Seddef vneg      : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
1247194710Seddef vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
1248194710Sed
1249194710Sedclass VNEGD<bits<2> size, string OpcodeStr, ValueType Ty>
1250194710Sed  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
1251194710Sed        !strconcat(OpcodeStr, "\t$dst, $src"), "",
1252194710Sed        [(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
1253194710Sedclass VNEGQ<bits<2> size, string OpcodeStr, ValueType Ty>
1254194710Sed  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
1255194710Sed        !strconcat(OpcodeStr, "\t$dst, $src"), "",
1256194710Sed        [(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
1257194710Sed
1258194710Sed//   VNEG     : Vector Negate
1259194710Seddef  VNEGs8d  : VNEGD<0b00, "vneg.s8", v8i8>;
1260194710Seddef  VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>;
1261194710Seddef  VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>;
1262194710Seddef  VNEGs8q  : VNEGQ<0b00, "vneg.s8", v16i8>;
1263194710Seddef  VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>;
1264194710Seddef  VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>;
1265194710Sed
1266194710Sed//   VNEG     : Vector Negate (floating-point)
1267194710Seddef  VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
1268194710Sed                    (outs DPR:$dst), (ins DPR:$src), "vneg.f32\t$dst, $src", "",
1269194710Sed                    [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
1270194710Seddef  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
1271194710Sed                    (outs QPR:$dst), (ins QPR:$src), "vneg.f32\t$dst, $src", "",
1272194710Sed                    [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
1273194710Sed
1274194710Seddef : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
1275194710Seddef : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
1276194710Seddef : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>;
1277194710Seddef : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>;
1278194710Seddef : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>;
1279194710Seddef : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
1280194710Sed
1281194710Sed//   VQNEG    : Vector Saturating Negate
1282194710Seddefm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, "vqneg.s",
1283194710Sed                           int_arm_neon_vqneg>;
1284194710Sed
1285194710Sed// Vector Bit Counting Operations.
1286194710Sed
1287194710Sed//   VCLS     : Vector Count Leading Sign Bits
1288194710Seddefm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, "vcls.s",
1289194710Sed                           int_arm_neon_vcls>;
1290194710Sed//   VCLZ     : Vector Count Leading Zeros
1291194710Seddefm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, "vclz.i",
1292194710Sed                           int_arm_neon_vclz>;
1293194710Sed//   VCNT     : Vector Count One Bits
1294194710Seddef  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8",
1295194710Sed                        v8i8, v8i8, int_arm_neon_vcnt>;
1296194710Seddef  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, "vcnt.8",
1297194710Sed                        v16i8, v16i8, int_arm_neon_vcnt>;
1298194710Sed
1299194710Sed// Vector Move Operations.
1300194710Sed
1301194710Sed//   VMOV     : Vector Move (Register)
1302194710Sed
1303194710Seddef  VMOVD    : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
1304194710Sed                    "vmov\t$dst, $src", "", []>;
1305194710Seddef  VMOVQ    : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
1306194710Sed                    "vmov\t$dst, $src", "", []>;
1307194710Sed
1308194710Sed//   VMOV     : Vector Move (Immediate)
1309194710Sed
1310194710Sed// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
1311194710Seddef VMOV_get_imm8 : SDNodeXForm<build_vector, [{
1312194710Sed  return ARM::getVMOVImm(N, 1, *CurDAG);
1313194710Sed}]>;
1314194710Seddef vmovImm8 : PatLeaf<(build_vector), [{
1315194710Sed  return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0;
1316194710Sed}], VMOV_get_imm8>;
1317194710Sed
1318194710Sed// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
1319194710Seddef VMOV_get_imm16 : SDNodeXForm<build_vector, [{
1320194710Sed  return ARM::getVMOVImm(N, 2, *CurDAG);
1321194710Sed}]>;
1322194710Seddef vmovImm16 : PatLeaf<(build_vector), [{
1323194710Sed  return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0;
1324194710Sed}], VMOV_get_imm16>;
1325194710Sed
1326194710Sed// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
1327194710Seddef VMOV_get_imm32 : SDNodeXForm<build_vector, [{
1328194710Sed  return ARM::getVMOVImm(N, 4, *CurDAG);
1329194710Sed}]>;
1330194710Seddef vmovImm32 : PatLeaf<(build_vector), [{
1331194710Sed  return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0;
1332194710Sed}], VMOV_get_imm32>;
1333194710Sed
1334194710Sed// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
1335194710Seddef VMOV_get_imm64 : SDNodeXForm<build_vector, [{
1336194710Sed  return ARM::getVMOVImm(N, 8, *CurDAG);
1337194710Sed}]>;
1338194710Seddef vmovImm64 : PatLeaf<(build_vector), [{
1339194710Sed  return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0;
1340194710Sed}], VMOV_get_imm64>;
1341194710Sed
1342194710Sed// Note: Some of the cmode bits in the following VMOV instructions need to
1343194710Sed// be encoded based on the immed values.
1344194710Sed
1345194710Seddef VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
1346194710Sed                         (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "",
1347194710Sed                         [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
1348194710Seddef VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
1349194710Sed                         (ins i8imm:$SIMM), "vmov.i8\t$dst, $SIMM", "",
1350194710Sed                         [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
1351194710Sed
1352194710Seddef VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
1353194710Sed                         (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "",
1354194710Sed                         [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
1355194710Seddef VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
1356194710Sed                         (ins i16imm:$SIMM), "vmov.i16\t$dst, $SIMM", "",
1357194710Sed                         [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
1358194710Sed
1359194710Seddef VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
1360194710Sed                         (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "",
1361194710Sed                         [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
1362194710Seddef VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
1363194710Sed                         (ins i32imm:$SIMM), "vmov.i32\t$dst, $SIMM", "",
1364194710Sed                         [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
1365194710Sed
1366194710Seddef VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
1367194710Sed                         (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "",
1368194710Sed                         [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
1369194710Seddef VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
1370194710Sed                         (ins i64imm:$SIMM), "vmov.i64\t$dst, $SIMM", "",
1371194710Sed                         [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
1372194710Sed
1373194710Sed//   VMOV     : Vector Get Lane (move scalar to ARM core register)
1374194710Sed
1375194710Seddef VGETLNs8  : NVGetLane<0b11100101, 0b1011, 0b00,
1376194710Sed                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
1377194710Sed                          "vmov", ".s8\t$dst, $src[$lane]",
1378194710Sed                          [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src),
1379194710Sed                                           imm:$lane))]>;
1380194710Seddef VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01,
1381194710Sed                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
1382194710Sed                          "vmov", ".s16\t$dst, $src[$lane]",
1383194710Sed                          [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src),
1384194710Sed                                           imm:$lane))]>;
1385194710Seddef VGETLNu8  : NVGetLane<0b11101101, 0b1011, 0b00,
1386194710Sed                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
1387194710Sed                          "vmov", ".u8\t$dst, $src[$lane]",
1388194710Sed                          [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src),
1389194710Sed                                           imm:$lane))]>;
1390194710Seddef VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01,
1391194710Sed                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
1392194710Sed                          "vmov", ".u16\t$dst, $src[$lane]",
1393194710Sed                          [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src),
1394194710Sed                                           imm:$lane))]>;
1395194710Seddef VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00,
1396194710Sed                          (outs GPR:$dst), (ins DPR:$src, i32imm:$lane),
1397194710Sed                          "vmov", ".32\t$dst, $src[$lane]",
1398194710Sed                          [(set GPR:$dst, (extractelt (v2i32 DPR:$src),
1399194710Sed                                           imm:$lane))]>;
1400194710Sed// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
1401194710Seddef : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
1402194710Sed          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
1403194710Sed                           (SubReg_i8_reg imm:$lane))),
1404194710Sed                     (SubReg_i8_lane imm:$lane))>;
1405194710Seddef : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
1406194710Sed          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
1407194710Sed                             (SubReg_i16_reg imm:$lane))),
1408194710Sed                     (SubReg_i16_lane imm:$lane))>;
1409194710Seddef : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
1410194710Sed          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
1411194710Sed                           (SubReg_i8_reg imm:$lane))),
1412194710Sed                     (SubReg_i8_lane imm:$lane))>;
1413194710Seddef : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
1414194710Sed          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
1415194710Sed                             (SubReg_i16_reg imm:$lane))),
1416194710Sed                     (SubReg_i16_lane imm:$lane))>;
1417194710Seddef : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
1418194710Sed          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
1419194710Sed                             (SubReg_i32_reg imm:$lane))),
1420194710Sed                     (SubReg_i32_lane imm:$lane))>;
1421194710Sed//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
1422194710Sed//          (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>;
1423194710Seddef : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
1424194710Sed          (EXTRACT_SUBREG QPR:$src1, (SubReg_f64_reg imm:$src2))>;
1425194710Sed
1426194710Sed
1427194710Sed//   VMOV     : Vector Set Lane (move ARM core register to scalar)
1428194710Sed
1429194710Sedlet Constraints = "$src1 = $dst" in {
1430194710Seddef VSETLNi8  : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst),
1431194710Sed                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
1432194710Sed                          "vmov", ".8\t$dst[$lane], $src2",
1433194710Sed                          [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1),
1434194710Sed                                           GPR:$src2, imm:$lane))]>;
1435194710Seddef VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst),
1436194710Sed                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
1437194710Sed                          "vmov", ".16\t$dst[$lane], $src2",
1438194710Sed                          [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1),
1439194710Sed                                           GPR:$src2, imm:$lane))]>;
1440194710Seddef VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst),
1441194710Sed                          (ins DPR:$src1, GPR:$src2, i32imm:$lane),
1442194710Sed                          "vmov", ".32\t$dst[$lane], $src2",
1443194710Sed                          [(set DPR:$dst, (insertelt (v2i32 DPR:$src1),
1444194710Sed                                           GPR:$src2, imm:$lane))]>;
1445194710Sed}
1446194710Seddef : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
1447194710Sed          (v16i8 (INSERT_SUBREG QPR:$src1, 
1448194710Sed                  (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
1449194710Sed                                   (SubReg_i8_reg imm:$lane))),
1450194710Sed                            GPR:$src2, (SubReg_i8_lane imm:$lane)),
1451194710Sed                  (SubReg_i8_reg imm:$lane)))>;
1452194710Seddef : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
1453194710Sed          (v8i16 (INSERT_SUBREG QPR:$src1, 
1454194710Sed                  (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
1455194710Sed                                     (SubReg_i16_reg imm:$lane))),
1456194710Sed                             GPR:$src2, (SubReg_i16_lane imm:$lane)),
1457194710Sed                  (SubReg_i16_reg imm:$lane)))>;
1458194710Seddef : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
1459194710Sed          (v4i32 (INSERT_SUBREG QPR:$src1, 
1460194710Sed                  (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
1461194710Sed                                     (SubReg_i32_reg imm:$lane))),
1462194710Sed                             GPR:$src2, (SubReg_i32_lane imm:$lane)),
1463194710Sed                  (SubReg_i32_reg imm:$lane)))>;
1464194710Sed
1465194710Sed//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
1466194710Sed//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>;
1467194710Seddef : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
1468194710Sed          (INSERT_SUBREG QPR:$src1, DPR:$src2, (SubReg_f64_reg imm:$src3))>;
1469194710Sed
1470194710Sed//   VDUP     : Vector Duplicate (from ARM core register to all elements)
1471194710Sed
1472194710Seddef splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
1473194710Sed                       (vector_shuffle node:$lhs, node:$rhs), [{
1474194710Sed  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1475194710Sed  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
1476194710Sed}]>;
1477194710Sed
1478194710Sedclass VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
1479194710Sed  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
1480194710Sed          "vdup", !strconcat(asmSize, "\t$dst, $src"),
1481194710Sed          [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
1482194710Sedclass VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
1483194710Sed  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
1484194710Sed          "vdup", !strconcat(asmSize, "\t$dst, $src"),
1485194710Sed          [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
1486194710Sed
1487194710Seddef  VDUP8d   : VDUPD<0b11101100, 0b00, ".8", v8i8>;
1488194710Seddef  VDUP16d  : VDUPD<0b11101000, 0b01, ".16", v4i16>;
1489194710Seddef  VDUP32d  : VDUPD<0b11101000, 0b00, ".32", v2i32>;
1490194710Seddef  VDUP8q   : VDUPQ<0b11101110, 0b00, ".8", v16i8>;
1491194710Seddef  VDUP16q  : VDUPQ<0b11101010, 0b01, ".16", v8i16>;
1492194710Seddef  VDUP32q  : VDUPQ<0b11101010, 0b00, ".32", v4i32>;
1493194710Sed
1494194710Seddef  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
1495194710Sed                      "vdup", ".32\t$dst, $src",
1496194710Sed                      [(set DPR:$dst, (v2f32 (splat_lo
1497194710Sed                                              (scalar_to_vector
1498194710Sed                                               (f32 (bitconvert GPR:$src))),
1499194710Sed                                              undef)))]>;
1500194710Seddef  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
1501194710Sed                      "vdup", ".32\t$dst, $src",
1502194710Sed                      [(set QPR:$dst, (v4f32 (splat_lo
1503194710Sed                                              (scalar_to_vector
1504194710Sed                                               (f32 (bitconvert GPR:$src))),
1505194710Sed                                              undef)))]>;
1506194710Sed
1507194710Sed//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
1508194710Sed
1509194710Seddef SHUFFLE_get_splat_lane : SDNodeXForm<vector_shuffle, [{
1510194710Sed  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1511194710Sed  return CurDAG->getTargetConstant(SVOp->getSplatIndex(), MVT::i32);
1512194710Sed}]>;
1513194710Sed
1514194710Seddef splat_lane : PatFrag<(ops node:$lhs, node:$rhs),
1515194710Sed                         (vector_shuffle node:$lhs, node:$rhs), [{
1516194710Sed  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1517194710Sed  return SVOp->isSplat();
1518194710Sed}], SHUFFLE_get_splat_lane>;
1519194710Sed
1520194710Sedclass VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty>
1521194710Sed  : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
1522194710Sed        (outs DPR:$dst), (ins DPR:$src, i32imm:$lane),
1523194710Sed        !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
1524194710Sed        [(set DPR:$dst, (Ty (splat_lane:$lane DPR:$src, undef)))]>;
1525194710Sed
1526194710Sed// vector_shuffle requires that the source and destination types match, so
1527194710Sed// VDUP to a 128-bit result uses a target-specific VDUPLANEQ node.
1528194710Sedclass VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr,
1529194710Sed              ValueType ResTy, ValueType OpTy>
1530194710Sed  : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
1531194710Sed        (outs QPR:$dst), (ins DPR:$src, i32imm:$lane),
1532194710Sed        !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "",
1533194710Sed        [(set QPR:$dst, (ResTy (NEONvduplaneq (OpTy DPR:$src), imm:$lane)))]>;
1534194710Sed
1535194710Seddef VDUPLN8d  : VDUPLND<0b00, 0b01, "vdup.8", v8i8>;
1536194710Seddef VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>;
1537194710Seddef VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>;
1538194710Seddef VDUPLNfd  : VDUPLND<0b01, 0b00, "vdup.32", v2f32>;
1539194710Seddef VDUPLN8q  : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>;
1540194710Seddef VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>;
1541194710Seddef VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>;
1542194710Seddef VDUPLNfq  : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>;
1543194710Sed
1544194710Sed//   VMOVN    : Vector Narrowing Move
1545194710Seddefm VMOVN    : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i",
1546194710Sed                            int_arm_neon_vmovn>;
1547194710Sed//   VQMOVN   : Vector Saturating Narrowing Move
1548194710Seddefm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, "vqmovn.s",
1549194710Sed                            int_arm_neon_vqmovns>;
1550194710Seddefm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, "vqmovn.u",
1551194710Sed                            int_arm_neon_vqmovnu>;
1552194710Seddefm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, "vqmovun.s",
1553194710Sed                            int_arm_neon_vqmovnsu>;
1554194710Sed//   VMOVL    : Vector Lengthening Move
1555194710Seddefm VMOVLs   : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>;
1556194710Seddefm VMOVLu   : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>;
1557194710Sed
1558194710Sed// Vector Conversions.
1559194710Sed
1560194710Sed//   VCVT     : Vector Convert Between Floating-Point and Integers
1561194710Seddef  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
1562194710Sed                     v2i32, v2f32, fp_to_sint>;
1563194710Seddef  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
1564194710Sed                     v2i32, v2f32, fp_to_uint>;
1565194710Seddef  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
1566194710Sed                     v2f32, v2i32, sint_to_fp>;
1567194710Seddef  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
1568194710Sed                     v2f32, v2i32, uint_to_fp>;
1569194710Sed
1570194710Seddef  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32",
1571194710Sed                     v4i32, v4f32, fp_to_sint>;
1572194710Seddef  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32",
1573194710Sed                     v4i32, v4f32, fp_to_uint>;
1574194710Seddef  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32",
1575194710Sed                     v4f32, v4i32, sint_to_fp>;
1576194710Seddef  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32",
1577194710Sed                     v4f32, v4i32, uint_to_fp>;
1578194710Sed
1579194710Sed//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
1580194710Sed// Note: Some of the opcode bits in the following VCVT instructions need to
1581194710Sed// be encoded based on the immed values.
1582194710Seddef VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32",
1583194710Sed                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
1584194710Seddef VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32",
1585194710Sed                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
1586194710Seddef VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32",
1587194710Sed                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
1588194710Seddef VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32",
1589194710Sed                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
1590194710Sed
1591194710Seddef VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32",
1592194710Sed                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
1593194710Seddef VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32",
1594194710Sed                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
1595194710Seddef VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32",
1596194710Sed                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
1597194710Seddef VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32",
1598194710Sed                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
1599194710Sed
1600194710Sed//===----------------------------------------------------------------------===//
1601194710Sed// Non-Instruction Patterns
1602194710Sed//===----------------------------------------------------------------------===//
1603194710Sed
1604194710Sed// bit_convert
1605194710Seddef : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
1606194710Seddef : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
1607194710Seddef : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
1608194710Seddef : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
1609194710Seddef : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
1610194710Seddef : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
1611194710Seddef : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
1612194710Seddef : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
1613194710Seddef : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
1614194710Seddef : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
1615194710Seddef : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
1616194710Seddef : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
1617194710Seddef : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
1618194710Seddef : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
1619194710Seddef : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
1620194710Seddef : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
1621194710Seddef : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
1622194710Seddef : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
1623194710Seddef : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
1624194710Seddef : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
1625194710Seddef : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
1626194710Seddef : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
1627194710Seddef : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
1628194710Seddef : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
1629194710Seddef : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
1630194710Seddef : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
1631194710Seddef : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
1632194710Seddef : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
1633194710Seddef : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
1634194710Seddef : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
1635194710Sed
1636194710Seddef : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
1637194710Seddef : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
1638194710Seddef : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
1639194710Seddef : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
1640194710Seddef : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
1641194710Seddef : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
1642194710Seddef : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
1643194710Seddef : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
1644194710Seddef : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
1645194710Seddef : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
1646194710Seddef : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
1647194710Seddef : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
1648194710Seddef : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
1649194710Seddef : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
1650194710Seddef : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
1651194710Seddef : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
1652194710Seddef : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
1653194710Seddef : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
1654194710Seddef : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
1655194710Seddef : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
1656194710Seddef : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
1657194710Seddef : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
1658194710Seddef : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
1659194710Seddef : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
1660194710Seddef : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
1661194710Seddef : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
1662194710Seddef : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
1663194710Seddef : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
1664194710Seddef : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
1665194710Seddef : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
1666